Merge branch 'main' of https://code.sdsdev.co.kr/SDSRV-IDP/sbt-idp into vietanh99-request-update

This commit is contained in:
daovietanh99 2024-02-01 15:23:15 +07:00
commit 528809ab70
218 changed files with 14927 additions and 6960 deletions

View File

@ -8,10 +8,17 @@ RUN groupadd --gid ${GID} ${USERNAME} \
&& apt-get install -y sudo bash gettext poppler-utils \ && apt-get install -y sudo bash gettext poppler-utils \
&& echo ${USERNAME} ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/${USERNAME} \ && echo ${USERNAME} ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/${USERNAME} \
&& chmod 0440 /etc/sudoers.d/${USERNAME} && chmod 0440 /etc/sudoers.d/${USERNAME}
RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y
RUN yes | apt install postgresql gcc musl-dev RUN yes | apt install postgresql gcc musl-dev
RUN pip install --upgrade pip RUN pip install --upgrade pip
RUN pip install uvicorn gunicorn Celery RUN pip install uvicorn gunicorn Celery
# For intergration with sdskvu
RUN pip install pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116
RUN pip install -U openmim==0.3.7 --no-cache-dir
RUN mim install mmcv-full==1.7.2
# End intergration with sdskvu
USER ${UID} USER ${UID}
ADD --chown=${UID}:${GID} fwd /app ADD --chown=${UID}:${GID} fwd /app
COPY --chown=${UID}:${GID} requirements.txt /app COPY --chown=${UID}:${GID} requirements.txt /app
@ -21,4 +28,27 @@ RUN pip install -r requirements.txt --no-cache-dir
COPY --chown=${UID}:${GID} . /app COPY --chown=${UID}:${GID} . /app
RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsv_dewarp && pip3 install -v -e . --no-cache-dir
RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsvtd && pip3 install -v -e . --no-cache-dir
RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsvtr && pip3 install -v -e . --no-cache-dir
RUN cd /app/fwd_api/utils/sdsvkvu && pip3 install -v -e . --no-cache-dir
# For intergration with sdskvu
RUN python -m pip install paddlepaddle-gpu==2.4.2.post116 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html --no-cache-dir
ENV TZ="Asia/Ho_Chi_Minh" ENV TZ="Asia/Ho_Chi_Minh"
# FROM cope2n-api-base AS builder
# ARG UID=1000
# ARG GID=1000
# ARG USERNAME=container-user
# # Create a new user
# RUN groupadd --gid ${GID} ${USERNAME} \
# && useradd --uid ${UID} --gid ${GID} -m ${USERNAME} \
# && echo ${USERNAME} ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/${USERNAME} \
# && chmod 0440 /etc/sudoers.d/${USERNAME}
# WORKDIR /app
# COPY --chown=${UID}:${GID} . /app

View File

@ -0,0 +1,17 @@
FROM python:3.9.17-buster
RUN apt-get update \
&& apt-get install -y sudo bash gettext poppler-utils postgresql gcc musl-dev
COPY requirements.txt /tmp
COPY ./fwd_api/utils/sdsvkvu /app/fwd_api/utils/sdsvkvu
RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsv_dewarp && pip3 install -v -e . --no-cache-dir
RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsvtd && pip3 install -v -e . --no-cache-dir
RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsvtr && pip3 install -v -e . --no-cache-dir
RUN cd /app/fwd_api/utils/sdsvkvu && pip3 install -v -e . --no-cache-dir
RUN pip install --upgrade pip && pip install uvicorn gunicorn Celery
RUN pip install -r /tmp/requirements.txt --no-cache-dir
ENV TZ="Asia/Ho_Chi_Minh"

View File

@ -2,18 +2,21 @@ from rest_framework import status, viewsets
from rest_framework.decorators import action from rest_framework.decorators import action
from rest_framework.response import Response from rest_framework.response import Response
from django.core.paginator import Paginator from django.core.paginator import Paginator
from django.http import JsonResponse from django.http import JsonResponse, FileResponse, HttpResponse
from datetime import datetime
from django.utils import timezone from django.utils import timezone
from django.db.models import Q from django.db.models import Q
import uuid
import os
from fwd import settings
from drf_spectacular.utils import extend_schema, OpenApiParameter, OpenApiTypes from drf_spectacular.utils import extend_schema, OpenApiParameter, OpenApiTypes
# from drf_spectacular.types import OpenApiString # from drf_spectacular.types import OpenApiString
from ..models import SubscriptionRequest
from ..exception.exceptions import RequiredFieldException, NotFoundException
import json import json
from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException
from ..models import SubscriptionRequest, Report, ReportFile
from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg
from ..utils.file import download_from_S3
from ..utils.process import string_to_boolean
from ..celery_worker.client_connector import c_connector
class AccuracyViewSet(viewsets.ViewSet): class AccuracyViewSet(viewsets.ViewSet):
lookup_field = "username" lookup_field = "username"
@ -23,16 +26,16 @@ class AccuracyViewSet(viewsets.ViewSet):
OpenApiParameter( OpenApiParameter(
name='start_date', name='start_date',
location=OpenApiParameter.QUERY, location=OpenApiParameter.QUERY,
description='Start date (YYYY-mm-DDTHH:MM:SS)', description='Start date (YYYY-mm-DDTHH:MM:SSZ)',
type=OpenApiTypes.DATE, type=OpenApiTypes.DATE,
default='2023-01-02T00:00:00', default='2023-01-02T00:00:00+0700',
), ),
OpenApiParameter( OpenApiParameter(
name='end_date', name='end_date',
location=OpenApiParameter.QUERY, location=OpenApiParameter.QUERY,
description='End date (YYYY-mm-DDTHH:MM:SS)', description='End date (YYYY-mm-DDTHH:MM:SSZ)',
type=OpenApiTypes.DATE, type=OpenApiTypes.DATE,
default='2024-01-10T00:00:00', default='2024-01-10T00:00:00+0700',
), ),
OpenApiParameter( OpenApiParameter(
name='include_test', name='include_test',
@ -59,13 +62,6 @@ class AccuracyViewSet(viewsets.ViewSet):
description='Specific redemption id', description='Specific redemption id',
type=OpenApiTypes.STR, type=OpenApiTypes.STR,
), ),
OpenApiParameter(
name='quality',
location=OpenApiParameter.QUERY,
description='One or more of [bad, good, all]',
type=OpenApiTypes.STR,
enum=['bad', 'good', 'all'],
),
OpenApiParameter( OpenApiParameter(
name='page', name='page',
location=OpenApiParameter.QUERY, location=OpenApiParameter.QUERY,
@ -84,7 +80,7 @@ class AccuracyViewSet(viewsets.ViewSet):
responses=None, tags=['Accuracy'] responses=None, tags=['Accuracy']
) )
@action(detail=False, url_path="request_list", methods=["GET"]) @action(detail=False, url_path="request_list", methods=["GET"])
def get_subscription_requests(self, request): def get_request_list(self, request):
if request.method == 'GET': if request.method == 'GET':
start_date_str = request.GET.get('start_date') start_date_str = request.GET.get('start_date')
end_date_str = request.GET.get('end_date') end_date_str = request.GET.get('end_date')
@ -94,13 +90,12 @@ class AccuracyViewSet(viewsets.ViewSet):
redemption_id = request.GET.get('redemption_id', None) redemption_id = request.GET.get('redemption_id', None)
is_reviewed = request.GET.get('is_reviewed', None) is_reviewed = request.GET.get('is_reviewed', None)
include_test = request.GET.get('include_test', False) include_test = request.GET.get('include_test', False)
quality = request.GET.get('quality', None)
try: try:
start_date = datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S') start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z')
end_date = datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S') end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z')
except ValueError: except ValueError:
return JsonResponse({'error': 'Invalid date format. Please use YYYY-MM-DD.'}, status=400) raise InvalidException(excArgs="Date format")
base_query = Q(created_at__range=(start_date, end_date)) base_query = Q(created_at__range=(start_date, end_date))
if request_id: if request_id:
@ -124,13 +119,6 @@ class AccuracyViewSet(viewsets.ViewSet):
base_query &= Q(is_reviewed=False) base_query &= Q(is_reviewed=False)
elif is_reviewed == "all": elif is_reviewed == "all":
pass pass
if isinstance(quality, str):
if quality == "good":
base_query &= Q(is_bad_image_quality=False)
elif quality == "bad":
base_query &= Q(is_bad_image_quality=True)
elif quality == "all":
pass
subscription_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at') subscription_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at')
@ -185,20 +173,405 @@ class AccuracyViewSet(viewsets.ViewSet):
return JsonResponse({'error': 'Invalid request method.'}, status=405) return JsonResponse({'error': 'Invalid request method.'}, status=405)
@extend_schema(
parameters=[
OpenApiParameter(
name='is_daily_report',
location=OpenApiParameter.QUERY,
description='Whether to include test record or not',
type=OpenApiTypes.BOOL,
),
OpenApiParameter(
name='start_date',
location=OpenApiParameter.QUERY,
description='Start date (YYYY-mm-DDTHH:MM:SSZ)',
type=OpenApiTypes.DATE,
default='2023-01-02T00:00:00+0700',
),
OpenApiParameter(
name='end_date',
location=OpenApiParameter.QUERY,
description='End date (YYYY-mm-DDTHH:MM:SSZ)',
type=OpenApiTypes.DATE,
default='2024-01-10T00:00:00+0700',
),
OpenApiParameter(
name='include_test',
location=OpenApiParameter.QUERY,
description='Whether to include test record or not',
type=OpenApiTypes.BOOL,
),
OpenApiParameter(
name='is_reviewed',
location=OpenApiParameter.QUERY,
description='Which records to be query',
type=OpenApiTypes.STR,
enum=['reviewed', 'not reviewed', 'all'],
),
OpenApiParameter(
name='request_id',
location=OpenApiParameter.QUERY,
description='Specific request id',
type=OpenApiTypes.STR,
),
OpenApiParameter(
name='redemption_id',
location=OpenApiParameter.QUERY,
description='Specific redemption id',
type=OpenApiTypes.STR,
),
OpenApiParameter(
name='subsidiary',
location=OpenApiParameter.QUERY,
description='Subsidiary',
type=OpenApiTypes.STR,
),
],
responses=None, tags=['Accuracy']
)
@action(detail=False, url_path="make_report", methods=["GET"])
def make_report(self, request):
if request.method == 'GET':
start_date_str = request.GET.get('start_date')
end_date_str = request.GET.get('end_date')
request_id = request.GET.get('request_id', None)
redemption_id = request.GET.get('redemption_id', None)
is_reviewed = string_to_boolean(request.GET.get('is_reviewed', "false"))
include_test = string_to_boolean(request.GET.get('include_test', "false"))
subsidiary = request.GET.get("subsidiary", "all")
is_daily_report = string_to_boolean(request.GET.get('is_daily_report', "false"))
try:
start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z')
end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z')
except ValueError:
raise InvalidException(excArgs="Date format")
query_set = {"start_date_str": start_date_str,
"end_date_str": end_date_str,
"request_id": request_id,
"redemption_id": redemption_id,
"is_reviewed": is_reviewed,
"include_test": include_test,
"subsidiary": subsidiary,
"is_daily_report": is_daily_report,
}
report_id = "report" + "_" + timezone.datetime.now().strftime("%Y%m%d%H%M%S%z") + "_" + uuid.uuid4().hex
new_report: Report = Report(
report_id=report_id,
is_daily_report=is_daily_report,
subsidiary=subsidiary.lower().replace(" ", ""),
include_test=include_test,
include_reviewed=is_reviewed,
start_at=start_date,
end_at=end_date,
status="Processing",
)
if is_daily_report:
new_report.created_at = end_date
new_report.save()
# Background job to calculate accuracy
shadow_report(report_id, query_set)
return JsonResponse(status=status.HTTP_200_OK, data={"report_id": report_id})
@extend_schema(
parameters=[
OpenApiParameter(
name='report_id',
location=OpenApiParameter.QUERY,
description='Specific report id',
type=OpenApiTypes.STR,
),
OpenApiParameter(
name='page',
location=OpenApiParameter.QUERY,
description='Page number',
type=OpenApiTypes.INT,
required=False
),
OpenApiParameter(
name='page_size',
location=OpenApiParameter.QUERY,
description='Number of items per page',
type=OpenApiTypes.INT,
required=False
),
],
responses=None, tags=['Accuracy']
)
@action(detail=False, url_path="report_detail_list", methods=["GET"])
def get_report_detail_list(self, request):
if request.method == 'GET':
report_id = request.GET.get('report_id', None)
page_number = int(request.GET.get('page', 1))
page_size = int(request.GET.get('page_size', 10))
report = Report.objects.filter(report_id=report_id).first()
report_files = ReportFile.objects.filter(report=report)
paginator = Paginator(report_files, page_size)
page = paginator.get_page(page_number)
data = extract_report_detail_list(page, in_percent=False)
response = {
'report_detail': data,
'metadata': {"subsidiary": report.subsidiary,
"start_at": report.start_at,
"end_at": report.end_at},
'page': {
'number': page.number,
'total_pages': page.paginator.num_pages,
'count': page.paginator.count,
}
}
return JsonResponse(response, status=200)
return JsonResponse({'error': 'Invalid request method.'}, status=405)
@extend_schema(
parameters=[
OpenApiParameter(
name='start_date',
location=OpenApiParameter.QUERY,
description='Start date (YYYY-mm-DDTHH:MM:SSZ)',
type=OpenApiTypes.DATE,
default='2023-01-02T00:00:00+0700',
),
OpenApiParameter(
name='end_date',
location=OpenApiParameter.QUERY,
description='End date (YYYY-mm-DDTHH:MM:SSZ)',
type=OpenApiTypes.DATE,
default='2024-01-10T00:00:00+0700',
),
OpenApiParameter(
name='daily_report_only',
location=OpenApiParameter.QUERY,
description='Specific report id',
type=OpenApiTypes.BOOL,
),
OpenApiParameter(
name='page',
location=OpenApiParameter.QUERY,
description='Page number',
type=OpenApiTypes.INT,
required=False
),
OpenApiParameter(
name='page_size',
location=OpenApiParameter.QUERY,
description='Number of items per page',
type=OpenApiTypes.INT,
required=False
),
],
responses=None, tags=['Accuracy']
)
@action(detail=False, url_path="report_list", methods=["GET"])
def get_report_list(self, request):
if request.method == 'GET':
daily_report_only = request.GET.get('daily_report_only', False)
start_date_str = request.GET.get('start_date', "")
end_date_str = request.GET.get('end_date', "")
page_number = int(request.GET.get('page', 1))
page_size = int(request.GET.get('page_size', 10))
if not start_date_str or not end_date_str:
reports = Report.objects.all()
else:
try:
start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z')
end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z')
except ValueError:
raise InvalidException(excArgs="Date format")
base_query = Q(created_at__range=(start_date, end_date))
if daily_report_only:
base_query &= Q(is_daily_report=True)
reports = Report.objects.filter(base_query).order_by('created_at')
paginator = Paginator(reports, page_size)
page = paginator.get_page(page_number)
data = []
for report in page:
data.append({
"ID": report.id,
"Created Date": report.created_at,
"No. Requests": report.number_request,
"Status": report.status,
"Purchase Date Acc": report.reviewed_accuracy.get("purchase_date", None) if report.reviewed_accuracy else None,
"Retailer Acc": report.feedback_accuracy.get("retailername", None) if report.reviewed_accuracy else None,
"IMEI Acc": report.feedback_accuracy.get("imei_number", None) if report.reviewed_accuracy else None,
"Avg. Accuracy": report.feedback_accuracy.get("avg", None) if report.reviewed_accuracy else None,
"Avg. Client Request Time": report.average_client_time.get("avg", 0) if report.average_client_time else 0,
"Avg. OCR Processing Time": report.average_OCR_time.get("avg", 0) if report.average_OCR_time else 0,
"report_id": report.report_id,
})
response = {
'report_detail': data,
'page': {
'number': page.number,
'total_pages': page.paginator.num_pages,
'count': page.paginator.count,
}
}
return JsonResponse(response, status=200)
return JsonResponse({'error': 'Invalid request method.'}, status=405)
@extend_schema(
parameters=[
OpenApiParameter(
name='start_date',
location=OpenApiParameter.QUERY,
description='Start date (YYYY-mm-DDTHH:MM:SSZ)',
type=OpenApiTypes.DATE,
default='2023-01-02T00:00:00+0700',
),
OpenApiParameter(
name='end_date',
location=OpenApiParameter.QUERY,
description='End date (YYYY-mm-DDTHH:MM:SSZ)',
type=OpenApiTypes.DATE,
default='2024-01-10T00:00:00+0700',
),
OpenApiParameter(
name='subsidiary',
location=OpenApiParameter.QUERY,
description='Subsidiary',
type=OpenApiTypes.STR,
),
OpenApiParameter(
name='page',
location=OpenApiParameter.QUERY,
description='Page number',
type=OpenApiTypes.INT,
required=False
),
OpenApiParameter(
name='page_size',
location=OpenApiParameter.QUERY,
description='Number of items per page',
type=OpenApiTypes.INT,
required=False
),
],
responses=None, tags=['Accuracy']
)
@action(detail=False, url_path="overview", methods=["GET"])
def overview(self, request):
if request.method == 'GET':
subsidiary = request.GET.get('subsidiary', None)
start_date_str = request.GET.get('start_date', "")
end_date_str = request.GET.get('end_date', "")
page_number = int(request.GET.get('page', 1))
page_size = int(request.GET.get('page_size', 10))
base_query = Q()
if start_date_str and end_date_str:
try:
start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z')
end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z')
except ValueError:
raise InvalidException(excArgs="Date format")
base_query &= Q(created_at__range=(start_date, end_date))
if subsidiary:
base_query &= Q(subsidiary=subsidiary)
base_query &= Q(is_daily_report=True)
reports = Report.objects.filter(base_query).order_by('created_at')
paginator = Paginator(reports, page_size)
page = paginator.get_page(page_number)
data = []
this_month_report = MonthReportAccumulate()
for report in page:
res = this_month_report.add(report)
if not(res):
_, _data, total = this_month_report()
data += [total]
data += _data
this_month_report = MonthReportAccumulate()
this_month_report.add(report)
else:
continue
_, _data, total = this_month_report()
data += [total]
data += _data
# Generate xlsx file
# workbook = dict2xlsx(data, _type="report")
# tmp_file = f"/tmp/{str(uuid.uuid4())}.xlsx"
# os.makedirs(os.path.dirname(tmp_file), exist_ok=True)
# workbook.save(tmp_file)
# c_connector.remove_local_file((tmp_file, "fake_request_id"))
response = {
# 'file': load_xlsx_file(),
'overview_data': data,
'page': {
'number': page.number,
'total_pages': page.paginator.num_pages,
'count': page.paginator.count,
}
}
return JsonResponse(response, status=200)
return JsonResponse({'error': 'Invalid request method.'}, status=405)
@extend_schema(
parameters=[],
responses=None, tags=['Accuracy']
)
@action(detail=False, url_path=r"get_report_file/(?P<report_id>[\w\-]+)", methods=["GET"])
def get_report_file(self, request, report_id):
if request.method == 'GET':
# report_id = request.GET.get('report_id', None)
if not report_id:
raise RequiredFieldException(excArgs="report_id1")
report_num = Report.objects.filter(report_id=report_id).count()
if report_num == 0:
raise NotFoundException(excArgs=f"report: {report_id}")
report = Report.objects.filter(report_id=report_id).first()
# download from s3 to local
tmp_file = "/tmp/" + "report_" + uuid.uuid4().hex + ".xlsx"
os.makedirs("/tmp", exist_ok=True)
if not report.S3_file_name:
raise NotFoundException(excArgs="S3 file name")
download_from_S3(report.S3_file_name, tmp_file)
file = open(tmp_file, 'rb')
response = FileResponse(file, status=200)
# Set the content type and content disposition headers
response['Content-Type'] = 'application/octet-stream'
response['Content-Disposition'] = 'attachment; filename="{0}"'.format(os.path.basename(tmp_file))
return response
return JsonResponse({'error': 'Invalid request method.'}, status=405)
class RequestViewSet(viewsets.ViewSet): class RequestViewSet(viewsets.ViewSet):
lookup_field = "username" lookup_field = "username"
@extend_schema(request = { @extend_schema(
request={
'multipart/form-data': { 'multipart/form-data': {
'type': 'object', 'type': 'object',
'properties': { 'properties': {
'reviewed_result': { 'reviewed_result': {
'type': 'string', 'type': 'string',
'default': '''{"request_id": "Sample request_id", "imei_number": ["sample_imei1", "sample_imei2"], "retailername": "Sample Retailer", "purchase_date": "01/01/1970", "sold_to_party": "Sample party"}''',
}, },
}
}, },
}, responses=None, tags=['Request'] },
},
responses=None,
tags=['Request']
) )
@action(detail=False, url_path=r"request/(?P<request_id>[\w\-]+)", methods=["GET", "POST"]) @action(detail=False, url_path=r"request/(?P<request_id>[\w\-]+)", methods=["GET", "POST"])
def get_subscription_request(self, request, request_id=None): def get_subscription_request(self, request, request_id=None):
@ -278,7 +651,7 @@ class RequestViewSet(viewsets.ViewSet):
subscription_request = subscription_request.first() subscription_request = subscription_request.first()
reviewed_result = json.loads(data["reviewed_result"][1:-1]) reviewed_result = json.loads(data["reviewed_result"])
for field in ['retailername', 'sold_to_party', 'purchase_date', 'imei_number']: for field in ['retailername', 'sold_to_party', 'purchase_date', 'imei_number']:
if not field in reviewed_result.keys(): if not field in reviewed_result.keys():
raise RequiredFieldException(excArgs=f'reviewed_result.{field}') raise RequiredFieldException(excArgs=f'reviewed_result.{field}')

View File

@ -32,8 +32,10 @@ class CeleryConnector:
'upload_file_to_s3': {'queue': "upload_file_to_s3"}, 'upload_file_to_s3': {'queue': "upload_file_to_s3"},
'upload_feedback_to_s3': {'queue': "upload_feedback_to_s3"}, 'upload_feedback_to_s3': {'queue': "upload_feedback_to_s3"},
'upload_obj_to_s3': {'queue': "upload_obj_to_s3"}, 'upload_obj_to_s3': {'queue': "upload_obj_to_s3"},
'upload_report_to_s3': {'queue': "upload_report_to_s3"},
'remove_local_file': {'queue': "remove_local_file"}, 'remove_local_file': {'queue': "remove_local_file"},
'csv_feedback': {'queue': "csv_feedback"}, 'csv_feedback': {'queue': "csv_feedback"},
'make_a_report': {'queue': "report"},
} }
app = Celery( app = Celery(
@ -41,14 +43,18 @@ class CeleryConnector:
broker=settings.BROKER_URL, broker=settings.BROKER_URL,
broker_transport_options={'confirm_publish': False}, broker_transport_options={'confirm_publish': False},
) )
def make_a_report(self, args):
return self.send_task('make_a_report', args)
def csv_feedback(self, args): def csv_feedback(self, args):
return self.send_task('csv_feedback', args) return self.send_task('csv_feedback', args)
def do_pdf(self, args): def do_pdf(self, args):
return self.send_task('do_pdf', args) return self.send_task('do_pdf', args)
def upload_file_to_s3(self, args): def upload_file_to_s3(self, args):
return self.send_task('upload_file_to_s3', args) return self.send_task('upload_file_to_s3', args)
def upload_feedback_to_s3(self, args): def upload_file_to_s3(self, args):
return self.send_task('upload_feedback_to_s3', args) return self.send_task('upload_file_to_s3', args)
def upload_report_to_s3(self, args):
return self.send_task('upload_report_to_s3', args)
def upload_obj_to_s3(self, args): def upload_obj_to_s3(self, args):
return self.send_task('upload_obj_to_s3', args) return self.send_task('upload_obj_to_s3', args)
def remove_local_file(self, args): def remove_local_file(self, args):

View File

@ -9,7 +9,7 @@ from fwd_api.models import SubscriptionRequest, UserProfile
from fwd_api.celery_worker.worker import app from fwd_api.celery_worker.worker import app
from ..constant.common import FolderFileType, image_extensions from ..constant.common import FolderFileType, image_extensions
from ..exception.exceptions import FileContentInvalidException from ..exception.exceptions import FileContentInvalidException
from fwd_api.models import SubscriptionRequestFile, FeedbackRequest from fwd_api.models import SubscriptionRequestFile, FeedbackRequest, Report
from ..utils import file as FileUtils from ..utils import file as FileUtils
from ..utils import process as ProcessUtil from ..utils import process as ProcessUtil
from ..utils import s3 as S3Util from ..utils import s3 as S3Util
@ -211,6 +211,22 @@ def upload_feedback_to_s3(local_file_path, s3_key, feedback_id):
else: else:
logger.info(f"S3 is not available, skipping,...") logger.info(f"S3 is not available, skipping,...")
@app.task(name='upload_report_to_s3')
def upload_report_to_s3(local_file_path, s3_key, report_id):
if s3_client.s3_client is not None:
try:
s3_client.upload_file(local_file_path, s3_key)
report = Report.objects.filter(report_id=report_id)[0]
report.S3_uploaded = True
report.S3_file_name = s3_key
report.save()
except Exception as e:
logger.error(f"Unable to set S3: {e}")
print(f"Unable to set S3: {e}")
return
else:
logger.info(f"S3 is not available, skipping,...")
@app.task(name='remove_local_file') @app.task(name='remove_local_file')
def remove_local_file(local_file_path, request_id): def remove_local_file(local_file_path, request_id):
print(f"[INFO] Removing local file: {local_file_path}, ...") print(f"[INFO] Removing local file: {local_file_path}, ...")

View File

@ -0,0 +1,154 @@
import traceback
from fwd_api.models import SubscriptionRequest, Report, ReportFile
from fwd_api.celery_worker.worker import app
from ..utils import s3 as S3Util
from ..utils.accuracy import update_temp_accuracy, IterAvg, calculate_and_save_subcription_file, count_transactions, extract_report_detail_list
from ..utils.file import dict2xlsx, save_workbook_file, save_report_to_S3
from django.utils import timezone
from django.db.models import Q
from celery.utils.log import get_task_logger
from fwd import settings
logger = get_task_logger(__name__)
s3_client = S3Util.MinioS3Client(
endpoint=settings.S3_ENDPOINT,
access_key=settings.S3_ACCESS_KEY,
secret_key=settings.S3_SECRET_KEY,
bucket_name=settings.S3_BUCKET_NAME
)
def mean_list(l):
l = [x for x in l if x is not None]
if len(l) == 0:
return 0
return sum(l)/len(l)
@app.task(name='make_a_report')
def make_a_report(report_id, query_set):
try:
start_date = timezone.datetime.strptime(query_set["start_date_str"], '%Y-%m-%dT%H:%M:%S%z')
end_date = timezone.datetime.strptime(query_set["end_date_str"], '%Y-%m-%dT%H:%M:%S%z')
base_query = Q(created_at__range=(start_date, end_date))
if query_set["request_id"]:
base_query &= Q(request_id=query_set["request_id"])
if query_set["redemption_id"]:
base_query &= Q(redemption_id=query_set["redemption_id"])
base_query &= Q(is_test_request=False)
if isinstance(query_set["include_test"], str):
query_set["include_test"] = True if query_set["include_test"].lower() in ["true", "yes", "1"] else False
if query_set["include_test"]:
# base_query = ~base_query
base_query.children = base_query.children[:-1]
elif isinstance(query_set["include_test"], bool):
if query_set["include_test"]:
base_query = ~base_query
if isinstance(query_set["subsidiary"], str):
if query_set["subsidiary"] and query_set["subsidiary"].lower().replace(" ", "")!="all":
base_query &= Q(redemption_id__startswith=query_set["subsidiary"])
if isinstance(query_set["is_reviewed"], str):
if query_set["is_reviewed"] == "reviewed":
base_query &= Q(is_reviewed=True)
elif query_set["is_reviewed"] == "not reviewed":
base_query &= Q(is_reviewed=False)
# elif query_set["is_reviewed"] == "all":
# pass
errors = []
# Create a placeholder to fill
accuracy = {"feedback" :{"imei_number": IterAvg(),
"purchase_date": IterAvg(),
"retailername": IterAvg(),
"sold_to_party": IterAvg(),},
"reviewed" :{"imei_number": IterAvg(),
"purchase_date": IterAvg(),
"retailername": IterAvg(),
"sold_to_party": IterAvg(),}
} # {"imei": {"acc": 0.1, count: 1}, ...}
time_cost = {"invoice": IterAvg(),
"imei": IterAvg()}
number_images = 0
number_bad_images = 0
# TODO: Multithreading
# Calculate accuracy, processing time, ....Then save.
subscription_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at')
report: Report = \
Report.objects.filter(report_id=report_id).first()
# TODO: number of transaction by doc type
num_request = 0
for request in subscription_requests:
if request.status != 200 or not (request.reviewed_result or request.feedback_result):
# Failed requests or lack of reviewed_result/feedback_result
continue
request_att = calculate_and_save_subcription_file(report, request)
request.feedback_accuracy = {"imei_number" : mean_list(request_att["acc"]["feedback"].get("imei_number", [None])),
"purchase_date" : mean_list(request_att["acc"]["feedback"].get("purchase_date", [None])),
"retailername" : mean_list(request_att["acc"]["feedback"].get("retailername", [None])),
"sold_to_party" : mean_list(request_att["acc"]["feedback"].get("sold_to_party", [None]))}
request.reviewed_accuracy = {"imei_number" : mean_list(request_att["acc"]["reviewed"].get("imei_number", [None])),
"purchase_date" : mean_list(request_att["acc"]["reviewed"].get("purchase_date", [None])),
"retailername" : mean_list(request_att["acc"]["reviewed"].get("retailername", [None])),
"sold_to_party" : mean_list(request_att["acc"]["reviewed"].get("sold_to_party", [None]))}
request.save()
number_images += request_att["total_images"]
number_bad_images += request_att["bad_images"]
update_temp_accuracy(accuracy["feedback"], request_att["acc"]["feedback"], keys=["imei_number", "purchase_date", "retailername", "sold_to_party"])
update_temp_accuracy(accuracy["reviewed"], request_att["acc"]["reviewed"], keys=["imei_number", "purchase_date", "retailername", "sold_to_party"])
time_cost["imei"].add(request_att["time_cost"].get("imei", []))
time_cost["invoice"].add(request_att["time_cost"].get("invoice", []))
errors += request_att["err"]
num_request += 1
transaction_att = count_transactions(start_date, end_date)
# Do saving process
report.number_request = num_request
report.number_images = number_images
report.number_imei = time_cost["imei"].count
report.number_invoice = time_cost["invoice"].count
report.number_bad_images = number_bad_images
# FIXME: refactor this data stream for endurability
report.average_OCR_time = {"invoice": time_cost["invoice"](), "imei": time_cost["imei"](),
"invoice_count": time_cost["invoice"].count, "imei_count": time_cost["imei"].count}
report.average_OCR_time["avg"] = (report.average_OCR_time["invoice"]*report.average_OCR_time["invoice_count"] + report.average_OCR_time["imei"]*report.average_OCR_time["imei_count"])/(report.average_OCR_time["imei_count"] + report.average_OCR_time["invoice_count"])
report.number_imei_transaction = transaction_att.get("imei", 0)
report.number_invoice_transaction = transaction_att.get("invoice", 0)
acumulated_acc = {"feedback": {},
"reviewed": {}}
for acc_type in ["feedback", "reviewed"]:
avg_acc = IterAvg()
for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
acumulated_acc[acc_type][key] = accuracy[acc_type][key]()
acumulated_acc[acc_type][key+"_count"] = accuracy[acc_type][key].count
avg_acc.add_avg(acumulated_acc[acc_type][key], acumulated_acc[acc_type][key+"_count"])
acumulated_acc[acc_type]["avg"] = avg_acc()
report.feedback_accuracy = acumulated_acc["feedback"]
report.reviewed_accuracy = acumulated_acc["reviewed"]
report.errors = "|".join(errors)
report.status = "Ready"
report.save()
# Saving a xlsx file
report_files = ReportFile.objects.filter(report=report)
data = extract_report_detail_list(report_files, lower=True)
data_workbook = dict2xlsx(data, _type='report_detail')
local_workbook = save_workbook_file(report.report_id + ".xlsx", report, data_workbook)
s3_key=save_report_to_S3(report.report_id, local_workbook)
except IndexError as e:
print(e)
traceback.print_exc()
print("NotFound request by report id, %d", report_id)
except Exception as e:
print("[ERROR]: an error occured while processing report: ", report_id)
traceback.print_exc()
return 400

View File

@ -12,7 +12,7 @@ django.setup()
app: Celery = Celery( app: Celery = Celery(
'postman', 'postman',
broker=settings.BROKER_URL, broker=settings.BROKER_URL,
include=['fwd_api.celery_worker.process_result_tasks', 'fwd_api.celery_worker.internal_task'], include=['fwd_api.celery_worker.process_result_tasks', 'fwd_api.celery_worker.internal_task', 'fwd_api.celery_worker.process_report_tasks'],
broker_transport_options={'confirm_publish': False}, broker_transport_options={'confirm_publish': False},
) )
@ -38,8 +38,10 @@ app.conf.update({
Queue('upload_file_to_s3'), Queue('upload_file_to_s3'),
Queue('upload_feedback_to_s3'), Queue('upload_feedback_to_s3'),
Queue('upload_obj_to_s3'), Queue('upload_obj_to_s3'),
Queue('upload_report_to_s3'),
Queue('remove_local_file'), Queue('remove_local_file'),
Queue('csv_feedback'), Queue('csv_feedback'),
Queue('report'),
], ],
'task_routes': { 'task_routes': {
@ -55,8 +57,10 @@ app.conf.update({
'upload_file_to_s3': {'queue': "upload_file_to_s3"}, 'upload_file_to_s3': {'queue': "upload_file_to_s3"},
'upload_feedback_to_s3': {'queue': "upload_feedback_to_s3"}, 'upload_feedback_to_s3': {'queue': "upload_feedback_to_s3"},
'upload_obj_to_s3': {'queue': "upload_obj_to_s3"}, 'upload_obj_to_s3': {'queue': "upload_obj_to_s3"},
'upload_report_to_s3': {'queue': "upload_report_to_s3"},
'remove_local_file': {'queue': "remove_local_file"}, 'remove_local_file': {'queue': "remove_local_file"},
'csv_feedback': {'queue': "csv_feedback"}, 'csv_feedback': {'queue': "csv_feedback"},
'make_a_report': {'queue': "report"},
} }
}) })

View File

@ -0,0 +1,71 @@
# myapp/management/commands/mycustomcommand.py
from django.core.management.base import BaseCommand
from tqdm import tqdm
from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest
from fwd_api.utils.accuracy import predict_result_to_ready
import traceback
import copy
class Command(BaseCommand):
help = 'Refactor database for image level'
def add_arguments(self, parser):
# Add your command-line arguments here
parser.add_argument('test', type=str, help='Value for the argument')
def process_request(self, request):
if len(request.request_id.split(".")[0].split("_")) < 2:
return
images = SubscriptionRequestFile.objects.filter(request=request)
time_cost = {"imei": [], "invoice": [], "all": []}
if request.ai_inference_profile is None:
time_cost["imei"] = [-1 for _ in range(len(images))]
time_cost["invoice"] = [-1]
time_cost["all"] = [-1]
else:
for k, v in request.ai_inference_profile.items():
time_cost[k.split("_")[0]].append(v["inference"][1][0] - v["inference"][0] + (v["postprocess"][1]-v["postprocess"][0]))
for i, image in enumerate(images):
# temp_imei_SAP_20240127223644_a493434edbf84fc08aeb87ef6cdde102_0.jpg
try:
image.index_in_request = int(image.file_name.split(".")[0].split("_")[-1]) if len(image.file_name.split(".")[0].split("_")) > 4 else 0
image.doc_type = image.file_name.split(".")[0].split("_")[1] if len(image.file_name.split(".")[0].split("_")) > 4 else "all"
image.processing_time = time_cost[image.doc_type][image.index_in_request]
if not request.predict_result:
raise KeyError(f"Key predict_result not found in {request.request_id}")
if request.predict_result.get("status", 200) != 200:
raise AttributeError(f"Failed request: {request.request_id}")
_predict_result = copy.deepcopy(predict_result_to_ready(request.predict_result))
_feedback_result = copy.deepcopy(request.feedback_result)
_reviewed_result = copy.deepcopy(request.reviewed_result)
if image.doc_type == "invoice":
_predict_result["imei_number"] = []
if _feedback_result:
_feedback_result["imei_number"] = []
else:
None
if _reviewed_result:
_reviewed_result["imei_number"] = []
else:
None
else:
_predict_result = {"retailername": None, "sold_to_party": None, "purchase_date": [], "imei_number": [_predict_result["imei_number"][image.index_in_request]]}
_feedback_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_feedback_result["imei_number"][image.index_in_request]]} if _feedback_result else None
_reviewed_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_reviewed_result["imei_number"][image.index_in_request]]} if _reviewed_result else None
image.predict_result = _predict_result
image.feedback_result = _feedback_result
image.reviewed_result = _reviewed_result
image.save()
except Exception as e:
self.stdout.write(self.style.ERROR(f"Request: {request.request_id} failed with {e}"))
print(traceback.format_exc())
continue
def handle(self, *args, **options):
test = options['test']
subcription_iter = SubscriptionRequest.objects.all()
for request in tqdm(subcription_iter.iterator()):
self.process_request(request)
self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!'))

View File

@ -0,0 +1,102 @@
# Generated by Django 4.1.3 on 2024-01-25 06:22
from django.db import migrations, models
import django.utils.timezone
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0166_remove_subscriptionrequest_is_bad_image_quality_and_more'),
]
operations = [
migrations.CreateModel(
name='Report',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('report_id', models.CharField(max_length=200)),
('local_file_name', models.CharField(max_length=200)),
('error_status', models.JSONField(null=True)),
('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
('updated_at', models.DateTimeField(auto_now=True)),
('start_at', models.DateTimeField(null=True)),
('end_at', models.DateTimeField(null=True)),
('include_for_test_sample', models.BooleanField(default=False)),
('status', models.CharField(max_length=100)),
('is_daily_report', models.BooleanField(default=False)),
('errors', models.TextField(default='')),
('S3_uploaded', models.BooleanField(default=False)),
('number_request', models.IntegerField(default=0)),
('number_images', models.IntegerField(default=0)),
('number_bad_images', models.IntegerField(default=0)),
('average_client_time_profile', models.JSONField(null=True)),
('average_OCR_time_profile', models.JSONField(null=True)),
('average_OCR_time', models.JSONField(null=True)),
('average_client_time', models.JSONField(null=True)),
('imei_accuracy', models.FloatField(default=-1)),
('purchase_date_accuracy', models.FloatField(default=-1)),
('retailer_name_accuracy', models.FloatField(default=-1)),
('sold_to_party_accuracy', models.FloatField(default=-1)),
],
),
migrations.RemoveField(
model_name='subscriptionrequestfile',
name='accuracy',
),
migrations.AddField(
model_name='subscriptionrequest',
name='imei_accuracy',
field=models.FloatField(default=-1),
),
migrations.AddField(
model_name='subscriptionrequest',
name='purchase_date_accuracy',
field=models.FloatField(default=-1),
),
migrations.AddField(
model_name='subscriptionrequest',
name='retailer_name_accuracy',
field=models.FloatField(default=-1),
),
migrations.AddField(
model_name='subscriptionrequest',
name='sold_to_party_accuracy',
field=models.FloatField(default=-1),
),
migrations.AddField(
model_name='subscriptionrequestfile',
name='counter_measures',
field=models.TextField(blank=True),
),
migrations.AddField(
model_name='subscriptionrequestfile',
name='imei_accuracy',
field=models.FloatField(default=-1),
),
migrations.AddField(
model_name='subscriptionrequestfile',
name='processing_time',
field=models.IntegerField(default=-1),
),
migrations.AddField(
model_name='subscriptionrequestfile',
name='purchase_date_accuracy',
field=models.FloatField(default=-1),
),
migrations.AddField(
model_name='subscriptionrequestfile',
name='reason',
field=models.TextField(blank=True),
),
migrations.AddField(
model_name='subscriptionrequestfile',
name='retailer_name_accuracy',
field=models.FloatField(default=-1),
),
migrations.AddField(
model_name='subscriptionrequestfile',
name='sold_to_party_accuracy',
field=models.FloatField(default=-1),
),
]

View File

@ -0,0 +1,23 @@
# Generated by Django 4.1.3 on 2024-01-25 09:44
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0167_report_remove_subscriptionrequestfile_accuracy_and_more'),
]
operations = [
migrations.AddField(
model_name='report',
name='number_imei_transaction',
field=models.IntegerField(default=0),
),
migrations.AddField(
model_name='report',
name='number_ivoice_transaction',
field=models.IntegerField(default=0),
),
]

View File

@ -0,0 +1,28 @@
# Generated by Django 4.1.3 on 2024-01-25 11:17
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0168_report_number_imei_transaction_and_more'),
]
operations = [
migrations.AddField(
model_name='report',
name='include_reviewed',
field=models.TextField(default=''),
),
migrations.AddField(
model_name='report',
name='include_test',
field=models.CharField(default='', max_length=200),
),
migrations.AddField(
model_name='report',
name='subsidiary',
field=models.TextField(default=''),
),
]

View File

@ -0,0 +1,28 @@
# Generated by Django 4.1.3 on 2024-01-25 11:19
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0169_report_include_reviewed_report_include_test_and_more'),
]
operations = [
migrations.AlterField(
model_name='report',
name='errors',
field=models.TextField(default='', null=True),
),
migrations.AlterField(
model_name='report',
name='include_reviewed',
field=models.TextField(default='', null=True),
),
migrations.AlterField(
model_name='report',
name='subsidiary',
field=models.TextField(default='', null=True),
),
]

View File

@ -0,0 +1,112 @@
# Generated by Django 4.1.3 on 2024-01-28 08:11
from django.db import migrations, models
import django.db.models.deletion
import django.utils.timezone
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0170_alter_report_errors_alter_report_include_reviewed_and_more'),
]
operations = [
migrations.RenameField(
model_name='report',
old_name='imei_accuracy',
new_name='imei_accuracy_ocr',
),
migrations.RenameField(
model_name='report',
old_name='purchase_date_accuracy',
new_name='imei_accuracy_revised',
),
migrations.RenameField(
model_name='report',
old_name='retailer_name_accuracy',
new_name='purchase_date_accuracy_ocr',
),
migrations.RenameField(
model_name='report',
old_name='sold_to_party_accuracy',
new_name='purchase_date_accuracy_revised',
),
migrations.AddField(
model_name='report',
name='retailer_name_accuracy_ocr',
field=models.FloatField(default=-1),
),
migrations.AddField(
model_name='report',
name='retailer_name_accuracy_revised',
field=models.FloatField(default=-1),
),
migrations.AddField(
model_name='report',
name='sold_to_party_accuracy_ocr',
field=models.FloatField(default=-1),
),
migrations.AddField(
model_name='report',
name='sold_to_party_accuracy_revised',
field=models.FloatField(default=-1),
),
migrations.AddField(
model_name='subscriptionrequestfile',
name='feedback_result',
field=models.JSONField(null=True),
),
migrations.AddField(
model_name='subscriptionrequestfile',
name='predict_result',
field=models.JSONField(null=True),
),
migrations.AddField(
model_name='subscriptionrequestfile',
name='reviewed_result',
field=models.JSONField(null=True),
),
migrations.AlterField(
model_name='subscriptionrequestfile',
name='doc_type',
field=models.CharField(default='', max_length=10),
),
migrations.CreateModel(
name='ReportFile',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('correspond_request_id', models.CharField(max_length=200)),
('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)),
('updated_at', models.DateTimeField(auto_now=True)),
('S3_uploaded', models.BooleanField(default=False)),
('doc_type', models.CharField(max_length=200)),
('imei_feedback', models.CharField(default=None, max_length=200, null=True)),
('purchase_date_feedback', models.CharField(default=None, max_length=200, null=True)),
('retailer_feedback', models.CharField(default=None, max_length=200, null=True)),
('sold_to_party_feedback', models.CharField(default=None, max_length=200, null=True)),
('imei_ocr', models.CharField(default=None, max_length=200, null=True)),
('purchase_date_ocr', models.CharField(default=None, max_length=200, null=True)),
('retailer_ocr', models.CharField(default=None, max_length=200, null=True)),
('sold_to_party_ocr', models.CharField(default=None, max_length=200, null=True)),
('imei_revised', models.CharField(default=None, max_length=200, null=True)),
('purchase_date_revised', models.CharField(default=None, max_length=200, null=True)),
('retailer_revised', models.CharField(default=None, max_length=200, null=True)),
('sold_to_party_revised', models.CharField(default=None, max_length=200, null=True)),
('imei_acc_feedback', models.FloatField(default=None, null=True)),
('purchase_date_acc_feedback', models.FloatField(default=None, null=True)),
('retailer_acc_feedback', models.FloatField(default=None, null=True)),
('sold_to_party_acc_feedback', models.CharField(default=None, max_length=200, null=True)),
('acc_feedback', models.FloatField(default=None, null=True)),
('imei_acc_revised', models.FloatField(default=None, null=True)),
('purchase_date_acc_revised', models.FloatField(default=None, null=True)),
('retailer_acc_revised', models.FloatField(default=None, null=True)),
('acc_revised', models.FloatField(default=None, null=True)),
('time_cost', models.FloatField(default=0)),
('is_reviewed', models.CharField(default='NA', max_length=5)),
('bad_image_reason', models.TextField(default='')),
('countermeasures', models.TextField(default='')),
('report', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='files', to='fwd_api.report')),
],
),
]

View File

@ -0,0 +1,38 @@
# Generated by Django 4.1.3 on 2024-01-28 09:27
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0171_rename_imei_accuracy_report_imei_accuracy_ocr_and_more'),
]
operations = [
migrations.AlterField(
model_name='subscriptionrequestfile',
name='imei_accuracy',
field=models.FloatField(default=None, null=True),
),
migrations.AlterField(
model_name='subscriptionrequestfile',
name='processing_time',
field=models.FloatField(default=-1),
),
migrations.AlterField(
model_name='subscriptionrequestfile',
name='purchase_date_accuracy',
field=models.FloatField(default=None, null=True),
),
migrations.AlterField(
model_name='subscriptionrequestfile',
name='retailer_name_accuracy',
field=models.FloatField(default=None, null=True),
),
migrations.AlterField(
model_name='subscriptionrequestfile',
name='sold_to_party_accuracy',
field=models.FloatField(default=None, null=True),
),
]

View File

@ -0,0 +1,226 @@
# Generated by Django 4.1.3 on 2024-01-28 18:00
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0172_alter_subscriptionrequestfile_imei_accuracy_and_more'),
]
operations = [
migrations.RenameField(
model_name='reportfile',
old_name='countermeasures',
new_name='counter_measures',
),
migrations.RemoveField(
model_name='report',
name='imei_accuracy_ocr',
),
migrations.RemoveField(
model_name='report',
name='imei_accuracy_revised',
),
migrations.RemoveField(
model_name='report',
name='purchase_date_accuracy_ocr',
),
migrations.RemoveField(
model_name='report',
name='purchase_date_accuracy_revised',
),
migrations.RemoveField(
model_name='report',
name='retailer_name_accuracy_ocr',
),
migrations.RemoveField(
model_name='report',
name='retailer_name_accuracy_revised',
),
migrations.RemoveField(
model_name='report',
name='sold_to_party_accuracy_ocr',
),
migrations.RemoveField(
model_name='report',
name='sold_to_party_accuracy_revised',
),
migrations.RemoveField(
model_name='reportfile',
name='acc_feedback',
),
migrations.RemoveField(
model_name='reportfile',
name='acc_revised',
),
migrations.RemoveField(
model_name='reportfile',
name='imei_acc_feedback',
),
migrations.RemoveField(
model_name='reportfile',
name='imei_acc_revised',
),
migrations.RemoveField(
model_name='reportfile',
name='imei_feedback',
),
migrations.RemoveField(
model_name='reportfile',
name='imei_ocr',
),
migrations.RemoveField(
model_name='reportfile',
name='imei_revised',
),
migrations.RemoveField(
model_name='reportfile',
name='purchase_date_acc_feedback',
),
migrations.RemoveField(
model_name='reportfile',
name='purchase_date_acc_revised',
),
migrations.RemoveField(
model_name='reportfile',
name='purchase_date_feedback',
),
migrations.RemoveField(
model_name='reportfile',
name='purchase_date_ocr',
),
migrations.RemoveField(
model_name='reportfile',
name='purchase_date_revised',
),
migrations.RemoveField(
model_name='reportfile',
name='retailer_acc_feedback',
),
migrations.RemoveField(
model_name='reportfile',
name='retailer_acc_revised',
),
migrations.RemoveField(
model_name='reportfile',
name='retailer_feedback',
),
migrations.RemoveField(
model_name='reportfile',
name='retailer_ocr',
),
migrations.RemoveField(
model_name='reportfile',
name='retailer_revised',
),
migrations.RemoveField(
model_name='reportfile',
name='sold_to_party_acc_feedback',
),
migrations.RemoveField(
model_name='reportfile',
name='sold_to_party_feedback',
),
migrations.RemoveField(
model_name='reportfile',
name='sold_to_party_ocr',
),
migrations.RemoveField(
model_name='reportfile',
name='sold_to_party_revised',
),
migrations.RemoveField(
model_name='subscriptionrequest',
name='imei_accuracy',
),
migrations.RemoveField(
model_name='subscriptionrequest',
name='purchase_date_accuracy',
),
migrations.RemoveField(
model_name='subscriptionrequest',
name='retailer_name_accuracy',
),
migrations.RemoveField(
model_name='subscriptionrequest',
name='sold_to_party_accuracy',
),
migrations.RemoveField(
model_name='subscriptionrequestfile',
name='imei_accuracy',
),
migrations.RemoveField(
model_name='subscriptionrequestfile',
name='purchase_date_accuracy',
),
migrations.RemoveField(
model_name='subscriptionrequestfile',
name='retailer_name_accuracy',
),
migrations.RemoveField(
model_name='subscriptionrequestfile',
name='sold_to_party_accuracy',
),
migrations.AddField(
model_name='report',
name='feedback_accuracy',
field=models.JSONField(null=True),
),
migrations.AddField(
model_name='report',
name='reviewed_accuracy',
field=models.JSONField(null=True),
),
migrations.AddField(
model_name='reportfile',
name='error',
field=models.TextField(default=''),
),
migrations.AddField(
model_name='reportfile',
name='feedback_accuracy',
field=models.JSONField(null=True),
),
migrations.AddField(
model_name='reportfile',
name='feedback_result',
field=models.JSONField(null=True),
),
migrations.AddField(
model_name='reportfile',
name='predict_result',
field=models.JSONField(null=True),
),
migrations.AddField(
model_name='reportfile',
name='reviewed_accuracy',
field=models.JSONField(null=True),
),
migrations.AddField(
model_name='reportfile',
name='reviewed_result',
field=models.JSONField(null=True),
),
migrations.AddField(
model_name='subscriptionrequest',
name='feedback_accuracy',
field=models.JSONField(null=True),
),
migrations.AddField(
model_name='subscriptionrequest',
name='reviewed_accuracy',
field=models.JSONField(null=True),
),
migrations.AddField(
model_name='subscriptionrequestfile',
name='feedback_accuracy',
field=models.JSONField(null=True),
),
migrations.AddField(
model_name='subscriptionrequestfile',
name='reviewed_accuracy',
field=models.JSONField(null=True),
),
]

View File

@ -0,0 +1,28 @@
# Generated by Django 4.1.3 on 2024-01-29 05:59
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0173_rename_countermeasures_reportfile_counter_measures_and_more'),
]
operations = [
migrations.AddField(
model_name='reportfile',
name='acc',
field=models.FloatField(default=0),
),
migrations.AddField(
model_name='reportfile',
name='correspond_redemption_id',
field=models.CharField(default='', max_length=200),
),
migrations.AlterField(
model_name='reportfile',
name='correspond_request_id',
field=models.CharField(default='', max_length=200),
),
]

View File

@ -0,0 +1,28 @@
# Generated by Django 4.1.3 on 2024-01-30 12:29
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0174_reportfile_acc_reportfile_correspond_redemption_id_and_more'),
]
operations = [
migrations.RenameField(
model_name='report',
old_name='number_ivoice_transaction',
new_name='number_imei',
),
migrations.AddField(
model_name='report',
name='number_invoice',
field=models.IntegerField(default=0),
),
migrations.AddField(
model_name='report',
name='number_invoice_transaction',
field=models.IntegerField(default=0),
),
]

View File

@ -0,0 +1,18 @@
# Generated by Django 4.1.3 on 2024-01-31 09:31
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0175_rename_number_ivoice_transaction_report_number_imei_and_more'),
]
operations = [
migrations.AddField(
model_name='report',
name='S3_file_name',
field=models.TextField(default=None, null=True),
),
]

View File

@ -0,0 +1,18 @@
# Generated by Django 4.1.3 on 2024-02-01 03:27
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0176_report_s3_file_name'),
]
operations = [
migrations.AlterField(
model_name='report',
name='subsidiary',
field=models.CharField(default='', max_length=200, null=True),
),
]

View File

@ -13,19 +13,29 @@ class Report(models.Model):
start_at = models.DateTimeField(null=True) start_at = models.DateTimeField(null=True)
end_at = models.DateTimeField(null=True) end_at = models.DateTimeField(null=True)
include_for_test_sample = models.BooleanField(default=False) include_for_test_sample = models.BooleanField(default=False)
status = models.CharField(null=True) status = models.CharField(max_length=100)
is_daily_report = models.BooleanField(default=False) is_daily_report = models.BooleanField(default=False)
errors = models.TextField(default="", null=True)
subsidiary = models.CharField(default="", null=True, max_length=200)
include_reviewed = models.TextField(default="", null=True, )
include_test = models.CharField(max_length=200, default="")
# Data # Data
S3_uploaded = models.BooleanField(default=False) S3_uploaded = models.BooleanField(default=False)
S3_file_name = models.TextField(default=None, null=True)
number_request = models.IntegerField(default=0) number_request = models.IntegerField(default=0)
number_images = models.IntegerField(default=0) number_images = models.IntegerField(default=0)
number_bad_images = models.IntegerField(default=0) number_bad_images = models.IntegerField(default=0)
average_client_time_profile = models.JSONField(default=0) # {"0.1": 100, 0.2: 200, ...} number_imei = models.IntegerField(default=0)
average_OCR_time_profile = models.JSONField(default=0) # {"0.1": 98, 0.2: 202, ...} number_invoice = models.IntegerField(default=0)
average_OCR_time = models.JSONField(null=True) # {"invoice": 0.1, "imei": 0.1}
number_imei_transaction = models.IntegerField(default=0)
number_invoice_transaction = models.IntegerField(default=0)
average_client_time_profile = models.JSONField(null=True) # {"0.1": 100, 0.2: 200, ...} | Future feature
average_OCR_time_profile = models.JSONField(null=True) # {"0.1": 98, 0.2: 202, ...} | Future feature
average_OCR_time = models.JSONField(null=True) # {"invoice": 0.1, "imei": 0.1} | Future feature
average_client_time = models.JSONField(null=True) # {"invoice": 0.1, "imei": 0.1} average_client_time = models.JSONField(null=True) # {"invoice": 0.1, "imei": 0.1}
imei_accuracy = models.FloatField(default=-1)
purchase_date_accuracy = models.FloatField(default=-1) feedback_accuracy = models.JSONField(null=True)
retailer_name_accuracy = models.FloatField(default=-1) reviewed_accuracy = models.JSONField(null=True)
sold_to_party_accuracy = models.FloatField(default=-1)

View File

@ -0,0 +1,35 @@
from django.db import models
from django.utils import timezone
from fwd_api.models.Subscription import Subscription
from fwd_api.models.SubscriptionRequest import SubscriptionRequest
from fwd_api.models.Report import Report
class ReportFile(models.Model):
# Metadata
id = models.AutoField(primary_key=True)
correspond_request_id = models.CharField(max_length=200, default="")
correspond_redemption_id = models.CharField(max_length=200, default="")
created_at = models.DateTimeField(default=timezone.now, db_index=True)
updated_at = models.DateTimeField(auto_now=True)
report = models.ForeignKey(Report, related_name="files", on_delete=models.CASCADE)
# Data
S3_uploaded = models.BooleanField(default=False)
doc_type = models.CharField(max_length=200)
predict_result = models.JSONField(null=True)
feedback_result = models.JSONField(null=True)
reviewed_result = models.JSONField(null=True)
feedback_accuracy = models.JSONField(null=True)
reviewed_accuracy = models.JSONField(null=True)
acc = models.FloatField(default=0)
time_cost = models.FloatField(default=0)
is_reviewed = models.CharField(default="NA", max_length=5) # NA, No, Yes
bad_image_reason = models.TextField(default="")
counter_measures = models.TextField(default="")
error = models.TextField(default="")

View File

@ -21,10 +21,9 @@ class SubscriptionRequest(models.Model):
updated_at = models.DateTimeField(auto_now=True) updated_at = models.DateTimeField(auto_now=True)
is_test_request = models.BooleanField(default=False) is_test_request = models.BooleanField(default=False)
S3_uploaded = models.BooleanField(default=False) S3_uploaded = models.BooleanField(default=False)
imei_accuracy = models.FloatField(default=-1)
purchase_date_accuracy = models.FloatField(default=-1) feedback_accuracy = models.JSONField(null=True)
retailer_name_accuracy = models.FloatField(default=-1) reviewed_accuracy = models.JSONField(null=True)
sold_to_party_accuracy = models.FloatField(default=-1)
ai_inference_profile = models.JSONField(null=True) ai_inference_profile = models.JSONField(null=True)
preprocessing_time = models.FloatField(default=-1) preprocessing_time = models.FloatField(default=-1)

View File

@ -20,12 +20,15 @@ class SubscriptionRequestFile(models.Model):
created_at = models.DateTimeField(default=timezone.now, db_index=True) created_at = models.DateTimeField(default=timezone.now, db_index=True)
updated_at = models.DateTimeField(auto_now=True) updated_at = models.DateTimeField(auto_now=True)
is_bad_image_quality = models.BooleanField(default=False) is_bad_image_quality = models.BooleanField(default=False)
doc_type = models.CharField(max_length=100, default="") doc_type = models.CharField(max_length=10, default="")
index_in_request = models.IntegerField(default=0) index_in_request = models.IntegerField(default=0) # by doc_type
processing_time = models.IntegerField(default=-1) # in milisecond processing_time = models.FloatField(default=-1) # in milisecond
reason = models.TextField(blank=True) reason = models.TextField(blank=True)
counter_measures = models.TextField(blank=True) counter_measures = models.TextField(blank=True)
imei_accuracy = models.FloatField(default=-1)
purchase_date_accuracy = models.FloatField(default=-1) predict_result = models.JSONField(null=True)
retailer_name_accuracy = models.FloatField(default=-1) feedback_result = models.JSONField(null=True)
sold_to_party_accuracy = models.FloatField(default=-1) reviewed_result = models.JSONField(null=True)
feedback_accuracy = models.JSONField(null=True)
reviewed_accuracy = models.JSONField(null=True)

View File

@ -6,4 +6,7 @@ from .OcrTemplateBox import OcrTemplateBox
from .PricingPlan import PricingPlan from .PricingPlan import PricingPlan
from .Subscription import Subscription from .Subscription import Subscription
from .FeedbackRequest import FeedbackRequest from .FeedbackRequest import FeedbackRequest
from .Report import Report
from .ReportFile import ReportFile

View File

@ -0,0 +1,488 @@
import re
from datetime import datetime
import copy
from typing import Any
from .ocr_utils.ocr_metrics import eval_ocr_metric
from .ocr_utils.sbt_report import post_processing_str
from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile, ReportFile
from ..celery_worker.client_connector import c_connector
from django.db.models import Q
BAD_THRESHOLD = 0.75
valid_keys = ["retailername", "sold_to_party", "purchase_date", "imei_number"]
class MonthReportAccumulate:
def __init__(self):
self.month = None
self.total = {
'subs': "+",
'extraction_date': "Subtotal ()",
'total_images': 0,
'images_quality': {
'successful': 0,
'successful_percent': 0,
'bad': 0,
'bad_percent': 0
},
'average_accuracy_rate': {
'imei': IterAvg(),
'purchase_date': IterAvg(),
'retailer_name': IterAvg()
},
'average_processing_time': {
'imei': IterAvg(),
'invoice': IterAvg()
},
'usage': {
'imei':0,
'invoice': 0
}
}
self.data = []
self.data_format = {
'subs': "",
'extraction_date': "",
'num_imei': 0,
'num_invoice': 0,
'total_images': 0,
'images_quality': {
'successful': 0,
'successful_percent': 0,
'bad': 0,
'bad_percent': 0
},
'average_accuracy_rate': {
'imei': 0,
'purchase_date': 0,
'retailer_name': 0
},
'average_processing_time': {
'imei': 0,
'invoice': 0
},
'usage': {
'imei':0,
'invoice': 0
}
},
def accumulate(self, report):
self.total["total_images"] += report.number_images
self.total["images_quality"]["successful"] += report.number_images - report.number_bad_images
self.total["images_quality"]["bad"] += report.number_bad_images
if sum([report.reviewed_accuracy[x] for x in report.reviewed_accuracy.keys() if "_count" not in x]) > 0 :
self.total["average_accuracy_rate"]["imei"].add_avg(report.reviewed_accuracy.get("imei_number", 0), report.reviewed_accuracy.get("imei_number_count", 0))
self.total["average_accuracy_rate"]["purchase_date"].add_avg(report.reviewed_accuracy.get("purchase_date", 0), report.reviewed_accuracy.get("purchase_date_count", 0))
self.total["average_accuracy_rate"]["retailer_name"].add_avg(report.reviewed_accuracy.get("retailername", 0), report.reviewed_accuracy.get("retailername_count", 0))
elif sum([ report.feedback_accuracy[x] for x in report.feedback_accuracy.keys() if "_count" not in x]) > 0:
self.total["average_accuracy_rate"]["imei"].add_avg(report.feedback_accuracy.get("imei_number", 0), report.feedback_accuracy.get("imei_number_count", 0))
self.total["average_accuracy_rate"]["purchase_date"].add_avg(report.feedback_accuracy.get("purchase_date", 0), report.feedback_accuracy.get("purchase_date_count", 0))
self.total["average_accuracy_rate"]["retailer_name"].add_avg(report.feedback_accuracy.get("retailername", 0), report.feedback_accuracy.get("retailername_count", 0))
self.total["average_processing_time"]["imei"].add_avg(report.average_OCR_time.get("imei", 0), report.average_OCR_time.get("imei_count", 0)) if report.average_OCR_time else 0
self.total["average_processing_time"]["invoice"].add_avg(report.average_OCR_time.get("invoice", 0), report.average_OCR_time.get("invoice_count", 0)) if report.average_OCR_time else 0
self.total["usage"]["imei"] += report.number_imei_transaction
self.total["usage"]["invoice"] += report.number_invoice_transaction
def add(self, report):
report_month = report.created_at.month
if self.month is None:
self.month = report_month
self.total["extraction_date"] = f"Subtotal ({self.month})"
elif self.month != report_month:
self.total["images_quality"]["successful_percent"] += self.total["images_quality"]["successful"]/self.total["total_images"]
self.total["images_quality"]["bad_percent"] += self.total["images_quality"]["bad"]/self.total["total_images"]
return False # Reports from a different month, stop accumulating
# accumulate fields
new_data = copy.deepcopy(self.data_format)[0]
new_data["num_imei"] = report.number_imei
new_data["subs"] = report.subsidiary
new_data["extraction_date"] = report.created_at
new_data["num_invoice"] = report.number_invoice
new_data["total_images"] = report.number_images
new_data["images_quality"]["successful"] = report.number_images - report.number_bad_images
new_data["images_quality"]["bad"] = report.number_bad_images
report.reviewed_accuracy = {} if report.reviewed_accuracy is None else report.reviewed_accuracy
report.feedback_accuracy = {} if report.feedback_accuracy is None else report.feedback_accuracy
if sum([ report.reviewed_accuracy[x] for x in report.reviewed_accuracy.keys() if "_count" not in x]):
new_data["average_accuracy_rate"]["imei"] = report.reviewed_accuracy.get("imei_number", None)
new_data["average_accuracy_rate"]["purchase_date"] = report.reviewed_accuracy.get("purchase_date", None)
new_data["average_accuracy_rate"]["retailer_name"] = report.reviewed_accuracy.get("retailername", None)
elif sum([ report.feedback_accuracy[x] for x in report.feedback_accuracy.keys() if "_count" not in x]):
new_data["average_accuracy_rate"]["imei"] = report.feedback_accuracy.get("imei_number", None)
new_data["average_accuracy_rate"]["purchase_date"] = report.feedback_accuracy.get("purchase_date", None)
new_data["average_accuracy_rate"]["retailer_name"] = report.feedback_accuracy.get("retailername", None)
new_data["average_processing_time"]["imei"] = report.average_OCR_time.get("imei", 0) if report.average_OCR_time else 0
new_data["average_processing_time"]["invoice"] = report.average_OCR_time.get("invoice", 0) if report.average_OCR_time else 0
new_data["usage"]["imei"] = report.number_imei_transaction
new_data["usage"]["invoice"] = report.number_invoice_transaction
new_data["images_quality"]["successful_percent"] += new_data["images_quality"]["successful"]/new_data["total_images"] if new_data["total_images"] else 0
new_data["images_quality"]["bad_percent"] += new_data["images_quality"]["bad"]/new_data["total_images"] if new_data["total_images"] else 0
self.data.append(new_data)
self.accumulate(report)
return True
def __call__(self):
self.total["images_quality"]["successful_percent"] += self.total["images_quality"]["successful"]/self.total["total_images"] if self.total["total_images"] else 0
self.total["images_quality"]["bad_percent"] += self.total["images_quality"]["bad"]/self.total["total_images"] if self.total["total_images"] else 0
total = copy.deepcopy(self.total)
total["average_accuracy_rate"]["imei"] = total["average_accuracy_rate"]["imei"]()
total["average_accuracy_rate"]["purchase_date"] = total["average_accuracy_rate"]["purchase_date"]()
total["average_accuracy_rate"]["retailer_name"] = total["average_accuracy_rate"]["retailer_name"]()
total["average_processing_time"]["imei"] = total["average_processing_time"]["imei"]()
total["average_processing_time"]["invoice"] = total["average_processing_time"]["invoice"]()
return self.month, self.data, total
class IterAvg:
def __init__(self, name="default"):
self.name = name
self.avg = 0
self.count = 0
def add(self, values):
"""
Args:
values (list[float]):
"""
values = [x for x in values if x is not None]
if len(values) == 0:
return
self.avg = (self.avg*self.count + sum(values))/(self.count+len(values))
self.count += len(values)
def add_avg(self, avg, count):
if avg is None or count is None or count == 0:
return
self.count += count
self.avg = (self.avg*(self.count-count) + avg*count)/(self.count)
def __call__(self):
return self.avg
def first_of_list(the_list):
if not the_list:
return None
return the_list[0]
def extract_report_detail_list(report_detail_list, lower=False, in_percent=True):
data = []
for report_file in report_detail_list:
data.append({
"Request ID": report_file.correspond_request_id,
"Redemption Number": report_file.correspond_redemption_id,
"Image type": report_file.doc_type,
"IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])),
"IMEI_OCR retrieved": first_of_list(report_file.predict_result.get("imei_number", [None])),
"IMEI1 Accuracy": first_of_list(report_file.feedback_accuracy.get("imei_number", [None])),
"Invoice_Purchase Date_Consumer": report_file.feedback_result.get("purchase_date", None),
"Invoice_Purchase Date_OCR": report_file.predict_result.get("purchase_date", []),
"Invoice_Purchase Date Accuracy": first_of_list(report_file.feedback_accuracy.get("purchase_date", [None])),
"Invoice_Retailer_Consumer": report_file.feedback_result.get("retailername", None),
"Invoice_Retailer_OCR": report_file.predict_result.get("retailername", None),
"Invoice_Retailer Accuracy": first_of_list(report_file.feedback_accuracy.get("retailername", [None])),
"OCR Image Accuracy": report_file.acc,
"OCR Image Speed (seconds)": report_file.time_cost,
"Reviewed?": "No",
"Bad Image Reasons": report_file.bad_image_reason,
"Countermeasures": report_file.counter_measures,
"IMEI_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("imei_number", [None])),
"Purchase Date_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("purchase_date", [None])),
"Retailer_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("retailername", [None])),
})
if lower:
for i, dat in enumerate(data):
keys = list(dat.keys())
for old_key in keys:
data[i][old_key.lower().replace(" ", "_")] = data[i].pop(old_key)
if in_percent:
for i, dat in enumerate(data):
keys = [x for x in list(dat.keys()) if "accuracy" in x.lower()]
for key in keys:
if data[i][key]:
data[i][key] = data[i][key]*100
return data
def count_transactions(start_date, end_date):
base_query = Q(created_at__range=(start_date, end_date))
base_query &= Q(is_test_request=False)
transaction_att = {}
print(f"[DEBUG]: atracting transactions attribute...")
total_transaction_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at')
for request in total_transaction_requests:
if not request.doc_type:
continue
doc_types = request.doc_type.split(",")
for doc_type in doc_types:
if transaction_att.get(doc_type, None) == None:
transaction_att[doc_type] = 1
else:
transaction_att[doc_type] += 1
return transaction_att
def convert_datetime_format(date_string: str, is_gt=False) -> str:
# pattern_date_string = "2023-02-28"
input_format = "%Y-%m-%d"
output_format = "%d/%m/%Y"
# Validate the input date string format
pattern = r"\d{4}-\d{2}-\d{2}"
if re.match(pattern, date_string):
# Convert the date string to a datetime object
date_object = datetime.strptime(date_string, input_format)
# Convert the datetime object to the desired output format
formatted_date = date_object.strftime(output_format)
return formatted_date
return date_string
def predict_result_to_ready(result):
dict_result = {"retailername": "",
"sold_to_party": "",
"purchase_date": [],
"imei_number": [],}
dict_result["retailername"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}])[0].get("value", None)
dict_result["sold_to_party"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}])[1].get("value", None)
dict_result["purchase_date"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}])[2].get("value", [])
dict_result["imei_number"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[3].get("value", [])
return dict_result
def align_fine_result(ready_predict, fine_result):
# print(f"[DEBUG]: fine_result: {fine_result}")
# print(f"[DEBUG]: ready_predict: {ready_predict}")
if fine_result:
if fine_result["purchase_date"] and len(ready_predict["purchase_date"]) == 0:
ready_predict["purchase_date"] = [None]
if fine_result["retailername"] and not ready_predict["retailername"]:
ready_predict["retailername"] = [None]
fine_result["purchase_date"] = [fine_result["purchase_date"] for _ in range(len(ready_predict["purchase_date"]))]
# else:
# fine_result = {}
# for key in ready_predict.keys():
# fine_result[key] = []
# fine_result["purchase_date"] = [None for _ in range(len(ready_predict["purchase_date"]))]
return ready_predict, fine_result
def update_temp_accuracy(accuracy, acc, keys):
for key in keys:
accuracy[key].add(acc[key])
return accuracy
def calculate_accuracy(key_name, inference, target):
"""_summary_
Args:
key_name (string): key to calculate accuracy on, ex: retailername
inference (dict): result from ocr, refined to align with the target down below
target (dict): result of type
"""
acc = []
data = []
if not target or not inference:
return acc, data
if not isinstance(inference[key_name], list):
if inference[key_name] is None:
inference[key_name] = []
else:
inference[key_name] = [inference[key_name]]
if not isinstance(target[key_name], list):
if target[key_name] is None:
target[key_name] = []
else:
target[key_name] = [target[key_name]]
for i, v in enumerate(inference[key_name]):
# TODO: target[key_name][i] is None, ""
x = post_processing_str(key_name, inference[key_name][i], is_gt=False)
y = post_processing_str(key_name, target[key_name][i], is_gt=True)
score = eval_ocr_metric(
[x],
[y],
metric=[
"one_minus_ned",
# "line_acc_ignore_case_symbol",
# "line_acc",
# "one_minus_ned_word",
])
acc.append(list(score.values())[0])
data.append([x, y])
return acc, data
def calculate_avg_accuracy(acc, type, keys=[]):
acc_list = []
# print(f"[DEBUG]: type: {type} - acc: {acc}")
for key in keys:
acc_list += acc.get(type, {}).get(key, [])
acc_list = [x for x in acc_list if x is not None]
return sum(acc_list)/len(acc_list) if len(acc_list) > 0 else None
def calculate_and_save_subcription_file(report, request):
request_att = {"acc": {"feedback": {"imei_number": [],
"purchase_date": [],
"retailername": [],
"sold_to_party": [],
},
"reviewed": {"imei_number": [],
"purchase_date": [],
"retailername": [],
"sold_to_party": [],
}},
"err": [],
"time_cost": {},
"total_images": 0,
"bad_images": 0}
images = SubscriptionRequestFile.objects.filter(request=request)
for image in images:
status, att = calculate_subcription_file(image)
if status != 200:
continue
image.feedback_accuracy = att["acc"]["feedback"]
image.reviewed_accuracy = att["acc"]["reviewed"]
image.is_bad_image_quality = att["is_bad_image"]
image.save()
new_report_file = ReportFile(report=report,
correspond_request_id=request.request_id,
correspond_redemption_id=request.redemption_id,
doc_type=image.doc_type,
predict_result=image.predict_result,
feedback_result=image.feedback_result,
reviewed_result=image.reviewed_result,
feedback_accuracy=att["acc"]["feedback"],
reviewed_accuracy=att["acc"]["reviewed"],
acc=att["avg_acc"],
time_cost=image.processing_time,
bad_image_reason=image.reason,
counter_measures=image.counter_measures,
error="|".join(att["err"])
)
new_report_file.save()
if request_att["time_cost"].get(image.doc_type, None):
request_att["time_cost"][image.doc_type].append(image.processing_time)
else:
request_att["time_cost"][image.doc_type] = [image.processing_time]
try:
request_att["acc"]["feedback"]["imei_number"] += att["acc"]["feedback"]["imei_number"]
request_att["acc"]["feedback"]["purchase_date"] += att["acc"]["feedback"]["purchase_date"]
request_att["acc"]["feedback"]["retailername"] += att["acc"]["feedback"]["retailername"]
request_att["acc"]["feedback"]["sold_to_party"] += att["acc"]["feedback"]["sold_to_party"]
request_att["acc"]["reviewed"]["imei_number"] += att["acc"]["reviewed"]["imei_number"]
request_att["acc"]["reviewed"]["purchase_date"] += att["acc"]["reviewed"]["purchase_date"]
request_att["acc"]["reviewed"]["retailername"] += att["acc"]["reviewed"]["retailername"]
request_att["acc"]["reviewed"]["sold_to_party"] += att["acc"]["reviewed"]["sold_to_party"]
request_att["bad_images"] += int(att["is_bad_image"])
request_att["total_images"] += 1
request_att["err"] += att["err"]
except Exception as e:
print(e)
continue
return request_att
def calculate_subcription_file(subcription_request_file):
att = {"acc": {"feedback": {},
"reviewed": {}},
"err": [],
"is_bad_image": False,
"avg_acc": None}
if not subcription_request_file.predict_result:
return 400, att
inference_result = copy.deepcopy(subcription_request_file.predict_result)
inference_result, feedback_result = align_fine_result(inference_result, copy.deepcopy(subcription_request_file.feedback_result))
inference_result, reviewed_result = align_fine_result(inference_result, copy.deepcopy(subcription_request_file.reviewed_result))
# print(f"[DEBUG]: predict_result: {subcription_request_file.predict_result}")
# print(f"[DEBUG]: inference_result: {inference_result}")
# print(f"[DEBUG]: feedback_result: {feedback_result}")
# print(f"[DEBUG]: reviewed_result: {reviewed_result}")
for key_name in valid_keys:
try:
att["acc"]["feedback"][key_name], _ = calculate_accuracy(key_name, inference_result, feedback_result)
att["acc"]["reviewed"][key_name], _ = calculate_accuracy(key_name, inference_result, reviewed_result)
except Exception as e:
att["err"].append(str(e))
# print(f"[DEBUG]: e: {e} -key_name: {key_name}")
avg_reviewed = calculate_avg_accuracy(att["acc"], "reviewed", ["retailername", "sold_to_party", "purchase_date", "imei_number"])
avg_feedback = calculate_avg_accuracy(att["acc"], "feedback", ["retailername", "sold_to_party", "purchase_date", "imei_number"])
if avg_feedback is not None or avg_reviewed is not None:
avg_acc = max([x for x in [avg_feedback, avg_reviewed] if x is not None])
if avg_acc < BAD_THRESHOLD:
att["is_bad_image"] = True
att["avg_acc"] = avg_acc
return 200, att
def calculate_attributions(request): # for one request, return in order
acc = {"feedback": {},
"reviewed": {}} # {"feedback": {"retailername": [0.1], "sold_to_party":[0.9], "purchase_date":[0.6], "imei_number":[0.8]},
# "reviewed": {"retailername": [0.1], "sold_to_party":[0.9], "purchase_date":[0.6], "imei_number":[0.8]}}
data = {"feedback": {},
"reviewed": {}} # {"feedback": {"retailername": [[ocr, feedback], ...], "sold_to_party":[[ocr, feedback], ...], "purchase_date":[[ocr, feedback], ...], "imei_number":[[ocr, feedback], ...]}}
# {"reviewed": {"retailername": [[ocr, reviewed], ...], "sold_to_party":[[ocr, reviewed], ...], "purchase_date":[[ocr, reviewed], ...], "imei_number":[[ocr, reviewed], ...]}}
time_cost = {} # {"imei": [0.1], "invoice": [0.1]}
image_quality_num = [0, 0] # [good, bad]
image_quality_num[0] = len(request.doc_type.split(","))
error = ""
inference_result = predict_result_to_ready(request.predict_result)
reviewed_result = align_fine_result(inference_result, request.reviewed_result)
feedback_result = align_fine_result(inference_result, request.feedback_result)
# accuracy calculation
for key_name in valid_keys:
if isinstance(inference_result[key_name], list):
if len(inference_result[key_name]) != len(reviewed_result.get(key_name, [])):
error = f"Request {request.request_id} failed with different {key_name} in predict and reviewed_result"
break
if len(inference_result[key_name]) != len(feedback_result.get(key_name, [])):
error = f"Request {request.request_id} failed with different {key_name} in predict and feedback_result"
break
# calculate accuracy for feedback result
acc["feedback"][key_name], data["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result)
acc["reviewed"][key_name], data["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result)
else:
inference_result[key_name] = [inference_result[key_name]]
feedback_result[key_name] = [feedback_result[key_name]]
reviewed_result[key_name] = [reviewed_result[key_name]]
acc["feedback"][key_name], data["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result)
acc["reviewed"][key_name], data["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result)
acc["feedback"]["purchase_date"] = [max(acc["feedback"]["purchase_date"])] if len(acc["feedback"]["purchase_date"]) > 0 else []
acc["reviewed"]["purchase_date"] = [max(acc["reviewed"]["purchase_date"])] if len(acc["reviewed"]["purchase_date"]) > 0 else []
# Count for bad and total images
avg_invoice_feedback = calculate_avg_accuracy(acc, "feedback", ["retailername", "sold_to_party", "purchase_date"])
avg_invoice_reviewed = calculate_avg_accuracy(acc, "reviewed", ["retailername", "sold_to_party", "purchase_date"])
if avg_invoice_feedback is not None or avg_invoice_reviewed is not None:
if max([x for x in [avg_invoice_feedback, avg_invoice_reviewed] if x is not None]) < BAD_THRESHOLD:
image_quality_num[1] += 1
for i, _ in enumerate(acc["feedback"]["imei_number"]):
if acc["feedback"]["imei_number"][i] is not None and acc["reviewed"]["imei_number"][i] is not None:
if max([x for x in [acc["feedback"]["imei_number"][i], acc["reviewed"]["imei_number"][i]] if x is not None]) < BAD_THRESHOLD:
image_quality_num[1] += 1
# time cost and quality calculation
# TODO: to be deprecated, doc_type would be in file level in the future
try:
for doc_type, doc_profile in request.ai_inference_profile.items():
doc_type = doc_type.split("_")[0]
inference_time = doc_profile["inference"][1][0] - doc_profile["inference"][0]
postprocess_time = doc_profile["postprocess"][1] - doc_profile["postprocess"][0]
time_cost[doc_type].append(inference_time + postprocess_time)
except Exception as e:
error = f"Request id {request.request_id} failed with error: {e}"
return acc, data, time_cost, image_quality_num, error
def shadow_report(report_id, query):
c_connector.make_a_report(
(report_id, query))

View File

@ -6,22 +6,54 @@ import json
from PIL import Image, ExifTags from PIL import Image, ExifTags
from django.core.files.uploadedfile import TemporaryUploadedFile from django.core.files.uploadedfile import TemporaryUploadedFile
from django.utils import timezone
from fwd import settings from fwd import settings
from ..utils import s3 as S3Util
from fwd_api.constant.common import allowed_file_extensions from fwd_api.constant.common import allowed_file_extensions
from fwd_api.exception.exceptions import GeneralException, RequiredFieldException, InvalidException, \ from fwd_api.exception.exceptions import GeneralException, RequiredFieldException, InvalidException, \
ServiceUnavailableException, FileFormatInvalidException, LimitReachedException, InvalidDecompressedSizeException, RequiredColumnException ServiceUnavailableException, FileFormatInvalidException, LimitReachedException, InvalidDecompressedSizeException, RequiredColumnException
from fwd_api.models import SubscriptionRequest, OcrTemplate, FeedbackRequest, SubscriptionRequestFile from fwd_api.models import SubscriptionRequest, OcrTemplate, FeedbackRequest, SubscriptionRequestFile, Report, ReportFile
from fwd_api.utils import process as ProcessUtil from fwd_api.utils import process as ProcessUtil
from fwd_api.utils.crypto import image_authenticator from fwd_api.utils.crypto import image_authenticator
from fwd_api.utils.image import resize from fwd_api.utils.image import resize
from ..celery_worker.client_connector import c_connector from ..celery_worker.client_connector import c_connector
import imagesize import imagesize
import csv import csv
from openpyxl import load_workbook from openpyxl import load_workbook
from openpyxl.styles import Font, Border, Side, PatternFill, NamedStyle from openpyxl.styles import Font, Border, Side, PatternFill, NamedStyle
s3_client = S3Util.MinioS3Client(
endpoint=settings.S3_ENDPOINT,
access_key=settings.S3_ACCESS_KEY,
secret_key=settings.S3_SECRET_KEY,
bucket_name=settings.S3_BUCKET_NAME
)
def validate_report_list(request):
start_date_str = request.GET.get('start_date')
end_date_str = request.GET.get('end_date')
page_number = int(request.GET.get('page', 0))
page_size = int(request.GET.get('page_size', 10))
report_id = request.GET.get('report_id', None)
validated_data = {}
validated_data["start_date"] = None
validated_data["end_date"] = None
if len(start_date_str) > 0 and len(end_date_str) > 0:
try:
validated_data["start_date"] = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z')
validated_data["end_date"] = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z')
except ValueError:
raise InvalidException(excArgs="Date format")
validated_data["report_id"] = report_id
validated_data["page_size"] = page_size
validated_data["page_number"] = page_number
if validated_data["report_id"] is None and validated_data["start_date"] is None:
raise RequiredFieldException(excArgs="report_id, start_date, end_date")
return validated_data
def validate_feedback_file(csv_file_path): def validate_feedback_file(csv_file_path):
required_columns = ['redemptionNumber', 'requestId', 'imeiNumber', 'imeiNumber2', 'Purchase Date', 'retailer', 'Sold to party', 'timetakenmilli'] required_columns = ['redemptionNumber', 'requestId', 'imeiNumber', 'imeiNumber2', 'Purchase Date', 'retailer', 'Sold to party', 'timetakenmilli']
missing_columns = [] missing_columns = []
@ -57,7 +89,6 @@ def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUES
if total_file_size > settings.MAX_UPLOAD_FILE_SIZE_OF_A_REQUEST: if total_file_size > settings.MAX_UPLOAD_FILE_SIZE_OF_A_REQUEST:
raise LimitReachedException(excArgs=('Total size of all files', str(settings.MAX_UPLOAD_SIZE_OF_A_FILE / 1024 / 1024), 'MB')) raise LimitReachedException(excArgs=('Total size of all files', str(settings.MAX_UPLOAD_SIZE_OF_A_FILE / 1024 / 1024), 'MB'))
def validate_csv_feedback(files, max_file_num=1, min_file_num=1, file_field="csv files"): def validate_csv_feedback(files, max_file_num=1, min_file_num=1, file_field="csv files"):
total_file_size = 0 total_file_size = 0
if len(files) < min_file_num: if len(files) < min_file_num:
@ -159,6 +190,16 @@ def save_feedback_file(file_name: str, rq: FeedbackRequest, uploaded_file: dict)
csvfile.write(file_contents) csvfile.write(file_contents)
return file_path return file_path
def save_workbook_file(file_name: str, rp: Report, workbook):
report_id = str(rp.report_id)
folder_path = os.path.join(settings.MEDIA_ROOT, "report", report_id)
os.makedirs(folder_path, exist_ok = True)
file_path = os.path.join(folder_path, file_name)
workbook.save(file_path)
return file_path
def delete_file_with_path(file_path: str) -> bool: def delete_file_with_path(file_path: str) -> bool:
try: try:
os.remove(file_path) os.remove(file_path)
@ -243,6 +284,19 @@ def save_feedback_to_S3(file_name, id, local_file_path):
print(f"[ERROR]: {e}") print(f"[ERROR]: {e}")
raise ServiceUnavailableException() raise ServiceUnavailableException()
def save_report_to_S3(id, local_file_path):
try:
s3_key = os.path.join("report", local_file_path.split("/")[-2], local_file_path.split("/")[-1])
c_connector.upload_report_to_s3((local_file_path, s3_key, id))
c_connector.remove_local_file((local_file_path, id))
return s3_key
except Exception as e:
print(f"[ERROR]: {e}")
raise ServiceUnavailableException()
def download_from_S3(s3_key, local_file_path):
s3_client.download_file(s3_key, local_file_path)
def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path): def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path):
try: try:
file_path = os.path.join(folder_path, file_name) file_path = os.path.join(folder_path, file_name)
@ -340,10 +394,11 @@ def get_value(_dict, keys):
else: else:
value = value.get(key, {}) value = value.get(key, {})
if value != 0: if not value:
return value
else:
return "-" return "-"
elif isinstance(value, list):
value = str(value)
return value
def dict2xlsx(input: json, _type='report'): def dict2xlsx(input: json, _type='report'):

View File

@ -0,0 +1,385 @@
import re
from pathlib import Path
from difflib import SequenceMatcher
from terminaltables import AsciiTable
from rapidfuzz.distance import Levenshtein
from .wiki_diff import inline_diff
def is_type_list(x, type):
if not isinstance(x, list):
return False
return all(isinstance(item, type) for item in x)
def cal_true_positive_char(pred, gt):
"""Calculate correct character number in prediction.
Args:
pred (str): Prediction text.
gt (str): Ground truth text.
Returns:
true_positive_char_num (int): The true positive number.
"""
all_opt = SequenceMatcher(None, pred, gt)
true_positive_char_num = 0
for opt, _, _, s2, e2 in all_opt.get_opcodes():
if opt == "equal":
true_positive_char_num += e2 - s2
else:
pass
return true_positive_char_num
def post_processing(text):
"""
- Remove special characters and extra spaces + lower case
"""
text = re.sub(
r"[^aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ0123456789 ]",
" ",
text,
)
text = re.sub(r"\s\s+", " ", text)
text = text.strip()
return text
def count_matches(pred_texts, gt_texts, use_ignore=True):
"""Count the various match number for metric calculation.
Args:
pred_texts (list[str]): Predicted text string.
gt_texts (list[str]): Ground truth text string.
Returns:
match_res: (dict[str: int]): Match number used for
metric calculation.
"""
match_res = {
"gt_char_num": 0,
"pred_char_num": 0,
"true_positive_char_num": 0,
"gt_word_num": 0,
"match_word_num": 0,
"match_word_ignore_case": 0,
"match_word_ignore_case_symbol": 0,
"match_kie": 0,
"match_kie_ignore_case": 0,
}
# comp = re.compile('[^A-Z^a-z^0-9^\u4e00-\u9fa5]')
# comp = re.compile('[]')
norm_ed_sum = 0.0
gt_texts_for_ned_word = []
pred_texts_for_ned_word = []
for pred_text, gt_text in zip(pred_texts, gt_texts):
if gt_text == pred_text:
match_res["match_word_num"] += 1
match_res["match_kie"] += 1
gt_text_lower = str(gt_text).lower()
pred_text_lower = str(pred_text).lower()
if gt_text_lower == pred_text_lower:
match_res["match_word_ignore_case"] += 1
# gt_text_lower_ignore = comp.sub('', gt_text_lower)
# pred_text_lower_ignore = comp.sub('', pred_text_lower)
if use_ignore:
gt_text_lower_ignore = post_processing(gt_text_lower)
pred_text_lower_ignore = post_processing(pred_text_lower)
else:
gt_text_lower_ignore = gt_text_lower
pred_text_lower_ignore = pred_text_lower
if gt_text_lower_ignore == pred_text_lower_ignore:
match_res["match_kie_ignore_case"] += 1
gt_texts_for_ned_word.append(gt_text_lower_ignore.split(" "))
pred_texts_for_ned_word.append(pred_text_lower_ignore.split(" "))
match_res["gt_word_num"] += 1
norm_ed = Levenshtein.normalized_distance(
pred_text_lower_ignore, gt_text_lower_ignore
)
# if norm_ed > 0.1:
# print(gt_text_lower_ignore, pred_text_lower_ignore, sep='\n')
# print("-"*20)
norm_ed_sum += norm_ed
# number to calculate char level recall & precision
match_res["gt_char_num"] += len(gt_text_lower_ignore)
match_res["pred_char_num"] += len(pred_text_lower_ignore)
true_positive_char_num = cal_true_positive_char(
pred_text_lower_ignore, gt_text_lower_ignore
)
match_res["true_positive_char_num"] += true_positive_char_num
normalized_edit_distance = norm_ed_sum / max(1, len(gt_texts))
match_res["ned"] = normalized_edit_distance
# NED for word-level
norm_ed_word_sum = 0.0
# print(pred_texts_for_ned_word[0])
unique_words = list(
set(
[x for line in pred_texts_for_ned_word for x in line]
+ [x for line in gt_texts_for_ned_word for x in line]
)
)
preds = [
[unique_words.index(w) for w in pred_text_for_ned_word]
for pred_text_for_ned_word in pred_texts_for_ned_word
]
truths = [
[unique_words.index(w) for w in gt_text_for_ned_word]
for gt_text_for_ned_word in gt_texts_for_ned_word
]
for pred_text, gt_text in zip(preds, truths):
norm_ed_word = Levenshtein.normalized_distance(pred_text, gt_text)
# if norm_ed_word < 0.2:
# print(pred_text, gt_text)
norm_ed_word_sum += norm_ed_word
normalized_edit_distance_word = norm_ed_word_sum / max(1, len(gt_texts))
match_res["ned_word"] = normalized_edit_distance_word
return match_res
def eval_ocr_metric(pred_texts, gt_texts, metric="acc"):
"""Evaluate the text recognition performance with metric: word accuracy and
1-N.E.D. See https://rrc.cvc.uab.es/?ch=14&com=tasks for details.
Args:
pred_texts (list[str]): Text strings of prediction.
gt_texts (list[str]): Text strings of ground truth.
metric (str | list[str]): Metric(s) to be evaluated. Options are:
- 'word_acc': Accuracy at word level.
- 'word_acc_ignore_case': Accuracy at word level, ignoring letter
case.
- 'word_acc_ignore_case_symbol': Accuracy at word level, ignoring
letter case and symbol. (Default metric for academic evaluation)
- 'char_recall': Recall at character level, ignoring
letter case and symbol.
- 'char_precision': Precision at character level, ignoring
letter case and symbol.
- 'one_minus_ned': 1 - normalized_edit_distance
In particular, if ``metric == 'acc'``, results on all metrics above
will be reported.
Returns:
dict{str: float}: Result dict for text recognition, keys could be some
of the following: ['word_acc', 'word_acc_ignore_case',
'word_acc_ignore_case_symbol', 'char_recall', 'char_precision',
'1-N.E.D'].
"""
assert isinstance(pred_texts, list)
assert isinstance(gt_texts, list)
assert len(pred_texts) == len(gt_texts)
assert isinstance(metric, str) or is_type_list(metric, str)
if metric == "acc" or metric == ["acc"]:
metric = [
"word_acc",
"word_acc_ignore_case",
"word_acc_ignore_case_symbol",
"char_recall",
"char_precision",
"one_minus_ned",
]
metric = set([metric]) if isinstance(metric, str) else set(metric)
# supported_metrics = set([
# 'word_acc', 'word_acc_ignore_case', 'word_acc_ignore_case_symbol',
# 'char_recall', 'char_precision', 'one_minus_ned', 'one_minust_ned_word'
# ])
# assert metric.issubset(supported_metrics)
match_res = count_matches(pred_texts, gt_texts)
eps = 1e-8
eval_res = {}
if "char_recall" in metric:
char_recall = (
1.0 * match_res["true_positive_char_num"] / (eps + match_res["gt_char_num"])
)
eval_res["char_recall"] = char_recall
if "char_precision" in metric:
char_precision = (
1.0
* match_res["true_positive_char_num"]
/ (eps + match_res["pred_char_num"])
)
eval_res["char_precision"] = char_precision
if "word_acc" in metric:
word_acc = 1.0 * match_res["match_word_num"] / (eps + match_res["gt_word_num"])
eval_res["word_acc"] = word_acc
if "word_acc_ignore_case" in metric:
word_acc_ignore_case = (
1.0 * match_res["match_word_ignore_case"] / (eps + match_res["gt_word_num"])
)
eval_res["word_acc_ignore_case"] = word_acc_ignore_case
if "word_acc_ignore_case_symbol" in metric:
word_acc_ignore_case_symbol = (
1.0
* match_res["match_word_ignore_case_symbol"]
/ (eps + match_res["gt_word_num"])
)
eval_res["word_acc_ignore_case_symbol"] = word_acc_ignore_case_symbol
if "one_minus_ned" in metric:
eval_res["1-N.E.D"] = 1.0 - match_res["ned"]
if "one_minus_ned_word" in metric:
eval_res["1-N.E.D_word"] = 1.0 - match_res["ned_word"]
if "line_acc_ignore_case_symbol" in metric:
line_acc_ignore_case_symbol = (
1.0 * match_res["match_kie_ignore_case"] / (eps + match_res["gt_word_num"])
)
eval_res["line_acc_ignore_case_symbol"] = line_acc_ignore_case_symbol
if "line_acc" in metric:
word_acc_ignore_case_symbol = (
1.0 * match_res["match_kie"] / (eps + match_res["gt_word_num"])
)
eval_res["line_acc"] = word_acc_ignore_case_symbol
for key, value in eval_res.items():
eval_res[key] = float("{:.4f}".format(value))
return eval_res
def eval_kie(preds_e2e: dict[str, dict[str, str]], gt_e2e: dict[str, dict[str, str]], labels, skip_labels=[]):
results = {label: 1 for label in labels}
pred_texts_dict = {label: [] for label in labels}
gt_texts_dict = {label: [] for label in labels}
fail_cases = {}
for img_id in gt_e2e.keys():
fail_cases[img_id] = {}
pred_items = preds_e2e.get(img_id, {k: '' for k in gt_e2e[img_id]})
gt_items = gt_e2e[img_id]
for class_name, text_gt in gt_items.items():
if class_name in skip_labels:
continue
# if class_name == 'seller_name_value':
# print(gt_items)
if class_name not in pred_items:
text_pred = ""
else:
text_pred = pred_items[class_name]
if str(text_pred) != str(text_gt):
diff = inline_diff(text_pred, text_gt)
fail_cases[img_id][class_name] = {
'pred': text_pred,
'gt': text_gt,
"diff": diff['res_text'],
"ned": diff["ned"],
"score": eval_ocr_metric([text_pred], [text_gt], metric=[
"one_minus_ned"])["1-N.E.D"],
}
pred_texts_dict[class_name].append(text_pred)
gt_texts_dict[class_name].append(text_gt)
for class_name in labels:
pred_texts = pred_texts_dict[class_name]
gt_texts = gt_texts_dict[class_name]
result = eval_ocr_metric(
pred_texts,
gt_texts,
metric=[
"one_minus_ned",
"line_acc_ignore_case_symbol",
"line_acc",
"one_minus_ned_word",
],
)
results[class_name] = {
"1-ned": result["1-N.E.D"],
"1-ned-word": result["1-N.E.D_word"],
"line_acc": result["line_acc"],
"line_acc_ignore_case_symbol": result["line_acc_ignore_case_symbol"],
"samples": len(pred_texts),
}
# avg reusults
sum_1_ned = sum(
[
results[class_name]["1-ned"] * results[class_name]["samples"]
for class_name in labels
]
)
sum_1_ned_word = sum(
[
results[class_name]["1-ned-word"] * results[class_name]["samples"]
for class_name in labels
]
)
sum_line_acc = sum(
[
results[class_name]["line_acc"] * results[class_name]["samples"]
for class_name in labels
]
)
sum_line_acc_ignore_case_symbol = sum(
[
results[class_name]["line_acc_ignore_case_symbol"]
* results[class_name]["samples"]
for class_name in labels
]
)
total_samples = sum(
[results[class_name]["samples"] for class_name in labels]
)
results["avg_all"] = {
"1-ned": round(sum_1_ned / total_samples, 4),
"1-ned-word": round(sum_1_ned_word / total_samples, 4),
"line_acc": round(sum_line_acc / total_samples, 4),
"line_acc_ignore_case_symbol": round(
sum_line_acc_ignore_case_symbol / total_samples, 4
),
"samples": total_samples,
}
table_data = [
[
"class_name",
"1-NED",
"1-N.E.D_word",
"line-acc",
"line_acc_ignore_case_symbol",
"#samples",
]
]
for class_name in results.keys():
# if c < p.shape[0]:
table_data.append(
[
class_name,
results[class_name]["1-ned"],
results[class_name]["1-ned-word"],
results[class_name]["line_acc"],
results[class_name]["line_acc_ignore_case_symbol"],
results[class_name]["samples"],
]
)
table = AsciiTable(table_data)
print(table.table)
return results, fail_cases

View File

@ -0,0 +1,432 @@
import os
import re
import ast
import time
import json
import glob
import shutil
import pandas as pd
from tqdm import tqdm
from pathlib import Path
from datetime import datetime
from .ocr_metrics import eval_ocr_metric
import sys
# sys.path.append(os.path.dirname(__file__))
from sdsvkvu.utils.query.sbt_v2 import get_seller, post_process_seller
def read_json(file_path: str):
with open(file_path, 'r') as f:
return json.load(f)
def write_to_json(file_path, content):
with open(file_path, mode='w', encoding='utf8') as f:
json.dump(content, f, ensure_ascii=False)
def convert_datetime_format(date_string: str, is_gt=False) -> str:
# pattern_date_string = "2023-02-28"
output_format = "%Y-%m-%d"
input_format = "%d/%m/%Y"
# Validate the input date string format
pattern = r"\d{2}\/\d{2}\/\d{4}"
if re.match(pattern, date_string):
# Convert the date string to a datetime object
date_object = datetime.strptime(date_string, input_format)
# Convert the datetime object to the desired output format
formatted_date = date_object.strftime(output_format)
return formatted_date
return date_string
def normalise_retailer_name(retailer: str):
input_value = {
"text": retailer,
"id": 0,
"class": "seller",
"bbox": [0, 0, 0, 0],
}
output = get_seller({'seller': [input_value]})
norm_seller_name = post_process_seller(output)
return norm_seller_name
def post_processing_str(class_name: str, s: str, is_gt: bool) -> str:
s = str(s).replace('', ' ').strip()
if s.lower() in ['null', 'nan', "none"]:
return ''
if class_name == "purchase_date" and is_gt == True:
s = convert_datetime_format(s)
if class_name == "retailername":
s = normalise_retailer_name(s)
return s
def convert_groundtruth_from_csv(
csv_path: str,
save_dir: str,
classes: list = ["retailername", "sold_to_party", "purchase_date", "imei_number"]
):
# if isinstance(csv_path_list, str):
# csv_path_list = [csv_path_list]
df = pd.read_csv(csv_path)
total_output = {}
for _, request in df.iterrows():
req_id = request['requestId']
if req_id not in total_output:
total_output[req_id] = {k: None for k in classes}
total_output[req_id]["imei_number"] = []
total_output[req_id]["imei_number"].extend([request["imeiNumber"], request["imeiNumber2"]])
total_output[req_id]["imei_number"] = list(set(total_output[req_id]["imei_number"]))
total_output[req_id]["purchase_date"] = request["Purchase Date"]
total_output[req_id]["retailername"] = request["retailer"]
for req_id, output in total_output.items():
save_path = os.path.join(save_dir, req_id)
os.makedirs(save_path, exist_ok=True)
write_to_json(os.path.join(save_path, f"{req_id}.json"), output)
def convert_predict_from_csv(
csv_path: str,
save_dir: str,
classes: list = ["retailername", "sold_to_party", "purchase_date", "imei_number"]
):
# if isinstance(csv_path_list, str):
# csv_path_list = [csv_path_list]
df = pd.read_csv(csv_path)
for _, request in df.iterrows():
n_pages = request['pages']
req_id = request['request_id']
if not isinstance(request['doc_type'], str) or not isinstance(request['predict_result'], str):
print(f"[WARNING] Skipped request id {req_id}")
continue
doc_type_list = request['doc_type'].split(',')
assert n_pages == len(doc_type_list), \
"No. pages is different no. documents"
json_path = os.path.join(save_dir, req_id)
os.makedirs(json_path, exist_ok=True)
# For user_submitted_results
if "feedback_result" in request:
feedback_data = ast.literal_eval(request['feedback_result'])
fname = f"{req_id}.json"
write_to_json(os.path.join(json_path, fname), feedback_data)
# For predict_results
data = ast.literal_eval(request['predict_result'])['content']['document'][0]['content']
infer_time = float(request['ai_inference_time']) + float(request['preprocessing_time']) + 0.1
n_imei, n_invoice = 0, 0
for doc_type in doc_type_list:
output = {k: None for k in classes}
if not os.path.exists(json_path):
os.makedirs(json_path, exist_ok=True)
if doc_type == "imei":
for info in data:
if info['label'] == "imei_number":
output['imei_number'] = info['value'][n_imei]
output['processing_time'] = infer_time
fname = f"temp_{doc_type}_{req_id}_{n_imei}.json"
write_to_json(os.path.join(json_path, fname), output)
n_imei += 1
break
elif doc_type == "invoice":
for info in data:
if info['label'] == "imei_number":
continue
output[info['label']] = info['value']
output['processing_time'] = infer_time
fname = f"temp_{doc_type}_{req_id}_{n_invoice}.json"
write_to_json(os.path.join(json_path, fname), output)
n_invoice += 1
def gen_req_to_red_dict(csv_path: str):
df = pd.read_csv(csv_path)
df = df.loc[:, ["requestId", "redemptionNumber"]]
req_to_red = {row["requestId"]: row["redemptionNumber"] for _, row in df.iterrows()}
return req_to_red
def gen_req_to_red_dict_2(csv_path: str):
df = pd.read_csv(csv_path)
df = df.loc[:, ["request_id", "redemption_id"]]
req_to_red = {row["request_id"]: row["redemption_id"] for _, row in df.iterrows()}
return req_to_red
def init_csv(
gt_dir: str,
pred_dir: str,
req_to_red: dict,
):
list_request_id = os.listdir(gt_dir)
total = []
for request_id in list_request_id:
gt_path = os.path.join(gt_dir, request_id, request_id+".json")
if not os.path.exists(gt_path):
print(f"[WARNING] Skipped request id {os.path.basename(os.path.dirname(gt_path))}")
continue
gt_data = read_json(gt_path)
json_file_list = glob.glob(os.path.join(pred_dir, request_id, "temp_*.json"))
json_file_list = sorted(json_file_list, key=lambda x: int(x.split(".json")[0].split('_')[-1]))
n_imei, n_invoice = 0, 0
# if len(json_file_list) > 3:
# continue
for json_file in json_file_list:
pred_data = read_json(json_file)
if "imei" in json_file:
pred_value = pred_data['imei_number']
gt_value = gt_data['imei_number'][n_imei]
n_imei += 1
score = eval_ocr_metric(
[post_processing_str("imei_number", pred_value, is_gt=False)],
[post_processing_str("imei_number", gt_value, is_gt=True)],
metric=["one_minus_ned"]
)['1-N.E.D']
total.append({
"requestId": request_id,
"redemptionNumber": req_to_red[request_id],
"userSubmitResults": gt_value,
"OCRResults": pred_value,
"revisedResults_by_SDSRV": "",
"accuracy": score,
"processingTime (by request)": pred_data['processing_time'],
"class_name": "imei_number",
"file_path": json_file
})
elif "invoice" in json_file:
for class_name in ["retailername", "purchase_date"]:
pred_value = pred_data[class_name]
gt_value = gt_data[class_name]
if isinstance(gt_value, list):
gt_value = gt_value[0]
n_invoice += 1
if not isinstance(pred_value, list):
pred_value = [pred_value]
score = 0
for _pred_value in pred_value:
score1 = eval_ocr_metric(
[post_processing_str(class_name, _pred_value, is_gt=False)],
[post_processing_str(class_name, gt_value, is_gt=True)],
metric=["one_minus_ned"]
)['1-N.E.D']
score = max(score, score1)
total.append({
"requestId": request_id,
"redemptionNumber": req_to_red[request_id],
"userSubmitResults": gt_value,
"OCRResults": pred_value[0] if class_name == "retailername" else pred_value,
"revisedResults_by_SDSRV": "",
"accuracy": score,
"processingTime (by request)": pred_data['processing_time'],
"class_name": class_name,
"file_path": json_file
})
return total
def export_report(
init_csv: str,
):
df = pd.read_csv(init_csv)
for index, request in df.iterrows():
file_path = request['file_path']
class_name = request['class_name']
pred_value = request['OCRResults']
revised_value = read_json(file_path)[class_name]
if class_name == "purchase_date":
pred_value = ast.literal_eval(pred_value)
if isinstance(revised_value, list):
if len(revised_value) > 0:
revised_value = revised_value[0]
else:
revised_value = None
if len(pred_value) == 0:
pred_value = [None]
score = 0
for _pred_value in pred_value:
score1 = eval_ocr_metric(
[post_processing_str(class_name, _pred_value, is_gt=False)],
[post_processing_str(class_name, revised_value, is_gt=True)],
metric=["one_minus_ned"]
)['1-N.E.D']
score = max(score, score1)
else:
score = eval_ocr_metric(
[post_processing_str(class_name, pred_value, is_gt=False)],
[post_processing_str(class_name, revised_value, is_gt=True)],
metric=["one_minus_ned"]
)['1-N.E.D']
df.at[index, "revisedResults_by_SDSRV"] = revised_value
df.at[index, "accuracy"] = score
return df
def pick_sample_to_revise(
ocr_accuracy: list,
gt_dir: str,
save_dir: str
):
empty_err_path = os.path.join(save_dir, "empty_results")
other_err_path = os.path.join(save_dir, "diff_results")
os.makedirs(empty_err_path, exist_ok=True)
os.makedirs(other_err_path, exist_ok=True)
for request in ocr_accuracy:
score = request['accuracy']
json_path = request['file_path']
request_id = request['requestId']
img_path_folder = os.path.join(gt_dir, Path(json_path).parts[-2], Path(json_path).parts[-1])
img_path = [ff for ff in glob.glob(img_path_folder.replace(".json", ".*")) if ".json" not in ff]
if len(img_path) == 0:
print(f"[WARNING] Skipped request id {request_id}")
continue
img_path = img_path[0]
# img_path = [ff for ff in glob.glob(json_path.replace(".json", ".*"))][0]
if score == 0:
save_path = os.path.join(empty_err_path, request_id)
elif score < 1:
save_path = os.path.join(other_err_path, request_id)
else:
continue
os.makedirs(save_path, exist_ok=True)
shutil.copy(img_path, save_path)
shutil.copy(json_path, save_path)
def merge_revised_sample(
revised_path_list: list,
save_dir: str
):
if not isinstance(revised_path_list, list):
revised_path_list = [revised_path_list]
for revised_path in revised_path_list:
list_request = [os.path.basename(ff) for ff in os.listdir(revised_path)]
for request in list_request:
file_list = glob.glob(os.path.join(revised_path, request, "*.json*"))
for file_path in file_list:
# shutil.copyfile(file_path, os.path.join(save_path, request))
os.system(f"sudo cp {file_path} {os.path.join(save_dir, request)}")
def calculate_average_by_column(df, column_name):
df = df.groupby(by=["requestId"])
time_list = []
for req, sub_df in df:
if len(sub_df) > 0:
time_list.append(sub_df.iloc[0][column_name])
if len(time_list) > 0:
return sum(time_list)/len(time_list)
return 0
if __name__ == "__main__":
save_path = "/mnt/hdd4T/TannedCung/OCR/Data/SBT_for_acc/15Jan"
save_csv = "logs/eval_20240115"
csv_path = "/mnt/hdd4T/TannedCung/OCR/Data/SBT_for_acc/15Jan.csv"
csv_path_end_user = "logs/eval_20240115/OCR_15Jan2024.csv"
# Step 1: Convert a csv file to get user submitted results for each request
print("[INFO] Starting convert csv from customer to json")
os.system(f"sudo chmod -R 777 {save_path}")
convert_groundtruth_from_csv(csv_path=csv_path_end_user, save_dir=save_path)
print("[INFO] Converted")
# # Step 2: Convert a csv file to get predict OCR results for each image
print("[INFO] Starting convert csv from SDSV to json")
convert_predict_from_csv(csv_path=csv_path, save_dir=save_path)
print("[INFO] Converted")
# # Step 3: Gen initial csv file and calculate OCR result between submitted results and ocr results
print("[INFO] Starting generate csv to get performance")
gt_path = save_path
pred_path = save_path
req_to_red_dict = gen_req_to_red_dict(csv_path_end_user)
init_data = init_csv(gt_dir=gt_path, pred_dir=pred_path, req_to_red=req_to_red_dict)
pd.DataFrame(init_data).to_csv(os.path.join(save_csv, "init1.csv"), index=False)
print("[INFO] Done")
# # Step 4: Split requests whose accuracy is less than 1 to revise
# print("[INFO] Starting split data to review")
# revised_path = os.path.join(save_csv, "revised")
# # shutil.rmtree(revised_path)
# pick_sample_to_revise(ocr_accuracy=init_data, gt_dir=save_path, save_dir=revised_path)
# print("[INFO] Done")
# # Step 5: Merge revised results to gt folder
# print("[INFO] Merging revised data to ground truth folder")
# revised_path = os.path.join(save_csv, "revised")
# revised_path = [f'{revised_path}/empty_results', f'{revised_path}/diff_results']
# merge_revised_sample(revised_path_list=revised_path, save_dir=save_path)
# print("Done")
# # Step 6: Caculate OCR result between ocr results and revised results
# print("[INFO] Exporting OCR report")
# init_csv_path = os.path.join(save_csv, "init1.csv")
# report = export_report(init_csv=init_csv_path)
# error_path = os.path.join(save_csv, "errors")
# pick_sample_to_revise(ocr_accuracy=report[report.accuracy < 0.75].to_dict('records'), gt_dir=save_path, save_dir=error_path)
# n_total_images = len(report)
# n_bad_images = len(report[report.accuracy < 0.75])
# average_acc = report[report.accuracy >= 0.75]['accuracy'].mean()
# print("Total requests:", len(report['requestId'].unique()))
# print("Total images:", n_total_images)
# print("No. imei images:", len(report[report.class_name == "imei_number"]))
# print("No. invoice images:", len(report[report.class_name == "retailername"]))
# print("No. bad quality images:", n_bad_images)
# print("No. valid images:", n_total_images - n_bad_images)
# print("No. per of bad quality images:", 100*n_bad_images/n_total_images)
# print("Average accuracy:", 100*average_acc)
# last_row = n_total_images
# report.at[last_row, "requestId"] = "Total requests:"
# report.at[last_row, "redemptionNumber"] = len(report['requestId'].unique())
# report.at[last_row+1, "requestId"] = "Total images:"
# report.at[last_row+1, "redemptionNumber"] = n_total_images
# report.at[last_row+2, "requestId"] = "No. imei images:"
# report.at[last_row+2, "redemptionNumber"] = len(report[report.class_name == "imei_number"])
# report.at[last_row+3, "requestId"] = "No. invoice images:"
# report.at[last_row+3, "redemptionNumber"] = len(report[report.class_name == "retailername"])
# report.at[last_row+4, "requestId"] = "No. bad quality images:"
# report.at[last_row+4, "redemptionNumber"] = n_bad_images
# report.at[last_row+5, "requestId"] = "No. valid images:"
# report.at[last_row+5, "redemptionNumber"] = n_total_images - n_bad_images
# report.at[last_row+6, "requestId"] = "No. per of bad quality images:"
# report.at[last_row+6, "redemptionNumber"] = 100*n_bad_images/n_total_images
# report.at[last_row+7, "requestId"] = "Average accuracy:"
# report.at[last_row+7, "redemptionNumber"] = 100*average_acc
# report.drop(columns=["file_path", "class_name"]).to_csv(os.path.join(save_csv, f"SBT_report_{time.strftime('%Y%m%d')}.csv"), index=False)
# print("[INFO] Done")

View File

@ -0,0 +1,201 @@
# https://stackoverflow.com/questions/774316/python-difflib-highlighting-differences-inline
import difflib
import unidecode
import os
import glob
import pandas as pd
VOWELS = 'aeouiy' + 'AEOUIY'
CONSONANTS = 'bcdfghjklmnpqrstvxwz' + 'BCDFGHJKLMNPQRSTVXWZ'
# PREDICT_PATH = 'ocr/result'
# GROUNDTRUTH_PATH = '/mnt/hdd2T/AICR/Datasets/wiki/ground_truth'
PREDICT_PATH = 'ocr/result/cinamon'
GROUNDTRUTH_PATH = '/mnt/hdd2T/AICR/Datasets/Backup/1.Hand_writing/Lines/cinnamon_data'
# note that we also use different preprocess for cinamon data
# SAVE_PATH = 'wiki_diff'
SAVE_PATH = 'wiki_diff/cinamon'
RES_PATH = f'{SAVE_PATH}/result/'
WRONG_ACCENT_FILE = f'{SAVE_PATH}/wrong_accent.txt'
LOST_ACCENT_FILE = f'{SAVE_PATH}/lost_accent.txt'
TOTAL_WORD = 0
def write_accent_error(path, err):
# path should be wrong_accent_file or lost_accent_file
with open(path, 'a') as f:
f.write(err)
f.write('\n')
def update_ddata_specialchars(ddata_specialchars, correction_key, char_key):
if char_key in ddata_specialchars[correction_key]:
ddata_specialchars[correction_key][char_key] += 1
else:
ddata_specialchars[correction_key][char_key] = 1
def process_replace_tag(matcher, i1, i2, j1, j2, ddata, ddata_specialchars):
a_char = matcher.a[i1:i2]
b_char = matcher.b[j1:j2]
ddata['res_text'] += ' ### {' + a_char + ' -> ' + b_char + '} ### '
ddata['nwrongs'] += 1*len(b_char)
if len(a_char) == 1 and len(b_char) == 1: # single char case
if a_char.lower() == b_char.lower(): # wrong upper/lower case
ddata['UL_single'] += 1
update_ddata_specialchars(ddata_specialchars, 'UL', (a_char, b_char))
else:
ddata['nwrongs_single'] += 1
a_ori = unidecode.unidecode(a_char).lower()
b_ori = unidecode.unidecode(b_char).lower()
if a_ori in VOWELS and b_ori in VOWELS:
if a_ori == b_ori:
err = a_char + ' -> ' + b_char
if b_ori == b_char.lower(): # e.g. Ơ -> O
ddata['nlost_accent'] += 1
# write_accent_error(LOST_ACCENT_FILE, err)
else: # e.g Ơ -> Ớ
ddata['nwrong_accent'] += 1
# write_accent_error(WRONG_ACCENT_FILE, err)
else: # e.g Ă -> Â
ddata['nwrong_vowels'] += 1
else:
if a_ori in CONSONANTS and b_ori in CONSONANTS:
ddata['nwrong_consonants'] += 1
else:
ddata['nwrong_specialchars'] += 1
update_ddata_specialchars(ddata_specialchars, 'wrong', (a_char, b_char))
else:
if a_char.lower() == b_char.lower():
ddata['UL_multiple'] += 1
update_ddata_specialchars(ddata_specialchars, 'UL', (a_char, b_char))
else:
ddata['nwrongs_multiple'] += 1
if len(a_char) > 10 or len(b_char) > 10:
ddata['nlong_sequences'] += 1
# print(a_char)
def process_delete_tag(matcher, i1, i2, ddata, ddata_specialchars):
a_char = matcher.a[i1:i2]
ddata['res_text'] += ' ### {- ' + a_char + '} ### '
ddata['nadds'] += 1*len(a_char)
if len(a_char) == 1:
ddata['nadds_single'] += 1
if a_char.lower() in CONSONANTS + VOWELS:
ddata['nadds_chars'] += 1
else:
if a_char == ' ':
ddata['nadds_space'] += 1
else:
ddata['nadds_specialchars'] += 1
update_ddata_specialchars(ddata_specialchars, 'add', a_char)
else:
ddata['nadds_multiple'] += 1
if len(a_char) > 10:
ddata['nlong_sequences'] += 1
# print(a_char)
def process_insert_tag(matcher, j1, j2, ddata, ddata_specialchars):
b_char = matcher.b[j1:j2]
ddata['nlosts'] += 1*len(b_char)
ddata['res_text'] += ' ### {+ ' + b_char + '} ### '
if len(b_char) == 1:
ddata['nlosts_single'] += 1
if b_char.lower() in CONSONANTS + VOWELS:
ddata['nlosts_chars'] += 1
else:
if b_char == ' ':
ddata['nlosts_space'] += 1
else:
ddata['nlosts_specialchars'] += 1
update_ddata_specialchars(ddata_specialchars, 'lost', b_char)
else:
ddata['nlosts_multiple'] += 1
if len(b_char) > 10:
ddata['nlong_sequences'] += 1
# print(b_char)
def inline_diff(a, b, ddata_specialchars={'lost': {}, 'add': {}, 'wrong': {}, 'UL': {}}):
matcher = difflib.SequenceMatcher(None, a, b)
ddata = {'res_text': ''}
# ddata = ddata | {key: 0 for key in ['nsingle', 'nmultiple']}
ddata = ddata | {key: 0 for key in ['UL_single', 'UL_multiple']}
ddata = ddata | {
key: 0 for key in
['nlosts', 'nlosts_single', 'nlosts_multiple', 'nlosts_chars', 'nlosts_specialchars', 'nlosts_space']}
ddata = ddata | {
key: 0 for key in
['nadds', 'nadds_single', 'nadds_multiple', 'nadds_chars', 'nadds_specialchars', 'nadds_space']}
ddata = ddata | {
key: 0 for key in
['nwrongs', 'nwrongs_single', 'nwrongs_multiple', 'nwrong_accent', 'nlost_accent', 'nwrong_vowels',
'nwrong_consonants', 'nwrong_specialchars']}
ddata['nlong_sequences'] = 0
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
if tag == 'replace': # wrong
process_replace_tag(matcher, i1, i2, j1, j2, ddata, ddata_specialchars)
if tag == 'delete': # OCR add char so the matcher "delete"
process_delete_tag(matcher, i1, i2, ddata, ddata_specialchars)
if tag == 'equal':
ddata['res_text'] += matcher.a[i1:i2]
if tag == 'insert': # OCR lost char so the matcher "insert"
process_insert_tag(matcher, j1, j2, ddata, ddata_specialchars)
ddata["ned"] = ddata['nwrongs'] + ddata['nadds'] + ddata['nlosts']
return ddata
def process_single_file(file_name, ddata_specialchars):
# read predict file
with open(os.path.join(PREDICT_PATH, file_name), 'r') as f:
predict = f.readlines()[0].strip()
# predict = ''.join(predict)
# predict = predict.replace(' ', '')
# predict = predict.replace('\n', '')
# print(predict)
# read groundtruth file
with open(os.path.join(GROUNDTRUTH_PATH, file_name), 'r') as f:
gt = f.readlines()[0].strip()
# gt = ''.join(gt)
# gt = gt.replace('\n', '')
# get statiscal data of difference between predict and ground truth
ddata = inline_diff(predict, gt, ddata_specialchars)
global TOTAL_WORD
TOTAL_WORD = TOTAL_WORD + len(gt.split())
# write to save_path
res_text = ddata.pop('res_text', None)
save_file = os.path.join(RES_PATH, file_name)
with open(save_file, 'w') as f:
f.write(res_text)
# generate csv file
ddata = {'file_name': save_file} | ddata
return ddata
def main(overwrite=False):
for accent_file in [WRONG_ACCENT_FILE, LOST_ACCENT_FILE]:
if os.path.exists(accent_file):
os.remove(accent_file)
lddata = []
ddata_specialchars = {'lost': {}, 'add': {}, 'wrong': {}, 'UL': {}}
for file_ in glob.glob(f'{PREDICT_PATH}/*.txt'):
file_name = file_.split('/')[-1]
ddata = process_single_file(file_name, ddata_specialchars)
lddata.append(ddata)
if overwrite:
df = pd.DataFrame(lddata)
df.to_csv(f'{SAVE_PATH}/wiki_diff.csv', sep='\t')
df_ = pd.DataFrame(ddata_specialchars)
df_.to_csv(f'{SAVE_PATH}/wiki_diff_specialchars.csv', sep='\t')
print(TOTAL_WORD)
if __name__ == '__main__':
main(overwrite=True)

View File

@ -36,7 +36,7 @@ requests==2.28.1
ruamel.yaml==0.17.21 ruamel.yaml==0.17.21
ruamel.yaml.clib==0.2.7 ruamel.yaml.clib==0.2.7
sqlparse==0.4.3 sqlparse==0.4.3
tzdata==2022.6 tzdata==2022.7
uritemplate==4.1.1 uritemplate==4.1.1
urllib3==1.26.13 urllib3==1.26.13
uvicorn==0.20.0 uvicorn==0.20.0
@ -51,3 +51,12 @@ imagesize==1.4.1
pdf2image==1.16.3 pdf2image==1.16.3
redis==5.0.1 redis==5.0.1
django-celery-beat==2.5.0 django-celery-beat==2.5.0
terminaltables==3.1.10
rapidfuzz==3.6.1
Unidecode==1.3.8
pandas==2.2.0
openpyxl==3.1.2
# For sdsvkvu compatibility
# torch==1.13.1+cu116
# torchvision==0.14.1+cu116
# --extra-index-url https://download.pytorch.org/whl/cu116

View File

@ -0,0 +1 @@
pg_dump -U sbt -h sbt.cxetpslawu4p.ap-southeast-1.rds.amazonaws.com sbt2 >> sbt2.sql

1
cope2n-fe/.dockerignore Normal file
View File

@ -0,0 +1 @@
/node_modules

View File

@ -0,0 +1,3 @@
VITE_PORT=8080
VITE_PROXY=https://107.120.133.22/
VITE_KUBEFLOW_HOST=https://107.120.133.22:8085

14
cope2n-fe/.eslintrc Normal file
View File

@ -0,0 +1,14 @@
{
"env": {
"browser": true,
"es6": true
},
"extends": [
"react-app",
"prettier",
"plugin:@tanstack/eslint-plugin-query/recommended"
],
"ignorePatterns": [
"**/components/react-via/**/*.js"
]
}

44
cope2n-fe/.gitignore vendored
View File

@ -1,27 +1,33 @@
# Logs # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
logs
*.log # dependencies
/node_modules
/.pnp
.pnp.js
# testing
/coverage
# production
/build
/dist
# misc
.DS_Store
.env.local
.env.development.local
.env.test.local
.env.production.local
npm-debug.log* npm-debug.log*
yarn-debug.log* yarn-debug.log*
yarn-error.log* yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
# rollup-plugin-visualizer
stats.html
# Ignore all the installed packages
node_modules node_modules
dist
dist-ssr
*.local
# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?
# linguijs locales # linguijs locales
src/locales/**/*.ts src/locales/**/*.ts

View File

@ -1,20 +0,0 @@
stages:
- check
sonarqube-check:
stage: check
image:
name: sonarsource/sonar-scanner-cli:latest
entrypoint: ['']
variables:
SONAR_USER_HOME: '${CI_PROJECT_DIR}/.sonar' # Defines the location of the analysis task cache
GIT_DEPTH: '0' # Tells git to fetch all the branches of the project, required by the analysis task
# cache:
# key: '${CI_JOB_NAME}'
# paths:
# - .sonar/cache
script:
- sonar-scanner
allow_failure: true
only:
- develop

View File

@ -1,4 +1,11 @@
.*/
./dist/
./data/
3rdparty/
node_modules/ node_modules/
dist/ keys/
logs/ logs/
static/ static/
templates/
src/components/react-via/js/
src/components/react-via/styles/

View File

@ -1,9 +1,29 @@
{ {
"arrowParens": "always",
"bracketSpacing": true,
"embeddedLanguageFormatting": "auto",
"htmlWhitespaceSensitivity": "css",
"insertPragma": false,
"jsxBracketSameLine": false,
"jsxSingleQuote": true,
"printWidth": 80, "printWidth": 80,
"proseWrap": "preserve",
"quoteProps": "as-needed",
"requirePragma": false,
"semi": true, "semi": true,
"singleQuote": true, "singleQuote": true,
"tabWidth": 2, "tabWidth": 2,
"trailingComma": "all", "trailingComma": "all",
"endOfLine": "crlf", "useTabs": false,
"jsxSingleQuote": false "vueIndentScriptAndStyle": false,
"overrides": [
{
"files": ["*.json", "*.yml", "*.yaml", "*.md"],
"options": {
"tabWidth": 2
} }
}
],
"endOfLine": "lf"
}

View File

@ -1,34 +0,0 @@
###################
# BUILD FOR LOCAL DEVELOPMENT
###################
FROM node:16-alpine AS development
WORKDIR /app/
COPY --chown=node:node package*.json ./
RUN npm ci
COPY --chown=node:node . .
USER node
###################
# BUILD FOR PRODUCTION
###################
FROM node:16-alpine AS build
WORKDIR /app/
ENV NODE_ENV production
COPY --chown=node:node package*.json ./
COPY --chown=node:node --from=development /app/node_modules ./node_modules
COPY --chown=node:node . .
RUN npm run build
RUN npm ci --only=production && npm cache clean --force
USER node
###################
# PRODUCTION
###################
FROM nginx:stable-alpine AS nginx
COPY --from=build /app/dist/ /usr/share/nginx/html/
COPY --from=build /app/run.sh /app/
COPY --from=build /app/nginx.conf /configs/
RUN chmod +x /app/run.sh
CMD ["/app/run.sh"]

View File

@ -1,35 +0,0 @@
###################
# BUILD FOR LOCAL DEVELOPMENT
###################
FROM node:16-alpine AS development
WORKDIR /app/
COPY --chown=node:node package*.json ./
RUN npm ci
COPY --chown=node:node . .
USER node
###################
# BUILD FOR PRODUCTION
###################
FROM node:16-alpine AS build
WORKDIR /app/
COPY --chown=node:node package*.json ./
COPY --chown=node:node --from=development /app/node_modules ./node_modules
COPY --chown=node:node . .
RUN npm run build
ENV NODE_ENV production
RUN npm ci --only=production && npm cache clean --force
USER node
###################
# PRODUCTION
###################
FROM nginx:stable-alpine AS nginx
ARG PORT=9999
COPY --from=build /app/dist/ /usr/share/nginx/html/
COPY ./nginx.conf /etc/nginx/conf.d/default.conf
EXPOSE ${PORT}
CMD ["nginx", "-g", "daemon off;" ]

View File

@ -1,27 +0,0 @@
# Variables
IMAGE_NAME="demoap-fe:latest"
CONTAINER_NAME="demoap-fe"
PORT="9999"
HTTP_PROXY="http://42.96.40.255:8002" # http:\/\/0.0.0.0:8002
# Make sure that HTTP_PROXY is not empty
if [ -z "$HTTP_PROXY" ]
then
echo "HTTP_PROXY is empty, you have to specify it in deploy.sh file"
exit 1
fi
# Replace #proxy_server in nginx.conf with HTTP_PROXY using sed command
sed -i "s|#proxy_server|$HTTP_PROXY|g" ./nginx.conf
# Replace #port in nginx.conf with PORT using sed command
sed -i "s|#port|$PORT|g" ./nginx.conf
# Build image
docker build --build-arg PORT=$PORT --pull --rm -f "Dockerfile" -t $IMAGE_NAME "."
# Remove exist container
docker container stop $CONTAINER_NAME
# Run container from new image
docker run --rm -d -p $PORT:$PORT/tcp --name $CONTAINER_NAME $IMAGE_NAME

View File

@ -1,12 +1,27 @@
<!DOCTYPE html> <!DOCTYPE html>
<html lang="en"> <html lang="en">
<head> <head>
<meta charset="UTF-8" /> <meta charset="utf-8" />
<link rel="icon" type="image/svg+xml" href="/favicon.ico" /> <link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <link rel="icon" type="image/png" sizes="96x96" href="/favicon-96x96.png" />
<title>OCR</title> <link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="theme-color" content="#000000" />
<meta name="description" content="A ML-Ops platform" />
<!--
manifest.json provides metadata used when your web app is installed on a
user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
-->
<link rel="manifest" href="/manifest.json" />
<title>SBT</title>
<style>
div#root {
height: 100vh;
}
</style>
</head> </head>
<body> <body>
<noscript>You need to enable JavaScript to run this app.</noscript>
<div id="root"></div> <div id="root"></div>
<script type="module" src="/src/index.tsx"></script> <script type="module" src="/src/index.tsx"></script>
</body> </body>

View File

@ -1,61 +0,0 @@
server {
# listen {{port}};
# listen [::]:{{port}};
server_name localhost;
client_max_body_size 100M;
#access_log /var/log/nginx/host.access.log main;
location ~ ^/api {
proxy_pass {{proxy_server}};
proxy_read_timeout 300;
proxy_connect_timeout 300;
proxy_send_timeout 300;
}
location /static/drf_spectacular_sidecar/ {
alias /backend-static/drf_spectacular_sidecar/;
}
location / {
root /usr/share/nginx/html;
index index.html index.htm;
try_files $uri /index.html;
}
location ~ ^/static/drf_spectacular_sidecar/swagger-ui-dist {
proxy_pass {{proxy_server}};
}
#error_page 404 /404.html;
# redirect server error pages to the static page /50x.html
#
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root /usr/share/nginx/html;
}
# proxy the PHP scripts to Apache listening on 127.0.0.1:80
#
#location ~ \.php$ {
# proxy_pass http://127.0.0.1;
#}
# pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
#
#location ~ \.php$ {
# root html;
# fastcgi_pass 127.0.0.1:9000;
# fastcgi_index index.php;
# fastcgi_param SCRIPT_FILENAME /scripts$fastcgi_script_name;
# include fastcgi_params;
#}
# deny access to .htaccess files, if Apache's document root
# concurs with nginx's one
#
#location ~ /\.ht {
# deny all;
#}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,57 +1,82 @@
{ {
"name": "vite-project", "name": "sbt-ui",
"private": true, "version": "0.1.0",
"version": "0.0.0",
"type": "module",
"scripts": { "scripts": {
"start": "npm run extract && npm run compile && vite", "start": "NODE_ENV=development npm run extract && npm run compile && vite --host",
"build": "npm run extract && npm run compile && tsc && vite build", "build": "NODE_ENV=production npm run extract && npm run compile && tsc && vite build",
"serve": "vite preview", "serve": "vite preview",
"extract": "lingui extract --clean", "extract": "lingui extract --clean",
"compile": "lingui compile", "compile": "lingui compile",
"format": "prettier --config ./.prettierrc --write src/**/*.{ts,tsx,js,jsx}" "format": "prettier --config ./.prettierrc --write src/**/*.{ts,tsx,js,jsx}",
"lint": "eslint . --ext .ts,.tsx --fix"
},
"engines": {
"node": ">=16"
},
"browserslist": {
"production": [
">0.2%",
"not dead",
"not op_mini all"
],
"development": [
"last 1 chrome version",
"last 1 firefox version",
"last 1 safari version"
]
}, },
"dependencies": { "dependencies": {
"@ant-design/colors": "^7.0.0", "@ant-design/colors": "^6.0.0",
"@ant-design/icons": "^4.8.0", "@ant-design/icons": "^4.8.0",
"@ant-design/plots": "^1.2.3",
"@ant-design/pro-layout": "^7.10.3",
"@babel/core": "^7.13.10",
"@tanstack/react-query": "^4.20.4", "@tanstack/react-query": "^4.20.4",
"antd": "^5.1.2", "antd": "^5.4.0",
"axios": "^1.2.2", "axios": "^1.2.2",
"konva": "^8.3.14", "chart.js": "^4.4.1",
"history": "^5.3.0",
"lodash-es": "^4.17.21", "lodash-es": "^4.17.21",
"pdfjs-dist": "^3.4.120", "mousetrap": "^1.6.5",
"prop-types": "^15.8.1", "process": "^0.11.10",
"react": "^18.2.0", "react": "^18.2.0",
"react-csv": "^2.2.2", "react-chartjs-2": "^5.2.0",
"react-dom": "^18.2.0", "react-dom": "^18.2.0",
"react-json-view-lite": "^0.9.6",
"react-konva": "^18.2.3",
"react-konva-utils": "^0.3.1",
"react-router-dom": "^6.6.1", "react-router-dom": "^6.6.1",
"styled-components": "^5.3.6", "styled-components": "^5.3.6",
"usehooks-ts": "^2.9.1",
"uuid": "^9.0.0" "uuid": "^9.0.0"
}, },
"devDependencies": { "devDependencies": {
"@babel/plugin-syntax-jsx": "^7.18.6", "@babel/plugin-syntax-jsx": "^7.12.13",
"@babel/plugin-transform-react-jsx-self": "^7.12.13",
"@babel/plugin-transform-react-jsx-source": "^7.12.13",
"@babel/preset-typescript": "^7.18.6", "@babel/preset-typescript": "^7.18.6",
"@babel/runtime": "^7.20.13", "@babel/runtime": "^7.13.10",
"@lingui/cli": "^3.17.0", "@lingui/cli": "^3.7.2",
"@lingui/core": "^3.17.0", "@lingui/core": "^3.7.2",
"@lingui/macro": "^3.17.0", "@lingui/macro": "^3.7.2",
"@lingui/react": "^3.17.0", "@lingui/react": "^3.7.2",
"@tanstack/eslint-plugin-query": "^4.29.4",
"@tanstack/react-query-devtools": "^4.20.4",
"@types/babel-plugin-macros": "^2.8.4",
"@types/lodash-es": "^4.17.6", "@types/lodash-es": "^4.17.6",
"@types/react": "^18.0.26", "@types/node": "^18.19.12",
"@types/react-dom": "^18.0.9", "@types/react": "^18.0.20",
"@types/styled-components": "^5.1.26", "@types/react-dom": "^18.0.10",
"@types/uuid": "^9.0.0", "@types/uuid": "^9.0.1",
"@vitejs/plugin-react": "^3.0.0", "@vitejs/plugin-react": "^3.0.0",
"babel-plugin-macros": "^3.1.0", "babel-plugin-macros": "^3.0.1",
"eslint": "^8.40.0",
"eslint-config-prettier": "^8.8.0",
"eslint-config-react-app": "^7.0.1",
"make-plural": "^7.2.0", "make-plural": "^7.2.0",
"prettier": "^2.8.3", "prettier": "^2.8.1",
"prettier-plugin-organize-imports": "^3.2.2", "prettier-plugin-organize-imports": "^3.2.1",
"typescript": "^4.9.3", "rollup-plugin-visualizer": "^5.9.0",
"vite": "^4.0.0", "sass": "^1.57.1",
"vite-tsconfig-paths": "^4.0.5" "typescript": "^4.9.4",
"vite": "^4.0.3",
"vite-plugin-svgr": "^2.4.0",
"vite-tsconfig-paths": "^4.0.3"
} }
} }

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.5 KiB

View File

@ -1,43 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="theme-color" content="#000000" />
<meta
name="description"
content="Web site created using create-react-app"
/>
<link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" />
<!--
manifest.json provides metadata used when your web app is installed on a
user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
-->
<link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
<!--
Notice the use of %PUBLIC_URL% in the tags above.
It will be replaced with the URL of the `public` folder during the build.
Only files inside the `public` folder can be referenced from the HTML.
Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
work correctly both with client-side routing and a non-root public URL.
Learn how to configure a non-root public URL by running `npm run build`.
-->
<title>React App</title>
</head>
<body>
<noscript>You need to enable JavaScript to run this app.</noscript>
<div id="root"></div>
<!--
This HTML file is a template.
If you open it directly in the browser, you will see an empty page.
You can add webfonts, meta tags, or analytics to this file.
The build step will place the bundled scripts into the <body> tag.
To begin the development, run `npm start` or `yarn start`.
To create a production bundle, use `npm run build` or `yarn build`.
-->
</body>
</html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9.4 KiB

View File

@ -1,21 +1,11 @@
{ {
"short_name": "React App", "short_name": "sbt",
"name": "Create React App Sample", "name": "sbt",
"icons": [ "icons": [
{ {
"src": "favicon.ico", "src": "favicon.ico",
"sizes": "64x64 32x32 24x24 16x16", "sizes": "64x64 32x32 24x24 16x16",
"type": "image/x-icon" "type": "image/x-icon"
},
{
"src": "logo192.png",
"type": "image/png",
"sizes": "192x192"
},
{
"src": "logo512.png",
"type": "image/png",
"sizes": "512x512"
} }
], ],
"start_url": ".", "start_url": ".",

File diff suppressed because one or more lines are too long

View File

@ -1,3 +0,0 @@
# https://www.robotstxt.org/robotstxt.html
User-agent: *
Disallow:

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.5 KiB

View File

@ -0,0 +1,17 @@
server {
root /usr/share/nginx/html;
location / {
# Any route that doesn't exist on the server (e.g. /devices)
try_files $uri $uri/ /index.html;
add_header Cache-Control: "no-cache, no-store, must-revalidate";
add_header Pragma: "no-cache";
add_header Expires: 0;
}
location /assets {
expires 1y;
add_header Cache-Control "public";
access_log off;
}
}

View File

@ -1,24 +0,0 @@
# Run the project COPEN-FE
- **How to clone the project**:
`git clone http://code.sdsrv.vn/c-ope2n/frontend.git`
- **Change to working directory**:
Open the downloaded project via terminal then re-direct to the `frontend` folder and switch to the `develop` branch:
`cd frontend`
`git checkout develop`
- **Install packages**: Install needed packages/dependencies for running the\*\* project (We need to manually run this every time we add a new dependency to the `package.json` file).
`npm i`
- Start project in developer mode:
`npm start`
- **Environment variables** in `.env` file:
- Development port: VITE_PORT
- Backend API: VITE_PROXY

View File

@ -1,5 +0,0 @@
#!/bin/sh
# update port and BD proxy
sed "s#{{proxy_server}}#$VITE_PROXY#g" /configs/nginx.conf > /etc/nginx/conf.d/default.conf
# run up
nginx -g 'daemon off;'

View File

@ -1,4 +0,0 @@
sonar.projectKey=c-ope2n_frontend_AYb9EnnVvcs_Ifu-neN-
sonar.qualitygate.wait=true
# sonar.login=admin
# sonar.password=trongtai37

View File

@ -1,31 +0,0 @@
import { QueryClient, QueryClientProvider } from '@tanstack/react-query';
import { RouterProvider } from 'react-router-dom';
import Internationalization from './components/internaltionalization';
import { createRouter } from './router/createRouter';
import 'antd/dist/reset.css';
import './styles/override.css';
const queryClient = new QueryClient({
defaultOptions: {
queries: {
cacheTime: 30_000,
staleTime: 10_000,
refetchOnWindowFocus: false,
},
},
});
function App() {
const router = createRouter();
return (
<QueryClientProvider client={queryClient}>
<Internationalization>
<RouterProvider router={router} />
</Internationalization>
</QueryClientProvider>
);
}
export default App;

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

Before

Width:  |  Height:  |  Size: 1.2 KiB

After

Width:  |  Height:  |  Size: 1.2 KiB

View File

@ -1,59 +0,0 @@
import * as React from 'react';
import { Html } from 'react-konva-utils';
export interface ControlsProps {
containerRef: React.MutableRefObject<HTMLDivElement>;
hidden?: boolean;
}
const PADDING_OFFSET = 16;
export const Controls = ({
containerRef,
children,
hidden = false,
}: React.PropsWithChildren<ControlsProps>) => {
const position = useControlsPosition(containerRef);
return hidden ? null : (
<Html
divProps={{
style: {
position: 'fixed',
top: `${position.top + PADDING_OFFSET}px`,
left: `${position.left + PADDING_OFFSET}px`,
},
}}
transform={false}
>
{children}
</Html>
);
};
function useControlsPosition(
containerRef: React.MutableRefObject<HTMLDivElement>,
) {
const [position, setPosition] = React.useState({ top: 0, left: 0 });
const observeRef = React.useRef(
new ResizeObserver((entries) => {
const container = entries[0];
if (container) {
setPosition({
top: container.target.getBoundingClientRect().top ?? 0,
left: container.target.getBoundingClientRect().left ?? 0,
});
}
}),
);
React.useEffect(() => {
if (containerRef.current) {
observeRef.current.observe(containerRef.current);
}
return () => {
observeRef.current.disconnect();
};
}, [containerRef.current]);
return position;
}

View File

@ -1,5 +0,0 @@
import { Circle as KonvaCircle, Rect } from 'react-konva';
import { withTransform } from './shapeFactory';
export const Rectangle = withTransform(Rect);
export const Circle = withTransform(KonvaCircle);

View File

@ -1,95 +0,0 @@
import Konva from 'konva';
import React from 'react';
import { KonvaNodeComponent, Transformer } from 'react-konva';
import { TransformableShapeProps } from '../types';
export function withTransform<
TShapeComponent extends KonvaNodeComponent<Konva.Node>,
>(Shape: TShapeComponent) {
return function ({
isTransformable,
isSelected,
onSelect,
onChange,
title,
isAnchor,
...shapeProps
}: TransformableShapeProps<TShapeComponent>) {
const labelRef = React.useRef<Konva.Label>();
const shapeRef = React.useRef<Konva.Node>();
const tranformRef = React.useRef<Konva.Transformer>();
React.useEffect(() => {
if (isSelected) {
const nodes = [shapeRef.current, labelRef.current].filter(Boolean);
tranformRef.current.nodes(nodes);
tranformRef.current.getLayer().batchDraw();
}
}, [isSelected]);
return (
<>
{/* @ts-ignore */}
<Shape
ref={shapeRef}
{...shapeProps}
strokeScaleEnabled={false}
draggable={isTransformable}
onClick={onSelect}
onTap={onSelect}
onDragEnd={(e) => {
onChange({
...shapeProps,
x: e.target.x(),
y: e.target.y(),
});
}}
onTransform={(event) => {
const textNode = labelRef.current;
if (textNode) {
const stageScale = event.currentTarget.getStage().scale().x;
const absScale = labelRef.current?.getAbsoluteScale() || {
x: 1,
y: 1,
};
textNode.scaleX((textNode.scaleX() / absScale.x) * stageScale);
textNode.scaleY((textNode.scaleY() / absScale.y) * stageScale);
}
}}
onTransformEnd={() => {
const node = shapeRef.current;
const scaleX = node.scaleX();
const scaleY = node.scaleY();
node.scaleX(1);
node.scaleY(1);
onChange({
...shapeProps,
x: node.x(),
y: node.y(),
width: Math.max(5, node.width() * scaleX),
height: Math.max(node.height() * scaleY),
});
}}
fill={isSelected ? shapeProps.fill : null}
/>
{isSelected && (
<Transformer
ref={tranformRef}
ignoreStroke
boundBoxFunc={(oldBox, newBox) => {
if (newBox.width < 5 || newBox.height < 5) {
return oldBox;
}
return newBox;
}}
draggable={isTransformable}
resizeEnabled={isTransformable}
rotateEnabled={isTransformable}
/>
)}
</>
);
};
}

View File

@ -1,107 +0,0 @@
import Konva from 'konva';
import { isNumber } from 'lodash-es';
import React from 'react';
import { KonvaNodeEvents } from 'react-konva';
function standardizeRectConfig(config: Konva.RectConfig): Konva.RectConfig {
if (config.width < 0) {
config.width = Math.abs(config.width);
config.x -= config.width;
}
if (config.height < 0) {
config.height = Math.abs(config.height);
config.y -= config.height;
}
return config;
}
function ensureEvtWithLayer(
evt: any,
): evt is MouseEvent & { layerX: number; layerY: number } {
return (
isNumber(evt.layerX) && isNumber(evt.layerY) && evt instanceof MouseEvent
);
}
export function useDrawStage({
canDraw = true,
onFinish,
}: {
canDraw?: boolean;
onFinish?(config: Konva.RectConfig): void;
}) {
const [isDrawing, setDrawing] = React.useState(false);
const [rectConfig, setRectConfig] = React.useState<Konva.RectConfig>({
x: 0,
y: 0,
width: 0,
height: 0,
stroke: 'red',
fill: 'rgba(255,0,0,0.2)',
strokeWidth: 4,
visible: false,
isAnchor: false,
});
const start: KonvaNodeEvents['onMouseDown'] = React.useCallback(
(event) => {
const evt = event.evt;
if (
canDraw &&
event.target.id() === 'background-image' &&
ensureEvtWithLayer(evt)
) {
const stage = event.currentTarget.getStage();
setDrawing(true);
setRectConfig((prevConfig) => ({
...prevConfig,
x: evt.layerX / stage.scaleX(),
y: evt.layerY / stage.scaleY(),
}));
}
},
[canDraw],
);
const handle: KonvaNodeEvents['onMouseMove'] = React.useCallback(
(event) => {
const evt = event.evt;
if (canDraw && isDrawing && ensureEvtWithLayer(evt)) {
const stage = event.currentTarget.getStage();
setRectConfig((prevConfig) => ({
...prevConfig,
width: evt.layerX / stage.scaleX() - prevConfig.x,
height: evt.layerY / stage.scaleY() - prevConfig.y,
visible: true,
}));
}
},
[canDraw, isDrawing],
);
const finish: KonvaNodeEvents['onMouseUp'] = React.useCallback(() => {
if (canDraw && isDrawing) {
setDrawing(false);
setRectConfig((prevConfig) => ({
...prevConfig,
width: 0,
height: 0,
visible: false,
}));
if (onFinish && rectConfig.width && rectConfig.height) {
onFinish(standardizeRectConfig(rectConfig));
}
}
}, [canDraw, onFinish]);
return {
start,
handle,
finish,
isDrawing,
rect: rectConfig,
};
}

View File

@ -1,4 +0,0 @@
export * from './draw';
export * from './scale';
export * from './store';
export * from './use-load-image';

View File

@ -1,33 +0,0 @@
import React from 'react';
import { StageProps } from 'react-konva';
export const SCALE_FACTOR = 1.1;
export const MIN_SCALE = 1 / 5;
export const MAX_SCALE = 5;
export function useScaleStage(initScale: StageProps['scale'] = { x: 1, y: 1 }) {
const [scale, setScale] = React.useState(initScale);
const scaleIn = React.useCallback(() => {
setScale((prev) => ({
x: Math.min(prev.x * SCALE_FACTOR, MAX_SCALE),
y: Math.min(prev.y * SCALE_FACTOR, MAX_SCALE),
}));
}, []);
const scaleOut = React.useCallback(() => {
setScale((prev) => ({
x: Math.max(prev.x / SCALE_FACTOR, MIN_SCALE),
y: Math.max(prev.y / SCALE_FACTOR, MIN_SCALE),
}));
}, []);
return {
scale,
canZoomIn: scale.x < MAX_SCALE,
canZoomOut: scale.x > MIN_SCALE,
scaleIn,
scaleOut,
scaleTo: setScale,
};
}

View File

@ -1,31 +0,0 @@
import React from 'react';
import { useStoreInstance } from '../store-context';
import {
DefaultShapeData,
ShapeConfig,
StoreSelector,
StoreValue,
} from '../types';
export function useAnnotatorStore<ReturnType>(
selector: StoreSelector<ReturnType> = (internalStore: StoreValue) =>
internalStore as ReturnType,
) {
const store = useStoreInstance();
return React.useSyncExternalStore(store.subscribe, () =>
selector(store.getStore()),
);
}
export function useShapes<TData = DefaultShapeData>() {
// @ts-ignore
return useAnnotatorStore<ShapeConfig<TData>[]>((store) => store.shapes);
}
export function useSelectedShapeId() {
return useAnnotatorStore((store) => store.selectedShapeId);
}
export function useBackground(): string {
return useAnnotatorStore((store) => store.background);
}

View File

@ -1,54 +0,0 @@
import React from 'react';
interface UseLoadImagesOptions {
onSuccess?(image: HTMLImageElement): void;
onError?(): void;
}
const initialLoadState = {
isLoading: true,
isSuccess: false,
isError: false,
};
export function useLoadImage(
src: string,
{ onSuccess, onError }: UseLoadImagesOptions = {},
) {
const imageRef = React.useRef<HTMLImageElement>(new window.Image());
const [loadState, setLoadState] = React.useState(initialLoadState);
React.useEffect(() => {
setLoadState(initialLoadState);
imageRef.current.src = src;
}, [src]);
imageRef.current.addEventListener('load', () => {
setLoadState({
isLoading: false,
isError: false,
isSuccess: true,
});
if (onSuccess) {
onSuccess(imageRef.current);
}
});
imageRef.current.addEventListener('error', () => {
setLoadState({
isLoading: false,
isSuccess: false,
isError: true,
});
if (onError) {
onError();
}
});
return {
...loadState,
image: imageRef.current,
};
}

View File

@ -1,249 +0,0 @@
import {
ExpandOutlined,
ZoomInOutlined,
ZoomOutOutlined,
} from '@ant-design/icons';
import { t } from '@lingui/macro';
import { useLingui } from '@lingui/react';
import { Button, Empty, Grid, Space, Spin, Tooltip } from 'antd';
import Konva from 'konva';
import React from 'react';
import { Image, Layer, Rect, Stage } from 'react-konva';
import { useIsFirstRender } from 'usehooks-ts';
import { v4 as uuidv4 } from 'uuid';
import { Controls } from './controls';
import { Rectangle } from './custom-shapes';
import {
useBackground,
useDrawStage,
useLoadImage,
useScaleStage,
useSelectedShapeId,
useShapes,
} from './hooks';
import { useStoreInstance } from './store-context';
import { ShapeConfig, ShapeTypes, StoreValue } from './types';
interface ImageAnnotatorProps {
background?: string;
shapes?: ShapeConfig[];
initialBackground?: string;
initialShapes?: ShapeConfig[];
containerWidth?: React.CSSProperties['width'];
containerHeight?: React.CSSProperties['height'];
readOnly?: boolean;
fitToView?: boolean;
}
export function ImageAnnotator({
background,
shapes,
initialBackground,
initialShapes,
containerWidth = '100%',
containerHeight = '100%',
fitToView = true,
readOnly = false,
}: ImageAnnotatorProps) {
const { i18n } = useLingui();
const screens = Grid.useBreakpoint();
const containerRef = React.useRef<HTMLDivElement>();
const stageRef = React.useRef<Konva.Stage>();
const isFirstRender = useIsFirstRender();
const storeInstance = useStoreInstance();
const internalShapes = useShapes();
const internalBackground = useBackground();
const selectedShapeId = useSelectedShapeId();
const { scale, scaleIn, scaleOut, scaleTo, canZoomIn, canZoomOut } =
useScaleStage();
const { start, handle, finish, rect } = useDrawStage({
canDraw: !readOnly,
onFinish: (finalRect) => {
storeInstance.addShapes([
{
id: uuidv4(),
type: ShapeTypes.RECTANGLE,
data: {
label: 'default label',
value: '',
},
...finalRect,
},
]);
},
});
const fitImageWidth = React.useCallback(
(_image: HTMLImageElement, isContainerReady = true) => {
const SCROLL_BAR_WIDTH_PX = 15;
const viewWidth =
containerRef.current.clientWidth -
(isContainerReady ? 0 : SCROLL_BAR_WIDTH_PX);
const imageWidth = _image.naturalWidth;
const newScale = viewWidth / imageWidth;
scaleTo({
x: newScale,
y: newScale,
});
},
[scaleTo],
);
const { image, isLoading } = useLoadImage(internalBackground, {
onSuccess(_image) {
if (fitToView) {
fitImageWidth(_image, false);
}
},
});
React.useEffect(() => {
const newStore = getStoreChangeFromDeriveProps({
shapes,
background,
});
const firstStore: Partial<StoreValue> = {};
if (initialBackground && isFirstRender) {
firstStore.background = initialBackground;
}
if (initialShapes && isFirstRender) {
firstStore.shapes = initialShapes;
}
storeInstance.setStore((prev) => ({
...prev,
...firstStore,
...newStore,
}));
}, [shapes, background, isFirstRender]);
return (
<div
ref={containerRef}
style={{
width: containerWidth,
height: containerHeight,
position: 'relative',
overflow: 'auto',
border: '1px dashed #d9d9d9',
borderRadius: '8px',
}}
>
{internalBackground ? (
<Stage
ref={stageRef}
scale={scale}
width={image.naturalWidth * scale.x}
height={image.naturalHeight * scale.y}
onMouseDown={(event) => {
const isClickedOnEmpty = event.target.id() === 'background-image';
if (isClickedOnEmpty) {
storeInstance.setSelectedShape(null);
start(event);
}
}}
onMouseMove={handle}
onMouseUp={finish}
>
<Layer>
<Image
image={image}
width={image.naturalWidth}
height={image.naturalHeight}
id="background-image"
/>
{internalShapes.map((shape) => (
<Rectangle
{...shape}
isSelected={shape.id === selectedShapeId}
isTransformable={!readOnly}
onSelect={() => storeInstance.setSelectedShape(shape.id)}
onChange={(shapeConfig) => {
const newShapes = internalShapes.map((item) =>
item.id === shape.id
? {
...shape,
...shapeConfig,
}
: item,
);
storeInstance.setShapes(newShapes);
}}
draggable={!readOnly}
key={shape.id}
/>
))}
<Rect {...rect} />
<Controls containerRef={containerRef} hidden={!screens.lg}>
<Space>
<Tooltip title={t(i18n)`Zoom out`}>
<Button
shape="circle"
icon={<ZoomOutOutlined />}
onClick={scaleOut}
disabled={!canZoomOut}
/>
</Tooltip>
<Tooltip title={t(i18n)`Zoom in`}>
<Button
shape="circle"
icon={<ZoomInOutlined />}
onClick={scaleIn}
disabled={!canZoomIn}
/>
</Tooltip>
<Tooltip title={t(i18n)`Fit image`}>
<Button
shape="circle"
icon={<ExpandOutlined />}
onClick={() => fitImageWidth(image)}
/>
</Tooltip>
</Space>
</Controls>
</Layer>
</Stage>
) : (
<Empty
image={Empty.PRESENTED_IMAGE_SIMPLE}
description={t(i18n)`No data, please upload an image.`}
style={{
position: 'absolute',
top: '50%',
left: '50%',
transform: 'translate(-50%, -50%)',
}}
/>
)}
{isLoading && (
<Spin
style={{
position: 'absolute',
left: '50%',
top: '50%',
transform: 'translate(-50%, -50%)',
}}
/>
)}
</div>
);
}
function getStoreChangeFromDeriveProps({
background,
shapes,
}: Pick<ImageAnnotatorProps, 'background' | 'shapes'>) {
const newStore: Partial<StoreValue> = {};
if (background !== undefined) {
newStore.background = background;
}
if (shapes !== undefined) {
newStore.shapes = shapes;
}
return newStore;
}

View File

@ -1,23 +0,0 @@
import React from 'react';
import { AnnotatorStore } from './store';
const defaultStore = new AnnotatorStore();
const AnnotatorContext = React.createContext(defaultStore);
export const useStoreInstance = () => React.useContext(AnnotatorContext);
export function AnnotatorProvider({
store,
children,
}: React.PropsWithChildren<{
store?: AnnotatorStore;
}>) {
const storeRef = React.useRef(new AnnotatorStore());
return (
<AnnotatorContext.Provider value={store || storeRef.current}>
{children}
</AnnotatorContext.Provider>
);
}

View File

@ -1,67 +0,0 @@
import { SetState } from '../../models';
import { ShapeConfig, StoreValue, SubcribeHandler } from './types';
export class AnnotatorStore {
private store: StoreValue = {
background: '',
shapes: [],
selectedShapeId: null,
};
private subscribers = new Set<SubcribeHandler>();
private notify() {
this.subscribers.forEach((handler) => handler(this.store));
}
subscribe = (handler: SubcribeHandler) => {
this.subscribers.add(handler);
return () => {
this.subscribers.delete(handler);
};
};
addShapes(shapes: ShapeConfig[]) {
this.store.shapes = this.store.shapes.concat(shapes);
this.notify();
}
removeShapes(shapeIds: ShapeConfig['id'][]) {
this.store.shapes = this.store.shapes.filter(
(shape) => !shapeIds.includes(shape.id),
);
this.notify();
}
setShapes(shapesHandler: SetState<ShapeConfig[]>) {
this.store.shapes =
typeof shapesHandler === 'function'
? shapesHandler(this.store.shapes)
: shapesHandler;
this.notify();
}
setBackground(background: string) {
this.store.background = background;
this.notify();
}
setSelectedShape(shapeId: ShapeConfig['id'] | null) {
this.store.selectedShapeId = shapeId;
this.notify();
}
setStore(storeHandler: SetState<StoreValue>) {
this.store =
typeof storeHandler === 'function'
? storeHandler(this.store)
: storeHandler;
this.notify();
}
getStore(): StoreValue {
return this.store;
}
}

View File

@ -1,60 +0,0 @@
import Konva from 'konva';
import { KonvaNodeComponent } from 'react-konva';
// Shapes
export enum ShapeTypes {
RECTANGLE = 'reactangle',
CIRCLE = 'circle',
}
export interface DefaultShapeData {
label: string;
value: string;
}
export type WithCustomConfig<
TConfig extends Konva.ShapeConfig,
TData = DefaultShapeData,
> = TConfig & {
id: string;
data?: TData;
} & WithTransformProps<TConfig>;
export type RectangleConfig<TData = DefaultShapeData> = {
type: ShapeTypes.RECTANGLE;
} & WithCustomConfig<Konva.RectConfig, TData>;
export type CircleConfig<TData = DefaultShapeData> = {
type: ShapeTypes.CIRCLE;
} & WithCustomConfig<Konva.CircleConfig, TData>;
export type ShapeConfig<TData = DefaultShapeData> =
| RectangleConfig<TData>
| CircleConfig<TData>;
// Store
export type SubcribeHandler = (store: StoreValue) => void;
export type StoreSelector<ReturnType> = (store: StoreValue) => ReturnType;
export interface StoreValue {
background: string;
shapes: ShapeConfig[];
selectedShapeId: ShapeConfig['id'] | null;
}
// Transformable
export type TransformableShapeProps<
TShapeComponent extends KonvaNodeComponent<Konva.Node>,
> = TShapeComponent extends KonvaNodeComponent<Konva.Node, infer Props>
? Props & WithTransformProps<Props>
: never;
export type WithTransformProps<
TShapeConfig extends Konva.ShapeConfig = Konva.ShapeConfig,
> = {
isTransformable?: boolean;
isSelected?: boolean;
onSelect?(): void;
onChange?(shapeConfig: TShapeConfig): void;
title?: string;
isAnchor?: boolean;
};

View File

@ -1,33 +0,0 @@
import { Typography } from "antd";
export const Brand = ({
collapsed,
isBordered = true,
}: {
collapsed: boolean;
isBordered?: boolean;
}) => {
return (
<Typography.Title
ellipsis={{
rows: 1,
}}
style={{
marginTop: 3,
marginLeft: 12,
marginRight: 12,
paddingTop: 18,
paddingBottom: 18,
letterSpacing: 2,
borderBottomLeftRadius: 10,
borderBottomRightRadius: 10,
color: "#fff",
textAlign: "center",
backgroundColor: "rgb(0, 106, 255)",
fontSize: 32,
}}
>
{collapsed ? "O" : "OCR"}
</Typography.Title>
);
};

View File

@ -0,0 +1,2 @@
export { default as MultiTypeChart } from './multitype-chart';
export { default as PieChart } from './pie-chart';

View File

@ -0,0 +1,41 @@
import {
BarController,
BarElement,
CategoryScale,
Chart as ChartJS,
ChartData,
ChartOptions,
ChartTypeRegistry,
Legend,
LinearScale,
LineController,
LineElement,
PointElement,
Tooltip,
} from 'chart.js';
import { Chart } from 'react-chartjs-2';
ChartJS.register(
LinearScale,
CategoryScale,
BarElement,
PointElement,
LineElement,
Legend,
Tooltip,
LineController,
BarController,
);
interface MultiChartProps {
type: keyof ChartTypeRegistry;
data: ChartData;
options?: ChartOptions;
height: number;
}
export default function MultiTypeChart(props: MultiChartProps) {
const { type, data, options, height } = props;
return <Chart type={type} data={data} height={height} options={options} />;
}

View File

@ -0,0 +1,20 @@
import {
ArcElement,
Chart as ChartJS,
ChartData,
ChartOptions,
Legend,
Tooltip,
} from 'chart.js';
import { Pie } from 'react-chartjs-2';
ChartJS.register(ArcElement, Tooltip, Legend);
interface PieChartProps {
data: ChartData<'pie'>;
options?: ChartOptions<'pie'>;
}
export default function PieChart({ data, options }: PieChartProps) {
return <Pie data={data} options={options} />;
}

View File

@ -0,0 +1,16 @@
import { Collapse as AntCollapse, CollapseProps, theme } from 'antd';
import styled from 'styled-components';
function Collapse(props: CollapseProps) {
return <AntCollapse {...props} />;
}
const Panel = styled(AntCollapse.Panel)`
& .sbt-collapse-header:hover {
color: ${theme.defaultConfig.token.colorPrimary};
}
`;
Collapse.Panel = Panel;
export { Collapse };

View File

@ -0,0 +1,13 @@
import React from 'react';
interface DisplayNoneProps {
enabled?: boolean;
children: React.ReactNode;
}
export const DisplayNone: React.FC<DisplayNoneProps> = ({
enabled = false,
children,
}) => {
return <div style={{ display: enabled ? 'none' : '' }}>{children}</div>;
};

View File

@ -1,11 +0,0 @@
import { Typography } from 'antd';
import styled from 'styled-components';
export const EditableCell = styled(Typography.Text)`
margin-top: 0 !important;
margin-bottom: 0 !important;
& > .ant-typography-copy {
margin-left: 8px;
}
`;

View File

@ -1,28 +0,0 @@
import { Typography } from 'antd';
import React from 'react';
export type EllipsisTitleProps = React.ComponentProps<typeof Typography.Title>;
export const EllipsisTitle = (props: EllipsisTitleProps) => {
const [isEllipsis, setEllipsis] = React.useState(false);
return (
<Typography.Title
{...props}
ellipsis={
props.ellipsis
? {
...(typeof props.ellipsis === 'object' ? props.ellipsis : {}),
onEllipsis(value) {
setEllipsis(value);
if (typeof props.ellipsis === 'object') {
props.ellipsis.onEllipsis(value);
}
},
}
: false
}
title={isEllipsis ? String(props.children) : ''}
/>
);
};

View File

@ -1 +0,0 @@
export * from './ellipsis-title';

Some files were not shown because too many files have changed in this diff Show More