From 7b8cfbb0628dec3283f103dd9ae2ffd0867c26cb Mon Sep 17 00:00:00 2001 From: dx-tan Date: Wed, 3 Jan 2024 11:07:58 +0700 Subject: [PATCH 01/27] Add: crawl database script --- scripts/crawl_database.py | 96 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 scripts/crawl_database.py diff --git a/scripts/crawl_database.py b/scripts/crawl_database.py new file mode 100644 index 0000000..7a06cf0 --- /dev/null +++ b/scripts/crawl_database.py @@ -0,0 +1,96 @@ +import csv +import psycopg2 +import boto3 +import os +from tqdm import tqdm + +OUTPUT_NAME = "issue_7" + +# Database connection details +db_host = os.environ.get('DB_HOST', "") +db_name = os.environ.get('DB_SCHEMA', "") +db_user = os.environ.get('DB_USER', "") +db_password = os.environ.get('DB_PASSWORD', "") + +# S3 bucket details +s3_bucket_name = os.environ.get('S3_BUCKET_NAME', "") +s3_folder_prefix = 'sbt_invoice' + +# S3 access credentials +access_key = os.environ.get('S3_ACCESS_KEY', "") +secret_key = os.environ.get('S3_SECRET_KEY', "") + +# Request IDs for filtering +request_ids = [ +'SAPe39e970592394b27a17d4a64c39f7ed0', +'SAP477a02a21faf41ecbd1a0bb21636e644', +'SAP459d58a7dba84e7195f5ad8f46fc1530', +'SAPa5aaa0e1ce8c4824a7b0ded2e550caec', +'SAP492c063db44049c6b1e44f59c531f8d8', +'SAP3d0bdd5cb4ce4291b0cb77d7de0a48e9', +'SAP7e2c673e49c441a991661d1227342131', +'SAPc26974bcac2649b28227981459a427aa', +'SAP25b12dde6b854c70b512ac79059ac1d4', +'SAP_20240102194138_bf4a3cc4e0304d0385126b6592c2632d', +'SAP_20240102214550_8389ec1b84a249738eed9d2152bf0922', +] + +# Connect to the PostgreSQL database +conn = psycopg2.connect( + host=db_host, + database=db_name, + user=db_user, + password=db_password +) + +# Create a cursor +cursor = conn.cursor() + +# Generate the placeholder string for the IN statement +placeholders = ','.join(['%s'] * len(request_ids)) + +# Execute the SELECT query with the filter +query = f"SELECT * FROM fwd_api_subscriptionrequest WHERE request_id IN ({placeholders})" +cursor.execute(query, request_ids) + +# Fetch the filtered data +data = cursor.fetchall() + +# Define the CSV file path +csv_file_path = f'{OUTPUT_NAME}.csv' + +# Write the data to the CSV file +with open(csv_file_path, 'w', newline='') as csv_file: + writer = csv.writer(csv_file) + writer.writerow([desc[0] for desc in cursor.description]) # Write column headers + writer.writerows(data) # Write the filtered data rows + +# Close the cursor and database connection +cursor.close() +conn.close() + +# Download folders from S3 +s3_client = boto3.client( + 's3', + aws_access_key_id=access_key, + aws_secret_access_key=secret_key +) + + +for request_id in tqdm(request_ids): + folder_key = f"{s3_folder_prefix}/{request_id}/" # Assuming folder structure like: s3_bucket_name/s3_folder_prefix/request_id/ + local_folder_path = f"{OUTPUT_NAME}/{request_id}/" # Path to the local folder to save the downloaded files + os.makedirs(OUTPUT_NAME, exist_ok=True) + os.makedirs(local_folder_path, exist_ok=True) + + + # List objects in the S3 folder + response = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=folder_key) + objects = response.get('Contents', []) + + for s3_object in objects: + object_key = s3_object['Key'] + local_file_path = local_folder_path + object_key.split('/')[-1] # Extracting the file name from the object key + + # Download the S3 object to the local file + s3_client.download_file(s3_bucket_name, object_key, local_file_path) \ No newline at end of file From cea59503eb32a513c10977213f39dc76d13a58b6 Mon Sep 17 00:00:00 2001 From: dx-tan Date: Wed, 3 Jan 2024 14:35:22 +0700 Subject: [PATCH 02/27] Add: sample env --- .env_sample | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 .env_sample diff --git a/.env_sample b/.env_sample new file mode 100644 index 0000000..0edcda4 --- /dev/null +++ b/.env_sample @@ -0,0 +1,41 @@ +MEDIA_ROOT=/app/media +# DATABASE django setup +DB_ENGINE=django.db.backends.postgresql_psycopg2 +DB_SCHEMA=sbt_dev +DB_USER=postgres +DB_PASSWORD=extraordinary +DB_HOST=db-sbt +DB_PUBLIC_PORT=5432 +DB_INTERNAL_PORT=5432 + +DEBUG=TRUE +CORS_ALLOWED_ORIGINS=* +CTEL_KEY=secret +DB_INTERNAL_KEY=secret +ALLOWED_HOSTS='*' +BROKER_URL=amqp://test:test@rabbitmq-manulife-sbt:5672 +BASE_URL=http://be-ctel-sbt:9000 +BASE_UI_URL=http://fe-sbt:9801 +HOST_MEDIA_FOLDER=./media +GID=1000 +UID=198 +SECRET_KEY=secret +RABBITMQ_DEFAULT_USER=test +RABBITMQ_DEFAULT_PASS=test +BASE_PORT=9000 +S3_ENDPOINT=minio +S3_ACCESS_KEY=sample-key +S3_SECRET_KEY=sample-key +S3_BUCKET_NAME=sample-key + +AUTH_TOKEN_LIFE_TIME=168 +IMAGE_TOKEN_LIFE_TIME=168 +INTERNAL_SDS_KEY=sample-key +FI_USER_NAME=sbt +FI_PASSWORD=abc + +# Front end env variables +# VITE_PORT=80 +# VITE_PROXY=http://0.0.0.0 +# VITE_API_BASE_URL=http://0.0.0.0:8000 +# PORT=8002 \ No newline at end of file From 87c3c8943f462d9fc8f8b46304cfe0caaf44ab5a Mon Sep 17 00:00:00 2001 From: dx-tan Date: Fri, 5 Jan 2024 14:18:16 +0700 Subject: [PATCH 03/27] Add: API list request --- cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py | 1 + .../celery_worker/mock_process_tasks_fi.py | 1 + cope2n-ai-fi/modules/sdsvkvu | 2 +- cope2n-api/fwd_api/api/accuracy_view.py | 183 ++++++++++++++++++ cope2n-api/fwd_api/api/ctel_view.py | 3 +- cope2n-api/fwd_api/api_router.py | 3 + ...ionrequest_client_request_time_and_more.py | 28 +++ .../fwd_api/models/SubscriptionRequest.py | 3 + deploy_images.sh | 23 ++- docker-compose-dev.yml | 23 +-- scripts/crawl_database.py | 80 ++++++-- scripts/crawl_database_by_time.py | 93 +++++++++ 12 files changed, 409 insertions(+), 34 deletions(-) create mode 100644 cope2n-api/fwd_api/api/accuracy_view.py create mode 100644 cope2n-api/fwd_api/migrations/0164_subscriptionrequest_client_request_time_and_more.py create mode 100644 scripts/crawl_database_by_time.py diff --git a/cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py b/cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py index 7039c2b..ada90f8 100755 --- a/cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py +++ b/cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py @@ -42,6 +42,7 @@ def sbt_predict(image_url, engine) -> None: img = cv2.imdecode(arr, -1) save_dir = "./tmp_results" + os.makedirs(save_dir, exist_ok=True) # image_path = os.path.join(save_dir, f"{image_url}.jpg") tmp_image_path = os.path.join(save_dir, f"{uuid.uuid4()}.jpg") cv2.imwrite(tmp_image_path, img) diff --git a/cope2n-ai-fi/celery_worker/mock_process_tasks_fi.py b/cope2n-ai-fi/celery_worker/mock_process_tasks_fi.py index ef16d45..b114330 100755 --- a/cope2n-ai-fi/celery_worker/mock_process_tasks_fi.py +++ b/cope2n-ai-fi/celery_worker/mock_process_tasks_fi.py @@ -69,6 +69,7 @@ def process_sbt_invoice(rq_id, list_url, metadata): c_connector.process_sbt_invoice_result((rq_id, hoadon, metadata)) return {"rq_id": rq_id} except Exception as e: + print(f"[ERROR]: Failed to extract invoice: {e}") print(e) hoadon = {"status": 404, "content": {}} c_connector.process_sbt_invoice_result((rq_id, hoadon, metadata)) diff --git a/cope2n-ai-fi/modules/sdsvkvu b/cope2n-ai-fi/modules/sdsvkvu index 11fb958..6907ea0 160000 --- a/cope2n-ai-fi/modules/sdsvkvu +++ b/cope2n-ai-fi/modules/sdsvkvu @@ -1 +1 @@ -Subproject commit 11fb9588df7e6cb03e7a761e3f728f11045bee09 +Subproject commit 6907ea0183b141e3b4f3c21758c9123f1e9b2a27 diff --git a/cope2n-api/fwd_api/api/accuracy_view.py b/cope2n-api/fwd_api/api/accuracy_view.py new file mode 100644 index 0000000..e482e58 --- /dev/null +++ b/cope2n-api/fwd_api/api/accuracy_view.py @@ -0,0 +1,183 @@ +from rest_framework import status, viewsets +from rest_framework.decorators import action +from rest_framework.response import Response +from django.core.paginator import Paginator +from django.http import JsonResponse +from datetime import datetime +from django.utils import timezone +from django.db.models import Q + +from drf_spectacular.utils import extend_schema, OpenApiParameter, OpenApiTypes +# from drf_spectacular.types import OpenApiString +from ..models import SubscriptionRequest + + +class AccuracyViewSet(viewsets.ViewSet): + lookup_field = "username" + + @extend_schema( + parameters=[ + OpenApiParameter( + name='start_date', + location=OpenApiParameter.QUERY, + description='Start date (YYYY-mm-DDTHH:MM:SS)', + type=OpenApiTypes.DATE, + default='2023-01-02T00:00:00', + ), + OpenApiParameter( + name='end_date', + location=OpenApiParameter.QUERY, + description='End date (YYYY-mm-DDTHH:MM:SS)', + type=OpenApiTypes.DATE, + default='2024-01-10T00:00:00', + ), + OpenApiParameter( + name='include_test', + location=OpenApiParameter.QUERY, + description='Whether to include test record or not', + type=OpenApiTypes.BOOL, + ), + OpenApiParameter( + name='is_reviewed', + location=OpenApiParameter.QUERY, + description='Which records to be query', + type=OpenApiTypes.STR, + enum=['reviewed', 'not reviewed', 'all'], + ), + OpenApiParameter( + name='request_id', + location=OpenApiParameter.QUERY, + description='Specific request id', + type=OpenApiTypes.STR, + ), + OpenApiParameter( + name='redemption_id', + location=OpenApiParameter.QUERY, + description='Specific redemption id', + type=OpenApiTypes.STR, + ), + OpenApiParameter( + name='quality', + location=OpenApiParameter.QUERY, + description='One or more of [bad, good, all]', + type=OpenApiTypes.STR, + enum=['bad', 'good', 'all'], + ), + OpenApiParameter( + name='page', + location=OpenApiParameter.QUERY, + description='Page number', + type=OpenApiTypes.INT, + required=False + ), + OpenApiParameter( + name='page_size', + location=OpenApiParameter.QUERY, + description='Number of items per page', + type=OpenApiTypes.INT, + required=False + ), + ], + responses=None, tags=['Accuracy'] + ) + @action(detail=False, url_path="request_list", methods=["GET"]) + def get_subscription_requests(self, request): + if request.method == 'GET': + start_date_str = request.GET.get('start_date') + end_date_str = request.GET.get('end_date') + page_number = int(request.GET.get('page', 1)) + page_size = int(request.GET.get('page_size', 10)) + request_id = request.GET.get('request_id', None) + redemption_id = request.GET.get('redemption_id', None) + is_reviewed = request.GET.get('is_reviewed', None) + include_test = request.GET.get('include_test', False) + quality = request.GET.get('quality', None) + + try: + start_date = datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S') + end_date = datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S') + except ValueError: + return JsonResponse({'error': 'Invalid date format. Please use YYYY-MM-DD.'}, status=400) + + base_query = Q(created_at__range=(start_date, end_date)) + if request_id: + base_query &= Q(request_id=request_id) + if redemption_id: + base_query &= Q(redemption_id=redemption_id) + base_query &= Q(is_test_request=False) + if isinstance(include_test, str): + include_test = True if include_test=="true" else False + if include_test: + # base_query = ~base_query + base_query.children = base_query.children[:-1] + + elif isinstance(include_test, bool): + if include_test: + base_query = ~base_query + if isinstance(is_reviewed, str): + if is_reviewed == "reviewed": + base_query &= Q(is_reviewed=True) + elif is_reviewed == "not reviewed": + base_query &= Q(is_reviewed=False) + elif is_reviewed == "all": + pass + if isinstance(quality, str): + if quality == "good": + base_query &= Q(is_bad_image_quality=False) + elif quality == "bad": + base_query &= Q(is_bad_image_quality=True) + elif quality == "all": + pass + + subscription_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at') + + paginator = Paginator(subscription_requests, page_size) + page = paginator.get_page(page_number) + + data = [] + for request in page: + imeis = [] + purchase_date = [] + retailer = "" + try: + if request.reviewed_result is not None: + imeis = request.reviewed_result.get("imei_number", []) + purchase_date = request.reviewed_result.get("purchase_date", []) + retailer = request.reviewed_result.get("retailername", "") + elif request.feedback_result is not None : + imeis = request.feedback_result.get("imei_number", []) + purchase_date = request.feedback_result.get("purchase_date", []) + retailer = request.feedback_result.get("retailername", "") + elif request.predict_result is not None: + if request.predict_result.get("status", 404) == 200: + imeis = request.predict_result.get("content", {}).get("document", [])[0].get("content", [])[3].get("value", []) + purchase_date = request.predict_result.get("content", {}).get("document", [])[0].get("content", [])[2].get("value", []) + retailer = request.predict_result.get("content", {}).get("document", [])[0].get("content", [])[0].get("value", []) + except Exception as e: + print(f"[ERROR]: {e}") + print(f"[ERROR]: {request}") + data.append({ + 'RequestID': request.request_id, + 'RedemptionID': request.redemption_id, + 'IMEIs': imeis, + 'Purchase Date': purchase_date, + 'Retailer': retailer, + 'Client Request Time (ms)': request.client_request_time, + 'Server Processing Time (ms)': request.preprocessing_time + request.ai_inference_time, + 'Is Reviewed': request.is_reviewed, + 'Is Bad Quality': request.is_bad_image_quality, + 'created_at': request.created_at.isoformat() + }) + + response = { + 'subscription_requests': data, + 'page': { + 'number': page.number, + 'total_pages': page.paginator.num_pages, + 'count': page.paginator.count, + } + } + + return JsonResponse(response) + + return JsonResponse({'error': 'Invalid request method.'}, status=405) \ No newline at end of file diff --git a/cope2n-api/fwd_api/api/ctel_view.py b/cope2n-api/fwd_api/api/ctel_view.py index 57155e2..6b77471 100755 --- a/cope2n-api/fwd_api/api/ctel_view.py +++ b/cope2n-api/fwd_api/api/ctel_view.py @@ -347,8 +347,7 @@ class CtelViewSet(viewsets.ViewSet): S3_path = FileUtils.save_to_S3(file_name, subcription_request, file_path) return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id}) - - + @extend_schema(request=None, responses=None, tags=['Data']) @extend_schema(request=None, responses=None, tags=['templates'], methods=['GET']) @action(detail=False, url_path=r"media/(?P\w+)/(?P\w+)", methods=["GET"]) diff --git a/cope2n-api/fwd_api/api_router.py b/cope2n-api/fwd_api/api_router.py index 6743957..9a466dc 100755 --- a/cope2n-api/fwd_api/api_router.py +++ b/cope2n-api/fwd_api/api_router.py @@ -2,6 +2,8 @@ from django.conf import settings from rest_framework.routers import DefaultRouter, SimpleRouter from fwd_api.api.ctel_view import CtelViewSet +from fwd_api.api.accuracy_view import AccuracyViewSet + from fwd_api.api.ctel_user_view import CtelUserViewSet from fwd_api.api.ctel_template_view import CtelTemplateViewSet @@ -13,6 +15,7 @@ else: router.register("ctel", CtelViewSet, basename="CtelAPI") router.register("ctel", CtelUserViewSet, basename="CtelUserAPI") +router.register("ctel", AccuracyViewSet, basename="AccuracyAPI") app_name = "api" urlpatterns = router.urls diff --git a/cope2n-api/fwd_api/migrations/0164_subscriptionrequest_client_request_time_and_more.py b/cope2n-api/fwd_api/migrations/0164_subscriptionrequest_client_request_time_and_more.py new file mode 100644 index 0000000..b86ff1e --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0164_subscriptionrequest_client_request_time_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 4.1.3 on 2024-01-04 08:24 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0163_subscriptionrequest_ai_inference_profile'), + ] + + operations = [ + migrations.AddField( + model_name='subscriptionrequest', + name='client_request_time', + field=models.FloatField(default=-1), + ), + migrations.AddField( + model_name='subscriptionrequest', + name='redemption_id', + field=models.CharField(max_length=200, null=True), + ), + migrations.AddField( + model_name='subscriptionrequest', + name='reviewed_result', + field=models.JSONField(null=True), + ), + ] diff --git a/cope2n-api/fwd_api/models/SubscriptionRequest.py b/cope2n-api/fwd_api/models/SubscriptionRequest.py index a852104..6018274 100755 --- a/cope2n-api/fwd_api/models/SubscriptionRequest.py +++ b/cope2n-api/fwd_api/models/SubscriptionRequest.py @@ -10,10 +10,12 @@ class SubscriptionRequest(models.Model): pages_left: int = models.IntegerField(default=1) doc_type: str = models.CharField(max_length=100) request_id = models.CharField(max_length=200) # Change to request_id + redemption_id = models.CharField(max_length=200, null=True) # Change to request_id process_type = models.CharField(max_length=200) # driver/id/invoice provider_code = models.CharField(max_length=200, default="Guest") # Request source FWD/CTel predict_result = models.JSONField(null=True) feedback_result = models.JSONField(null=True) + reviewed_result = models.JSONField(null=True) status = models.IntegerField() # 1: Processing(Pending) 2: PredictCompleted 3: ReturnCompleted subscription = models.ForeignKey(Subscription, on_delete=models.CASCADE) created_at = models.DateTimeField(default=timezone.now, db_index=True) @@ -23,6 +25,7 @@ class SubscriptionRequest(models.Model): ai_inference_profile = models.JSONField(null=True) preprocessing_time = models.FloatField(default=-1) + client_request_time = models.FloatField(default=-1) ai_inference_start_time = models.FloatField(default=0) ai_inference_time = models.FloatField(default=0) cpu_percent = models.FloatField(default=-1) diff --git a/deploy_images.sh b/deploy_images.sh index 75d720d..cb89a0e 100755 --- a/deploy_images.sh +++ b/deploy_images.sh @@ -2,26 +2,33 @@ set -e tag=$1 +is_prod=${$2:-False} echo "[INFO] Tag received from Python: $tag" -echo "[INFO] Updating everything the remote..." -git submodule update --recursive --remote +# echo "[INFO] Updating everything the remote..." +# git submodule update --recursive --remote echo "[INFO] Pushing AI image with tag: $tag..." docker compose -f docker-compose-dev.yml build cope2n-fi-sbt -docker tag sidp/cope2n-ai-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-ai-fi-sbt:${tag} -docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-ai-fi-sbt:${tag} +docker tag sidp/cope2n-ai-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-ai-fi-sbt:${tag} +docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-ai-fi-sbt:${tag} +# docker tag sidp/cope2n-ai-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-ai-fi-sbt:production +# docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-ai-fi-sbt:production echo "[INFO] Pushing BE image with tag: $tag..." docker compose -f docker-compose-dev.yml build be-ctel-sbt -docker tag sidp/cope2n-be-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-be-fi-sbt:${tag} -docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-be-fi-sbt:${tag} +docker tag sidp/cope2n-be-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-be-fi-sbt:${tag} +# docker tag sidp/cope2n-be-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-be-fi-sbt:production +docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-be-fi-sbt:${tag} +# docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-be-fi-sbt:production echo "[INFO] Pushing FE image with tag: $tag..." docker compose -f docker-compose-dev.yml build fe-sbt -docker tag sidp/cope2n-fe-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-fe-fi-sbt:${tag} -docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-fe-fi-sbt:${tag} +docker tag sidp/cope2n-fe-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-fe-fi-sbt:${tag} +# docker tag sidp/cope2n-fe-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-fe-fi-sbt:production +docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-fe-fi-sbt:${tag} +# docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-fe-fi-sbt:production cp ./docker-compose-prod.yml ./docker-compose_${tag}.yml sed -i "s/{{tag}}/$tag/g" ./docker-compose_${tag}.yml diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 5638fee..fa83e77 100755 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -20,8 +20,8 @@ services: environment: - CELERY_BROKER=amqp://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@rabbitmq-sbt:5672 - CUDA_VISIBLE_DEVICES=0 - # volumes: - # - ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only + volumes: + - ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only working_dir: /workspace/cope2n-ai-fi # deploy: # resources: @@ -74,19 +74,19 @@ services: networks: - ctel-sbt volumes: - - ${HOST_MEDIA_FOLDER}:${MEDIA_ROOT} + # - BE_media:${MEDIA_ROOT} - BE_static:/app/static - ./cope2n-api:/app working_dir: /app depends_on: db-sbt: condition: service_started - command: sh -c "chmod -R 777 /app/static; sleep 5; python manage.py collectstatic --no-input && - python manage.py makemigrations && - python manage.py migrate && - python manage.py compilemessages && - gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod - # command: bash -c "tail -f > /dev/null" + # command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input && + # python manage.py makemigrations && + # python manage.py migrate && + # python manage.py compilemessages && + # gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod + command: bash -c "tail -f > /dev/null" minio: image: minio/minio @@ -164,7 +164,7 @@ services: rabbitmq-sbt: condition: service_started volumes: - - ${HOST_MEDIA_FOLDER}:${MEDIA_ROOT} + # - BE_media:${MEDIA_ROOT} - ./cope2n-api:/app working_dir: /app @@ -222,4 +222,5 @@ services: volumes: db_data: - BE_static: \ No newline at end of file + BE_static: + BE_media: \ No newline at end of file diff --git a/scripts/crawl_database.py b/scripts/crawl_database.py index 7a06cf0..7cfacd5 100644 --- a/scripts/crawl_database.py +++ b/scripts/crawl_database.py @@ -4,7 +4,11 @@ import boto3 import os from tqdm import tqdm -OUTPUT_NAME = "issue_7" +from dotenv import load_dotenv + +load_dotenv("../.env_prod") + +OUTPUT_NAME = "5Jan" # Database connection details db_host = os.environ.get('DB_HOST', "") @@ -22,17 +26,69 @@ secret_key = os.environ.get('S3_SECRET_KEY', "") # Request IDs for filtering request_ids = [ -'SAPe39e970592394b27a17d4a64c39f7ed0', -'SAP477a02a21faf41ecbd1a0bb21636e644', -'SAP459d58a7dba84e7195f5ad8f46fc1530', -'SAPa5aaa0e1ce8c4824a7b0ded2e550caec', -'SAP492c063db44049c6b1e44f59c531f8d8', -'SAP3d0bdd5cb4ce4291b0cb77d7de0a48e9', -'SAP7e2c673e49c441a991661d1227342131', -'SAPc26974bcac2649b28227981459a427aa', -'SAP25b12dde6b854c70b512ac79059ac1d4', -'SAP_20240102194138_bf4a3cc4e0304d0385126b6592c2632d', -'SAP_20240102214550_8389ec1b84a249738eed9d2152bf0922', +'SAP_20240104082259_85c7f4dd262946d183dbec826fc6709e', +'SAP_20240104082709_c05319c56fd3422dbf133aee33fc3e10', +'SAP_20240104091512_23ae1a81f1314be0a27ebeae0e8fa0d7', +'SAP_20240104091512_23ae1a81f1314be0a27ebeae0e8fa0d7', +'SAP_20240104091816_025c90b9789246ed811772003622fa0d', +'SAP_20240104092541_5c71e535f07c4cc8803b45336ec70f77', +'SAP_20240104100259_5a667d33cb914e7ba5a4447b9e17d649', +'SAP_20240104101145_a7010bac159f47bc95d5866e6c5f5bdf', +'SAP_20240104105702_95252229252b4e238add117919ce882a', +'SAP_20240104112108_34b2cca84a42473ca77bc316e787fe2e', +'SAP_20240104114038_dd57ecf7982c4a5eaf1409f5ef050fab', +'SAP_20240104115942_1b77f411791940a4a85c838c2e9931ad', +'SAP_20240104120746_d63319f4cde343d894f9b89706756a9d', +'SAP_20240104123607_48d25c04fec6411dbf013c6a19054e77', +'SAP_20240104130957_ece21bad331b4f2cad0887693331aa3a', +'SAP_20240104131228_edebee4000ae4bd382feaea5d6c82031', +'SAP_20240104132641_97909efd013f45e89d83d36a5ea35c52', +'SAP_20240104133527_ad55f6ee667643ba8ae65e9ef1c32418', +'SAP_20240104134014_2d2cdbc1b06a44868ce1b32cdb53864f', +'SAP_20240104134425_9b37555ef8094153838e6048f7c63c9b', +'SAP_20240104134457_55a1cf1e371146d995c8849cc0ba7c7b', +'SAP_20240104134609_3f7d308e467d43dbb59a7bcc02e3a7d2', +'SAP_20240104134709_c708daf83f7e4aa69ab9696afe1a9081', +'SAP_20240104135007_44b7a30c5e9c41a0b8065ac4e7000223', +'SAP_20240104141547_7203ddb915274e99a08ae6e54ec49cbd', +'SAP_20240104141559_62fd19a6179248ecb4ff15b33338b294', +'SAP_20240104142352_68699cbe140f4264b858981a3ac67e40', +'SAP_20240104143937_801931cc1f344a4ca8384dfe13d1accc', +'SAP_20240104144730_3180a8919e604e26a188ce051465c392', +'SAP_20240104144933_3380f64019634769befed49e9a671bc6', +'SAP_20240104151239_76ae2f1d02444f7fabbc104eb77fe45f', +'SAP_20240104151243_61775c88685d434d98bb9fc7a9889b8e', +'SAP_20240104151243_61775c88685d434d98bb9fc7a9889b8e', +'SAP_20240104151243_61775c88685d434d98bb9fc7a9889b8e', +'SAP_20240104151638_a08a61448a58459a8f2209f64e54c213', +'SAP_20240104152030_479259e84c5b449499df2cb1023e91ac', +'SAP_20240104160108_a03634c80583454494b77efcdecbcc71', +'SAP_20240104160108_a03634c80583454494b77efcdecbcc71', +'SAP_20240104160311_e7cb02a11bbd4ea1906b3758e97f33ab', +'SAP_20240104161305_89c5518563224ab89345439dffd504a5', +'SAP_20240104161305_89c5518563224ab89345439dffd504a5', +'SAP_20240104164022_0b94af24db9d4ebe9af2086a4bd3cd7e', +'SAP_20240104170837_58165ec9f88d4e4aa3095ba3dda201d7', +'SAP_20240104171740_10279cfebbf344f184bbb429cb9a15ad', +'SAP_20240104175202_247892a4dc7f40f28eafac9c2ad85971', +'SAP_20240104180517_8ce7a1981dc743e08e09284fd904d536', +'SAP_20240104182034_406bac0ab0684727b9efb1bb9b422026', +'SAP_20240104182426_92a48bb4b85a4c3abb48e0d7cf727777', +'SAP_20240104183506_aa1fa7d6774a4509a142a6f4a7b5af29', +'SAP_20240104185716_f9d464e42c314370910913b37133e6c3', +'SAP_20240104190220_573244d03bb8408dbca422ff60eb527a', +'SAP_20240104191236_deedcc588b7b4928a950f7dc2ce4230c', +'SAP_20240104191236_deedcc588b7b4928a950f7dc2ce4230c', +'SAP_20240104192614_990bf10c38e144a7bf489548d356720e', +'SAP_20240104192614_990bf10c38e144a7bf489548d356720e', +'SAP_20240104212143_f8c1b4a6e6e443fcb5e882c7a5b917f3', +'SAP_20240104212924_ee1998a60d6848af9576292ac383037f', +'SAP_20240104214418_f8e1abf808c8499097ecddf014d401c7', +'SAP_20240104214619_8d27c05a9ce74b738b20195cb816bfbf', +'SAP_20240104215037_477863cdc0aa4d5fa1f05bbb0ae673ed', +'SAP_20240104221543_37605982df624324ad2594e268054361', +'SAP_20240104225026_acacd06ea6de4a738bc47683dc53f378', +'SAP_20240104235743_b48aa3e744ed428795171d84066adefe', ] # Connect to the PostgreSQL database diff --git a/scripts/crawl_database_by_time.py b/scripts/crawl_database_by_time.py new file mode 100644 index 0000000..17f6570 --- /dev/null +++ b/scripts/crawl_database_by_time.py @@ -0,0 +1,93 @@ +import csv +import psycopg2 +import boto3 +import os +from tqdm import tqdm +from datetime import datetime, timedelta +from pytz import timezone + +from dotenv import load_dotenv + +load_dotenv("../.env_prod") + +OUTPUT_NAME = "missing_records" +START_DATE = datetime(2023, 12, 28, tzinfo=timezone('Asia/Ho_Chi_Minh')) +END_DATE = datetime(2024, 1, 3, tzinfo=timezone('Asia/Ho_Chi_Minh')) + +# Database connection details +db_host = os.environ.get('DB_HOST', "") +db_name = os.environ.get('DB_SCHEMA', "") +db_user = os.environ.get('DB_USER', "") +db_password = os.environ.get('DB_PASSWORD', "") + +# S3 bucket details +s3_bucket_name = os.environ.get('S3_BUCKET_NAME', "") +s3_folder_prefix = 'sbt_invoice' + +# S3 access credentials +access_key = os.environ.get('S3_ACCESS_KEY', "") +secret_key = os.environ.get('S3_SECRET_KEY', "") + +# Request IDs for filtering + +# Connect to the PostgreSQL database +conn = psycopg2.connect( + host=db_host, + database=db_name, + user=db_user, + password=db_password +) + +# Create a cursor +cursor = conn.cursor() + + +# Execute the SELECT query with the filter +query = "SELECT * FROM fwd_api_subscriptionrequest WHERE created_at >= %s AND created_at <= %s" +cursor.execute(query, (START_DATE, END_DATE)) + +# Fetch the filtered data +data = cursor.fetchall() + +# Define the CSV file path +csv_file_path = f'{OUTPUT_NAME}.csv' + +# Write the data to the CSV file +with open(csv_file_path, 'w', newline='') as csv_file: + writer = csv.writer(csv_file) + writer.writerow([desc[0] for desc in cursor.description]) # Write column headers + writer.writerows(data) # Write the filtered data rows + +# Close the cursor and database connection +cursor.close() +conn.close() + +# Download folders from S3 +s3_client = boto3.client( + 's3', + aws_access_key_id=access_key, + aws_secret_access_key=secret_key +) + +request_ids = [] +for rq in data: + rq_id = rq[3] + request_ids.append(rq_id) + +for request_id in tqdm(request_ids): + folder_key = f"{s3_folder_prefix}/{request_id}/" # Assuming folder structure like: s3_bucket_name/s3_folder_prefix/request_id/ + local_folder_path = f"{OUTPUT_NAME}/{request_id}/" # Path to the local folder to save the downloaded files + os.makedirs(OUTPUT_NAME, exist_ok=True) + os.makedirs(local_folder_path, exist_ok=True) + + + # List objects in the S3 folder + response = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=folder_key) + objects = response.get('Contents', []) + + for s3_object in objects: + object_key = s3_object['Key'] + local_file_path = local_folder_path + object_key.split('/')[-1] # Extracting the file name from the object key + + # Download the S3 object to the local file + s3_client.download_file(s3_bucket_name, object_key, local_file_path) \ No newline at end of file From ccda0189bb8329c955bb3915cf50633838973b4e Mon Sep 17 00:00:00 2001 From: dx-tan Date: Tue, 9 Jan 2024 19:41:17 +0700 Subject: [PATCH 04/27] Feature: feedback csv API --- cope2n-api/fwd_api/api/ctel_view.py | 53 ++++++++++++- .../fwd_api/celery_worker/client_connector.py | 6 ++ .../fwd_api/celery_worker/internal_task.py | 75 ++++++++++++++++++- cope2n-api/fwd_api/celery_worker/worker.py | 7 +- cope2n-api/fwd_api/exception/exceptions.py | 5 ++ .../migrations/0165_feedbackrequest.py | 29 +++++++ cope2n-api/fwd_api/models/FeedbackRequest.py | 14 ++++ cope2n-api/fwd_api/models/__init__.py | 2 + cope2n-api/fwd_api/utils/file.py | 66 +++++++++++++++- cope2n-api/fwd_api/utils/process.py | 4 +- .../swagger-ui-bundle.js.LICENSE.txt | 1 + deploy_images.sh | 2 +- docker-compose-dev.yml | 15 ++-- 13 files changed, 262 insertions(+), 17 deletions(-) create mode 100644 cope2n-api/fwd_api/migrations/0165_feedbackrequest.py create mode 100644 cope2n-api/fwd_api/models/FeedbackRequest.py diff --git a/cope2n-api/fwd_api/api/ctel_view.py b/cope2n-api/fwd_api/api/ctel_view.py index 6b77471..6d4c632 100755 --- a/cope2n-api/fwd_api/api/ctel_view.py +++ b/cope2n-api/fwd_api/api/ctel_view.py @@ -20,7 +20,7 @@ from ..annotation.api import throw_on_failure from ..constant.common import ProcessType, REQUEST_ID, FOLDER_TYPE, EntityStatus, pdf_extensions, allowed_file_extensions, image_extensions, standard_ocr_list from ..exception.exceptions import RequiredFieldException, InvalidException, NotFoundException, \ PermissionDeniedException, LockedEntityException, FileContentInvalidException, ServiceTimeoutException -from ..models import SubscriptionRequest, SubscriptionRequestFile, OcrTemplate +from ..models import SubscriptionRequest, SubscriptionRequestFile, OcrTemplate, FeedbackRequest from ..response.ReportSerializer import ReportSerializer from ..utils import file as FileUtils from ..utils import process as ProcessUtil @@ -348,6 +348,57 @@ class CtelViewSet(viewsets.ViewSet): return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id}) + @extend_schema(request={ + 'multipart/form-data': { + 'type': 'object', + 'properties': { + 'files': { + 'type': 'array', + 'items': { + 'type': 'string', + 'format': 'binary' + } + }, + }, + 'required': ['files'] + } + }, responses=None, tags=['OCR']) + @action(detail=False, url_path="images/feedback_file", methods=["POST"]) + def feedback_file(self, request): + files = request.data.getlist('files') + FileUtils.validate_csv_feedback(files) + + user_info = ProcessUtil.get_user(request) + user = user_info.user + sub = user_info.current_sub + + feedback_id = "FB_" + datetime.now().strftime("%Y%m%d%H%M%S") + "_" + uuid.uuid4().hex + + origin_name = "" + file_names = "" + for i, file in enumerate(files): + origin_name += file.name + "," + file_names += f"{feedback_id}_{i}.csv" + origin_name = origin_name[:-1] + + new_request: FeedbackRequest = FeedbackRequest(feedback_id=feedback_id, + origin_name=origin_name, + file_name=file_names, + subscription=sub) + new_request.save() + + for i, file in enumerate(files): + file_name = f"{feedback_id}_{i}.csv" + # Save to local + file_path = FileUtils.save_feedback_file(file_name, new_request, file) + FileUtils.validate_feedback_file(file_path) + # Upload to S3 + S3_path = FileUtils.save_feedback_to_S3(file_name, feedback_id, file_path) + # Process csv file in the background + ProcessUtil.process_feedback(feedback_id, file_path) + + return JsonResponse(status=status.HTTP_200_OK, data={"feedback_id": feedback_id}) + @extend_schema(request=None, responses=None, tags=['Data']) @extend_schema(request=None, responses=None, tags=['templates'], methods=['GET']) @action(detail=False, url_path=r"media/(?P\w+)/(?P\w+)", methods=["GET"]) diff --git a/cope2n-api/fwd_api/celery_worker/client_connector.py b/cope2n-api/fwd_api/celery_worker/client_connector.py index 3ff8f88..16c7dd5 100755 --- a/cope2n-api/fwd_api/celery_worker/client_connector.py +++ b/cope2n-api/fwd_api/celery_worker/client_connector.py @@ -30,8 +30,10 @@ class CeleryConnector: 'process_sbt_invoice': {'queue': "invoice_sbt"}, 'do_pdf': {'queue': "do_pdf"}, 'upload_file_to_s3': {'queue': "upload_file_to_s3"}, + 'upload_feedback_to_s3': {'queue': "upload_feedback_to_s3"}, 'upload_obj_to_s3': {'queue': "upload_obj_to_s3"}, 'remove_local_file': {'queue': "remove_local_file"}, + 'csv_feedback': {'queue': "csv_feedback"}, } app = Celery( @@ -39,10 +41,14 @@ class CeleryConnector: broker=settings.BROKER_URL, broker_transport_options={'confirm_publish': False}, ) + def csv_feedback(self, args): + return self.send_task('csv_feedback', args) def do_pdf(self, args): return self.send_task('do_pdf', args) def upload_file_to_s3(self, args): return self.send_task('upload_file_to_s3', args) + def upload_feedback_to_s3(self, args): + return self.send_task('upload_feedback_to_s3', args) def upload_obj_to_s3(self, args): return self.send_task('upload_obj_to_s3', args) def remove_local_file(self, args): diff --git a/cope2n-api/fwd_api/celery_worker/internal_task.py b/cope2n-api/fwd_api/celery_worker/internal_task.py index 04d875f..5ea175b 100755 --- a/cope2n-api/fwd_api/celery_worker/internal_task.py +++ b/cope2n-api/fwd_api/celery_worker/internal_task.py @@ -9,12 +9,13 @@ from fwd_api.models import SubscriptionRequest, UserProfile from fwd_api.celery_worker.worker import app from ..constant.common import FolderFileType, image_extensions from ..exception.exceptions import FileContentInvalidException -from fwd_api.models import SubscriptionRequestFile +from fwd_api.models import SubscriptionRequestFile, FeedbackRequest from ..utils import file as FileUtils from ..utils import process as ProcessUtil from ..utils import s3 as S3Util from fwd_api.constant.common import ProcessType - +import csv +import json from celery.utils.log import get_task_logger from fwd import settings @@ -59,6 +60,61 @@ def process_image_file(file_name: str, file_path, request, user) -> list: 'request_file_id': new_request_file.code }] +@app.task(name="csv_feedback") +def process_csv_feedback(csv_file_path, feedback_id): + # load file to RAM + status = {} + with open(csv_file_path, 'r') as file: + reader = csv.DictReader(file) + # for rq in rqs + for row in reader: + # get request_subcription + request_id = row.get('requestId') + sub_rqs = SubscriptionRequest.objects.filter(request_id=request_id) + if len(sub_rqs) != 1: + status[request_id] = f"Found {len(sub_rqs)} records of request id {request_id}" + continue + else: + sub_rq = sub_rqs[0] + fb = {} + # update user result (with validate) + redemption_id = row.get('redemptionNumber') + imei1 = row.get('imeiNumber') + imei2 = row.get('imeiNumber2') + purchase_date = row.get('Purchase Date') + retailer = row.get('retailer') + sold_to_party = row.get('Sold to party') + server_time = float(row.get('timetakenmilli')) + fb['request_id'] = request_id + fb['retailername'] = retailer + fb['sold_to_party'] = sold_to_party + fb['purchase_date'] = purchase_date + fb['imei_number'] = [imei1, imei2] + sub_rq.feedback_result = fb + sub_rq.client_request_time = server_time + # update redemption_id if exist + if len(redemption_id) > 0: + sub_rq.redemption_id = redemption_id + sub_rq.save() + # update log into database + feedback_rq = FeedbackRequest.objects.filter(feedback_id=feedback_id).first() + feedback_rq.error_status = status + # save log to local + directory_name = os.path.dirname(csv_file_path) + file_path = csv_file_path.replace(".csv", "_error.json") + with open(file_path, "w") as outfile: + json.dump(status, outfile) + # save to s3 + s3_key = os.path.join("feedback", directory_name.split("/")[-1], file_path.split("/")[-1]) + if s3_client.s3_client is not None: + try: + # check if saved then delete local + s3_client.upload_file(file_path, s3_key) + os.remove(file_path) + except Exception as e: + logger.error(f"Unable to set S3: {e}") + print(f"Unable to set S3: {e}") + feedback_rq.save() @app.task(name='do_pdf') def process_pdf(rq_id, sub_id, p_type, user_id, files): @@ -136,6 +192,21 @@ def upload_file_to_s3(local_file_path, s3_key, request_id): else: logger.info(f"S3 is not available, skipping,...") +@app.task(name='upload_feedback_to_s3') +def upload_feedback_to_s3(local_file_path, s3_key, feedback_id): + if s3_client.s3_client is not None: + try: + s3_client.upload_file(local_file_path, s3_key) + feed_request = FeedbackRequest.objects.filter(feedback_id=feedback_id)[0] + feed_request.S3_uploaded = True + feed_request.save() + except Exception as e: + logger.error(f"Unable to set S3: {e}") + print(f"Unable to set S3: {e}") + return + else: + logger.info(f"S3 is not available, skipping,...") + @app.task(name='remove_local_file') def remove_local_file(local_file_path, request_id): print(f"[INFO] Removing local file: {local_file_path}, ...") diff --git a/cope2n-api/fwd_api/celery_worker/worker.py b/cope2n-api/fwd_api/celery_worker/worker.py index ee497cd..a47b5c9 100755 --- a/cope2n-api/fwd_api/celery_worker/worker.py +++ b/cope2n-api/fwd_api/celery_worker/worker.py @@ -38,7 +38,7 @@ app.conf.update({ Queue('upload_file_to_s3'), Queue('upload_obj_to_s3'), Queue('remove_local_file'), - + Queue('csv_feedback'), ], 'task_routes': { @@ -52,9 +52,10 @@ app.conf.update({ 'process_sbt_invoice': {'queue': "invoice_sbt"}, 'do_pdf': {'queue': "do_pdf"}, 'upload_file_to_s3': {'queue': "upload_file_to_s3"}, - 'upload_obj_to_s3': {'queue': "upload_obj_to_s3"}, - 'upload_file_to_s3': {'queue': "upload_file_to_s3"}, + 'upload_feedback_to_s3': {'queue': "upload_feedback_to_s3"}, + 'upload_obj_to_s3': {'queue': "upload_obj_to_s3"}, 'remove_local_file': {'queue': "remove_local_file"}, + 'csv_feedback': {'queue': "csv_feedback"}, } }) diff --git a/cope2n-api/fwd_api/exception/exceptions.py b/cope2n-api/fwd_api/exception/exceptions.py index 3292a5e..d584fb2 100755 --- a/cope2n-api/fwd_api/exception/exceptions.py +++ b/cope2n-api/fwd_api/exception/exceptions.py @@ -67,6 +67,11 @@ class RequiredFieldException(GeneralException): default_detail = 'Field required' detail_with_arg = '{} param is required' +class RequiredColumnException(GeneralException): + status_code = status.HTTP_400_BAD_REQUEST + default_code = 4003 + default_detail = 'Collumns required' + detail_with_arg = '{} collumns are required' class DuplicateEntityException(GeneralException): status_code = status.HTTP_400_BAD_REQUEST diff --git a/cope2n-api/fwd_api/migrations/0165_feedbackrequest.py b/cope2n-api/fwd_api/migrations/0165_feedbackrequest.py new file mode 100644 index 0000000..8d18c02 --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0165_feedbackrequest.py @@ -0,0 +1,29 @@ +# Generated by Django 4.1.3 on 2024-01-09 10:08 + +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0164_subscriptionrequest_client_request_time_and_more'), + ] + + operations = [ + migrations.CreateModel( + name='FeedbackRequest', + fields=[ + ('id', models.AutoField(primary_key=True, serialize=False)), + ('feedback_id', models.CharField(max_length=200)), + ('file_name', models.CharField(max_length=200)), + ('origin_name', models.CharField(max_length=200)), + ('error_status', models.JSONField(null=True)), + ('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('S3_uploaded', models.BooleanField(default=False)), + ('subscription', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='fwd_api.subscription')), + ], + ), + ] diff --git a/cope2n-api/fwd_api/models/FeedbackRequest.py b/cope2n-api/fwd_api/models/FeedbackRequest.py new file mode 100644 index 0000000..b4448fa --- /dev/null +++ b/cope2n-api/fwd_api/models/FeedbackRequest.py @@ -0,0 +1,14 @@ +from django.db import models +from django.utils import timezone +from fwd_api.models.Subscription import Subscription + +class FeedbackRequest(models.Model): + id = models.AutoField(primary_key=True) + feedback_id = models.CharField(max_length=200) # Change to request_id + file_name = models.CharField(max_length=200) # Change to request_id + origin_name = models.CharField(max_length=200) # Change to request_id + error_status = models.JSONField(null=True) + created_at = models.DateTimeField(default=timezone.now, db_index=True) + updated_at = models.DateTimeField(auto_now=True) + subscription = models.ForeignKey(Subscription, on_delete=models.CASCADE) + S3_uploaded = models.BooleanField(default=False) \ No newline at end of file diff --git a/cope2n-api/fwd_api/models/__init__.py b/cope2n-api/fwd_api/models/__init__.py index 0308fc6..3cfcd22 100755 --- a/cope2n-api/fwd_api/models/__init__.py +++ b/cope2n-api/fwd_api/models/__init__.py @@ -5,3 +5,5 @@ from .OcrTemplate import OcrTemplate from .OcrTemplateBox import OcrTemplateBox from .PricingPlan import PricingPlan from .Subscription import Subscription +from .FeedbackRequest import FeedbackRequest + diff --git a/cope2n-api/fwd_api/utils/file.py b/cope2n-api/fwd_api/utils/file.py index f3af27e..0fa22ed 100644 --- a/cope2n-api/fwd_api/utils/file.py +++ b/cope2n-api/fwd_api/utils/file.py @@ -10,13 +10,29 @@ from django.core.files.uploadedfile import TemporaryUploadedFile from fwd import settings from fwd_api.constant.common import allowed_file_extensions from fwd_api.exception.exceptions import GeneralException, RequiredFieldException, InvalidException, \ - ServiceUnavailableException, FileFormatInvalidException, LimitReachedException, InvalidDecompressedSizeException -from fwd_api.models import SubscriptionRequest, OcrTemplate + ServiceUnavailableException, FileFormatInvalidException, LimitReachedException, InvalidDecompressedSizeException, RequiredColumnException +from fwd_api.models import SubscriptionRequest, OcrTemplate, FeedbackRequest from fwd_api.utils import process as ProcessUtil from fwd_api.utils.crypto import image_authenticator from fwd_api.utils.image import resize from ..celery_worker.client_connector import c_connector import imagesize +import csv + +def validate_feedback_file(csv_file_path): + required_columns = ['redemptionNumber', 'requestId', 'imeiNumber', 'imeiNumber2', 'Purchase Date', 'retailer', 'Sold to party', 'timetakenmilli'] + missing_columns = [] + + with open(csv_file_path, 'r') as file: + reader = csv.DictReader(file) + + # Check if all required columns are present + for column in required_columns: + if column not in reader.fieldnames: + missing_columns.append(column) + + if missing_columns: + raise RequiredColumnException(excArgs=str(missing_columns)) def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUEST, min_file_num=1, file_field="files"): total_file_size = 0 @@ -39,6 +55,26 @@ def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUES raise LimitReachedException(excArgs=('Total size of all files', str(settings.MAX_UPLOAD_SIZE_OF_A_FILE / 1024 / 1024), 'MB')) +def validate_csv_feedback(files, max_file_num=1, min_file_num=1, file_field="csv files"): + total_file_size = 0 + if len(files) < min_file_num: + raise RequiredFieldException(excArgs=file_field) + if len(files) > max_file_num: + raise LimitReachedException(excArgs=(f'Number of {file_field}', str(max_file_num), '')) + + for f in files: + if not isinstance(f, TemporaryUploadedFile): + # print(f'[DEBUG]: {f.name}') + raise InvalidException(excArgs="files") + extension = f.name.split(".")[-1].lower() in ["csv"] + if not extension or "." not in f.name: + raise FileFormatInvalidException(excArgs=[".csv"]) + if f.size > settings.MAX_UPLOAD_SIZE_OF_A_FILE: + raise LimitReachedException(excArgs=('A file', str(settings.MAX_UPLOAD_SIZE_OF_A_FILE / 1024 / 1024), 'MB')) + total_file_size += f.size + if total_file_size > settings.MAX_UPLOAD_FILE_SIZE_OF_A_REQUEST: + raise LimitReachedException(excArgs=('Total size of all files', str(settings.MAX_UPLOAD_SIZE_OF_A_FILE / 1024 / 1024), 'MB')) + def get_file(file_path: str): try: return open(file_path, 'rb') @@ -105,6 +141,21 @@ def save_json_file(file_name: str, rq: SubscriptionRequest, data: dict): json.dump(data, json_file) return file_path +def save_feedback_file(file_name: str, rq: FeedbackRequest, uploaded_file: dict): + user_id = str(rq.subscription.user.id) + feedback_id = str(rq.id) + + folder_path = os.path.join(settings.MEDIA_ROOT, 'users', user_id, "feedbacks", feedback_id, 'requests', feedback_id) + os.makedirs(folder_path, exist_ok = True) + + file_path = os.path.join(folder_path, file_name) + with uploaded_file.open() as file: + # Read the contents of the file + file_contents = file.read().decode('utf-8') + with open(file_path, 'w', newline='') as csvfile: + csvfile.write(file_contents) + return file_path + def delete_file_with_path(file_path: str) -> bool: try: os.remove(file_path) @@ -166,6 +217,17 @@ def save_to_S3(file_name, rq, local_file_path): print(f"[ERROR]: {e}") raise ServiceUnavailableException() +def save_feedback_to_S3(file_name, id, local_file_path): + try: + assert len(local_file_path.split("/")) >= 2, "file_path must have at least feedback_folder and feedback_id" + s3_key = os.path.join(local_file_path.split("/")[-2], local_file_path.split("/")[-1], file_name) + c_connector.upload_feedback_to_s3((local_file_path, s3_key, id)) + c_connector.remove_local_file((local_file_path, id)) + return s3_key + except Exception as e: + print(f"[ERROR]: {e}") + raise ServiceUnavailableException() + def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path): try: file_path = os.path.join(folder_path, file_name) diff --git a/cope2n-api/fwd_api/utils/process.py b/cope2n-api/fwd_api/utils/process.py index 6b6e43b..b5d6c16 100644 --- a/cope2n-api/fwd_api/utils/process.py +++ b/cope2n-api/fwd_api/utils/process.py @@ -306,7 +306,6 @@ def token_value(token_type): return 5 return 1 # Basic OCR - def send_to_queue2(rq_id, sub_id, file_url, user_id, typez, metadata={}): try: if typez == ProcessType.ID_CARD.value: @@ -324,7 +323,6 @@ def send_to_queue2(rq_id, sub_id, file_url, user_id, typez, metadata={}): print(e) raise BadGatewayException() - def build_template_matching_data(template): temp_dict = { @@ -362,6 +360,8 @@ def send_template_queue(rq_id, file_url, template: OcrTemplate, uid): print(e) raise BadGatewayException() +def process_feedback(feedback_id, local_file_path): + c_connector.csv_feedback((local_file_path, feedback_id)) def process_pdf_file(file_name: str, file_obj: TemporaryUploadedFile, request: SubscriptionRequest, user) -> list: doc: fitz.Document = fitz.open(stream=file_obj.file.read()) diff --git a/cope2n-api/static/drf_spectacular_sidecar/swagger-ui-dist/swagger-ui-bundle.js.LICENSE.txt b/cope2n-api/static/drf_spectacular_sidecar/swagger-ui-dist/swagger-ui-bundle.js.LICENSE.txt index d645695..5471dc1 100644 --- a/cope2n-api/static/drf_spectacular_sidecar/swagger-ui-dist/swagger-ui-bundle.js.LICENSE.txt +++ b/cope2n-api/static/drf_spectacular_sidecar/swagger-ui-dist/swagger-ui-bundle.js.LICENSE.txt @@ -1,4 +1,5 @@ + Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ diff --git a/deploy_images.sh b/deploy_images.sh index cb89a0e..11e5360 100755 --- a/deploy_images.sh +++ b/deploy_images.sh @@ -2,7 +2,7 @@ set -e tag=$1 -is_prod=${$2:-False} +# is_prod=${$2:-False} echo "[INFO] Tag received from Python: $tag" diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index fa83e77..abcfcaf 100755 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -81,12 +81,12 @@ services: depends_on: db-sbt: condition: service_started - # command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input && - # python manage.py makemigrations && - # python manage.py migrate && - # python manage.py compilemessages && - # gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod - command: bash -c "tail -f > /dev/null" + command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input && + python manage.py makemigrations && + python manage.py migrate && + python manage.py compilemessages && + gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod + # command: bash -c "tail -f > /dev/null" minio: image: minio/minio @@ -97,6 +97,9 @@ services: - MINIO_SECRET_KEY=${S3_SECRET_KEY} volumes: - ./data/minio_data:/data + ports: + - 9884:9884 + - 9885:9885 networks: - ctel-sbt restart: always From 9686791d59fed7bb67c6b08395b3891699e6f3e8 Mon Sep 17 00:00:00 2001 From: dx-tan Date: Wed, 3 Jan 2024 11:07:58 +0700 Subject: [PATCH 05/27] Add: crawl database script --- scripts/crawl_database.py | 96 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 scripts/crawl_database.py diff --git a/scripts/crawl_database.py b/scripts/crawl_database.py new file mode 100644 index 0000000..7a06cf0 --- /dev/null +++ b/scripts/crawl_database.py @@ -0,0 +1,96 @@ +import csv +import psycopg2 +import boto3 +import os +from tqdm import tqdm + +OUTPUT_NAME = "issue_7" + +# Database connection details +db_host = os.environ.get('DB_HOST', "") +db_name = os.environ.get('DB_SCHEMA', "") +db_user = os.environ.get('DB_USER', "") +db_password = os.environ.get('DB_PASSWORD', "") + +# S3 bucket details +s3_bucket_name = os.environ.get('S3_BUCKET_NAME', "") +s3_folder_prefix = 'sbt_invoice' + +# S3 access credentials +access_key = os.environ.get('S3_ACCESS_KEY', "") +secret_key = os.environ.get('S3_SECRET_KEY', "") + +# Request IDs for filtering +request_ids = [ +'SAPe39e970592394b27a17d4a64c39f7ed0', +'SAP477a02a21faf41ecbd1a0bb21636e644', +'SAP459d58a7dba84e7195f5ad8f46fc1530', +'SAPa5aaa0e1ce8c4824a7b0ded2e550caec', +'SAP492c063db44049c6b1e44f59c531f8d8', +'SAP3d0bdd5cb4ce4291b0cb77d7de0a48e9', +'SAP7e2c673e49c441a991661d1227342131', +'SAPc26974bcac2649b28227981459a427aa', +'SAP25b12dde6b854c70b512ac79059ac1d4', +'SAP_20240102194138_bf4a3cc4e0304d0385126b6592c2632d', +'SAP_20240102214550_8389ec1b84a249738eed9d2152bf0922', +] + +# Connect to the PostgreSQL database +conn = psycopg2.connect( + host=db_host, + database=db_name, + user=db_user, + password=db_password +) + +# Create a cursor +cursor = conn.cursor() + +# Generate the placeholder string for the IN statement +placeholders = ','.join(['%s'] * len(request_ids)) + +# Execute the SELECT query with the filter +query = f"SELECT * FROM fwd_api_subscriptionrequest WHERE request_id IN ({placeholders})" +cursor.execute(query, request_ids) + +# Fetch the filtered data +data = cursor.fetchall() + +# Define the CSV file path +csv_file_path = f'{OUTPUT_NAME}.csv' + +# Write the data to the CSV file +with open(csv_file_path, 'w', newline='') as csv_file: + writer = csv.writer(csv_file) + writer.writerow([desc[0] for desc in cursor.description]) # Write column headers + writer.writerows(data) # Write the filtered data rows + +# Close the cursor and database connection +cursor.close() +conn.close() + +# Download folders from S3 +s3_client = boto3.client( + 's3', + aws_access_key_id=access_key, + aws_secret_access_key=secret_key +) + + +for request_id in tqdm(request_ids): + folder_key = f"{s3_folder_prefix}/{request_id}/" # Assuming folder structure like: s3_bucket_name/s3_folder_prefix/request_id/ + local_folder_path = f"{OUTPUT_NAME}/{request_id}/" # Path to the local folder to save the downloaded files + os.makedirs(OUTPUT_NAME, exist_ok=True) + os.makedirs(local_folder_path, exist_ok=True) + + + # List objects in the S3 folder + response = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=folder_key) + objects = response.get('Contents', []) + + for s3_object in objects: + object_key = s3_object['Key'] + local_file_path = local_folder_path + object_key.split('/')[-1] # Extracting the file name from the object key + + # Download the S3 object to the local file + s3_client.download_file(s3_bucket_name, object_key, local_file_path) \ No newline at end of file From 3c2714a841f7f2d20558c9bfb767eed6da447f8f Mon Sep 17 00:00:00 2001 From: dx-tan Date: Wed, 3 Jan 2024 14:35:22 +0700 Subject: [PATCH 06/27] Add: sample env --- .env_sample | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 .env_sample diff --git a/.env_sample b/.env_sample new file mode 100644 index 0000000..0edcda4 --- /dev/null +++ b/.env_sample @@ -0,0 +1,41 @@ +MEDIA_ROOT=/app/media +# DATABASE django setup +DB_ENGINE=django.db.backends.postgresql_psycopg2 +DB_SCHEMA=sbt_dev +DB_USER=postgres +DB_PASSWORD=extraordinary +DB_HOST=db-sbt +DB_PUBLIC_PORT=5432 +DB_INTERNAL_PORT=5432 + +DEBUG=TRUE +CORS_ALLOWED_ORIGINS=* +CTEL_KEY=secret +DB_INTERNAL_KEY=secret +ALLOWED_HOSTS='*' +BROKER_URL=amqp://test:test@rabbitmq-manulife-sbt:5672 +BASE_URL=http://be-ctel-sbt:9000 +BASE_UI_URL=http://fe-sbt:9801 +HOST_MEDIA_FOLDER=./media +GID=1000 +UID=198 +SECRET_KEY=secret +RABBITMQ_DEFAULT_USER=test +RABBITMQ_DEFAULT_PASS=test +BASE_PORT=9000 +S3_ENDPOINT=minio +S3_ACCESS_KEY=sample-key +S3_SECRET_KEY=sample-key +S3_BUCKET_NAME=sample-key + +AUTH_TOKEN_LIFE_TIME=168 +IMAGE_TOKEN_LIFE_TIME=168 +INTERNAL_SDS_KEY=sample-key +FI_USER_NAME=sbt +FI_PASSWORD=abc + +# Front end env variables +# VITE_PORT=80 +# VITE_PROXY=http://0.0.0.0 +# VITE_API_BASE_URL=http://0.0.0.0:8000 +# PORT=8002 \ No newline at end of file From ac654753566c6ad593b07ae0fce52e36e06d3f98 Mon Sep 17 00:00:00 2001 From: dx-tan Date: Fri, 5 Jan 2024 14:18:16 +0700 Subject: [PATCH 07/27] Add: API list request --- cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py | 1 + .../celery_worker/mock_process_tasks_fi.py | 1 + cope2n-ai-fi/modules/sdsvkvu | 2 +- cope2n-api/fwd_api/api/accuracy_view.py | 183 ++++++++++++++++++ cope2n-api/fwd_api/api/ctel_view.py | 3 +- cope2n-api/fwd_api/api_router.py | 3 + ...ionrequest_client_request_time_and_more.py | 28 +++ .../fwd_api/models/SubscriptionRequest.py | 3 + deploy_images.sh | 23 ++- docker-compose-dev.yml | 9 +- scripts/crawl_database.py | 80 ++++++-- scripts/crawl_database_by_time.py | 93 +++++++++ 12 files changed, 402 insertions(+), 27 deletions(-) create mode 100644 cope2n-api/fwd_api/api/accuracy_view.py create mode 100644 cope2n-api/fwd_api/migrations/0164_subscriptionrequest_client_request_time_and_more.py create mode 100644 scripts/crawl_database_by_time.py diff --git a/cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py b/cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py index 49038d9..c03b041 100755 --- a/cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py +++ b/cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py @@ -33,6 +33,7 @@ def sbt_predict(image_url, engine) -> None: img = cv2.imdecode(arr, -1) save_dir = "./tmp_results" + os.makedirs(save_dir, exist_ok=True) # image_path = os.path.join(save_dir, f"{image_url}.jpg") os.makedirs(save_dir, exist_ok = True) tmp_image_path = os.path.join(save_dir, f"{uuid.uuid4()}.jpg") diff --git a/cope2n-ai-fi/celery_worker/mock_process_tasks_fi.py b/cope2n-ai-fi/celery_worker/mock_process_tasks_fi.py index ef16d45..b114330 100755 --- a/cope2n-ai-fi/celery_worker/mock_process_tasks_fi.py +++ b/cope2n-ai-fi/celery_worker/mock_process_tasks_fi.py @@ -69,6 +69,7 @@ def process_sbt_invoice(rq_id, list_url, metadata): c_connector.process_sbt_invoice_result((rq_id, hoadon, metadata)) return {"rq_id": rq_id} except Exception as e: + print(f"[ERROR]: Failed to extract invoice: {e}") print(e) hoadon = {"status": 404, "content": {}} c_connector.process_sbt_invoice_result((rq_id, hoadon, metadata)) diff --git a/cope2n-ai-fi/modules/sdsvkvu b/cope2n-ai-fi/modules/sdsvkvu index 11fb958..6907ea0 160000 --- a/cope2n-ai-fi/modules/sdsvkvu +++ b/cope2n-ai-fi/modules/sdsvkvu @@ -1 +1 @@ -Subproject commit 11fb9588df7e6cb03e7a761e3f728f11045bee09 +Subproject commit 6907ea0183b141e3b4f3c21758c9123f1e9b2a27 diff --git a/cope2n-api/fwd_api/api/accuracy_view.py b/cope2n-api/fwd_api/api/accuracy_view.py new file mode 100644 index 0000000..e482e58 --- /dev/null +++ b/cope2n-api/fwd_api/api/accuracy_view.py @@ -0,0 +1,183 @@ +from rest_framework import status, viewsets +from rest_framework.decorators import action +from rest_framework.response import Response +from django.core.paginator import Paginator +from django.http import JsonResponse +from datetime import datetime +from django.utils import timezone +from django.db.models import Q + +from drf_spectacular.utils import extend_schema, OpenApiParameter, OpenApiTypes +# from drf_spectacular.types import OpenApiString +from ..models import SubscriptionRequest + + +class AccuracyViewSet(viewsets.ViewSet): + lookup_field = "username" + + @extend_schema( + parameters=[ + OpenApiParameter( + name='start_date', + location=OpenApiParameter.QUERY, + description='Start date (YYYY-mm-DDTHH:MM:SS)', + type=OpenApiTypes.DATE, + default='2023-01-02T00:00:00', + ), + OpenApiParameter( + name='end_date', + location=OpenApiParameter.QUERY, + description='End date (YYYY-mm-DDTHH:MM:SS)', + type=OpenApiTypes.DATE, + default='2024-01-10T00:00:00', + ), + OpenApiParameter( + name='include_test', + location=OpenApiParameter.QUERY, + description='Whether to include test record or not', + type=OpenApiTypes.BOOL, + ), + OpenApiParameter( + name='is_reviewed', + location=OpenApiParameter.QUERY, + description='Which records to be query', + type=OpenApiTypes.STR, + enum=['reviewed', 'not reviewed', 'all'], + ), + OpenApiParameter( + name='request_id', + location=OpenApiParameter.QUERY, + description='Specific request id', + type=OpenApiTypes.STR, + ), + OpenApiParameter( + name='redemption_id', + location=OpenApiParameter.QUERY, + description='Specific redemption id', + type=OpenApiTypes.STR, + ), + OpenApiParameter( + name='quality', + location=OpenApiParameter.QUERY, + description='One or more of [bad, good, all]', + type=OpenApiTypes.STR, + enum=['bad', 'good', 'all'], + ), + OpenApiParameter( + name='page', + location=OpenApiParameter.QUERY, + description='Page number', + type=OpenApiTypes.INT, + required=False + ), + OpenApiParameter( + name='page_size', + location=OpenApiParameter.QUERY, + description='Number of items per page', + type=OpenApiTypes.INT, + required=False + ), + ], + responses=None, tags=['Accuracy'] + ) + @action(detail=False, url_path="request_list", methods=["GET"]) + def get_subscription_requests(self, request): + if request.method == 'GET': + start_date_str = request.GET.get('start_date') + end_date_str = request.GET.get('end_date') + page_number = int(request.GET.get('page', 1)) + page_size = int(request.GET.get('page_size', 10)) + request_id = request.GET.get('request_id', None) + redemption_id = request.GET.get('redemption_id', None) + is_reviewed = request.GET.get('is_reviewed', None) + include_test = request.GET.get('include_test', False) + quality = request.GET.get('quality', None) + + try: + start_date = datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S') + end_date = datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S') + except ValueError: + return JsonResponse({'error': 'Invalid date format. Please use YYYY-MM-DD.'}, status=400) + + base_query = Q(created_at__range=(start_date, end_date)) + if request_id: + base_query &= Q(request_id=request_id) + if redemption_id: + base_query &= Q(redemption_id=redemption_id) + base_query &= Q(is_test_request=False) + if isinstance(include_test, str): + include_test = True if include_test=="true" else False + if include_test: + # base_query = ~base_query + base_query.children = base_query.children[:-1] + + elif isinstance(include_test, bool): + if include_test: + base_query = ~base_query + if isinstance(is_reviewed, str): + if is_reviewed == "reviewed": + base_query &= Q(is_reviewed=True) + elif is_reviewed == "not reviewed": + base_query &= Q(is_reviewed=False) + elif is_reviewed == "all": + pass + if isinstance(quality, str): + if quality == "good": + base_query &= Q(is_bad_image_quality=False) + elif quality == "bad": + base_query &= Q(is_bad_image_quality=True) + elif quality == "all": + pass + + subscription_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at') + + paginator = Paginator(subscription_requests, page_size) + page = paginator.get_page(page_number) + + data = [] + for request in page: + imeis = [] + purchase_date = [] + retailer = "" + try: + if request.reviewed_result is not None: + imeis = request.reviewed_result.get("imei_number", []) + purchase_date = request.reviewed_result.get("purchase_date", []) + retailer = request.reviewed_result.get("retailername", "") + elif request.feedback_result is not None : + imeis = request.feedback_result.get("imei_number", []) + purchase_date = request.feedback_result.get("purchase_date", []) + retailer = request.feedback_result.get("retailername", "") + elif request.predict_result is not None: + if request.predict_result.get("status", 404) == 200: + imeis = request.predict_result.get("content", {}).get("document", [])[0].get("content", [])[3].get("value", []) + purchase_date = request.predict_result.get("content", {}).get("document", [])[0].get("content", [])[2].get("value", []) + retailer = request.predict_result.get("content", {}).get("document", [])[0].get("content", [])[0].get("value", []) + except Exception as e: + print(f"[ERROR]: {e}") + print(f"[ERROR]: {request}") + data.append({ + 'RequestID': request.request_id, + 'RedemptionID': request.redemption_id, + 'IMEIs': imeis, + 'Purchase Date': purchase_date, + 'Retailer': retailer, + 'Client Request Time (ms)': request.client_request_time, + 'Server Processing Time (ms)': request.preprocessing_time + request.ai_inference_time, + 'Is Reviewed': request.is_reviewed, + 'Is Bad Quality': request.is_bad_image_quality, + 'created_at': request.created_at.isoformat() + }) + + response = { + 'subscription_requests': data, + 'page': { + 'number': page.number, + 'total_pages': page.paginator.num_pages, + 'count': page.paginator.count, + } + } + + return JsonResponse(response) + + return JsonResponse({'error': 'Invalid request method.'}, status=405) \ No newline at end of file diff --git a/cope2n-api/fwd_api/api/ctel_view.py b/cope2n-api/fwd_api/api/ctel_view.py index 57155e2..6b77471 100755 --- a/cope2n-api/fwd_api/api/ctel_view.py +++ b/cope2n-api/fwd_api/api/ctel_view.py @@ -347,8 +347,7 @@ class CtelViewSet(viewsets.ViewSet): S3_path = FileUtils.save_to_S3(file_name, subcription_request, file_path) return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id}) - - + @extend_schema(request=None, responses=None, tags=['Data']) @extend_schema(request=None, responses=None, tags=['templates'], methods=['GET']) @action(detail=False, url_path=r"media/(?P\w+)/(?P\w+)", methods=["GET"]) diff --git a/cope2n-api/fwd_api/api_router.py b/cope2n-api/fwd_api/api_router.py index 6743957..9a466dc 100755 --- a/cope2n-api/fwd_api/api_router.py +++ b/cope2n-api/fwd_api/api_router.py @@ -2,6 +2,8 @@ from django.conf import settings from rest_framework.routers import DefaultRouter, SimpleRouter from fwd_api.api.ctel_view import CtelViewSet +from fwd_api.api.accuracy_view import AccuracyViewSet + from fwd_api.api.ctel_user_view import CtelUserViewSet from fwd_api.api.ctel_template_view import CtelTemplateViewSet @@ -13,6 +15,7 @@ else: router.register("ctel", CtelViewSet, basename="CtelAPI") router.register("ctel", CtelUserViewSet, basename="CtelUserAPI") +router.register("ctel", AccuracyViewSet, basename="AccuracyAPI") app_name = "api" urlpatterns = router.urls diff --git a/cope2n-api/fwd_api/migrations/0164_subscriptionrequest_client_request_time_and_more.py b/cope2n-api/fwd_api/migrations/0164_subscriptionrequest_client_request_time_and_more.py new file mode 100644 index 0000000..b86ff1e --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0164_subscriptionrequest_client_request_time_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 4.1.3 on 2024-01-04 08:24 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0163_subscriptionrequest_ai_inference_profile'), + ] + + operations = [ + migrations.AddField( + model_name='subscriptionrequest', + name='client_request_time', + field=models.FloatField(default=-1), + ), + migrations.AddField( + model_name='subscriptionrequest', + name='redemption_id', + field=models.CharField(max_length=200, null=True), + ), + migrations.AddField( + model_name='subscriptionrequest', + name='reviewed_result', + field=models.JSONField(null=True), + ), + ] diff --git a/cope2n-api/fwd_api/models/SubscriptionRequest.py b/cope2n-api/fwd_api/models/SubscriptionRequest.py index a852104..6018274 100755 --- a/cope2n-api/fwd_api/models/SubscriptionRequest.py +++ b/cope2n-api/fwd_api/models/SubscriptionRequest.py @@ -10,10 +10,12 @@ class SubscriptionRequest(models.Model): pages_left: int = models.IntegerField(default=1) doc_type: str = models.CharField(max_length=100) request_id = models.CharField(max_length=200) # Change to request_id + redemption_id = models.CharField(max_length=200, null=True) # Change to request_id process_type = models.CharField(max_length=200) # driver/id/invoice provider_code = models.CharField(max_length=200, default="Guest") # Request source FWD/CTel predict_result = models.JSONField(null=True) feedback_result = models.JSONField(null=True) + reviewed_result = models.JSONField(null=True) status = models.IntegerField() # 1: Processing(Pending) 2: PredictCompleted 3: ReturnCompleted subscription = models.ForeignKey(Subscription, on_delete=models.CASCADE) created_at = models.DateTimeField(default=timezone.now, db_index=True) @@ -23,6 +25,7 @@ class SubscriptionRequest(models.Model): ai_inference_profile = models.JSONField(null=True) preprocessing_time = models.FloatField(default=-1) + client_request_time = models.FloatField(default=-1) ai_inference_start_time = models.FloatField(default=0) ai_inference_time = models.FloatField(default=0) cpu_percent = models.FloatField(default=-1) diff --git a/deploy_images.sh b/deploy_images.sh index 75d720d..cb89a0e 100755 --- a/deploy_images.sh +++ b/deploy_images.sh @@ -2,26 +2,33 @@ set -e tag=$1 +is_prod=${$2:-False} echo "[INFO] Tag received from Python: $tag" -echo "[INFO] Updating everything the remote..." -git submodule update --recursive --remote +# echo "[INFO] Updating everything the remote..." +# git submodule update --recursive --remote echo "[INFO] Pushing AI image with tag: $tag..." docker compose -f docker-compose-dev.yml build cope2n-fi-sbt -docker tag sidp/cope2n-ai-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-ai-fi-sbt:${tag} -docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-ai-fi-sbt:${tag} +docker tag sidp/cope2n-ai-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-ai-fi-sbt:${tag} +docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-ai-fi-sbt:${tag} +# docker tag sidp/cope2n-ai-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-ai-fi-sbt:production +# docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-ai-fi-sbt:production echo "[INFO] Pushing BE image with tag: $tag..." docker compose -f docker-compose-dev.yml build be-ctel-sbt -docker tag sidp/cope2n-be-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-be-fi-sbt:${tag} -docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-be-fi-sbt:${tag} +docker tag sidp/cope2n-be-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-be-fi-sbt:${tag} +# docker tag sidp/cope2n-be-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-be-fi-sbt:production +docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-be-fi-sbt:${tag} +# docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-be-fi-sbt:production echo "[INFO] Pushing FE image with tag: $tag..." docker compose -f docker-compose-dev.yml build fe-sbt -docker tag sidp/cope2n-fe-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-fe-fi-sbt:${tag} -docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-fe-fi-sbt:${tag} +docker tag sidp/cope2n-fe-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-fe-fi-sbt:${tag} +# docker tag sidp/cope2n-fe-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-fe-fi-sbt:production +docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-fe-fi-sbt:${tag} +# docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-fe-fi-sbt:production cp ./docker-compose-prod.yml ./docker-compose_${tag}.yml sed -i "s/{{tag}}/$tag/g" ./docker-compose_${tag}.yml diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 5244ab2..5d58c16 100755 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -75,14 +75,14 @@ services: networks: - ctel-sbt volumes: - # - ${HOST_MEDIA_FOLDER}:${MEDIA_ROOT} + # - BE_media:${MEDIA_ROOT} - BE_static:/app/static - ./cope2n-api:/app working_dir: /app depends_on: db-sbt: condition: service_started - command: sh -c "chmod -R 777 /app/static; sleep 5; python manage.py collectstatic --no-input && + command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input && python manage.py makemigrations && python manage.py migrate && python manage.py compilemessages && @@ -165,7 +165,7 @@ services: rabbitmq-sbt: condition: service_started volumes: - # - ${HOST_MEDIA_FOLDER}:${MEDIA_ROOT} + # - BE_media:${MEDIA_ROOT} - ./cope2n-api:/app working_dir: /app @@ -223,4 +223,5 @@ services: volumes: db_data: - BE_static: \ No newline at end of file + BE_static: + BE_media: \ No newline at end of file diff --git a/scripts/crawl_database.py b/scripts/crawl_database.py index 7a06cf0..7cfacd5 100644 --- a/scripts/crawl_database.py +++ b/scripts/crawl_database.py @@ -4,7 +4,11 @@ import boto3 import os from tqdm import tqdm -OUTPUT_NAME = "issue_7" +from dotenv import load_dotenv + +load_dotenv("../.env_prod") + +OUTPUT_NAME = "5Jan" # Database connection details db_host = os.environ.get('DB_HOST', "") @@ -22,17 +26,69 @@ secret_key = os.environ.get('S3_SECRET_KEY', "") # Request IDs for filtering request_ids = [ -'SAPe39e970592394b27a17d4a64c39f7ed0', -'SAP477a02a21faf41ecbd1a0bb21636e644', -'SAP459d58a7dba84e7195f5ad8f46fc1530', -'SAPa5aaa0e1ce8c4824a7b0ded2e550caec', -'SAP492c063db44049c6b1e44f59c531f8d8', -'SAP3d0bdd5cb4ce4291b0cb77d7de0a48e9', -'SAP7e2c673e49c441a991661d1227342131', -'SAPc26974bcac2649b28227981459a427aa', -'SAP25b12dde6b854c70b512ac79059ac1d4', -'SAP_20240102194138_bf4a3cc4e0304d0385126b6592c2632d', -'SAP_20240102214550_8389ec1b84a249738eed9d2152bf0922', +'SAP_20240104082259_85c7f4dd262946d183dbec826fc6709e', +'SAP_20240104082709_c05319c56fd3422dbf133aee33fc3e10', +'SAP_20240104091512_23ae1a81f1314be0a27ebeae0e8fa0d7', +'SAP_20240104091512_23ae1a81f1314be0a27ebeae0e8fa0d7', +'SAP_20240104091816_025c90b9789246ed811772003622fa0d', +'SAP_20240104092541_5c71e535f07c4cc8803b45336ec70f77', +'SAP_20240104100259_5a667d33cb914e7ba5a4447b9e17d649', +'SAP_20240104101145_a7010bac159f47bc95d5866e6c5f5bdf', +'SAP_20240104105702_95252229252b4e238add117919ce882a', +'SAP_20240104112108_34b2cca84a42473ca77bc316e787fe2e', +'SAP_20240104114038_dd57ecf7982c4a5eaf1409f5ef050fab', +'SAP_20240104115942_1b77f411791940a4a85c838c2e9931ad', +'SAP_20240104120746_d63319f4cde343d894f9b89706756a9d', +'SAP_20240104123607_48d25c04fec6411dbf013c6a19054e77', +'SAP_20240104130957_ece21bad331b4f2cad0887693331aa3a', +'SAP_20240104131228_edebee4000ae4bd382feaea5d6c82031', +'SAP_20240104132641_97909efd013f45e89d83d36a5ea35c52', +'SAP_20240104133527_ad55f6ee667643ba8ae65e9ef1c32418', +'SAP_20240104134014_2d2cdbc1b06a44868ce1b32cdb53864f', +'SAP_20240104134425_9b37555ef8094153838e6048f7c63c9b', +'SAP_20240104134457_55a1cf1e371146d995c8849cc0ba7c7b', +'SAP_20240104134609_3f7d308e467d43dbb59a7bcc02e3a7d2', +'SAP_20240104134709_c708daf83f7e4aa69ab9696afe1a9081', +'SAP_20240104135007_44b7a30c5e9c41a0b8065ac4e7000223', +'SAP_20240104141547_7203ddb915274e99a08ae6e54ec49cbd', +'SAP_20240104141559_62fd19a6179248ecb4ff15b33338b294', +'SAP_20240104142352_68699cbe140f4264b858981a3ac67e40', +'SAP_20240104143937_801931cc1f344a4ca8384dfe13d1accc', +'SAP_20240104144730_3180a8919e604e26a188ce051465c392', +'SAP_20240104144933_3380f64019634769befed49e9a671bc6', +'SAP_20240104151239_76ae2f1d02444f7fabbc104eb77fe45f', +'SAP_20240104151243_61775c88685d434d98bb9fc7a9889b8e', +'SAP_20240104151243_61775c88685d434d98bb9fc7a9889b8e', +'SAP_20240104151243_61775c88685d434d98bb9fc7a9889b8e', +'SAP_20240104151638_a08a61448a58459a8f2209f64e54c213', +'SAP_20240104152030_479259e84c5b449499df2cb1023e91ac', +'SAP_20240104160108_a03634c80583454494b77efcdecbcc71', +'SAP_20240104160108_a03634c80583454494b77efcdecbcc71', +'SAP_20240104160311_e7cb02a11bbd4ea1906b3758e97f33ab', +'SAP_20240104161305_89c5518563224ab89345439dffd504a5', +'SAP_20240104161305_89c5518563224ab89345439dffd504a5', +'SAP_20240104164022_0b94af24db9d4ebe9af2086a4bd3cd7e', +'SAP_20240104170837_58165ec9f88d4e4aa3095ba3dda201d7', +'SAP_20240104171740_10279cfebbf344f184bbb429cb9a15ad', +'SAP_20240104175202_247892a4dc7f40f28eafac9c2ad85971', +'SAP_20240104180517_8ce7a1981dc743e08e09284fd904d536', +'SAP_20240104182034_406bac0ab0684727b9efb1bb9b422026', +'SAP_20240104182426_92a48bb4b85a4c3abb48e0d7cf727777', +'SAP_20240104183506_aa1fa7d6774a4509a142a6f4a7b5af29', +'SAP_20240104185716_f9d464e42c314370910913b37133e6c3', +'SAP_20240104190220_573244d03bb8408dbca422ff60eb527a', +'SAP_20240104191236_deedcc588b7b4928a950f7dc2ce4230c', +'SAP_20240104191236_deedcc588b7b4928a950f7dc2ce4230c', +'SAP_20240104192614_990bf10c38e144a7bf489548d356720e', +'SAP_20240104192614_990bf10c38e144a7bf489548d356720e', +'SAP_20240104212143_f8c1b4a6e6e443fcb5e882c7a5b917f3', +'SAP_20240104212924_ee1998a60d6848af9576292ac383037f', +'SAP_20240104214418_f8e1abf808c8499097ecddf014d401c7', +'SAP_20240104214619_8d27c05a9ce74b738b20195cb816bfbf', +'SAP_20240104215037_477863cdc0aa4d5fa1f05bbb0ae673ed', +'SAP_20240104221543_37605982df624324ad2594e268054361', +'SAP_20240104225026_acacd06ea6de4a738bc47683dc53f378', +'SAP_20240104235743_b48aa3e744ed428795171d84066adefe', ] # Connect to the PostgreSQL database diff --git a/scripts/crawl_database_by_time.py b/scripts/crawl_database_by_time.py new file mode 100644 index 0000000..17f6570 --- /dev/null +++ b/scripts/crawl_database_by_time.py @@ -0,0 +1,93 @@ +import csv +import psycopg2 +import boto3 +import os +from tqdm import tqdm +from datetime import datetime, timedelta +from pytz import timezone + +from dotenv import load_dotenv + +load_dotenv("../.env_prod") + +OUTPUT_NAME = "missing_records" +START_DATE = datetime(2023, 12, 28, tzinfo=timezone('Asia/Ho_Chi_Minh')) +END_DATE = datetime(2024, 1, 3, tzinfo=timezone('Asia/Ho_Chi_Minh')) + +# Database connection details +db_host = os.environ.get('DB_HOST', "") +db_name = os.environ.get('DB_SCHEMA', "") +db_user = os.environ.get('DB_USER', "") +db_password = os.environ.get('DB_PASSWORD', "") + +# S3 bucket details +s3_bucket_name = os.environ.get('S3_BUCKET_NAME', "") +s3_folder_prefix = 'sbt_invoice' + +# S3 access credentials +access_key = os.environ.get('S3_ACCESS_KEY', "") +secret_key = os.environ.get('S3_SECRET_KEY', "") + +# Request IDs for filtering + +# Connect to the PostgreSQL database +conn = psycopg2.connect( + host=db_host, + database=db_name, + user=db_user, + password=db_password +) + +# Create a cursor +cursor = conn.cursor() + + +# Execute the SELECT query with the filter +query = "SELECT * FROM fwd_api_subscriptionrequest WHERE created_at >= %s AND created_at <= %s" +cursor.execute(query, (START_DATE, END_DATE)) + +# Fetch the filtered data +data = cursor.fetchall() + +# Define the CSV file path +csv_file_path = f'{OUTPUT_NAME}.csv' + +# Write the data to the CSV file +with open(csv_file_path, 'w', newline='') as csv_file: + writer = csv.writer(csv_file) + writer.writerow([desc[0] for desc in cursor.description]) # Write column headers + writer.writerows(data) # Write the filtered data rows + +# Close the cursor and database connection +cursor.close() +conn.close() + +# Download folders from S3 +s3_client = boto3.client( + 's3', + aws_access_key_id=access_key, + aws_secret_access_key=secret_key +) + +request_ids = [] +for rq in data: + rq_id = rq[3] + request_ids.append(rq_id) + +for request_id in tqdm(request_ids): + folder_key = f"{s3_folder_prefix}/{request_id}/" # Assuming folder structure like: s3_bucket_name/s3_folder_prefix/request_id/ + local_folder_path = f"{OUTPUT_NAME}/{request_id}/" # Path to the local folder to save the downloaded files + os.makedirs(OUTPUT_NAME, exist_ok=True) + os.makedirs(local_folder_path, exist_ok=True) + + + # List objects in the S3 folder + response = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=folder_key) + objects = response.get('Contents', []) + + for s3_object in objects: + object_key = s3_object['Key'] + local_file_path = local_folder_path + object_key.split('/')[-1] # Extracting the file name from the object key + + # Download the S3 object to the local file + s3_client.download_file(s3_bucket_name, object_key, local_file_path) \ No newline at end of file From cecee15cd67263d14083f6ebe0aacaa7a33cf323 Mon Sep 17 00:00:00 2001 From: dx-tan Date: Tue, 9 Jan 2024 19:41:17 +0700 Subject: [PATCH 08/27] Feature: feedback csv API --- cope2n-api/fwd_api/api/ctel_view.py | 53 ++++++++++++- .../fwd_api/celery_worker/client_connector.py | 6 ++ .../fwd_api/celery_worker/internal_task.py | 75 ++++++++++++++++++- cope2n-api/fwd_api/celery_worker/worker.py | 7 +- cope2n-api/fwd_api/exception/exceptions.py | 5 ++ .../migrations/0165_feedbackrequest.py | 29 +++++++ cope2n-api/fwd_api/models/FeedbackRequest.py | 14 ++++ cope2n-api/fwd_api/models/__init__.py | 2 + cope2n-api/fwd_api/utils/file.py | 66 +++++++++++++++- cope2n-api/fwd_api/utils/process.py | 4 +- .../swagger-ui-bundle.js.LICENSE.txt | 1 + deploy_images.sh | 2 +- docker-compose-dev.yml | 3 + 13 files changed, 256 insertions(+), 11 deletions(-) create mode 100644 cope2n-api/fwd_api/migrations/0165_feedbackrequest.py create mode 100644 cope2n-api/fwd_api/models/FeedbackRequest.py diff --git a/cope2n-api/fwd_api/api/ctel_view.py b/cope2n-api/fwd_api/api/ctel_view.py index 6b77471..6d4c632 100755 --- a/cope2n-api/fwd_api/api/ctel_view.py +++ b/cope2n-api/fwd_api/api/ctel_view.py @@ -20,7 +20,7 @@ from ..annotation.api import throw_on_failure from ..constant.common import ProcessType, REQUEST_ID, FOLDER_TYPE, EntityStatus, pdf_extensions, allowed_file_extensions, image_extensions, standard_ocr_list from ..exception.exceptions import RequiredFieldException, InvalidException, NotFoundException, \ PermissionDeniedException, LockedEntityException, FileContentInvalidException, ServiceTimeoutException -from ..models import SubscriptionRequest, SubscriptionRequestFile, OcrTemplate +from ..models import SubscriptionRequest, SubscriptionRequestFile, OcrTemplate, FeedbackRequest from ..response.ReportSerializer import ReportSerializer from ..utils import file as FileUtils from ..utils import process as ProcessUtil @@ -348,6 +348,57 @@ class CtelViewSet(viewsets.ViewSet): return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id}) + @extend_schema(request={ + 'multipart/form-data': { + 'type': 'object', + 'properties': { + 'files': { + 'type': 'array', + 'items': { + 'type': 'string', + 'format': 'binary' + } + }, + }, + 'required': ['files'] + } + }, responses=None, tags=['OCR']) + @action(detail=False, url_path="images/feedback_file", methods=["POST"]) + def feedback_file(self, request): + files = request.data.getlist('files') + FileUtils.validate_csv_feedback(files) + + user_info = ProcessUtil.get_user(request) + user = user_info.user + sub = user_info.current_sub + + feedback_id = "FB_" + datetime.now().strftime("%Y%m%d%H%M%S") + "_" + uuid.uuid4().hex + + origin_name = "" + file_names = "" + for i, file in enumerate(files): + origin_name += file.name + "," + file_names += f"{feedback_id}_{i}.csv" + origin_name = origin_name[:-1] + + new_request: FeedbackRequest = FeedbackRequest(feedback_id=feedback_id, + origin_name=origin_name, + file_name=file_names, + subscription=sub) + new_request.save() + + for i, file in enumerate(files): + file_name = f"{feedback_id}_{i}.csv" + # Save to local + file_path = FileUtils.save_feedback_file(file_name, new_request, file) + FileUtils.validate_feedback_file(file_path) + # Upload to S3 + S3_path = FileUtils.save_feedback_to_S3(file_name, feedback_id, file_path) + # Process csv file in the background + ProcessUtil.process_feedback(feedback_id, file_path) + + return JsonResponse(status=status.HTTP_200_OK, data={"feedback_id": feedback_id}) + @extend_schema(request=None, responses=None, tags=['Data']) @extend_schema(request=None, responses=None, tags=['templates'], methods=['GET']) @action(detail=False, url_path=r"media/(?P\w+)/(?P\w+)", methods=["GET"]) diff --git a/cope2n-api/fwd_api/celery_worker/client_connector.py b/cope2n-api/fwd_api/celery_worker/client_connector.py index 3ff8f88..16c7dd5 100755 --- a/cope2n-api/fwd_api/celery_worker/client_connector.py +++ b/cope2n-api/fwd_api/celery_worker/client_connector.py @@ -30,8 +30,10 @@ class CeleryConnector: 'process_sbt_invoice': {'queue': "invoice_sbt"}, 'do_pdf': {'queue': "do_pdf"}, 'upload_file_to_s3': {'queue': "upload_file_to_s3"}, + 'upload_feedback_to_s3': {'queue': "upload_feedback_to_s3"}, 'upload_obj_to_s3': {'queue': "upload_obj_to_s3"}, 'remove_local_file': {'queue': "remove_local_file"}, + 'csv_feedback': {'queue': "csv_feedback"}, } app = Celery( @@ -39,10 +41,14 @@ class CeleryConnector: broker=settings.BROKER_URL, broker_transport_options={'confirm_publish': False}, ) + def csv_feedback(self, args): + return self.send_task('csv_feedback', args) def do_pdf(self, args): return self.send_task('do_pdf', args) def upload_file_to_s3(self, args): return self.send_task('upload_file_to_s3', args) + def upload_feedback_to_s3(self, args): + return self.send_task('upload_feedback_to_s3', args) def upload_obj_to_s3(self, args): return self.send_task('upload_obj_to_s3', args) def remove_local_file(self, args): diff --git a/cope2n-api/fwd_api/celery_worker/internal_task.py b/cope2n-api/fwd_api/celery_worker/internal_task.py index 04d875f..5ea175b 100755 --- a/cope2n-api/fwd_api/celery_worker/internal_task.py +++ b/cope2n-api/fwd_api/celery_worker/internal_task.py @@ -9,12 +9,13 @@ from fwd_api.models import SubscriptionRequest, UserProfile from fwd_api.celery_worker.worker import app from ..constant.common import FolderFileType, image_extensions from ..exception.exceptions import FileContentInvalidException -from fwd_api.models import SubscriptionRequestFile +from fwd_api.models import SubscriptionRequestFile, FeedbackRequest from ..utils import file as FileUtils from ..utils import process as ProcessUtil from ..utils import s3 as S3Util from fwd_api.constant.common import ProcessType - +import csv +import json from celery.utils.log import get_task_logger from fwd import settings @@ -59,6 +60,61 @@ def process_image_file(file_name: str, file_path, request, user) -> list: 'request_file_id': new_request_file.code }] +@app.task(name="csv_feedback") +def process_csv_feedback(csv_file_path, feedback_id): + # load file to RAM + status = {} + with open(csv_file_path, 'r') as file: + reader = csv.DictReader(file) + # for rq in rqs + for row in reader: + # get request_subcription + request_id = row.get('requestId') + sub_rqs = SubscriptionRequest.objects.filter(request_id=request_id) + if len(sub_rqs) != 1: + status[request_id] = f"Found {len(sub_rqs)} records of request id {request_id}" + continue + else: + sub_rq = sub_rqs[0] + fb = {} + # update user result (with validate) + redemption_id = row.get('redemptionNumber') + imei1 = row.get('imeiNumber') + imei2 = row.get('imeiNumber2') + purchase_date = row.get('Purchase Date') + retailer = row.get('retailer') + sold_to_party = row.get('Sold to party') + server_time = float(row.get('timetakenmilli')) + fb['request_id'] = request_id + fb['retailername'] = retailer + fb['sold_to_party'] = sold_to_party + fb['purchase_date'] = purchase_date + fb['imei_number'] = [imei1, imei2] + sub_rq.feedback_result = fb + sub_rq.client_request_time = server_time + # update redemption_id if exist + if len(redemption_id) > 0: + sub_rq.redemption_id = redemption_id + sub_rq.save() + # update log into database + feedback_rq = FeedbackRequest.objects.filter(feedback_id=feedback_id).first() + feedback_rq.error_status = status + # save log to local + directory_name = os.path.dirname(csv_file_path) + file_path = csv_file_path.replace(".csv", "_error.json") + with open(file_path, "w") as outfile: + json.dump(status, outfile) + # save to s3 + s3_key = os.path.join("feedback", directory_name.split("/")[-1], file_path.split("/")[-1]) + if s3_client.s3_client is not None: + try: + # check if saved then delete local + s3_client.upload_file(file_path, s3_key) + os.remove(file_path) + except Exception as e: + logger.error(f"Unable to set S3: {e}") + print(f"Unable to set S3: {e}") + feedback_rq.save() @app.task(name='do_pdf') def process_pdf(rq_id, sub_id, p_type, user_id, files): @@ -136,6 +192,21 @@ def upload_file_to_s3(local_file_path, s3_key, request_id): else: logger.info(f"S3 is not available, skipping,...") +@app.task(name='upload_feedback_to_s3') +def upload_feedback_to_s3(local_file_path, s3_key, feedback_id): + if s3_client.s3_client is not None: + try: + s3_client.upload_file(local_file_path, s3_key) + feed_request = FeedbackRequest.objects.filter(feedback_id=feedback_id)[0] + feed_request.S3_uploaded = True + feed_request.save() + except Exception as e: + logger.error(f"Unable to set S3: {e}") + print(f"Unable to set S3: {e}") + return + else: + logger.info(f"S3 is not available, skipping,...") + @app.task(name='remove_local_file') def remove_local_file(local_file_path, request_id): print(f"[INFO] Removing local file: {local_file_path}, ...") diff --git a/cope2n-api/fwd_api/celery_worker/worker.py b/cope2n-api/fwd_api/celery_worker/worker.py index ee497cd..a47b5c9 100755 --- a/cope2n-api/fwd_api/celery_worker/worker.py +++ b/cope2n-api/fwd_api/celery_worker/worker.py @@ -38,7 +38,7 @@ app.conf.update({ Queue('upload_file_to_s3'), Queue('upload_obj_to_s3'), Queue('remove_local_file'), - + Queue('csv_feedback'), ], 'task_routes': { @@ -52,9 +52,10 @@ app.conf.update({ 'process_sbt_invoice': {'queue': "invoice_sbt"}, 'do_pdf': {'queue': "do_pdf"}, 'upload_file_to_s3': {'queue': "upload_file_to_s3"}, - 'upload_obj_to_s3': {'queue': "upload_obj_to_s3"}, - 'upload_file_to_s3': {'queue': "upload_file_to_s3"}, + 'upload_feedback_to_s3': {'queue': "upload_feedback_to_s3"}, + 'upload_obj_to_s3': {'queue': "upload_obj_to_s3"}, 'remove_local_file': {'queue': "remove_local_file"}, + 'csv_feedback': {'queue': "csv_feedback"}, } }) diff --git a/cope2n-api/fwd_api/exception/exceptions.py b/cope2n-api/fwd_api/exception/exceptions.py index 3292a5e..d584fb2 100755 --- a/cope2n-api/fwd_api/exception/exceptions.py +++ b/cope2n-api/fwd_api/exception/exceptions.py @@ -67,6 +67,11 @@ class RequiredFieldException(GeneralException): default_detail = 'Field required' detail_with_arg = '{} param is required' +class RequiredColumnException(GeneralException): + status_code = status.HTTP_400_BAD_REQUEST + default_code = 4003 + default_detail = 'Collumns required' + detail_with_arg = '{} collumns are required' class DuplicateEntityException(GeneralException): status_code = status.HTTP_400_BAD_REQUEST diff --git a/cope2n-api/fwd_api/migrations/0165_feedbackrequest.py b/cope2n-api/fwd_api/migrations/0165_feedbackrequest.py new file mode 100644 index 0000000..8d18c02 --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0165_feedbackrequest.py @@ -0,0 +1,29 @@ +# Generated by Django 4.1.3 on 2024-01-09 10:08 + +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0164_subscriptionrequest_client_request_time_and_more'), + ] + + operations = [ + migrations.CreateModel( + name='FeedbackRequest', + fields=[ + ('id', models.AutoField(primary_key=True, serialize=False)), + ('feedback_id', models.CharField(max_length=200)), + ('file_name', models.CharField(max_length=200)), + ('origin_name', models.CharField(max_length=200)), + ('error_status', models.JSONField(null=True)), + ('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('S3_uploaded', models.BooleanField(default=False)), + ('subscription', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='fwd_api.subscription')), + ], + ), + ] diff --git a/cope2n-api/fwd_api/models/FeedbackRequest.py b/cope2n-api/fwd_api/models/FeedbackRequest.py new file mode 100644 index 0000000..b4448fa --- /dev/null +++ b/cope2n-api/fwd_api/models/FeedbackRequest.py @@ -0,0 +1,14 @@ +from django.db import models +from django.utils import timezone +from fwd_api.models.Subscription import Subscription + +class FeedbackRequest(models.Model): + id = models.AutoField(primary_key=True) + feedback_id = models.CharField(max_length=200) # Change to request_id + file_name = models.CharField(max_length=200) # Change to request_id + origin_name = models.CharField(max_length=200) # Change to request_id + error_status = models.JSONField(null=True) + created_at = models.DateTimeField(default=timezone.now, db_index=True) + updated_at = models.DateTimeField(auto_now=True) + subscription = models.ForeignKey(Subscription, on_delete=models.CASCADE) + S3_uploaded = models.BooleanField(default=False) \ No newline at end of file diff --git a/cope2n-api/fwd_api/models/__init__.py b/cope2n-api/fwd_api/models/__init__.py index 0308fc6..3cfcd22 100755 --- a/cope2n-api/fwd_api/models/__init__.py +++ b/cope2n-api/fwd_api/models/__init__.py @@ -5,3 +5,5 @@ from .OcrTemplate import OcrTemplate from .OcrTemplateBox import OcrTemplateBox from .PricingPlan import PricingPlan from .Subscription import Subscription +from .FeedbackRequest import FeedbackRequest + diff --git a/cope2n-api/fwd_api/utils/file.py b/cope2n-api/fwd_api/utils/file.py index f3af27e..0fa22ed 100644 --- a/cope2n-api/fwd_api/utils/file.py +++ b/cope2n-api/fwd_api/utils/file.py @@ -10,13 +10,29 @@ from django.core.files.uploadedfile import TemporaryUploadedFile from fwd import settings from fwd_api.constant.common import allowed_file_extensions from fwd_api.exception.exceptions import GeneralException, RequiredFieldException, InvalidException, \ - ServiceUnavailableException, FileFormatInvalidException, LimitReachedException, InvalidDecompressedSizeException -from fwd_api.models import SubscriptionRequest, OcrTemplate + ServiceUnavailableException, FileFormatInvalidException, LimitReachedException, InvalidDecompressedSizeException, RequiredColumnException +from fwd_api.models import SubscriptionRequest, OcrTemplate, FeedbackRequest from fwd_api.utils import process as ProcessUtil from fwd_api.utils.crypto import image_authenticator from fwd_api.utils.image import resize from ..celery_worker.client_connector import c_connector import imagesize +import csv + +def validate_feedback_file(csv_file_path): + required_columns = ['redemptionNumber', 'requestId', 'imeiNumber', 'imeiNumber2', 'Purchase Date', 'retailer', 'Sold to party', 'timetakenmilli'] + missing_columns = [] + + with open(csv_file_path, 'r') as file: + reader = csv.DictReader(file) + + # Check if all required columns are present + for column in required_columns: + if column not in reader.fieldnames: + missing_columns.append(column) + + if missing_columns: + raise RequiredColumnException(excArgs=str(missing_columns)) def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUEST, min_file_num=1, file_field="files"): total_file_size = 0 @@ -39,6 +55,26 @@ def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUES raise LimitReachedException(excArgs=('Total size of all files', str(settings.MAX_UPLOAD_SIZE_OF_A_FILE / 1024 / 1024), 'MB')) +def validate_csv_feedback(files, max_file_num=1, min_file_num=1, file_field="csv files"): + total_file_size = 0 + if len(files) < min_file_num: + raise RequiredFieldException(excArgs=file_field) + if len(files) > max_file_num: + raise LimitReachedException(excArgs=(f'Number of {file_field}', str(max_file_num), '')) + + for f in files: + if not isinstance(f, TemporaryUploadedFile): + # print(f'[DEBUG]: {f.name}') + raise InvalidException(excArgs="files") + extension = f.name.split(".")[-1].lower() in ["csv"] + if not extension or "." not in f.name: + raise FileFormatInvalidException(excArgs=[".csv"]) + if f.size > settings.MAX_UPLOAD_SIZE_OF_A_FILE: + raise LimitReachedException(excArgs=('A file', str(settings.MAX_UPLOAD_SIZE_OF_A_FILE / 1024 / 1024), 'MB')) + total_file_size += f.size + if total_file_size > settings.MAX_UPLOAD_FILE_SIZE_OF_A_REQUEST: + raise LimitReachedException(excArgs=('Total size of all files', str(settings.MAX_UPLOAD_SIZE_OF_A_FILE / 1024 / 1024), 'MB')) + def get_file(file_path: str): try: return open(file_path, 'rb') @@ -105,6 +141,21 @@ def save_json_file(file_name: str, rq: SubscriptionRequest, data: dict): json.dump(data, json_file) return file_path +def save_feedback_file(file_name: str, rq: FeedbackRequest, uploaded_file: dict): + user_id = str(rq.subscription.user.id) + feedback_id = str(rq.id) + + folder_path = os.path.join(settings.MEDIA_ROOT, 'users', user_id, "feedbacks", feedback_id, 'requests', feedback_id) + os.makedirs(folder_path, exist_ok = True) + + file_path = os.path.join(folder_path, file_name) + with uploaded_file.open() as file: + # Read the contents of the file + file_contents = file.read().decode('utf-8') + with open(file_path, 'w', newline='') as csvfile: + csvfile.write(file_contents) + return file_path + def delete_file_with_path(file_path: str) -> bool: try: os.remove(file_path) @@ -166,6 +217,17 @@ def save_to_S3(file_name, rq, local_file_path): print(f"[ERROR]: {e}") raise ServiceUnavailableException() +def save_feedback_to_S3(file_name, id, local_file_path): + try: + assert len(local_file_path.split("/")) >= 2, "file_path must have at least feedback_folder and feedback_id" + s3_key = os.path.join(local_file_path.split("/")[-2], local_file_path.split("/")[-1], file_name) + c_connector.upload_feedback_to_s3((local_file_path, s3_key, id)) + c_connector.remove_local_file((local_file_path, id)) + return s3_key + except Exception as e: + print(f"[ERROR]: {e}") + raise ServiceUnavailableException() + def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path): try: file_path = os.path.join(folder_path, file_name) diff --git a/cope2n-api/fwd_api/utils/process.py b/cope2n-api/fwd_api/utils/process.py index 6b6e43b..b5d6c16 100644 --- a/cope2n-api/fwd_api/utils/process.py +++ b/cope2n-api/fwd_api/utils/process.py @@ -306,7 +306,6 @@ def token_value(token_type): return 5 return 1 # Basic OCR - def send_to_queue2(rq_id, sub_id, file_url, user_id, typez, metadata={}): try: if typez == ProcessType.ID_CARD.value: @@ -324,7 +323,6 @@ def send_to_queue2(rq_id, sub_id, file_url, user_id, typez, metadata={}): print(e) raise BadGatewayException() - def build_template_matching_data(template): temp_dict = { @@ -362,6 +360,8 @@ def send_template_queue(rq_id, file_url, template: OcrTemplate, uid): print(e) raise BadGatewayException() +def process_feedback(feedback_id, local_file_path): + c_connector.csv_feedback((local_file_path, feedback_id)) def process_pdf_file(file_name: str, file_obj: TemporaryUploadedFile, request: SubscriptionRequest, user) -> list: doc: fitz.Document = fitz.open(stream=file_obj.file.read()) diff --git a/cope2n-api/static/drf_spectacular_sidecar/swagger-ui-dist/swagger-ui-bundle.js.LICENSE.txt b/cope2n-api/static/drf_spectacular_sidecar/swagger-ui-dist/swagger-ui-bundle.js.LICENSE.txt index d645695..5471dc1 100644 --- a/cope2n-api/static/drf_spectacular_sidecar/swagger-ui-dist/swagger-ui-bundle.js.LICENSE.txt +++ b/cope2n-api/static/drf_spectacular_sidecar/swagger-ui-dist/swagger-ui-bundle.js.LICENSE.txt @@ -1,4 +1,5 @@ + Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ diff --git a/deploy_images.sh b/deploy_images.sh index cb89a0e..11e5360 100755 --- a/deploy_images.sh +++ b/deploy_images.sh @@ -2,7 +2,7 @@ set -e tag=$1 -is_prod=${$2:-False} +# is_prod=${$2:-False} echo "[INFO] Tag received from Python: $tag" diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 5d58c16..580665a 100755 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -98,6 +98,9 @@ services: - MINIO_SECRET_KEY=${S3_SECRET_KEY} volumes: - ./data/minio_data:/data + ports: + - 9884:9884 + - 9885:9885 networks: - ctel-sbt restart: always From d6bccec51deb2c2f805129ebb4279e44d2e6f3c7 Mon Sep 17 00:00:00 2001 From: dx-tan Date: Thu, 11 Jan 2024 17:38:12 +0700 Subject: [PATCH 09/27] Add: redemtion id --- cope2n-api/fwd_api/api/ctel_view.py | 3 ++- .../fwd_api/models/SubscriptionRequest.py | 2 +- deploy_images.sh | 24 +++++++++---------- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/cope2n-api/fwd_api/api/ctel_view.py b/cope2n-api/fwd_api/api/ctel_view.py index 6d4c632..ceeb64e 100755 --- a/cope2n-api/fwd_api/api/ctel_view.py +++ b/cope2n-api/fwd_api/api/ctel_view.py @@ -151,7 +151,8 @@ class CtelViewSet(viewsets.ViewSet): pages_left=total_page, process_type=p_type, status=1, request_id=rq_id, provider_code=provider_code, - subscription=sub) + subscription=sub, + redemption_id=validated_data["redemption_ID"]) new_request.save() count = 0 compact_files = [] diff --git a/cope2n-api/fwd_api/models/SubscriptionRequest.py b/cope2n-api/fwd_api/models/SubscriptionRequest.py index 6018274..3ac977d 100755 --- a/cope2n-api/fwd_api/models/SubscriptionRequest.py +++ b/cope2n-api/fwd_api/models/SubscriptionRequest.py @@ -10,7 +10,7 @@ class SubscriptionRequest(models.Model): pages_left: int = models.IntegerField(default=1) doc_type: str = models.CharField(max_length=100) request_id = models.CharField(max_length=200) # Change to request_id - redemption_id = models.CharField(max_length=200, null=True) # Change to request_id + redemption_id = models.CharField(max_length=200, null=True) process_type = models.CharField(max_length=200) # driver/id/invoice provider_code = models.CharField(max_length=200, default="Guest") # Request source FWD/CTel predict_result = models.JSONField(null=True) diff --git a/deploy_images.sh b/deploy_images.sh index 11e5360..3b57e42 100755 --- a/deploy_images.sh +++ b/deploy_images.sh @@ -11,24 +11,24 @@ echo "[INFO] Tag received from Python: $tag" echo "[INFO] Pushing AI image with tag: $tag..." docker compose -f docker-compose-dev.yml build cope2n-fi-sbt -docker tag sidp/cope2n-ai-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-ai-fi-sbt:${tag} -docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-ai-fi-sbt:${tag} -# docker tag sidp/cope2n-ai-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-ai-fi-sbt:production -# docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-ai-fi-sbt:production +docker tag sidp/cope2n-ai-fi-sbt:latest public.ecr.aws/sdsrv/sidp/cope2n-ai-fi-sbt:${tag} +docker push public.ecr.aws/sdsrv/sidp/cope2n-ai-fi-sbt:${tag} +# docker tag sidp/cope2n-ai-fi-sbt:latest public.ecr.aws/sdsrv/sidp/cope2n-ai-fi-sbt:production +# docker push public.ecr.aws/sdsrv/sidp/cope2n-ai-fi-sbt:production echo "[INFO] Pushing BE image with tag: $tag..." docker compose -f docker-compose-dev.yml build be-ctel-sbt -docker tag sidp/cope2n-be-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-be-fi-sbt:${tag} -# docker tag sidp/cope2n-be-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-be-fi-sbt:production -docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-be-fi-sbt:${tag} -# docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-be-fi-sbt:production +docker tag sidp/cope2n-be-fi-sbt:latest public.ecr.aws/sdsrv/sidp/cope2n-be-fi-sbt:${tag} +# docker tag sidp/cope2n-be-fi-sbt:latest public.ecr.aws/sdsrv/sidp/cope2n-be-fi-sbt:production +docker push public.ecr.aws/sdsrv/sidp/cope2n-be-fi-sbt:${tag} +# docker push public.ecr.aws/sdsrv/sidp/cope2n-be-fi-sbt:production echo "[INFO] Pushing FE image with tag: $tag..." docker compose -f docker-compose-dev.yml build fe-sbt -docker tag sidp/cope2n-fe-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-fe-fi-sbt:${tag} -# docker tag sidp/cope2n-fe-fi-sbt:latest 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-fe-fi-sbt:production -docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-fe-fi-sbt:${tag} -# docker push 756281617842.dkr.ecr.ap-southeast-1.amazonaws.com/sidp/cope2n-fe-fi-sbt:production +docker tag sidp/cope2n-fe-fi-sbt:latest public.ecr.aws/sdsrv/sidp/cope2n-fe-fi-sbt:${tag} +# docker tag sidp/cope2n-fe-fi-sbt:latest public.ecr.aws/sdsrv/sidp/cope2n-fe-fi-sbt:production +docker push public.ecr.aws/sdsrv/sidp/cope2n-fe-fi-sbt:${tag} +# docker push public.ecr.aws/sdsrv/sidp/cope2n-fe-fi-sbt:production cp ./docker-compose-prod.yml ./docker-compose_${tag}.yml sed -i "s/{{tag}}/$tag/g" ./docker-compose_${tag}.yml From 4500d7dc1f12fb825373a63510443e2dd0f3987d Mon Sep 17 00:00:00 2001 From: dx-tan Date: Fri, 12 Jan 2024 14:53:50 +0700 Subject: [PATCH 10/27] Add: is_test_request --- cope2n-api/fwd_api/api/ctel_view.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/cope2n-api/fwd_api/api/ctel_view.py b/cope2n-api/fwd_api/api/ctel_view.py index ceeb64e..dd8a5fc 100755 --- a/cope2n-api/fwd_api/api/ctel_view.py +++ b/cope2n-api/fwd_api/api/ctel_view.py @@ -122,6 +122,9 @@ class CtelViewSet(viewsets.ViewSet): 'redemption_ID': { 'type': 'string' }, + 'is_test_request': { + 'type': 'boolean', + }, }, 'required': {'imei_files'} } @@ -144,7 +147,7 @@ class CtelViewSet(viewsets.ViewSet): "invoice": invoice_file_objs } total_page = len(files.keys()) - + is_test_request = validated_data.get("is_test_request", False) rq_id = provider_code + "_" + datetime.now().strftime("%Y%m%d%H%M%S") + "_" + uuid.uuid4().hex p_type = validated_data['type'] new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page, @@ -152,7 +155,8 @@ class CtelViewSet(viewsets.ViewSet): process_type=p_type, status=1, request_id=rq_id, provider_code=provider_code, subscription=sub, - redemption_id=validated_data["redemption_ID"]) + redemption_id=validated_data["redemption_ID"], + is_test_request=is_test_request) new_request.save() count = 0 compact_files = [] @@ -194,6 +198,9 @@ class CtelViewSet(viewsets.ViewSet): 'redemption_ID': { 'type': 'string' }, + 'is_test_request': { + 'type': 'boolean', + }, }, 'required': {'imei_files'} } @@ -216,7 +223,7 @@ class CtelViewSet(viewsets.ViewSet): "invoice": invoice_file_objs } rq_id = provider_code + "_" + datetime.now().strftime("%Y%m%d%H%M%S") + "_" + uuid.uuid4().hex - + is_test_request = validated_data.get("is_test_request", False) count = 0 doc_files_with_type = [] for doc_type, doc_files in files.items(): @@ -236,7 +243,9 @@ class CtelViewSet(viewsets.ViewSet): pages_left=total_page, process_type=p_type, status=1, request_id=rq_id, provider_code=provider_code, - subscription=sub) + subscription=sub, + redemption_id=validated_data["redemption_ID"], + is_test_request=is_test_request) new_request.save() # Run file processing in a pool of 2 threads. TODO: Convert to Celery worker when possible From 94be43e798cc1c617b4e09bbff657e0a4b968979 Mon Sep 17 00:00:00 2001 From: dx-tan Date: Wed, 17 Jan 2024 16:28:50 +0700 Subject: [PATCH 11/27] Feature: is_test_request, Fix: csv not backup --- cope2n-api/fwd/settings.py | 7 ++- cope2n-api/fwd_api/api/ctel_user_view.py | 22 ++++++--- cope2n-api/fwd_api/api/ctel_view.py | 18 ++++--- .../fwd_api/celery_worker/internal_task.py | 14 ++++-- cope2n-api/fwd_api/celery_worker/worker.py | 1 + ...onrequest_is_bad_image_quality_and_more.py | 48 +++++++++++++++++++ cope2n-api/fwd_api/models/Report.py | 24 ++++++++++ .../fwd_api/models/SubscriptionRequest.py | 2 - .../fwd_api/models/SubscriptionRequestFile.py | 7 ++- cope2n-api/fwd_api/utils/file.py | 24 +++++++--- cope2n-api/fwd_api/utils/process.py | 20 +++++--- docker-compose-dev.yml | 7 +-- docker-compose-prod.yml | 6 ++- 13 files changed, 158 insertions(+), 42 deletions(-) create mode 100644 cope2n-api/fwd_api/migrations/0166_remove_subscriptionrequest_is_bad_image_quality_and_more.py create mode 100644 cope2n-api/fwd_api/models/Report.py diff --git a/cope2n-api/fwd/settings.py b/cope2n-api/fwd/settings.py index 4b6a13f..e2ddf3a 100755 --- a/cope2n-api/fwd/settings.py +++ b/cope2n-api/fwd/settings.py @@ -36,8 +36,11 @@ BASE_URL = env.str("BASE_URL", "") BASE_UI_URL = env.str("BASE_UI_URL", "") AUTH_TOKEN_LIFE_TIME = env.int("AUTH_TOKEN_LIFE_TIME", 0) IMAGE_TOKEN_LIFE_TIME = env.int("IMAGE_TOKEN_LIFE_TIME", 0) -FI_USER_NAME = env.str("FI_USER_NAME", "secret_username") -FI_PASSWORD = env.str("FI_PASSWORD", 'admin')# SECURITY WARNING: don't run with debug turned on in production! +ADMIN_USER_NAME = env.str("ADMIN_USER_NAME", "") +ADMIN_PASSWORD = env.str("ADMIN_PASSWORD", '')# SECURITY WARNING: don't run with debug turned on in production! +STANDARD_USER_NAME = env.str("STANDARD_USER_NAME", "") +STANDARD_PASSWORD = env.str("STANDARD_PASSWORD", '')# SECURITY WARNING: don't run with debug turned on in production! + # Application definition S3_ENDPOINT = env.str("S3_ENDPOINT", "") S3_ACCESS_KEY = env.str("S3_ACCESS_KEY", "") diff --git a/cope2n-api/fwd_api/api/ctel_user_view.py b/cope2n-api/fwd_api/api/ctel_user_view.py index 64dcb78..4114b55 100755 --- a/cope2n-api/fwd_api/api/ctel_user_view.py +++ b/cope2n-api/fwd_api/api/ctel_user_view.py @@ -48,15 +48,23 @@ class CtelUserViewSet(viewsets.ViewSet): print(serializer.is_valid(raise_exception=True)) data = serializer.validated_data - if data['username'] != settings.FI_USER_NAME or data['password'] != settings.FI_PASSWORD: + token_limit = 999999 + if data['username'] == settings.ADMIN_USER_NAME: + if data['password'] != settings.ADMIN_PASSWORD: + raise NotAuthenticatedException() + elif data['username'] == settings.STANDARD_USER_NAME: + if data['password'] != settings.STANDARD_PASSWORD: + raise NotAuthenticatedException() + token_limit = 1000 + else: raise NotAuthenticatedException() - users = UserProfile.objects.filter(sync_id=settings.FI_USER_NAME) + users = UserProfile.objects.filter(sync_id=data['username']) if len(users) > 1: raise InvalidException(excArgs=USER_MESSAGE) if len(users) == 0: - user = UserProfile(sync_id=settings.FI_USER_NAME, status=EntityStatus.ACTIVE.value) + user = UserProfile(sync_id=data['username'], status=EntityStatus.ACTIVE.value) user.save() else: user = users[0] @@ -69,7 +77,7 @@ class CtelUserViewSet(viewsets.ViewSet): if len(plans) > 1: raise TrialOneException(excArgs=PLAN_MESSAGE) if len(plans) == 0: - plan = PricingPlan(code=p_code, duration=365, token_limitations=999999) + plan = PricingPlan(code=p_code, duration=365, token_limitations=token_limit) plan.save() else: plan: PricingPlan = plans[0] @@ -84,9 +92,9 @@ class CtelUserViewSet(viewsets.ViewSet): else: sub = subs[0] return Response(status=status.HTTP_200_OK, data={ - 'user_id': 'SBT', - 'user_name': settings.FI_USER_NAME, - 'token': sds_authenticator.generate_token(user_id=settings.FI_USER_NAME, internal_id=user.id, status=EntityStatus.ACTIVE.value, sub_id=sub.id) + 'user_id': user.id, + 'user_name': data['username'], + 'token': sds_authenticator.generate_token(user_id=data['username'], internal_id=user.id, status=EntityStatus.ACTIVE.value, sub_id=sub.id) }) diff --git a/cope2n-api/fwd_api/api/ctel_view.py b/cope2n-api/fwd_api/api/ctel_view.py index dd8a5fc..57abc5a 100755 --- a/cope2n-api/fwd_api/api/ctel_view.py +++ b/cope2n-api/fwd_api/api/ctel_view.py @@ -70,7 +70,6 @@ class CtelViewSet(viewsets.ViewSet): new_request: SubscriptionRequest = SubscriptionRequest( pages=total_page, pages_left=total_page, - doc_type="all", process_type=p_type, status=1, request_id=rq_id, provider_code=provider_code, subscription=sub, @@ -91,7 +90,7 @@ class CtelViewSet(viewsets.ViewSet): if file_extension in pdf_extensions: c_connector.do_pdf((rq_id, sub.id, p_type, user.id, files)) elif file_extension in image_extensions: - b_url = ProcessUtil.process_image_file(file_name, file_obj, new_request, user) + b_url = ProcessUtil.process_image_file(file_name, file_obj, new_request, user, "all", 0) j_time = time.time() print(f"[INFO]: Duration of Pre-processing: {j_time - s_time}s") print(f"[INFO]: b_url: {b_url}") @@ -171,9 +170,10 @@ class CtelViewSet(viewsets.ViewSet): FileUtils.save_to_S3(_name, new_request, file_path) count += 1 this_file = { - "file_name": _name, - "file_path": file_path, - "file_type": doc_type + "index_in_request": i, + "file_name": _name, + "file_path": file_path, + "file_type": doc_type } compact_files.append(this_file) c_connector.do_pdf((rq_id, sub.id, p_type, user.id, compact_files)) @@ -254,9 +254,11 @@ class CtelViewSet(viewsets.ViewSet): def process_file(data): idx, doc_type, doc_file, tmp_file_name = data doc_file.seek(0) + index_in_request = int(tmp_file_name.split(".")[0].split("_")[-1]) file_path = FileUtils.resize_and_save_file(tmp_file_name, new_request, doc_file, 100) FileUtils.save_to_S3(tmp_file_name, new_request, file_path) return { + "index_in_request": index_in_request, "idx": idx, "file_name": tmp_file_name, "file_path": file_path, @@ -275,7 +277,7 @@ class CtelViewSet(viewsets.ViewSet): waiting_time = current_time - start_time if waiting_time > time_limit: break - time.sleep(0.2) + time.sleep(0.1) report_filter = SubscriptionRequest.objects.filter(request_id=rq_id) if report_filter.count() != 1: raise InvalidException(excArgs='requestId') @@ -401,9 +403,10 @@ class CtelViewSet(viewsets.ViewSet): file_name = f"{feedback_id}_{i}.csv" # Save to local file_path = FileUtils.save_feedback_file(file_name, new_request, file) - FileUtils.validate_feedback_file(file_path) # Upload to S3 S3_path = FileUtils.save_feedback_to_S3(file_name, feedback_id, file_path) + # validate + FileUtils.validate_feedback_file(file_path) # Process csv file in the background ProcessUtil.process_feedback(feedback_id, file_path) @@ -449,6 +452,7 @@ class CtelViewSet(viewsets.ViewSet): if user.id != user_data['internal_id'] or user.status != EntityStatus.ACTIVE.value: raise PermissionDeniedException() + print(f"[DEBUG]: rq: {rq}, file_name: {file_name}") file_data = SubscriptionRequestFile.objects.filter(request=rq, file_name=file_name)[0] except IndexError: raise NotFoundException(excArgs='file') diff --git a/cope2n-api/fwd_api/celery_worker/internal_task.py b/cope2n-api/fwd_api/celery_worker/internal_task.py index 5ea175b..4443ad6 100755 --- a/cope2n-api/fwd_api/celery_worker/internal_task.py +++ b/cope2n-api/fwd_api/celery_worker/internal_task.py @@ -30,13 +30,16 @@ s3_client = S3Util.MinioS3Client( bucket_name=settings.S3_BUCKET_NAME ) -def process_pdf_file(file_name: str, file_path: str, request, user) -> list: +def process_pdf_file(file_name: str, file_path: str, request, user, doc_type: str, index_in_request: int) -> list: try: # Origin file + code = f'FIL{uuid.uuid4().hex}' new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path, request=request, file_name=file_name, - code=f'FIL{uuid.uuid4().hex}') + code=code, + doc_type=doc_type, + index_in_request=index_in_request) new_request_file.save() # Sub-file return ProcessUtil.pdf_to_images_urls(FileUtils.get_file(file_path), request, user) @@ -120,7 +123,8 @@ def process_csv_feedback(csv_file_path, feedback_id): def process_pdf(rq_id, sub_id, p_type, user_id, files): """ files: [{ - "idx": int + "index_in_request": int, + "idx": int, "file_name": "", "file_path": "", # local path to file "file_type": "" @@ -135,7 +139,7 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files): idx, file = data extension = file["file_name"].split(".")[-1].lower() if extension == "pdf": - _b_urls = process_pdf_file(file["file_name"], file["file_path"], new_request, user) + _b_urls = process_pdf_file(file["file_name"], file["file_path"], new_request, user, file["file_type"], file["index_in_request"]) if _b_urls is None: new_request.status = 400 new_request.save() @@ -145,7 +149,7 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files): _b_urls[j]["page_number"] = idx return idx, _b_urls[0] elif extension in image_extensions: - this_url = ProcessUtil.process_image_local_file(file["file_name"], file["file_path"], new_request, user)[0] + this_url = ProcessUtil.process_image_local_file(file["file_name"], file["file_path"], new_request, user, file["file_type"], file["index_in_request"])[0] this_url["page_number"] = idx if file["file_type"]: this_url["doc_type"] = file["file_type"] diff --git a/cope2n-api/fwd_api/celery_worker/worker.py b/cope2n-api/fwd_api/celery_worker/worker.py index a47b5c9..a056266 100755 --- a/cope2n-api/fwd_api/celery_worker/worker.py +++ b/cope2n-api/fwd_api/celery_worker/worker.py @@ -36,6 +36,7 @@ app.conf.update({ Queue('invoice_sbt_rs'), Queue('do_pdf'), Queue('upload_file_to_s3'), + Queue('upload_feedback_to_s3'), Queue('upload_obj_to_s3'), Queue('remove_local_file'), Queue('csv_feedback'), diff --git a/cope2n-api/fwd_api/migrations/0166_remove_subscriptionrequest_is_bad_image_quality_and_more.py b/cope2n-api/fwd_api/migrations/0166_remove_subscriptionrequest_is_bad_image_quality_and_more.py new file mode 100644 index 0000000..41c1500 --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0166_remove_subscriptionrequest_is_bad_image_quality_and_more.py @@ -0,0 +1,48 @@ +# Generated by Django 4.1.3 on 2024-01-17 03:47 + +from django.db import migrations, models +import django.utils.timezone + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0165_feedbackrequest'), + ] + + operations = [ + migrations.RemoveField( + model_name='subscriptionrequest', + name='is_bad_image_quality', + ), + migrations.AddField( + model_name='subscriptionrequestfile', + name='accuracy', + field=models.JSONField(null=True), + ), + migrations.AddField( + model_name='subscriptionrequestfile', + name='doc_type', + field=models.CharField(default='', max_length=100), + ), + migrations.AddField( + model_name='subscriptionrequestfile', + name='index_in_request', + field=models.IntegerField(default=0), + ), + migrations.AddField( + model_name='subscriptionrequestfile', + name='is_bad_image_quality', + field=models.BooleanField(default=False), + ), + migrations.AddField( + model_name='subscriptionrequestfile', + name='origin_name', + field=models.CharField(default='', max_length=300), + ), + migrations.AlterField( + model_name='subscriptionrequestfile', + name='created_at', + field=models.DateTimeField(db_index=True, default=django.utils.timezone.now), + ), + ] diff --git a/cope2n-api/fwd_api/models/Report.py b/cope2n-api/fwd_api/models/Report.py new file mode 100644 index 0000000..11dc820 --- /dev/null +++ b/cope2n-api/fwd_api/models/Report.py @@ -0,0 +1,24 @@ +from django.db import models +from django.utils import timezone +from fwd_api.models.Subscription import Subscription + +class Report(models.Model): + id = models.AutoField(primary_key=True) + report_id = models.CharField(max_length=200) # Change to request_id + local_file_name = models.CharField(max_length=200) # Change to request_id + origin_name = models.CharField(max_length=200) # Change to request_id + error_status = models.JSONField(null=True) + created_at = models.DateTimeField(default=timezone.now, db_index=True) + updated_at = models.DateTimeField(auto_now=True) + subscription = models.ForeignKey(Subscription, on_delete=models.CASCADE) + S3_uploaded = models.BooleanField(default=False) + start_at = models.DateTimeField(null=True) + end_at = models.DateTimeField(null=True) + include_for_test_sample = models.BooleanField(default=False) + status = models.CharField(null=True) + number_request = models.IntegerField(default=0) + number_images = models.IntegerField(default=0) + number_bad_images = models.IntegerField(default=0) + average_client_time = models.JSONField(default=0) # {"0.1": 100, 0.2: 200, ...} + average_OCR_time = models.JSONField(default=0) # {"0.1": 98, 0.2: 202, ...} + accuracy = models.JSONField(null=True) diff --git a/cope2n-api/fwd_api/models/SubscriptionRequest.py b/cope2n-api/fwd_api/models/SubscriptionRequest.py index 3ac977d..62d70ec 100755 --- a/cope2n-api/fwd_api/models/SubscriptionRequest.py +++ b/cope2n-api/fwd_api/models/SubscriptionRequest.py @@ -3,7 +3,6 @@ from django.utils import timezone from fwd_api.models.Subscription import Subscription - class SubscriptionRequest(models.Model): id = models.AutoField(primary_key=True) pages: int = models.IntegerField() @@ -34,4 +33,3 @@ class SubscriptionRequest(models.Model): total_memory = models.FloatField(default=-1) gpu_stats = models.CharField(max_length=100, null=True) is_reviewed = models.BooleanField(default=False) - is_bad_image_quality = models.BooleanField(default=False) diff --git a/cope2n-api/fwd_api/models/SubscriptionRequestFile.py b/cope2n-api/fwd_api/models/SubscriptionRequestFile.py index ef3c788..508553b 100755 --- a/cope2n-api/fwd_api/models/SubscriptionRequestFile.py +++ b/cope2n-api/fwd_api/models/SubscriptionRequestFile.py @@ -12,9 +12,14 @@ class SubscriptionRequestFile(models.Model): return f"FIL{uuid.uuid4().hex}" code = models.CharField(max_length=300, default=gen_random_code) + origin_name = models.CharField(max_length=300, default="") file_name = models.CharField(max_length=300, default=None) file_path = EncryptedCharField(max_length=500, default=None) file_category = models.CharField(max_length=200, default=FileCategory.Origin.value) request = models.ForeignKey(SubscriptionRequest, related_name="files", on_delete=models.CASCADE) - created_at = models.DateTimeField(default=timezone.now) + created_at = models.DateTimeField(default=timezone.now, db_index=True) updated_at = models.DateTimeField(auto_now=True) + accuracy = models.JSONField(null=True) + is_bad_image_quality = models.BooleanField(default=False) + doc_type = models.CharField(max_length=100, default="") + index_in_request = models.IntegerField(default=0) diff --git a/cope2n-api/fwd_api/utils/file.py b/cope2n-api/fwd_api/utils/file.py index 0fa22ed..5e95ff8 100644 --- a/cope2n-api/fwd_api/utils/file.py +++ b/cope2n-api/fwd_api/utils/file.py @@ -11,7 +11,7 @@ from fwd import settings from fwd_api.constant.common import allowed_file_extensions from fwd_api.exception.exceptions import GeneralException, RequiredFieldException, InvalidException, \ ServiceUnavailableException, FileFormatInvalidException, LimitReachedException, InvalidDecompressedSizeException, RequiredColumnException -from fwd_api.models import SubscriptionRequest, OcrTemplate, FeedbackRequest +from fwd_api.models import SubscriptionRequest, OcrTemplate, FeedbackRequest, SubscriptionRequestFile from fwd_api.utils import process as ProcessUtil from fwd_api.utils.crypto import image_authenticator from fwd_api.utils.image import resize @@ -145,7 +145,7 @@ def save_feedback_file(file_name: str, rq: FeedbackRequest, uploaded_file: dict) user_id = str(rq.subscription.user.id) feedback_id = str(rq.id) - folder_path = os.path.join(settings.MEDIA_ROOT, 'users', user_id, "feedbacks", feedback_id, 'requests', feedback_id) + folder_path = os.path.join(settings.MEDIA_ROOT, 'users', user_id, "feedbacks", feedback_id) os.makedirs(folder_path, exist_ok = True) file_path = os.path.join(folder_path, file_name) @@ -177,7 +177,7 @@ def save_template_file(file_name: str, rq: OcrTemplate, file: TemporaryUploadedF print(e) raise ServiceUnavailableException() -def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path): +def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path: str): try: file_path = os.path.join(folder_path, file_name) extension = file_name.split(".")[-1] @@ -193,10 +193,19 @@ def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, fo return file_path -def resize_and_save_file(file_name: str, rq: SubscriptionRequest, file: TemporaryUploadedFile, quality): +def resize_and_save_file(file_name: str, rq: SubscriptionRequest, file: TemporaryUploadedFile, quality: int): try: folder_path = get_folder_path(rq) pathlib.Path(folder_path).mkdir(exist_ok=True, parents=True) + # request_file: SubscriptionRequestFile = SubscriptionRequestFile( + # file_name = file_name, + # file_path = os.path.join(folder_path, file_name), + # doc_type = doc_type, + # origin_name = file.name, + # request = rq, + # index_in_request= index_in_request + # ) + # request_file.save() return save_file_with_path(file_name, file, quality, folder_path) except InvalidDecompressedSizeException as e: raise e @@ -219,8 +228,11 @@ def save_to_S3(file_name, rq, local_file_path): def save_feedback_to_S3(file_name, id, local_file_path): try: - assert len(local_file_path.split("/")) >= 2, "file_path must have at least feedback_folder and feedback_id" - s3_key = os.path.join(local_file_path.split("/")[-2], local_file_path.split("/")[-1], file_name) + # print(f"[DEBUG]: Uploading feedback to S3 with local path {local_file_path}, id: {id}, file_name: {file_name}") + assert len(local_file_path.split("/")) >= 3, "file_path must have at least feedback_folder and feedback_id" + # s3_key = os.path.join(local_file_path.split("/")[-3], local_file_path.split("/")[-2], file_name) + s3_key = os.path.join("feedback", local_file_path.split("/")[-2], file_name) + # print(f"[DEBUG]: Uploading feedback to S3 with s3_key {s3_key}") c_connector.upload_feedback_to_s3((local_file_path, s3_key, id)) c_connector.remove_local_file((local_file_path, id)) return s3_key diff --git a/cope2n-api/fwd_api/utils/process.py b/cope2n-api/fwd_api/utils/process.py index b5d6c16..28bf0b7 100644 --- a/cope2n-api/fwd_api/utils/process.py +++ b/cope2n-api/fwd_api/utils/process.py @@ -363,7 +363,7 @@ def send_template_queue(rq_id, file_url, template: OcrTemplate, uid): def process_feedback(feedback_id, local_file_path): c_connector.csv_feedback((local_file_path, feedback_id)) -def process_pdf_file(file_name: str, file_obj: TemporaryUploadedFile, request: SubscriptionRequest, user) -> list: +def process_pdf_file(file_name: str, file_obj: TemporaryUploadedFile, request: SubscriptionRequest, user, doc_type: str, index_in_request: int) -> list: doc: fitz.Document = fitz.open(stream=file_obj.file.read()) if doc.page_count > settings.MAX_PAGES_OF_PDF_FILE: raise LimitReachedException(excArgs=('Number of pages', str(settings.MAX_PAGES_OF_PDF_FILE), 'pages')) @@ -372,16 +372,18 @@ def process_pdf_file(file_name: str, file_obj: TemporaryUploadedFile, request: S # Origin file file_obj.seek(0) file_path = FileUtils.resize_and_save_file(file_name, request, file_obj, 100) + code = f'FIL{uuid.uuid4().hex}' new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path, request=request, file_name=file_name, - code=f'FIL{uuid.uuid4().hex}') + code=code, + doc_type=doc_type, + index_in_request=index_in_request) new_request_file.save() # Sub-file return pdf_to_images_urls(doc, request, user) - -def process_image_file(file_name: str, file_obj: TemporaryUploadedFile, request: SubscriptionRequest, user) -> list: +def process_image_file(file_name: str, file_obj: TemporaryUploadedFile, request: SubscriptionRequest, user, doc_type: str, index_in_request: int) -> list: if file_obj.size > settings.SIZE_TO_COMPRESS: quality = 95 else: @@ -390,7 +392,9 @@ def process_image_file(file_name: str, file_obj: TemporaryUploadedFile, request: new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path, request=request, file_name=file_name, - code=f'FIL{uuid.uuid4().hex}') + code=f'FIL{uuid.uuid4().hex}', + doc_type=doc_type, + index_in_request=index_in_request) new_request_file.save() return [{ 'file_url': FileUtils.build_url(FolderFileType.REQUESTS.value, request.request_id, user.id, file_name), @@ -398,11 +402,13 @@ def process_image_file(file_name: str, file_obj: TemporaryUploadedFile, request: 'request_file_id': new_request_file.code }] -def process_image_local_file(file_name: str, file_path: str, request: SubscriptionRequest, user) -> list: +def process_image_local_file(file_name: str, file_path: str, request: SubscriptionRequest, user, doc_type: str, index_in_request: int) -> list: new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path, request=request, file_name=file_name, - code=f'FIL{uuid.uuid4().hex}') + code=f'FIL{uuid.uuid4().hex}', + doc_type=doc_type, + index_in_request=index_in_request) new_request_file.save() return [{ 'file_url': FileUtils.build_url(FolderFileType.REQUESTS.value, request.request_id, user.id, file_name), diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 580665a..f50412c 100755 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -63,9 +63,10 @@ services: - AUTH_TOKEN_LIFE_TIME=${AUTH_TOKEN_LIFE_TIME} - IMAGE_TOKEN_LIFE_TIME=${IMAGE_TOKEN_LIFE_TIME} - INTERNAL_SDS_KEY=${INTERNAL_SDS_KEY} - - FI_USER_NAME=${FI_USER_NAME} - - FI_PASSWORD=${FI_PASSWORD} - - S3_ENDPOINT=${S3_ENDPOINT} + - ADMIN_USER_NAME=${ADMIN_USER_NAME} + - ADMIN_PASSWORD=${ADMIN_PASSWORD} + - STANDARD_USER_NAME=${STANDARD_USER_NAME} + - STANDARD_PASSWORD=${STANDARD_PASSWORD} - S3_ACCESS_KEY=${S3_ACCESS_KEY} - S3_SECRET_KEY=${S3_SECRET_KEY} - S3_BUCKET_NAME=${S3_BUCKET_NAME} diff --git a/docker-compose-prod.yml b/docker-compose-prod.yml index dc5c1b8..be4c4b2 100644 --- a/docker-compose-prod.yml +++ b/docker-compose-prod.yml @@ -44,8 +44,10 @@ services: - AUTH_TOKEN_LIFE_TIME=${AUTH_TOKEN_LIFE_TIME} - IMAGE_TOKEN_LIFE_TIME=${IMAGE_TOKEN_LIFE_TIME} - INTERNAL_SDS_KEY=${INTERNAL_SDS_KEY} - - FI_USER_NAME=${FI_USER_NAME} - - FI_PASSWORD=${FI_PASSWORD} + - ADMIN_USER_NAME=${ADMIN_USER_NAME} + - ADMIN_PASSWORD=${ADMIN_PASSWORD} + - STANDARD_USER_NAME=${STANDARD_USER_NAME} + - STANDARD_PASSWORD=${STANDARD_PASSWORD} - S3_ENDPOINT=${S3_ENDPOINT} - S3_ACCESS_KEY=${S3_ACCESS_KEY} - S3_SECRET_KEY=${S3_SECRET_KEY} From 04781ea7a444985864d9407d1c108f10a1e87b6b Mon Sep 17 00:00:00 2001 From: dx-tan Date: Wed, 17 Jan 2024 16:44:04 +0700 Subject: [PATCH 12/27] Fix: bad image to file level --- cope2n-api/fwd_api/api/ctel_view.py | 2 +- cope2n-api/fwd_api/utils/health.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cope2n-api/fwd_api/api/ctel_view.py b/cope2n-api/fwd_api/api/ctel_view.py index 57abc5a..91816fe 100755 --- a/cope2n-api/fwd_api/api/ctel_view.py +++ b/cope2n-api/fwd_api/api/ctel_view.py @@ -452,7 +452,7 @@ class CtelViewSet(viewsets.ViewSet): if user.id != user_data['internal_id'] or user.status != EntityStatus.ACTIVE.value: raise PermissionDeniedException() - print(f"[DEBUG]: rq: {rq}, file_name: {file_name}") + # print(f"[DEBUG]: rq: {rq}, file_name: {file_name}") file_data = SubscriptionRequestFile.objects.filter(request=rq, file_name=file_name)[0] except IndexError: raise NotFoundException(excArgs='file') diff --git a/cope2n-api/fwd_api/utils/health.py b/cope2n-api/fwd_api/utils/health.py index 65a7cf4..1883ba5 100644 --- a/cope2n-api/fwd_api/utils/health.py +++ b/cope2n-api/fwd_api/utils/health.py @@ -23,7 +23,7 @@ def get_latest_requests(limit=50): "total_memory": request.total_memory, "gpu_stats": request.gpu_stats, "is_reviewed": request.is_reviewed, - "is_bad_image_quality": request.is_bad_image_quality, + # "is_bad_image_quality": request.is_bad_image_quality, }) return requests_dict From 874dffedc9f8a4a629ae1b0b1fc37fb00cce2c2d Mon Sep 17 00:00:00 2001 From: dx-tan Date: Tue, 23 Jan 2024 14:16:22 +0700 Subject: [PATCH 13/27] Fix: unknown bool param of type str --- cope2n-ai-fi/.dockerignore | 3 ++- cope2n-api/fwd_api/utils/file.py | 18 ++++++++++++++++++ cope2n-api/fwd_api/utils/process.py | 17 ++++++++++++++++- docker-compose-prod.yml | 1 - tests/utils.py | 2 +- 5 files changed, 37 insertions(+), 4 deletions(-) diff --git a/cope2n-ai-fi/.dockerignore b/cope2n-ai-fi/.dockerignore index b2ac9a7..4936ddc 100755 --- a/cope2n-ai-fi/.dockerignore +++ b/cope2n-ai-fi/.dockerignore @@ -5,4 +5,5 @@ packages/ __pycache__ DataBase/image_temp/ DataBase/json_temp/ -DataBase/template.db \ No newline at end of file +DataBase/template.db +key_value_understanding-20231024-125646_manulife2/ \ No newline at end of file diff --git a/cope2n-api/fwd_api/utils/file.py b/cope2n-api/fwd_api/utils/file.py index 5e95ff8..d3ae556 100644 --- a/cope2n-api/fwd_api/utils/file.py +++ b/cope2n-api/fwd_api/utils/file.py @@ -326,3 +326,21 @@ def build_url(folder: str, data_id: str, user_id: int, file_name: str = None) -> def build_media_url_v2(media_id: str, user_id: int, sub_id: int, u_sync_id: str) -> str: token = image_authenticator.generate_img_token_v2(user_id, sub_id, u_sync_id) return f'{settings.BASE_URL}/api/ctel/v2/media/request/{media_id}/?token={token}' + +def json2xlsx(input: json): + """_summary_ + + Args: + input (json): + : [{ + Subs: Jan, # Subtotal name + Metadata: {num_imei: 1, + ...: ...} + Data: [{num_imei: 1, + ...: ...}] + }] + OR + + """ + # return xlsx + pass \ No newline at end of file diff --git a/cope2n-api/fwd_api/utils/process.py b/cope2n-api/fwd_api/utils/process.py index 28bf0b7..c89b42f 100644 --- a/cope2n-api/fwd_api/utils/process.py +++ b/cope2n-api/fwd_api/utils/process.py @@ -104,7 +104,9 @@ def validate_ocr_request_and_get(request, subscription): FileUtils.validate_list_file(list_file) validated_data['file'] = list_file[0] - validated_data['is_test_request'] = request.data.get('is_test_request', False) + # validated_data['is_test_request'] = bool(request.data.get('is_test_request', False)) + validated_data['is_test_request'] = string_to_boolean(request.data.get('is_test_request', "false")) + # print(f"[DEBUG]: is_test_request: ", validated_data['is_test_request']) return validated_data @@ -139,9 +141,22 @@ def sbt_validate_ocr_request_and_get(request, subscription): validated_data['imei_file'] = imei_files validated_data['invoice_file'] = invoice_file validated_data['redemption_ID'] = redemption_ID + validated_data['is_test_request'] = string_to_boolean(request.data.get('is_test_request', "false")) + # print(f"[DEBUG]: is_test_request: ", validated_data['is_test_request']) return validated_data +def string_to_boolean(value): + true_strings = ['true', 'yes', '1', 'on'] + false_strings = ['false', 'no', '0', 'off'] + + if isinstance(value, str): + lower_value = value.lower() + if lower_value in true_strings: + return True + else: + return False + def sbt_validate_feedback(request): validated_data = {} diff --git a/docker-compose-prod.yml b/docker-compose-prod.yml index be4c4b2..cbc1e68 100644 --- a/docker-compose-prod.yml +++ b/docker-compose-prod.yml @@ -139,7 +139,6 @@ services: condition: service_started volumes: - BE_media:${MEDIA_ROOT} - working_dir: /app command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5" diff --git a/tests/utils.py b/tests/utils.py index 2994ef9..0dc3518 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -34,7 +34,7 @@ def make_sbt_request(host, token, invoice_files=None, imei_files=None, ensure_su files.append(('imei_files', (file, open(file, "rb"), 'application/octet-stream'))) num_files = len(files) files.append(('processType', '12')) - files.append(('is_test_request', 'true')) + files.append(('is_test_request', 'True')) start_time = time.time() end_of_upload_time = 0 From bf8742fa4b1c29a28896324d667e86056e3a1ddc Mon Sep 17 00:00:00 2001 From: dx-tan Date: Tue, 23 Jan 2024 15:14:40 +0700 Subject: [PATCH 14/27] Feature: placeholder for xlsx function --- cope2n-api/fwd_api/models/Report.py | 8 +++++--- cope2n-api/fwd_api/models/SubscriptionRequestFile.py | 3 +++ cope2n-api/fwd_api/utils/file.py | 5 +++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/cope2n-api/fwd_api/models/Report.py b/cope2n-api/fwd_api/models/Report.py index 11dc820..b5bfa8d 100644 --- a/cope2n-api/fwd_api/models/Report.py +++ b/cope2n-api/fwd_api/models/Report.py @@ -19,6 +19,8 @@ class Report(models.Model): number_request = models.IntegerField(default=0) number_images = models.IntegerField(default=0) number_bad_images = models.IntegerField(default=0) - average_client_time = models.JSONField(default=0) # {"0.1": 100, 0.2: 200, ...} - average_OCR_time = models.JSONField(default=0) # {"0.1": 98, 0.2: 202, ...} - accuracy = models.JSONField(null=True) + average_client_time_profile = models.JSONField(default=0) # {"0.1": 100, 0.2: 200, ...} + average_OCR_time_profile = models.JSONField(default=0) # {"0.1": 98, 0.2: 202, ...} + average_OCR_time = models.FloatField(default=0) # {"0.1": 98, 0.2: 202, ...} + average_client_time = models.FloatField(default=0) # {"0.1": 98, 0.2: 202, ...} + accuracy = models.JSONField(null=True) # {"imei": {}, "purchase_date": {}, "retailer_name": {}} \ No newline at end of file diff --git a/cope2n-api/fwd_api/models/SubscriptionRequestFile.py b/cope2n-api/fwd_api/models/SubscriptionRequestFile.py index 508553b..bcb1d36 100755 --- a/cope2n-api/fwd_api/models/SubscriptionRequestFile.py +++ b/cope2n-api/fwd_api/models/SubscriptionRequestFile.py @@ -23,3 +23,6 @@ class SubscriptionRequestFile(models.Model): is_bad_image_quality = models.BooleanField(default=False) doc_type = models.CharField(max_length=100, default="") index_in_request = models.IntegerField(default=0) + processing_time = models.IntegerField(default=-1) # in milisecond + reason = models.TextField(blank=True) + counter_measures = models.TextField(blank=True) \ No newline at end of file diff --git a/cope2n-api/fwd_api/utils/file.py b/cope2n-api/fwd_api/utils/file.py index d3ae556..92142da 100644 --- a/cope2n-api/fwd_api/utils/file.py +++ b/cope2n-api/fwd_api/utils/file.py @@ -340,7 +340,8 @@ def json2xlsx(input: json): ...: ...}] }] OR - + input (json): + : [] + Return xlsx (object) """ - # return xlsx pass \ No newline at end of file From 965fa8e9cad5df0403d523c8debd2de08ec23d68 Mon Sep 17 00:00:00 2001 From: dx-tan Date: Wed, 24 Jan 2024 14:14:33 +0700 Subject: [PATCH 15/27] Update: table design --- .gitmodules | 3 +++ cope2n-api/fwd_api/models/Report.py | 17 +++++++++++------ .../fwd_api/models/SubscriptionRequest.py | 4 ++++ .../fwd_api/models/SubscriptionRequestFile.py | 7 +++++-- cope2n-api/fwd_api/utils/sdsvkvu | 1 + 5 files changed, 24 insertions(+), 8 deletions(-) create mode 160000 cope2n-api/fwd_api/utils/sdsvkvu diff --git a/.gitmodules b/.gitmodules index 3d4203a..1198d96 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "cope2n-ai-fi/modules/sdsvkvu"] path = cope2n-ai-fi/modules/sdsvkvu url = https://code.sdsdev.co.kr/SDSRV-IDP/sdsvkvu.git +[submodule "cope2n-api/fwd_api/utils/sdsvkvu"] + path = cope2n-api/fwd_api/utils/sdsvkvu + url = https://code.sdsdev.co.kr/SDSRV-IDP/sdsvkvu diff --git a/cope2n-api/fwd_api/models/Report.py b/cope2n-api/fwd_api/models/Report.py index b5bfa8d..ef03c59 100644 --- a/cope2n-api/fwd_api/models/Report.py +++ b/cope2n-api/fwd_api/models/Report.py @@ -3,24 +3,29 @@ from django.utils import timezone from fwd_api.models.Subscription import Subscription class Report(models.Model): + # Metadata id = models.AutoField(primary_key=True) report_id = models.CharField(max_length=200) # Change to request_id local_file_name = models.CharField(max_length=200) # Change to request_id - origin_name = models.CharField(max_length=200) # Change to request_id error_status = models.JSONField(null=True) created_at = models.DateTimeField(default=timezone.now, db_index=True) updated_at = models.DateTimeField(auto_now=True) - subscription = models.ForeignKey(Subscription, on_delete=models.CASCADE) - S3_uploaded = models.BooleanField(default=False) start_at = models.DateTimeField(null=True) end_at = models.DateTimeField(null=True) include_for_test_sample = models.BooleanField(default=False) status = models.CharField(null=True) + is_daily_report = models.BooleanField(default=False) + + # Data + S3_uploaded = models.BooleanField(default=False) number_request = models.IntegerField(default=0) number_images = models.IntegerField(default=0) number_bad_images = models.IntegerField(default=0) average_client_time_profile = models.JSONField(default=0) # {"0.1": 100, 0.2: 200, ...} average_OCR_time_profile = models.JSONField(default=0) # {"0.1": 98, 0.2: 202, ...} - average_OCR_time = models.FloatField(default=0) # {"0.1": 98, 0.2: 202, ...} - average_client_time = models.FloatField(default=0) # {"0.1": 98, 0.2: 202, ...} - accuracy = models.JSONField(null=True) # {"imei": {}, "purchase_date": {}, "retailer_name": {}} \ No newline at end of file + average_OCR_time = models.JSONField(null=True) # {"invoice": 0.1, "imei": 0.1} + average_client_time = models.JSONField(null=True) # {"invoice": 0.1, "imei": 0.1} + imei_accuracy = models.FloatField(default=-1) + purchase_date_accuracy = models.FloatField(default=-1) + retailer_name_accuracy = models.FloatField(default=-1) + sold_to_party_accuracy = models.FloatField(default=-1) \ No newline at end of file diff --git a/cope2n-api/fwd_api/models/SubscriptionRequest.py b/cope2n-api/fwd_api/models/SubscriptionRequest.py index 62d70ec..3839c3a 100755 --- a/cope2n-api/fwd_api/models/SubscriptionRequest.py +++ b/cope2n-api/fwd_api/models/SubscriptionRequest.py @@ -21,6 +21,10 @@ class SubscriptionRequest(models.Model): updated_at = models.DateTimeField(auto_now=True) is_test_request = models.BooleanField(default=False) S3_uploaded = models.BooleanField(default=False) + imei_accuracy = models.FloatField(default=-1) + purchase_date_accuracy = models.FloatField(default=-1) + retailer_name_accuracy = models.FloatField(default=-1) + sold_to_party_accuracy = models.FloatField(default=-1) ai_inference_profile = models.JSONField(null=True) preprocessing_time = models.FloatField(default=-1) diff --git a/cope2n-api/fwd_api/models/SubscriptionRequestFile.py b/cope2n-api/fwd_api/models/SubscriptionRequestFile.py index bcb1d36..6293421 100755 --- a/cope2n-api/fwd_api/models/SubscriptionRequestFile.py +++ b/cope2n-api/fwd_api/models/SubscriptionRequestFile.py @@ -19,10 +19,13 @@ class SubscriptionRequestFile(models.Model): request = models.ForeignKey(SubscriptionRequest, related_name="files", on_delete=models.CASCADE) created_at = models.DateTimeField(default=timezone.now, db_index=True) updated_at = models.DateTimeField(auto_now=True) - accuracy = models.JSONField(null=True) is_bad_image_quality = models.BooleanField(default=False) doc_type = models.CharField(max_length=100, default="") index_in_request = models.IntegerField(default=0) processing_time = models.IntegerField(default=-1) # in milisecond reason = models.TextField(blank=True) - counter_measures = models.TextField(blank=True) \ No newline at end of file + counter_measures = models.TextField(blank=True) + imei_accuracy = models.FloatField(default=-1) + purchase_date_accuracy = models.FloatField(default=-1) + retailer_name_accuracy = models.FloatField(default=-1) + sold_to_party_accuracy = models.FloatField(default=-1) \ No newline at end of file diff --git a/cope2n-api/fwd_api/utils/sdsvkvu b/cope2n-api/fwd_api/utils/sdsvkvu new file mode 160000 index 0000000..b6d4fab --- /dev/null +++ b/cope2n-api/fwd_api/utils/sdsvkvu @@ -0,0 +1 @@ +Subproject commit b6d4fab46f7f8689dd6b050cfbff2faa6a6f3fec From 4ff48110e1fcc1d886099585cef8352a0ed0cd46 Mon Sep 17 00:00:00 2001 From: daovietanh99 Date: Fri, 26 Jan 2024 16:37:12 +0700 Subject: [PATCH 16/27] add get and update request api --- cope2n-api/fwd_api/api/accuracy_view.py | 86 ++++++++++++++++++++++++- cope2n-api/fwd_api/api_router.py | 3 +- 2 files changed, 86 insertions(+), 3 deletions(-) diff --git a/cope2n-api/fwd_api/api/accuracy_view.py b/cope2n-api/fwd_api/api/accuracy_view.py index e482e58..3ed49f2 100644 --- a/cope2n-api/fwd_api/api/accuracy_view.py +++ b/cope2n-api/fwd_api/api/accuracy_view.py @@ -165,7 +165,7 @@ class AccuracyViewSet(viewsets.ViewSet): 'Client Request Time (ms)': request.client_request_time, 'Server Processing Time (ms)': request.preprocessing_time + request.ai_inference_time, 'Is Reviewed': request.is_reviewed, - 'Is Bad Quality': request.is_bad_image_quality, + # 'Is Bad Quality': request.is_bad_image_quality, 'created_at': request.created_at.isoformat() }) @@ -180,4 +180,86 @@ class AccuracyViewSet(viewsets.ViewSet): return JsonResponse(response) - return JsonResponse({'error': 'Invalid request method.'}, status=405) \ No newline at end of file + return JsonResponse({'error': 'Invalid request method.'}, status=405) + + +class RequestViewSet(viewsets.ViewSet): + lookup_field = "username" + + @extend_schema(request = { + 'multipart/form-data': { + 'type': 'object', + 'properties': { + 'reviewed_result': { + 'type': 'string', + }, + } + }, + }, responses=None, tags=['Request'] + ) + @action(detail=False, url_path=r"request/(?P[\w\-]+)", methods=["GET", "POST"]) + def get_subscription_request(self, request, request_id=None): + if request.method == 'GET': + base_query = Q(request_id=request_id) + + subscription_request = SubscriptionRequest.objects.filter(base_query).first() + + data = [] + + imeis = [] + purchase_date = [] + retailer = "" + try: + if subscription_request.reviewed_result is not None: + imeis = subscription_request.reviewed_result.get("imei_number", []) + purchase_date = subscription_request.reviewed_result.get("purchase_date", []) + retailer = subscription_request.reviewed_result.get("retailername", "") + elif subscription_request.feedback_result is not None : + imeis = subscription_request.feedback_result.get("imei_number", []) + purchase_date = subscription_request.feedback_result.get("purchase_date", []) + retailer = subscription_request.feedback_result.get("retailername", "") + elif subscription_request.predict_result is not None: + if subscription_request.predict_result.get("status", 404) == 200: + imeis = subscription_request.predict_result.get("content", {}).get("document", [])[0].get("content", [])[3].get("value", []) + purchase_date = subscription_request.predict_result.get("content", {}).get("document", [])[0].get("content", [])[2].get("value", []) + retailer = subscription_request.predict_result.get("content", {}).get("document", [])[0].get("content", [])[0].get("value", []) + except Exception as e: + print(f"[ERROR]: {e}") + print(f"[ERROR]: {subscription_request}") + data.append({ + 'RequestID': subscription_request.request_id, + 'RedemptionID': subscription_request.redemption_id, + 'IMEIs': imeis, + 'Purchase Date': purchase_date, + 'Retailer': retailer, + 'Reviewed result': subscription_request.reviewed_result, + 'Feedback result': subscription_request.feedback_result, + 'Client Request Time (ms)': subscription_request.client_request_time, + 'Server Processing Time (ms)': subscription_request.preprocessing_time + subscription_request.ai_inference_time, + 'Is Reviewed': subscription_request.is_reviewed, + # 'Is Bad Quality': subscription_request.is_bad_image_quality, + 'created_at': subscription_request.created_at.isoformat() + }) + + response = { + 'subscription_requests': data + } + + return JsonResponse(response) + + elif request.method == 'POST': + data = request.data + + base_query = Q(request_id=request_id) + + subscription_request = SubscriptionRequest.objects.filter(base_query).first() + + try: + subscription_request.reviewed_result = data['reviewed_result'] + except Exception as e: + print(f"[ERROR]: {e}") + print(f"[ERROR]: {subscription_request}") + + return JsonResponse({'message': 'success.'}, status=200) + else: + return JsonResponse({'error': 'Invalid request method.'}, status=405) \ No newline at end of file diff --git a/cope2n-api/fwd_api/api_router.py b/cope2n-api/fwd_api/api_router.py index 9a466dc..322cc20 100755 --- a/cope2n-api/fwd_api/api_router.py +++ b/cope2n-api/fwd_api/api_router.py @@ -2,7 +2,7 @@ from django.conf import settings from rest_framework.routers import DefaultRouter, SimpleRouter from fwd_api.api.ctel_view import CtelViewSet -from fwd_api.api.accuracy_view import AccuracyViewSet +from fwd_api.api.accuracy_view import AccuracyViewSet, RequestViewSet from fwd_api.api.ctel_user_view import CtelUserViewSet @@ -16,6 +16,7 @@ else: router.register("ctel", CtelViewSet, basename="CtelAPI") router.register("ctel", CtelUserViewSet, basename="CtelUserAPI") router.register("ctel", AccuracyViewSet, basename="AccuracyAPI") +router.register("ctel", RequestViewSet, basename="RequestAPI") app_name = "api" urlpatterns = router.urls From e4c439c7cd9daa6327cc40366a67cc3159671f75 Mon Sep 17 00:00:00 2001 From: daovietanh99 Date: Mon, 29 Jan 2024 17:43:10 +0700 Subject: [PATCH 17/27] api update --- cope2n-api/fwd_api/api/accuracy_view.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/cope2n-api/fwd_api/api/accuracy_view.py b/cope2n-api/fwd_api/api/accuracy_view.py index 3ed49f2..abeb682 100644 --- a/cope2n-api/fwd_api/api/accuracy_view.py +++ b/cope2n-api/fwd_api/api/accuracy_view.py @@ -10,6 +10,9 @@ from django.db.models import Q from drf_spectacular.utils import extend_schema, OpenApiParameter, OpenApiTypes # from drf_spectacular.types import OpenApiString from ..models import SubscriptionRequest +from ..exception.exceptions import RequiredFieldException + +import json class AccuracyViewSet(viewsets.ViewSet): @@ -238,7 +241,8 @@ class RequestViewSet(viewsets.ViewSet): 'Server Processing Time (ms)': subscription_request.preprocessing_time + subscription_request.ai_inference_time, 'Is Reviewed': subscription_request.is_reviewed, # 'Is Bad Quality': subscription_request.is_bad_image_quality, - 'created_at': subscription_request.created_at.isoformat() + 'created_at': subscription_request.created_at.isoformat(), + 'updated_at': subscription_request.updated_at.isoformat() }) response = { @@ -254,11 +258,14 @@ class RequestViewSet(viewsets.ViewSet): subscription_request = SubscriptionRequest.objects.filter(base_query).first() - try: - subscription_request.reviewed_result = data['reviewed_result'] - except Exception as e: - print(f"[ERROR]: {e}") - print(f"[ERROR]: {subscription_request}") + reviewed_result = json.loads(data["reviewed_result"][1:-1]) + for field in ['retailername', 'sold_to_party', 'purchase_date', 'imei_number']: + if not field in reviewed_result.keys(): + raise RequiredFieldException(excArgs=f'reviewed_result.{field}') + subscription_request.reviewed_result = reviewed_result + subscription_request.reviewed_result['request_id'] = request_id + subscription_request.is_reviewed = True + subscription_request.save() return JsonResponse({'message': 'success.'}, status=200) else: From cd88f3b95950baa4815af27e914397f72f85c7a9 Mon Sep 17 00:00:00 2001 From: daovietanh99 Date: Tue, 30 Jan 2024 16:33:42 +0700 Subject: [PATCH 18/27] convert dict to xlsx --- cope2n-api/fwd_api/utils/file.py | 120 ++++++++++++++++++++++++++----- cope2n-api/report.xlsx | Bin 0 -> 6546 bytes 2 files changed, 103 insertions(+), 17 deletions(-) create mode 100644 cope2n-api/report.xlsx diff --git a/cope2n-api/fwd_api/utils/file.py b/cope2n-api/fwd_api/utils/file.py index 92142da..44a2e9b 100644 --- a/cope2n-api/fwd_api/utils/file.py +++ b/cope2n-api/fwd_api/utils/file.py @@ -19,6 +19,9 @@ from ..celery_worker.client_connector import c_connector import imagesize import csv +from openpyxl import load_workbook +from openpyxl.styles import Font, Border, Side, PatternFill, NamedStyle + def validate_feedback_file(csv_file_path): required_columns = ['redemptionNumber', 'requestId', 'imeiNumber', 'imeiNumber2', 'Purchase Date', 'retailer', 'Sold to party', 'timetakenmilli'] missing_columns = [] @@ -327,21 +330,104 @@ def build_media_url_v2(media_id: str, user_id: int, sub_id: int, u_sync_id: str) token = image_authenticator.generate_img_token_v2(user_id, sub_id, u_sync_id) return f'{settings.BASE_URL}/api/ctel/v2/media/request/{media_id}/?token={token}' -def json2xlsx(input: json): - """_summary_ - Args: - input (json): - : [{ - Subs: Jan, # Subtotal name - Metadata: {num_imei: 1, - ...: ...} - Data: [{num_imei: 1, - ...: ...}] - }] - OR - input (json): - : [] - Return xlsx (object) - """ - pass \ No newline at end of file +def get_value(_dict, keys): + keys = keys.split('.') + value = _dict + for key in keys: + if not key in value.keys(): + return "-" + else: + value = value.get(key, {}) + + if value != 0: + return value + else: + return "-" + + +def dict2xlsx(input: json): + red = "FF0000" + black = "000000" + green = "E2EFDA" + yellow = "FFF2CC" + gray = "D0CECE" + font_black = Font(name="Calibri", size=11, color=black) + font_black_bold = Font(name="Calibri", size=11, color=black, bold=True) + font_red = Font(name="Calibri", size=11, color=red) + thin = Side(border_style="thin", color=black) + border = Border(left=thin, right=thin, top=thin, bottom=thin) + fill_green = PatternFill(start_color=green, end_color=green, fill_type = "solid") + fill_yellow = PatternFill(start_color=yellow, end_color=yellow, fill_type = "solid") + fill_gray = PatternFill(start_color=gray, end_color=gray, fill_type = "solid") + normal_cell = NamedStyle(name="normal_cell", font=font_black, border=border) + normal_cell_red = NamedStyle(name="normal_cell_red", font=font_red, border=border) + + wb = load_workbook(filename = 'report.xlsx') + ws = wb['Sheet1'] + + mapping = { + 'A': 'subs', + 'B': 'extraction_date', + 'C': 'num_imei', + 'D': 'num_invoice', + 'E': 'total_images', + 'F': 'images_quality.successful', + 'G': 'images_quality.successful_percent', + 'H': 'images_quality.bad', + 'I': 'images_quality.bad_percent', + 'J': 'average_accuracy_rate.imei', + 'K': 'average_accuracy_rate.purchase_date', + 'L': 'average_accuracy_rate.retailer_name', + 'M': 'average_processing_time.imei', + 'N': 'average_processing_time.invoice', + 'O': 'usage.imei', + 'P': 'usage.invoice', + } + + start_index = 5 + + for subtotal in input: + ws['A' + str(start_index)] = subtotal['subs'] + ws['A' + str(start_index)].font = font_black + ws['A' + str(start_index)].border = border + ws['A' + str(start_index)].fill = fill_gray + + ws['B' + str(start_index)] = subtotal['extraction_date'] + ws['B' + str(start_index)].font = font_black_bold + ws['B' + str(start_index)].border = border + ws['B' + str(start_index)].fill = fill_green + + ws['C' + str(start_index)].border = border + ws['D' + str(start_index)].border = border + + for key in ['E', 'F', 'G', 'H', 'I']: + ws[key + str(start_index)] = get_value(subtotal, mapping[key]) + ws[key + str(start_index)].font = font_black + ws[key + str(start_index)].border = border + ws[key + str(start_index)].fill = fill_yellow + + for key in ['J', 'K', 'L', 'M', 'N']: + ws[key + str(start_index)] = get_value(subtotal, mapping[key]) + ws[key + str(start_index)].font = font_black + ws[key + str(start_index)].border = border + ws[key + str(start_index)].fill = fill_gray + + start_index += 1 + + for record in subtotal['data']: + for key in mapping.keys(): + value = get_value(record, mapping[key]) + ws[key + str(start_index)] = value + print(type(value)) + if 'average_accuracy_rate' in mapping[key] and type(value) in [int, float] and value < 95: + ws[key + str(start_index)].style = normal_cell_red + elif 'average_processing_time' in mapping[key] and type(value) in [int, float] and value > 2.0: + ws[key + str(start_index)].style = normal_cell_red + elif 'bad_percent' in mapping[key] and type(value) in [int, float] and value > 10: + ws[key + str(start_index)].style = normal_cell_red + else : + ws[key + str(start_index)].style = normal_cell + start_index += 1 + + return wb diff --git a/cope2n-api/report.xlsx b/cope2n-api/report.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..fc5b37d989c90238850317ab48fca2472ee7f76f GIT binary patch literal 6546 zcmaJ_1z1$=wx+v7Qc!7-5Cw*m?j8mtBxHsdnxVTvx}-ZqS{f9kLty9{S{f-4Bn2+` zpL_Ip?{V$t*)!ifd#(BQyVkejRaZhqBSXT%!a^EW5POF7o8aDjH*$nn!??L_@8$7B zil~o>LifGHV?AB#$)&+HAHs&y<-Pq<2Chs5QhBJHy*&f5^ok2HV-$Q2;Q~@=^CV{9 z!!zWx8`;|^HjxRoz$;d3UkP8)T)V0X>k^53U$nUO1cqLaRn1>#FCA< zYK~Xg>bIC^9bvI}ilnZDiDgz2(6Ds#+QKMEH|zff4E41%X!XL*&Wg*; z#`+(eG$o)NJVebK%aNsbUVQdW`(w_e9szLB6_FLe6?^+4n=mB-AB2~ zqMxsaD-BR8zh_4AcE{|yt^251w&jPD%@5U&N{P~Ch^CtZ(q{c_ z#Wf1XU$4u*MxfU}YQ@z@Hs%a8p;Qe_!|f6Z3O=L9d!FVHK|8O? zy6$0cP*KJG7y-DsA?a>Ve+GJ~&Wur1@`0DHg=Sy0?x#X@XzgebH^Q|yPH0(^w{i(< zM1=N)%~#x_K$%@?aPok03GCaWKX!k?lEJW6lB=O_zWHt4$8pMB!&nA4Bid|b!4>m`j>&-8h*z)3Dnd&5O^S-7hc=Gs2)P4m zhL9l2gcT>0)om*OOo0Nein-S6hqIuLCn||x9Bq)vczZk5Q$42*kb>seB{!c!9M$I0 zNkbQ%gR$8sg>6jf;H)b~UBPkm%lPPzGE$4Z zsEM<<<;Qah*1N3ei4&K?W)hWQ6Wq5kGce1cb%Kn9WQ+Q@vqkz(%s5#MfJQN?%^`{BJZ5a$7tYp@^hz(lId@Q@^#Q; zmoV0HP>Q%K&!x&G6KR1w!?PW(YlqK)!3j?iZCiOf6Ub}FQGE88X^V-g$)l*Kky&*n z_!}f)*AKUS+NRx|*x)C??(eS{YE|{p0B5S47Uz*K_`OF-%$ll9kw5JvHZF zavbfbb@T-iL21VMj*;T4d->w!$8ZBn(+1=4*nmYgEqTX&r$Zmj{jt;|?obF-{xQYl zso`?;gpr9B!i1^;g0sEroyDs*`3*ZI4Ep24fpwPK7{ijym|(ccNmE#Vi!uCvVhrZw zZViFmM%S+HNOU@Y%zLp?+-FfzG6b7La)P%XYNQPkex+SE!f$=6=@ZS{3u4d8husCPA5@TdTwqQ^D731kEjKz6&TtQ%$kvxm|O(#g((&oH-|{_yKmLw%kkKe z7c=k89HKOYqyzx+LWRm@w!u5%!HJL3XuU5)le>*kX?M9yni9X!>PN<1~_>u9;KD$IEewUu6pxg^c5E~8wKy!$~#478j4W z1Vzdx1r}8;sM?w|HJBrL_P#o{pjvvko@{R)X*VH;{23M2CeE@?Y4e>01p7o+nhd-< z08_?1f`TDT`LWG*E;)V$GR9BltGJzbx``!%V@&{D9L^MF8qgG?vLr%OIqZ=v?uaEP z4#PorCY>E3Tg$|%WM)pVC13Fudax$1bg!_?X$`=_ zBBja(RDBmP5i<2WIdzM?jj4PjZ-r(Mtxvk)smnSbKuWIY1+DL^3J`JfrB6XU(El-H zG!QddT<#zV`^^V=6(fN4UZI1s$ZL0sXD@8J9$h*OX^OP-s=!%0?mxT;`X&JmhACto zImL8604@lsl`0*MZyPW}p;=~7*31nCGjDQJV%WjVyYQy}y`%@aeH9lI(!`$p%~z7Ofxv%j24Yz^&Z`XtrEPYQR_eK9U86I@`bGahNR z6NJq0Z?gvn8IV!G(K#t^}eT!OV;Mym@{W)$GOBRvU; z_LTi*%ez!!FOIgeC2v}Vwn#Ec1!#XBMS0&zcn>iN7wKgvilRh5H?+GQ$nFk*R{Y#PvKD_cK#;I=Rs z&N{FI??&he-uyzM=56gTkfCn49v70kkLk~uukb>0(EOzmBbLa}I{dZDqXCC_>AR{v zMisj?HkGkG+pYXXF`zJ;f@8(ns3R{CE6D+Opbll4`RpAz({(`o%t#8h1|MYo^kW(* zoqramp25UIN{chi>Iv3E1on83$kTez@`Z_oELl%k>xoU6UX)AcYjMlB_kB)8*SL=D zT&Jhz53hTcTYh$UW&Ev58#fgQO!K!4LHVn5fmuKxPB8ADA3sachi9{n`2eDuWRc^R zQgoNccxT$wligu?&Z|_xtO&YD7Hb1SdL%im@X<$q`NhNO&eSwzx)%Uw=AIbm{mr(i zBA(&;6s90qsd-+#cOeWh=Zt!U-}g@vhFIPu*^`~I<`C0;83K&7L|;+wuzD)KGpXfG~bGz$sL^G?cSSskfj*5fsp@XUT5WX|A10uGn|GF|8V$= z?(x8aM3&mX6(4hduvUY#Rv%Zd zbM;kn91=X!M|vy511&v{8H-~~NL_CQsZ-8YM_O6b35ip?ze3++>s>~1wR~t0=|B|r#2HRn zKh?Tuxjlnh_&GBah3FHsj{O8C_*^D919+qiWK$AjcbP;jj6or3=G0(J4UQF%eL=CV zu}O{M6j5XGI!PbBx=HUFP+J-=iJWwNVml8&+GLh9-E=3XgK+r}U)qeSVnxrk$3>|y zhfC+#8-k8{>luP9J3SyiR6VVc}3 z0Q|#VM{6lBdbuc{!Z9LIiIlHO56;rRn(HzRzmLz2-Ned9v;+DIGvC+J7|cYQ%*ws< zQcJVl8#{wpx;Q3hGACDz2=T*R8+X>Z-m7q_MGjW!ew0HP?j5cUv02rH%gjPmk2aZ( z=!(`T9!e@sqU+S-I;@TOH7oeQ+2h+Y9&l0~-#8JD0uCQs2pB0b`islR^P zThCsN6A4-}K|PTi+-1N$ycq)0gU53R)WW=Y zSQzWfeztPu{!Qe<4jnv+(nQ8=HWwaEpE~`YZ zSCj)p}DOyi{a)(T$&&q4~9%vG<;u<3;_VMWL-_U9BT$D~ER z^I+eVqau~lfvutRufbzRigR;CeAJzh(q6>c!ZJbl5>FdRf z0^oE5mQz56e)TC1%M+LbWlFqK)+((I_htz>^F2qM53@S2?tGkqM8NCbuhG+15GHm^ z+EOEWvwE_OW!Ng`(jlSDQFWAEDf4j@4_h?j6-gysY{U0Yf7Ji5pW>6KG9Hala~-+F zki+f0L&qH{UR_8N_om(bG7$k6XuzGE1 zt95P+tNUS!kttc4#WxQ&OIU@!E}Yc1Uv+2m>CbV4(iJ5i9SI3e{c{+^(?t(OV6ng9 zKw&2jQ|&eF?A4<3)maf@b$ZhExxJK@>NE|M`7+2mZZN3-tju{gp{b^9-)5cSbTemDJ2?m31roijJ zPJzTXZUNhwsyo`*!?;cD93i(|dX#3rZTk(ea>DUok#xg9r#k*#9eL3M5XyLoz?5#l z>~xj2)YASoBja$Al%25f3L<&zOhan{x%PA2fI3-FFY2c+0sXvE>EGL1K<`lZN3z6^4!|<1y;I90K<_m0O4Hs`(;g@k zn}2R`%4(g)Y!R>e^Jv`s4p-?ML2Tb7>psvGUpD?5LN8Qxz=Xp7`1Kmz7?LV zQ1a>nek<%EH&P{K5ByIOV6`14x#pXtD5UmG_#91 z*`*i#Ht60SSl2&#e$d>yC^2H20(|45*}sxIM8CVp#NPhaLz%HEcI`ZPA^R8Ppw2o4 z;VRACCl+J1q@ymI!?vKL##lBZMNPD8ALJ)XBH7k0lcpDirGX<|bdp+wE^oTY*;{c~ zUd_8~*7$PhYjt*(ug>Uh4ii7ntHL}d&IpYq>wG|Jy)(?k)WXOZewUKYA<0HtmI6<$ z2~U5yTLat8`30{rse4&TNb7z7SND{B(erQ}n$)g?6S6RQhO{2yREYPG#r&Y zlN>PpR5EU``|sr8Ey1BLWoPRI zv2`-ia<_-Tp5H?EK?|=@fh}8^rb`VZp!;zA z!sw}C-egNu9FD?z&cjnE2b40q8fO#I;w#gnO_!+Vy>zeR2Z18Lm6u~n>-HGoCW^`88TJGdEz^j9Y4EN<{FTW#@sfpz>d?h}XUO$1 z8q`ce#4`eSZ6|PgePILjz_c`~0|fr;MrzdJAzp;>vel#M8rPaE?tFEeC@BKz0Qf$6 ztT8n-`!>42$3%HRZama`QybKkkWt8xezD)b)4#Xu_kYT7T=<`gzw@ED?C>w4y7{aB z&K3V@=XZ|mmOcF?5;uqO#?J3t>YvKLlOMNH_Dh`6ek%V-s{Lu;_u1%{Bl#r=jGqR6 zF(!Yi4*XVaZ-=K}a)I;jjsKrf>Q5`b7l+%*{7Y7E(#_55!~U$(e_HrGcik3|U*bvf z|CE$J)qg+2w+Zf-(B16k-xcX!>F!VM-$Uy^9iV;Fg#15;UR??O23RB{oSVz<#=$@C H+^+rysSZ9~ literal 0 HcmV?d00001 From 75819d97807f76b21e1e5f37c77f2a07c0219cf3 Mon Sep 17 00:00:00 2001 From: daovietanh99 Date: Tue, 30 Jan 2024 16:34:30 +0700 Subject: [PATCH 19/27] convert dict to xlxs --- cope2n-api/fwd_api/utils/file.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cope2n-api/fwd_api/utils/file.py b/cope2n-api/fwd_api/utils/file.py index 44a2e9b..266a038 100644 --- a/cope2n-api/fwd_api/utils/file.py +++ b/cope2n-api/fwd_api/utils/file.py @@ -419,7 +419,6 @@ def dict2xlsx(input: json): for key in mapping.keys(): value = get_value(record, mapping[key]) ws[key + str(start_index)] = value - print(type(value)) if 'average_accuracy_rate' in mapping[key] and type(value) in [int, float] and value < 95: ws[key + str(start_index)].style = normal_cell_red elif 'average_processing_time' in mapping[key] and type(value) in [int, float] and value > 2.0: From 34cd161c41f4135181212b3bd9608e8454b62ba1 Mon Sep 17 00:00:00 2001 From: daovietanh99 Date: Tue, 30 Jan 2024 17:38:05 +0700 Subject: [PATCH 20/27] convert dict to xlsx --- cope2n-api/fwd_api/utils/file.py | 132 ++++++++++++++++++------------- cope2n-api/report_detail.xlsx | Bin 0 -> 6182 bytes 2 files changed, 77 insertions(+), 55 deletions(-) create mode 100644 cope2n-api/report_detail.xlsx diff --git a/cope2n-api/fwd_api/utils/file.py b/cope2n-api/fwd_api/utils/file.py index 266a038..17d22ee 100644 --- a/cope2n-api/fwd_api/utils/file.py +++ b/cope2n-api/fwd_api/utils/file.py @@ -346,7 +346,7 @@ def get_value(_dict, keys): return "-" -def dict2xlsx(input: json): +def dict2xlsx(input: json, _type='report'): red = "FF0000" black = "000000" green = "E2EFDA" @@ -366,67 +366,89 @@ def dict2xlsx(input: json): wb = load_workbook(filename = 'report.xlsx') ws = wb['Sheet1'] - mapping = { - 'A': 'subs', - 'B': 'extraction_date', - 'C': 'num_imei', - 'D': 'num_invoice', - 'E': 'total_images', - 'F': 'images_quality.successful', - 'G': 'images_quality.successful_percent', - 'H': 'images_quality.bad', - 'I': 'images_quality.bad_percent', - 'J': 'average_accuracy_rate.imei', - 'K': 'average_accuracy_rate.purchase_date', - 'L': 'average_accuracy_rate.retailer_name', - 'M': 'average_processing_time.imei', - 'N': 'average_processing_time.invoice', - 'O': 'usage.imei', - 'P': 'usage.invoice', - } + if _type == 'report': + wb = load_workbook(filename = 'report.xlsx') + ws = wb['Sheet1'] + mapping = { + 'A': 'subs', + 'B': 'extraction_date', + 'C': 'num_imei', + 'D': 'num_invoice', + 'E': 'total_images', + 'F': 'images_quality.successful', + 'G': 'images_quality.successful_percent', + 'H': 'images_quality.bad', + 'I': 'images_quality.bad_percent', + 'J': 'average_accuracy_rate.imei', + 'K': 'average_accuracy_rate.purchase_date', + 'L': 'average_accuracy_rate.retailer_name', + 'M': 'average_processing_time.imei', + 'N': 'average_processing_time.invoice', + 'O': 'usage.imei', + 'P': 'usage.invoice', + } + start_index = 5 - start_index = 5 + elif _type == 'report_detail': + wb = load_workbook(filename = 'report_detail.xlsx') + ws = wb['Sheet1'] + mapping = { + 'A': 'request_id', + 'B': 'redemption_number', + 'C': 'image_type', + 'D': 'imei_user_submitted', + 'E': "imei_ocr_retrieved", + 'F': "imei1_accuracy", + 'G': "purchase_date_user_submitted", + 'H': "purchase_date_ocr_retrieved", + 'I': "purchase_date_accuracy", + 'J': "retailer_user_submitted", + 'K': "retailer_ocr_retrieved", + 'L': "retailer_accuracy", + 'M': "average_accuracy", + 'N': "ocr_processing_time", + 'O': "is_reviewed", + 'P': "bad_image_reasons", + 'R': "countermeasures", + } + start_index = 4 for subtotal in input: - ws['A' + str(start_index)] = subtotal['subs'] - ws['A' + str(start_index)].font = font_black - ws['A' + str(start_index)].border = border - ws['A' + str(start_index)].fill = fill_gray - - ws['B' + str(start_index)] = subtotal['extraction_date'] - ws['B' + str(start_index)].font = font_black_bold - ws['B' + str(start_index)].border = border - ws['B' + str(start_index)].fill = fill_green - - ws['C' + str(start_index)].border = border - ws['D' + str(start_index)].border = border - - for key in ['E', 'F', 'G', 'H', 'I']: - ws[key + str(start_index)] = get_value(subtotal, mapping[key]) - ws[key + str(start_index)].font = font_black + for key_index, key in enumerate(mapping.keys()): + value = get_value(subtotal, mapping[key]) + ws[key + str(start_index)] = value ws[key + str(start_index)].border = border - ws[key + str(start_index)].fill = fill_yellow - for key in ['J', 'K', 'L', 'M', 'N']: - ws[key + str(start_index)] = get_value(subtotal, mapping[key]) - ws[key + str(start_index)].font = font_black - ws[key + str(start_index)].border = border - ws[key + str(start_index)].fill = fill_gray + if _type == 'report': + ws[key + str(start_index)].font = font_black_bold + if key_index == 0 or (key_index >= 9 and key_index <= 15): + ws[key + str(start_index)].fill = fill_gray + elif key_index >= 4 and key_index <= 8: + ws[key + str(start_index)].fill = fill_yellow + elif _type == 'report_detail': + if 'accuracy' in mapping[key] and type(value) in [int, float] and value < 95: + ws[key + str(start_index)].style = normal_cell_red + elif 'time' in mapping[key] and type(value) in [int, float] and value > 2.0: + ws[key + str(start_index)].style = normal_cell_red + else: + ws[key + str(start_index)].style = normal_cell start_index += 1 - for record in subtotal['data']: - for key in mapping.keys(): - value = get_value(record, mapping[key]) - ws[key + str(start_index)] = value - if 'average_accuracy_rate' in mapping[key] and type(value) in [int, float] and value < 95: - ws[key + str(start_index)].style = normal_cell_red - elif 'average_processing_time' in mapping[key] and type(value) in [int, float] and value > 2.0: - ws[key + str(start_index)].style = normal_cell_red - elif 'bad_percent' in mapping[key] and type(value) in [int, float] and value > 10: - ws[key + str(start_index)].style = normal_cell_red - else : - ws[key + str(start_index)].style = normal_cell - start_index += 1 + if 'data' in subtotal.keys(): + for record in subtotal['data']: + for key in mapping.keys(): + value = get_value(record, mapping[key]) + ws[key + str(start_index)] = value + if 'average_accuracy_rate' in mapping[key] and type(value) in [int, float] and value < 95: + ws[key + str(start_index)].style = normal_cell_red + elif 'average_processing_time' in mapping[key] and type(value) in [int, float] and value > 2.0: + ws[key + str(start_index)].style = normal_cell_red + elif 'bad_percent' in mapping[key] and type(value) in [int, float] and value > 10: + ws[key + str(start_index)].style = normal_cell_red + else : + ws[key + str(start_index)].style = normal_cell + + start_index += 1 return wb diff --git a/cope2n-api/report_detail.xlsx b/cope2n-api/report_detail.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..3df13d8d91d576538b183207c12c63de1f61f725 GIT binary patch literal 6182 zcmaJ_1z3}7`v+lC0;54elx{|McM79Hgh`HOV<6qqjdY5LfYKpHaDbqsbdHjckQ5{Z z{_LD{ee(JJ&$-|0diFlMc0cdl-`zhQ4J>SO3|w4Xj0$~eV~lG;aCL71vvGp+@}So@ z$ph+Ge87ku|L8)Mvc!E9Ckpp3o?h)5=g2I21;ifdY0jFX@iu-`ADkTpwc z`y)C>C0g0F%#<@W{wK9>Q9jYpw>vG4M*WPR4b_>5t$NBvJDd*gCA(nO+yPBK)NMC@ zB)(`_OxqM;J!|b7=4@_(b!okJ7+HEaXFqoqXBcr5mR?o*O*z7A`Oyt!v7tqy=ZV}$ zY|6X>%_I*76!i=WZfn1D;`KW5)y8ETH>+NVbkBXOUmKf&L`XJ8z;b2CR>LYT;9_ra zl@l(SleN}+?Z;Y&SnXvnbTn{qZOcL$7p`7g;s(am{-1!kef5hcPP~6%FH9}QmelN+Bmc6R)i-9^n zC5~1m@!If&JODCH_+e)+4eMyTR(ea=SWj!YcfV&>mJT#~UNznxBL!tTip#`_#SRurU!dPVWcBY0x;8{wq$zf8lf;p*X#67ZeECa>R)} zXQMXfa3>ARC_WKm7Fg1qnXF9d7){=_m-hT}j>M*zeOiH7Sd3u4InF2`}5uFbg6 zpzpa{m!wV;PuO(lCdz7KAj2-HJVfKoJzJ`(P1_$X-ov)}(3YdMmKwftV73x)vNeeG=_+{&%{D(o3gi5<2&XI zpI1B8bA^YMRNPMx>9>1CwiVW!11L9|wunpL^9!^G_r#lY7XuvXhD&&-ytHz$WZ;EtLpz;%%4ayl@^HLHcZm{JJEUrxYY!WoXI!7~)$E zQMj|^h>P7AbL4hldoq%QG70qQAslUqH7)a)qtc&R$7c5+H?&Ts&JHgKYT{KUs*Wi1 zgl8dTL8Q;$^xHSoweuaH_2 z&Dz1zi;m_{627y&0P!4_+Vw&DilQWXrAD&L5B8FKBPhY*Hs);WybC51u~EQza{PNG zh50V5R20FR!x=TFEjB>v*tvwQTxH}KFWP7NP@MWlm>3u?SpODVWPkaLhrNxn4ey^T zA3CyTpP9R&?vX#;IpPE!ak|eas*#sur7698RTtyYBGWec64MFOIXzJ3miA(0>4+e7 z!;g$ea;G;K{4|1TNdr< z@b5M$TR{ZXjjV0kaWXI10S~-fhH>t`t5Y&~d;c&#{TUFUcsUb%D}h;GM7|O)?^EsR zMgh$Awvm}gszH`z5iC}AdAmsV&0+sTs8yq7bYjRnyS^%{*JD2byfcz{z#CyhQ*=lr zFfsTBkTNueBu=UBBRbi>+?>B?Q(bk{xJiGw-}jXj?PIvgIb)1hIcWm--+YYlFCT+@ zcstp^(cZOXG8CUJO71^jDH|{^FaH#eqR=7Jrk=OFELZ)R;n_kEX8A|ukILBkRks5_^o3dKraJ+~eT3$oZHwoUnvtaLq*IS%APohLiy6`axB@tN^ zGE|!{Q9$TE8i0^iV90$^CJ`<)yfs1VNDO$*Ck9>m16~?MHQ2&gv?Fxy7?a)Updrgw z<6fI@eC$OH{&z#|dKPU450Ph{nq3*5^!5t0LOQq|P>ws=mp`94EYIkgXijTQ>{$4u z)8-j|dv~dAPt(=})@6wWZSTU6SkTkmT~zkLqo%2WU?#Pdv6K8`+9HIASdY|F(?gCA zq7#-NF|nu2MTvg)o&`ZKlq?0Ms(3v>A4%lG6Ct8JoCvDdx(*q@*J;F9DtJSAyfF(N zy?UR$nbRkLF3{BKbQW%Fs8Hz!y)~F$vCCKK*f>XEN$vip#r2J1a|IWEj-vCT!%-2- zkwm?o)RYR#`Y&1&Hnu)mZfcHoYsYBQ3xh+Y)SoPh9Su5cklwFa9xP~P z9xMbi4(c{r-EiOqk5Cb1t3YYV5};N zZ0oP(&`!_wG57uZZ|NuNzCVjA_dYF_elN{)O}(gX85tOE5G!$((n9s*G`ODA9Qb|a zhsye8)wF#-*ZX=nCUT-BX80I`#ZV>Jbh9;o0~D!Q(;#EX02<>yV!sbT5Jp^-zoThu zB+WrKBUQj4Mu75gFkG>Aq^l|4U3kFVwz@%Jt|jV-(p|`ckvT{AAgWs|Gr3Y*YS~CmG$qZMFHP4|8QSv!-n?6ptMNZhZUB*nWiQ0rX_)w>1HVbWc7y5yZWoN6%Y+DBMs zkF0WZ2!nl@raTTAd;tn%nl9DC;=LB?n+!-X?p#W%_lkM#-!CxOVbJQ&qkqr~R7?UPIXJsEUP zXw7T|PhHiV8Jd5~3Dm!O6}Y{PjR&0f&yPQg&l@9i__R3CABk*Vijbat+$U06n4+OR zVs-;=ZTa(HH+hvyxWjm?S=F zt%{trij@B@BVK~vPGqVT`C9tpje|dClgJp8C!cw#M#hy&m*44a-_KR@xh`#0VCS4BMIua} zSka)&D6#Xxu_l%zuxI$FH8%t-As;2V9=6p~a0$TKzL` z198%|-KN7O0UzkL$n(Vv_IcIC75^dAe-?Qb}ns*Z?@`)|0x;B0)mjMrD#&G8OSUtA|1c`BJZ)wIG+i6Cx3+&$V z90_7_CuN`g3T5IWxB?& z^Gp(-aN>*`k8t$sgK<*w^$0AtgI~!9a(J_=hjUnJZ`jG#h$i%(&It`rbwy>IDN+lCE)g$>g*x2QCB!A>P0 zbb4TTJww#jyLKuBL?*~xr!ot1J=80awbEpq-pr6x7M+r9a4QfCw*Fx(QmgloSp|D~HlE>fT^uh!Surg% z5}@#~c>QefCncBRexZ)PL|YFp6&Qg+Q0}CP?B?P%Hw#*%EwV7XDU>Qk-_GHd*1#Ko zhg}D9;I3CTlx+uYq^kA@64ioSLl#E9_!ad_QDWORXCf8cPa%ae^XEMoKX-9MpH+>6 zt_m78bD+$El&01UmqQL}#9|^3{!%xVB}2waKg;;^sO4pUlVnL2t7O_Gpzjg~ot-YB zIwHo{7#M1|{w+I^|J||JLtr-6dLA&Si`}1%OKz&xgAbrvPj`Msvw5Y(VHK2a2?{5A zE+|ms=%!@}fm2c^X*TDV2r;Jt#S+ur^k?gRsH0< zpH~1kAo(zEJ0t?Ujj{4toD--9+Ne(D)|D~9*CNnfVmnwkS>aYJiTdzXcT#1LumNLP z9Km8 zS9bX4sh{M^2G*`tIxtr^IIoo}%m&>##(~XULqO=x@t%JK802uf)!NK2tDDrkVe<{A z(dNx!X!#n_0#mN>q^i+(TTJSxynE63*&JpaNxdcsp8Rs~GJ?lC>3Mqpa6aRJF$>&q zoD}SFBb0K=yq#jD{;8cg=b&NGUQUu=YB~AVYXfqt4_-QaWTN37%MIfcwL}{?I6oqd zF-7q|uvfI!ZBZ6{dUf>CtHrMhJgh&~$^*atCHjU45Iu*etT~c2j= z_KOi62_FbEhr4r^MqLqJv;9=+gz<}8w{^7_!GP>3?>BA! zm(zM_eUr~5P7P%P0%3PWUwt{&>_MSq+mV*Z4ZosYiUR5KHFy{%|%RRiA zL&mt)PQyvk(*xkeNy5~hgoA9^#}Vr*6QTZh(hLDgZu7{1%B*mC0p|;&W}+v`I{5tR%gY%Ekm6|^7@}WUwx$DXvJrJHtVrg z8_2o(Dl*EfyS{d?5KFPn+UtN(l=wb-q!P>IWTCTqfSfA18Ax(*rlCpT56dS#Hj_Y+ zc=gtDvJ}khE`Ni1>_fcQ*e0h0z}%=9^HW}Zarb84inGuGNo8NqqkZhyWXI`u9f-f&MJ;$flh z?PdcvM?=@5-~XT;M2_4y;%doyv(QFe+t!v)lpJZNhiZINQ?F$PyzsRkAnm{-jZ7E% z=8LRsTv;hnLNURsv>gl*<;Gd@*eb&H4dfoN5U&f-5c-~2LtB|A%OA=)?DP{B$4?CM zr$e#uIg1;(_m3So9jNnb@YigRfl5t=cT*quFWmd^lSp0I(GP}f{q~HfT@Chbg8&SG zd=yL;NU*tGR!3%6=E$4{gRPvT zZw$f8X6WSCxmBz^tMa%H2G(DmkXayoWJ_*FwfZz&5;&13oi2|8(_%38^xmb9yv>Zr zM_cJfLY$Aym4&(hXe(jfAjkN{$X=%;(TwbW%Qb%Xcg5=*9h$NHB{Wx$`e**~_j9iE zHfTogm&jcOT`Ca{b w2%{6+FR3BDrv7)j`(69GYyGzYHm Date: Wed, 31 Jan 2024 09:00:04 +0700 Subject: [PATCH 21/27] update dict2xlsx --- cope2n-api/fwd_api/utils/file.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cope2n-api/fwd_api/utils/file.py b/cope2n-api/fwd_api/utils/file.py index 17d22ee..04eb5ce 100644 --- a/cope2n-api/fwd_api/utils/file.py +++ b/cope2n-api/fwd_api/utils/file.py @@ -363,9 +363,6 @@ def dict2xlsx(input: json, _type='report'): normal_cell = NamedStyle(name="normal_cell", font=font_black, border=border) normal_cell_red = NamedStyle(name="normal_cell_red", font=font_red, border=border) - wb = load_workbook(filename = 'report.xlsx') - ws = wb['Sheet1'] - if _type == 'report': wb = load_workbook(filename = 'report.xlsx') ws = wb['Sheet1'] @@ -409,7 +406,7 @@ def dict2xlsx(input: json, _type='report'): 'N': "ocr_processing_time", 'O': "is_reviewed", 'P': "bad_image_reasons", - 'R': "countermeasures", + 'Q': "countermeasures", } start_index = 4 From d17b1c741be02c315323c4c341ed4100bdd2ecec Mon Sep 17 00:00:00 2001 From: daovietanh99 Date: Wed, 31 Jan 2024 09:03:12 +0700 Subject: [PATCH 22/27] update report_detail.xlsx --- cope2n-api/report_detail.xlsx | Bin 6182 -> 6209 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/cope2n-api/report_detail.xlsx b/cope2n-api/report_detail.xlsx index 3df13d8d91d576538b183207c12c63de1f61f725..aa552cc235a564a0164573d7f340e3791b9ebb5c 100644 GIT binary patch delta 2094 zcmZ8ic{CJi8y`%TD>25*h(ScwF}53HK5ItF&@>@qxl%}3LP(UkW4pGx#@J2SB3Uwa zxzJ}H>JwZFKN4$hP40tA1km+IGQ$f3?pG+=zbF%HFIwF!&=YB1uE^D6T;n^u z(|hTh`vZTizbvR0T4>uaiix-_@UpzC;@0|DG9WUq4u4?;?p?YbGXC8mAKm`>bBaS4 zOKeOb$%zD7dt_-)lUSC@7r zD7rB=n`XAJD!P~}2trY;5oGK1WUt8+M4nj-ksY<@p1ZZ&00FCLwz`7EQ06w5hI{&t z7+u;j7~-sv$4Pmk!RaJN!=}S>-0L_p0p2Odxf8PCQ!FB}qT9~b|F=(El+>VziwU`y zU9*}@VQ0CCmnFjrS075*CH_W&`eCyrcQ1*QYxz}K9;17~d$REPi#$boDSFop>y?S} zhZKd8vuXWV$LN|W#Wu3PqhG3Wq~(4a&P=9oEA*r$K#IheU%gdrhAY~Pj*bkfUYR`= zs}#!;;=Q+C9{%lZ39HE*S=LvT^4FY)&(Sdai5F-bhDlJmee;p8>(OW*S=v%%+)Tiz zQ}$`Z>IE2ZV=h$rEhCzXnfaug2+9G-- zEhPH7O&Xr)5!LcC49iu)bA7ZeviptCA=+VMn>?e`Cb3VZ5SqM@pe(_`+~aIYk`kRz z;kc-cc_X55hk_*vZsnhS)NxXWk~#e*)pU8DiFDpJQ186bxj8ZM=}{nR!5>A0qi!yL zceb`nACOKjAZzc)IQ5=QuQp8AzWQ}Xx>==XizqOk73n3xpX_Q3!Z^$Cn z8A@bf_0Nk32IXLLA9U%@nhd@gdsfn7IJ;5?m=8zszi*DOF#Fbz3GwMDNmv<+O;Eo) zY<_1}&fKS?F+sc@lJ>^c^F1r{ZI!5Jr-roD*yhr3<2i(e>DycP z{MTM-#j$^x(Pc>Lf@5$N^raZDnIy@MTR`}Q@kzekz>`wx&btj!EyUtKhAWNAcPFJP z-9^Ba@G=SPa?Qa@Gs-mfcEI_boT8~Q1_ZNwjkiURG(>znGk|wu0YBhW$R)g|ExQ6D)|I&si4I=nWC&R=M&BL(Mdo=K zBU@>^%BZD3jVnP;y8~gAtFv3f%#jI)V#7nDR?pklYe;;k7ISZ%n3iJ1y!XCN{XAds zrOkk;WDaIjXsTGLedkyYq&bns7~r(^UHu*+7tWH8Ur;I>dc_Y203d|`fS<-#b%rqM zV0=;FAS4m0V=md|=6ENoTa-|@P_BA$fw%BlDZAB$%cOpbRv)&6GT1C#U9(U9z*PgP z#p1+8SR_`Ex*It+-TTuU@>MkoO4Un1K%ZpN^HqH&e$C-|^;3waYFkL}8Hrc65y68h z@s(5P;tT<*seXXtd^~|Khy^ND-GBXrQ~VAv;6WU17nZ-D=kO{#*g5fOah_H!~1FM2wvnJeTcj`E|Qh-V!`#vW?8P)+!pkkj5CUMQ_^~hZ6&&bpuYFeuvu3A=4Apl;YTd z{=YIW_1O;ki7ZZ77CdG1k3D_8GYxK7JS{;=D5oiWot6FeMP}f}z6@(mt}S?Q0RUs% z|EGdI;NVbRib@f0&@~RnEOQ{X9kmwdpy)gu&w&~+Gnn1yYn#@P^kfT{oWHgVGv?|2 z+l^BzJ%)6)3hmmMjI1g$QBcS6n%h5N)S0*}hP4G7FjP}Z7A|MDJ^XAds;+R?-xn9r$r5lZl@XSCtl)Ov z%V{d0&d!6{xQrQpXB({DSEv|nVupuR51kj%LMlItmg>%~9x1y+$HWh`5&ujstymD= z?+}LogX1}S&7khJ>1U&Ef(w5bH3xF?V5ts6Fb8-Hs(*maP{RX!2-Q2Xw{-3IO#iRv zDJ-bo!Icb5lBsLC;YD*pdSkS8x<*cVaE9w_QMS=0iOG7kFp?`h&#<?;q!Lp7Va5^PcDPp68qCUjz5$j6>)7OVhIs=5-5tdTlRAE*hd1 z$E$f$Be)}BxU25^9In2`o1-T!%u-azIaqbM854WN2Z#TGHdp@K;>HfW4^1(xG`9dIU?}wN0@l-Db}g z(y8xV=4JBm&Bfymr^8&@gm2f}6h7I{Zw6$HRmHQX729Qi9dT>nR;YPikh7UE~77V^<|a1Gn)%p^K@ZpCbqJ6 z+MqgS&ZzcyuL|<*I%*A-)4Ux1whO8_lSsjXV>3naQ)(&ocv=DBN^>=ei5xQK?6AMl zU7WU*FDB%DtO4uyCDxZLpX5fPZl|oYlUzt7wg5zwTyU4U4iK-=gw zk4}5&$|4sd&PK!}<+atZeH?!@Z7+9|MA-7EcUERHcez^vjNBZ5QajC)il1Qp9o4O9 z?R7r%#NLNyKbX}LJQ{Xu$tr9*L)U~jY*6ic^I)X=7p4x!_KBh?Y~)pv{Mr-=9${jf z79B5aUtLN zfjU?v;RK*wWx_CMM)6b3x&Z`-;1l(>|6SPaZVC=|ygrj_AzX+%=kVp8l74; zK+vX?$tVK)Zz+~mqiQFzdHv3VxdC@=5|uLa-RN0N!MBFH-L<*}S<1R;Yr%ura4VZM3F1jc`J)(VZ2R)gkEw`k-ZjjxlEHYiK39 z$`TDX9FH6i(r{0{mEPTVL!pPM6u=meqy-8^jkvzJE=op)b+l|Z7)p~OxXR}hcLmkLH#D5;({QZB2D)?6~tbP zXP2!@A|2W(9tJD^n-=Btbx)`5z0~YEcA@3UV~NbZj_hmmlqT-$nz(SsPxp^$Hntv= zDT%F^jiW3pyz}jFFAo;$F%W2eIg_V=H4wQt%n|WBJOk&QHY+ik|elLm?2J z(0>#Mhb8v_M;{Z!+RuoRj{w{`!ENA7%C^>l9ND_ zPV%eQELn-}(KR_o_;V4Kc+GpHt|h2cylevhDgC0}xzg0PcvdrmPi|cC5(#NhKkZ9l z;mI=>N<+w%R(|^l<2xe^_Vh70>E>Pz11Y1;N#kFJy&3v%RocAX9=$z&LUl#>r!qW6 zE!;T)1OmCfF3-Q3kt2!C|J8~Qr4DTD$3>~b8%V~HHt;M?Zv$&_M8x|3Wd81Ja1BS? zxFSh!6}p53xkwy%N?Ls9PuBnc1$zDA-vs|#oD7|wd_40aZ TJ_g*4l>${|P@Atx{p9@v;s2M3 From 70358eef67fa4baaba4b87b1a81f9326492d946b Mon Sep 17 00:00:00 2001 From: daovietanh99 Date: Wed, 31 Jan 2024 09:50:58 +0700 Subject: [PATCH 23/27] update file utils and report_detail.xlsx --- cope2n-api/fwd_api/utils/file.py | 21 ++++++++++++--------- cope2n-api/report_detail.xlsx | Bin 6209 -> 6335 bytes 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/cope2n-api/fwd_api/utils/file.py b/cope2n-api/fwd_api/utils/file.py index 04eb5ce..994b9dc 100644 --- a/cope2n-api/fwd_api/utils/file.py +++ b/cope2n-api/fwd_api/utils/file.py @@ -396,17 +396,20 @@ def dict2xlsx(input: json, _type='report'): 'D': 'imei_user_submitted', 'E': "imei_ocr_retrieved", 'F': "imei1_accuracy", - 'G': "purchase_date_user_submitted", - 'H': "purchase_date_ocr_retrieved", - 'I': "purchase_date_accuracy", - 'J': "retailer_user_submitted", - 'K': "retailer_ocr_retrieved", - 'L': "retailer_accuracy", - 'M': "average_accuracy", - 'N': "ocr_processing_time", + 'G': "invoice_purchase_date_consumer", + 'H': "invoice_purchase_date_ocr", + 'I': "invoice_purchase_date_accuracy", + 'J': "invoice_retailer_consumer", + 'K': "invoice_retailer_ocr", + 'L': "invoice_retailer_accuracy", + 'M': "ocr_image_accuracy", + 'N': "ocr_image_speed", 'O': "is_reviewed", 'P': "bad_image_reasons", 'Q': "countermeasures", + 'R': 'imei_revised_accuracy', + 'S': 'purchase_date_revised_accuracy', + 'T': 'retailer_revised_accuracy', } start_index = 4 @@ -425,7 +428,7 @@ def dict2xlsx(input: json, _type='report'): elif _type == 'report_detail': if 'accuracy' in mapping[key] and type(value) in [int, float] and value < 95: ws[key + str(start_index)].style = normal_cell_red - elif 'time' in mapping[key] and type(value) in [int, float] and value > 2.0: + elif 'speed' in mapping[key] and type(value) in [int, float] and value > 2.0: ws[key + str(start_index)].style = normal_cell_red else: ws[key + str(start_index)].style = normal_cell diff --git a/cope2n-api/report_detail.xlsx b/cope2n-api/report_detail.xlsx index aa552cc235a564a0164573d7f340e3791b9ebb5c..6850d8eaf52e793f77fcda5caa9e394cdfbbd189 100644 GIT binary patch delta 3258 zcmZuz2T&8*5)A=Ec>$BqJ4hgi@F>!YAkrj;fS?o&A|2@nVkm~tMF{W`DI!D!Q92kp zLTG{@QUa(DY4Yez9-=7illR~M)ef z#i(#w(6l!$VTQFZ!lhe^1Fpj5&Q5=SS~#z}eklB+ zT;=bF@P(}M=3c7}!NK{zt#*oeNpIk{#4+;pQ8cri=)_pc<#P@8HI5*ELU2atDix%|ErR=XWc&Q<)S1!O6 z**`}3xpGXsMB;i>H(|&d*8(r;>%HVj^*e2SmKMPF_FR61g#GMwLxkR>OSNU6&T37Q z&GWp)%P)$tC8L(w`X{q@Qj+8PUwf~bEc|D|RD_QnwVu`}Ep?_e%Bin=;=Ad$P|4(bT;}&gwzgsJ@=jreLgdk*db-PxotKW@C z;xx%c8sy|0GFtqaYrmK)=8aM90TVaA*sl@I#9r#H`Sr%+J3uS%%>Mj=UUametWq@B zW2r~ECjAFs3YP2a+uGdO4HT}SKC$$NkmM}*Df-fYv-vCSowQK35K|H&l9;(Sj!pc4 zne1)vC{U#I#Bb^Dczly3E2O62ISba@2j6Rp4bmO-`aB5koj&CPZW`e*bfgcqKg-f_ zAw52SlNFB|hAU$SdYqAH$lW}JM=3dmS=;dY+8%p22f7E{QF8DD4)1G0E``v|)*pHb zFaIbU{!@9is6t6~uudWMTlV(HTc&%L#I9JoCvTS8*9KC`othC~U$H9>7KAu~547<= zvle^m%p>%{@5JJi&wh!_F)cS~?&&rNtitYZg!f_ArZ=i8n-`DXb{zpMk*sVdm64|> zm;r!Kf`DWDxG-7&sZ8Pz4mn(l9qM+0CZuJv@`+}@7b{o*=jwxY%iVIlL(~)wzfYK0*7!z0dQ*fnflG@`j4}WC7)3fVbvJ@TvnEF%Nh0~NE_QE+`6{b+^GZXii&C9l zWUh~QRruwh41qgo>4IQW>+OkVhj3Uv;0W?+F=ZN*x_pmmiBYB$zB8D$g5s!@T}!uo|h=zj0&FnoG_cjk~bCQ(nD&Na{=qdXc}jm+=9I z>$5V}qFE8jep4cx>8`FIF?%mj^`DKDhWem0-KSSDPH(jxQ6=FPzt_Y(zK5FlkWNtW z4`8B&O=}+>WqGyS7M*+_l92ha$)Y7xbaG)QViowgX=f@VB>aQ0-JzwZ+5Cf6pN2F8 z3VT6#46gWt?L|x;yQw<1p&>G{LDb}a#UNRQX5pE;FX7uOgSM!!05xRBglMdy)^d|8 z4fltQH5U`H{IhvdeobtfB3sWMERm)7FtIqo?u0RT09udf`XaJb{y-pC>TvB;_hKSd zHL2F9WyHXtWXfAY!CqW7Eu#X-Stz|$0HR&See*w7*NGq+?aniNO&7*n0ukbS*Jy=M z9N`{hqEg*I26pWlLUeu#S}Co6p4a4ypE}&Nu{Lq^lCe}tXy>4G>i~)_oR8j!#&EHH zPxNJnAWF73>^q?%@oqZznquRxpfX!^bPwBw11_AYv^Tv z#o=};gwrA;YJsH`19TwWQXrX_nBktXt&rH~k9gdD9ph&+Czr>xa)y-TpO=oak`~Ad zE8Cre(Mf3Utks4sz*#ys%Ie!5_f*GFcb{?G?dv-sdevyZ!XA~Z8{4vK%A=d6^L2lz zRp($k=C{ZY%D>y6cS866C<_4a2J%n)`=|Tq+X0S+SbI_)htM>r7lk~Uu&Bb}R2< z*(p&5OQBePQIH@%G}Ur;igl2`D{83oPKBf z6>T+mbJx(}vUY?OQcAt8qb^SM$NEkXuUETLMG6;_jdrMhWmdwBTCiS~ax+OKOtRi9 zas6>dRam0qdmvuHC~EAJeX>j~f*LRJy>t1dnhgXO>(_RYf_t*3eFWOeo|@cPRORdv zy9RhofEH3X(6EHokuRWI8HKH|l{2;{fW^-*jvh8fYs4B&Tq1wZ)hhfWY-et9h?66* zVVAm`+*!Ju+tB z57`z-X=G=6YP=@cy>x;->*bKw z-;flgPs<(MV^W>bCHqj{yX-w%?Opl|kd*VdW@F33>l8=q+i>_g-trJ>)74e}=Y+lI zF!YoIh4m-&aY7nT*C}ovpW$=T)DF!gJpr*&d3qW6cY(Yh!8;#0V_;uax;)!tbP6*D}>|xof$#VV!VpTVTEI?mKR!B!90(*oRz|wp%I)`^NWQ$HU86 z(BnLHR?|#VgV(-TkJ&sZ$@3+NK0ovx|N8D}gYeQezrF;_@6DQ+ePFJ2P{X!GeX zAe2v^9nPn3_xAuor1GhVGpt|hQGoFT{wo_q2tPuC;q&X;;rZ`5Lt%pusVDh}zWiV| zh9e_UL_9wq(=%cgzr^2neN17 C0?fJq delta 3132 zcmZ8j2RIzc7G5<(+tn6XHbj?Y^|DuQ5ms0w2+>_VR&TLakPxD+PSof`*$_mJM2#M8 z=cjnAFXHJ`F2b@|@n}n1RKtVwPFag$3gNcYufH)o^ z-k*ph79+kKhmzV{B6gAdE*VEoP+S#O`HD1VHMAa~M>*2$Ju6kNM}r4fC>wH-El4uf z_G-U&lX(|*Q!$&Wk1%=m1t2)>9JA*|q`jS}S7<{kYg`wNriKZl5l)T8Kf61=8+e&X*;_@r;D zGlQJmwhT;#WE-!)IoEY$?Px^l^Txopc0AiioPO-=;4}B{H%_)&bazUsN#%vvIZ2kTLG4gtYCS>f-%YZ_7qmFLzpSnmTBy2< z2^O3d9F9v{jl~=EC1J4>6r7ks7o#rwwt$kBJM%i}`}3hUhV`3miJe7IV_@1$O(zIL zoYS9VDJJ*OI*RhOjB(XE2I74l6#WW$!9df`<|sb-S=*YPB)`G2wb9=WK~l4?gtNwIVlBa)OPHh8Y~@tls-br8ruMP zjWwT}ZM5gC!6S8Q>lLlI;2&Y%1URjIXr7#vzhUioO`p})+@^_w-yqW%4fRsPjrBC< zprwY;4mI`Se_~pqzA>XEEpPB->F-DN%BCk)>B3b-j!fnN)0TW$V@jfd;Iv3kj)YHB~(VpIpnkE4NKCuD-fB7Z( zUyh0Waa9mgcNghZAOmdKuF91eaJoW$HfVLn{yAFgjaHVH7{VwBtIT3VYQt^KcXWVH z1cc?)qYS1YuH^^*v!`bHh>qpuB(tCaGVAHi!>mxZ&o}Kry}ne(#|u9e2I|ArOvCy6 zcRE8lI?tLr*^!2Ck4gm~3Z9keEA-W)tSWQ6OB|r*pDJG#N zq13$YPj5L~dn|s&Ov~KO-37jR6P2Z<+9|3|-wCGdReyH~X^9f~vH~bI-jz^kh9e|% zq}O%|xd}SeCUTtG{liVBgR=obL%(mGvtgwl8-7zW>Z|G*-GP}FMJh5KY0eKSm%{SY zS}`l(oA$X!+YL+%f}(BKAVygYL#wI&5n4s7Bl*eWy@c7&ycyrr(0dBa)Lg^|dI_eG zZmQ#8|3kMDAlr^~$CZ)qZc*Xv6F^Jlgp!q4yNTG93_F&JL~!A5HoI}m{ZM8Poy=?J zR=`Rzk6P{Poi3363{?ImNs&yFjI+XP0Sq@8ADzf*%1FlbPEo-UL(V@C?}d&zxb9D- zCt`_t`jexXiWo=tU4;y%s@fGPR7d!%^D&V!?j6o)*Ree zgHXKh5PIsPru+e3FWjwJBiuNvw`!u6JZH=^HDS93VA8|2K!Gny+hr0i4{dVC3o-P$ zopkiQq+j;3c0D$p@trkHP?mW{BrjEOB6d>zBb*IWP#A~s*7rd=L{%itGwor$LvCc% z)JleVipkB4iz-aj+%#02re+!rx0B-eJ58(ZZHlW80eOP4IvA>>{M#?Oq$IHE3q#2& z+v}qu7C+>LyKTF_&CM;o@Rr>`%VHq1UfZV@y4tB@9H|8f;%89v!P}`gg;eq1_s=+5 z1pALLl?>X8Zc{IMQ&Y_ z!Zs5cC)4T!_{g?EMg-`f>56Cm8j_BnHi75piF%oN(Nz1%$1~Ws9Vo?)HhFlKgK@Hb zvV7^E2!Z3Z^p4$d4tD&vt*NFvFu2ON-!t@HpIFq&)g|eHYtjsl^|U&-9=j~XU+eNC zg&54vUm5h4Vo$X=ZwPP2lsuWLQLH?lXRooR7dDvF%`bj>LwO+0wPH4qT+q|8MRtjf zz?7M)RPbSax@0YRvDQ7dzK~za{5WcgGf2myKGgNIiWhP3+Gii@ zixqN0KrQ%MO%hCB(da6YN_T;OfBt9@rdAyFwn2YNbC{+cK#=nh<**biu1xI=Cq!ZJ z_q7t5AKt0{jx;2jDU*>%x2dA}wj@Y_rm8Iqt=q7wyUA!2F{6E+EIl9i1@unU(#D%BaS^=n_+9@M9-5Z-h52f|l+exd`V@q&c!w%=fW4q~ZY4S&{c%vmpt%i*zJb;^X z+66CHDDW>ooGmakY-+NFMpstz?5}d3e1(oZxRi9S)M>-vLQ*{O|4C}h0LN${d?NBB z9LHWUjQXw&yBno6#FrlFCFbvKgOx}IzuO(xao|NPkbymi^I^3`$~?k)zImn<3=$5?ckK4Ybs zr7yvQS>qBKm=Tw7z^q7V0#r2l`PXCR!D7(M(S?8g&%IbK4*jQWVq?L4*DpE0S_vvB+*9mG8gw7BQ}ST30uknBrj*WT=@^xSCyLp From c6884c81a8e42115a2f08733e4f1389633b4231e Mon Sep 17 00:00:00 2001 From: daovietanh99 Date: Wed, 31 Jan 2024 09:51:32 +0700 Subject: [PATCH 24/27] update file utils --- cope2n-api/fwd_api/utils/file.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cope2n-api/fwd_api/utils/file.py b/cope2n-api/fwd_api/utils/file.py index 994b9dc..bd0b4c8 100644 --- a/cope2n-api/fwd_api/utils/file.py +++ b/cope2n-api/fwd_api/utils/file.py @@ -423,6 +423,8 @@ def dict2xlsx(input: json, _type='report'): ws[key + str(start_index)].font = font_black_bold if key_index == 0 or (key_index >= 9 and key_index <= 15): ws[key + str(start_index)].fill = fill_gray + elif key_index == 1: + ws[key + str(start_index)].fill = fill_green elif key_index >= 4 and key_index <= 8: ws[key + str(start_index)].fill = fill_yellow elif _type == 'report_detail': From dd206c4a3ca51725a5e20be55dc1a7e531a08e0c Mon Sep 17 00:00:00 2001 From: dx-tan Date: Wed, 31 Jan 2024 10:00:18 +0700 Subject: [PATCH 25/27] Merged from vietanh99, Add APIs --- cope2n-api/Dockerfile | 30 + cope2n-api/Dockerfile.base | 17 + cope2n-api/fwd_api/api/accuracy_view.py | 519 +++++++++++++++--- .../fwd_api/celery_worker/client_connector.py | 5 +- .../celery_worker/process_report_tasks.py | 149 +++++ cope2n-api/fwd_api/celery_worker/worker.py | 4 +- .../commands/migrate-database-010224.py | 71 +++ ...bscriptionrequestfile_accuracy_and_more.py | 102 ++++ ...report_number_imei_transaction_and_more.py | 23 + ...e_reviewed_report_include_test_and_more.py | 28 + ..._alter_report_include_reviewed_and_more.py | 28 + ...uracy_report_imei_accuracy_ocr_and_more.py | 112 ++++ ...ptionrequestfile_imei_accuracy_and_more.py | 38 ++ ...es_reportfile_counter_measures_and_more.py | 226 ++++++++ ...tfile_correspond_redemption_id_and_more.py | 28 + ...transaction_report_number_imei_and_more.py | 28 + cope2n-api/fwd_api/models/Report.py | 25 +- cope2n-api/fwd_api/models/ReportFile.py | 35 ++ .../fwd_api/models/SubscriptionRequest.py | 7 +- .../fwd_api/models/SubscriptionRequestFile.py | 17 +- cope2n-api/fwd_api/models/__init__.py | 3 + cope2n-api/fwd_api/utils/accuracy.py | 417 ++++++++++++++ cope2n-api/fwd_api/utils/file.py | 27 +- .../fwd_api/utils/ocr_utils/__init__.py | 0 .../fwd_api/utils/ocr_utils/ocr_metrics.py | 385 +++++++++++++ .../fwd_api/utils/ocr_utils/sbt_report.py | 432 +++++++++++++++ .../fwd_api/utils/ocr_utils/wiki_diff.py | 201 +++++++ cope2n-api/requirements.txt | 13 +- cope2n-api/scripts/database_cloning.sh | 1 + cope2n-api/token.txt | 1 + docker-compose-dev.yml | 14 +- scripts/crawl_database_by_time.py | 52 +- scripts/database_cloning.sh | 1 + 33 files changed, 2899 insertions(+), 140 deletions(-) create mode 100644 cope2n-api/Dockerfile.base create mode 100644 cope2n-api/fwd_api/celery_worker/process_report_tasks.py create mode 100644 cope2n-api/fwd_api/management/commands/migrate-database-010224.py create mode 100644 cope2n-api/fwd_api/migrations/0167_report_remove_subscriptionrequestfile_accuracy_and_more.py create mode 100644 cope2n-api/fwd_api/migrations/0168_report_number_imei_transaction_and_more.py create mode 100644 cope2n-api/fwd_api/migrations/0169_report_include_reviewed_report_include_test_and_more.py create mode 100644 cope2n-api/fwd_api/migrations/0170_alter_report_errors_alter_report_include_reviewed_and_more.py create mode 100644 cope2n-api/fwd_api/migrations/0171_rename_imei_accuracy_report_imei_accuracy_ocr_and_more.py create mode 100644 cope2n-api/fwd_api/migrations/0172_alter_subscriptionrequestfile_imei_accuracy_and_more.py create mode 100644 cope2n-api/fwd_api/migrations/0173_rename_countermeasures_reportfile_counter_measures_and_more.py create mode 100644 cope2n-api/fwd_api/migrations/0174_reportfile_acc_reportfile_correspond_redemption_id_and_more.py create mode 100644 cope2n-api/fwd_api/migrations/0175_rename_number_ivoice_transaction_report_number_imei_and_more.py create mode 100644 cope2n-api/fwd_api/models/ReportFile.py create mode 100644 cope2n-api/fwd_api/utils/accuracy.py create mode 100644 cope2n-api/fwd_api/utils/ocr_utils/__init__.py create mode 100644 cope2n-api/fwd_api/utils/ocr_utils/ocr_metrics.py create mode 100644 cope2n-api/fwd_api/utils/ocr_utils/sbt_report.py create mode 100644 cope2n-api/fwd_api/utils/ocr_utils/wiki_diff.py create mode 100644 cope2n-api/scripts/database_cloning.sh create mode 100644 cope2n-api/token.txt create mode 100644 scripts/database_cloning.sh diff --git a/cope2n-api/Dockerfile b/cope2n-api/Dockerfile index 18d6e3f..c841ccb 100755 --- a/cope2n-api/Dockerfile +++ b/cope2n-api/Dockerfile @@ -8,10 +8,17 @@ RUN groupadd --gid ${GID} ${USERNAME} \ && apt-get install -y sudo bash gettext poppler-utils \ && echo ${USERNAME} ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/${USERNAME} \ && chmod 0440 /etc/sudoers.d/${USERNAME} +RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y RUN yes | apt install postgresql gcc musl-dev RUN pip install --upgrade pip RUN pip install uvicorn gunicorn Celery +# For intergration with sdskvu +RUN pip install pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116 +RUN pip install -U openmim==0.3.7 --no-cache-dir +RUN mim install mmcv-full==1.7.2 +# End intergration with sdskvu + USER ${UID} ADD --chown=${UID}:${GID} fwd /app COPY --chown=${UID}:${GID} requirements.txt /app @@ -21,4 +28,27 @@ RUN pip install -r requirements.txt --no-cache-dir COPY --chown=${UID}:${GID} . /app +RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsv_dewarp && pip3 install -v -e . --no-cache-dir +RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsvtd && pip3 install -v -e . --no-cache-dir +RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsvtr && pip3 install -v -e . --no-cache-dir +RUN cd /app/fwd_api/utils/sdsvkvu && pip3 install -v -e . --no-cache-dir + +# For intergration with sdskvu +RUN python -m pip install paddlepaddle-gpu==2.4.2.post116 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html --no-cache-dir + ENV TZ="Asia/Ho_Chi_Minh" + + +# FROM cope2n-api-base AS builder +# ARG UID=1000 +# ARG GID=1000 +# ARG USERNAME=container-user + +# # Create a new user +# RUN groupadd --gid ${GID} ${USERNAME} \ +# && useradd --uid ${UID} --gid ${GID} -m ${USERNAME} \ +# && echo ${USERNAME} ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/${USERNAME} \ +# && chmod 0440 /etc/sudoers.d/${USERNAME} + +# WORKDIR /app +# COPY --chown=${UID}:${GID} . /app diff --git a/cope2n-api/Dockerfile.base b/cope2n-api/Dockerfile.base new file mode 100644 index 0000000..c13dc27 --- /dev/null +++ b/cope2n-api/Dockerfile.base @@ -0,0 +1,17 @@ +FROM python:3.9.17-buster + +RUN apt-get update \ + && apt-get install -y sudo bash gettext poppler-utils postgresql gcc musl-dev + +COPY requirements.txt /tmp +COPY ./fwd_api/utils/sdsvkvu /app/fwd_api/utils/sdsvkvu + +RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsv_dewarp && pip3 install -v -e . --no-cache-dir +RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsvtd && pip3 install -v -e . --no-cache-dir +RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsvtr && pip3 install -v -e . --no-cache-dir +RUN cd /app/fwd_api/utils/sdsvkvu && pip3 install -v -e . --no-cache-dir + +RUN pip install --upgrade pip && pip install uvicorn gunicorn Celery +RUN pip install -r /tmp/requirements.txt --no-cache-dir + +ENV TZ="Asia/Ho_Chi_Minh" \ No newline at end of file diff --git a/cope2n-api/fwd_api/api/accuracy_view.py b/cope2n-api/fwd_api/api/accuracy_view.py index abeb682..2159ad0 100644 --- a/cope2n-api/fwd_api/api/accuracy_view.py +++ b/cope2n-api/fwd_api/api/accuracy_view.py @@ -3,88 +3,87 @@ from rest_framework.decorators import action from rest_framework.response import Response from django.core.paginator import Paginator from django.http import JsonResponse -from datetime import datetime from django.utils import timezone from django.db.models import Q +import uuid from drf_spectacular.utils import extend_schema, OpenApiParameter, OpenApiTypes # from drf_spectacular.types import OpenApiString -from ..models import SubscriptionRequest -from ..exception.exceptions import RequiredFieldException - import json +from ..exception.exceptions import InvalidException, RequiredFieldException +from ..models import SubscriptionRequest, Report, ReportFile +from ..utils.accuracy import shadow_report, MonthReportAccumulate +from ..utils.file import validate_report_list +from ..utils.process import string_to_boolean +def first_of_list(the_list): + if not the_list: + return None + return the_list[0] class AccuracyViewSet(viewsets.ViewSet): lookup_field = "username" @extend_schema( - parameters=[ - OpenApiParameter( - name='start_date', - location=OpenApiParameter.QUERY, - description='Start date (YYYY-mm-DDTHH:MM:SS)', - type=OpenApiTypes.DATE, - default='2023-01-02T00:00:00', - ), - OpenApiParameter( - name='end_date', - location=OpenApiParameter.QUERY, - description='End date (YYYY-mm-DDTHH:MM:SS)', - type=OpenApiTypes.DATE, - default='2024-01-10T00:00:00', - ), - OpenApiParameter( - name='include_test', - location=OpenApiParameter.QUERY, - description='Whether to include test record or not', - type=OpenApiTypes.BOOL, - ), - OpenApiParameter( - name='is_reviewed', - location=OpenApiParameter.QUERY, - description='Which records to be query', - type=OpenApiTypes.STR, - enum=['reviewed', 'not reviewed', 'all'], - ), - OpenApiParameter( - name='request_id', - location=OpenApiParameter.QUERY, - description='Specific request id', - type=OpenApiTypes.STR, - ), - OpenApiParameter( - name='redemption_id', - location=OpenApiParameter.QUERY, - description='Specific redemption id', - type=OpenApiTypes.STR, - ), - OpenApiParameter( - name='quality', - location=OpenApiParameter.QUERY, - description='One or more of [bad, good, all]', - type=OpenApiTypes.STR, - enum=['bad', 'good', 'all'], - ), - OpenApiParameter( - name='page', - location=OpenApiParameter.QUERY, - description='Page number', - type=OpenApiTypes.INT, - required=False - ), - OpenApiParameter( - name='page_size', - location=OpenApiParameter.QUERY, - description='Number of items per page', - type=OpenApiTypes.INT, - required=False - ), - ], - responses=None, tags=['Accuracy'] + parameters=[ + OpenApiParameter( + name='start_date', + location=OpenApiParameter.QUERY, + description='Start date (YYYY-mm-DDTHH:MM:SSZ)', + type=OpenApiTypes.DATE, + default='2023-01-02T00:00:00+0700', + ), + OpenApiParameter( + name='end_date', + location=OpenApiParameter.QUERY, + description='End date (YYYY-mm-DDTHH:MM:SSZ)', + type=OpenApiTypes.DATE, + default='2024-01-10T00:00:00+0700', + ), + OpenApiParameter( + name='include_test', + location=OpenApiParameter.QUERY, + description='Whether to include test record or not', + type=OpenApiTypes.BOOL, + ), + OpenApiParameter( + name='is_reviewed', + location=OpenApiParameter.QUERY, + description='Which records to be query', + type=OpenApiTypes.STR, + enum=['reviewed', 'not reviewed', 'all'], + ), + OpenApiParameter( + name='request_id', + location=OpenApiParameter.QUERY, + description='Specific request id', + type=OpenApiTypes.STR, + ), + OpenApiParameter( + name='redemption_id', + location=OpenApiParameter.QUERY, + description='Specific redemption id', + type=OpenApiTypes.STR, + ), + OpenApiParameter( + name='page', + location=OpenApiParameter.QUERY, + description='Page number', + type=OpenApiTypes.INT, + required=False + ), + OpenApiParameter( + name='page_size', + location=OpenApiParameter.QUERY, + description='Number of items per page', + type=OpenApiTypes.INT, + required=False + ), + ], + responses=None, tags=['Accuracy'] ) @action(detail=False, url_path="request_list", methods=["GET"]) - def get_subscription_requests(self, request): + def get_request_list(self, request): if request.method == 'GET': start_date_str = request.GET.get('start_date') end_date_str = request.GET.get('end_date') @@ -94,14 +93,13 @@ class AccuracyViewSet(viewsets.ViewSet): redemption_id = request.GET.get('redemption_id', None) is_reviewed = request.GET.get('is_reviewed', None) include_test = request.GET.get('include_test', False) - quality = request.GET.get('quality', None) try: - start_date = datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S') - end_date = datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S') + start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z') + end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z') except ValueError: - return JsonResponse({'error': 'Invalid date format. Please use YYYY-MM-DD.'}, status=400) - + raise InvalidException(excArgs="Date format") + base_query = Q(created_at__range=(start_date, end_date)) if request_id: base_query &= Q(request_id=request_id) @@ -124,19 +122,12 @@ class AccuracyViewSet(viewsets.ViewSet): base_query &= Q(is_reviewed=False) elif is_reviewed == "all": pass - if isinstance(quality, str): - if quality == "good": - base_query &= Q(is_bad_image_quality=False) - elif quality == "bad": - base_query &= Q(is_bad_image_quality=True) - elif quality == "all": - pass subscription_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at') paginator = Paginator(subscription_requests, page_size) page = paginator.get_page(page_number) - + data = [] for request in page: imeis = [] @@ -184,7 +175,369 @@ class AccuracyViewSet(viewsets.ViewSet): return JsonResponse(response) return JsonResponse({'error': 'Invalid request method.'}, status=405) - + + @extend_schema( + parameters=[ + OpenApiParameter( + name='is_daily_report', + location=OpenApiParameter.QUERY, + description='Whether to include test record or not', + type=OpenApiTypes.BOOL, + ), + OpenApiParameter( + name='start_date', + location=OpenApiParameter.QUERY, + description='Start date (YYYY-mm-DDTHH:MM:SSZ)', + type=OpenApiTypes.DATE, + default='2023-01-02T00:00:00+0700', + ), + OpenApiParameter( + name='end_date', + location=OpenApiParameter.QUERY, + description='End date (YYYY-mm-DDTHH:MM:SSZ)', + type=OpenApiTypes.DATE, + default='2024-01-10T00:00:00+0700', + ), + OpenApiParameter( + name='include_test', + location=OpenApiParameter.QUERY, + description='Whether to include test record or not', + type=OpenApiTypes.BOOL, + ), + OpenApiParameter( + name='is_reviewed', + location=OpenApiParameter.QUERY, + description='Which records to be query', + type=OpenApiTypes.STR, + enum=['reviewed', 'not reviewed', 'all'], + ), + OpenApiParameter( + name='request_id', + location=OpenApiParameter.QUERY, + description='Specific request id', + type=OpenApiTypes.STR, + ), + OpenApiParameter( + name='redemption_id', + location=OpenApiParameter.QUERY, + description='Specific redemption id', + type=OpenApiTypes.STR, + ), + OpenApiParameter( + name='subsidiary', + location=OpenApiParameter.QUERY, + description='Subsidiary', + type=OpenApiTypes.STR, + ), + ], + responses=None, tags=['Accuracy'] + ) + @action(detail=False, url_path="make_report", methods=["GET"]) + def make_report(self, request): + if request.method == 'GET': + start_date_str = request.GET.get('start_date') + end_date_str = request.GET.get('end_date') + request_id = request.GET.get('request_id', None) + redemption_id = request.GET.get('redemption_id', None) + is_reviewed = string_to_boolean(request.data.get('is_reviewed', "false")) + include_test = string_to_boolean(request.data.get('include_test', "false")) + subsidiary = request.GET.get("subsidiary", "all") + is_daily_report = string_to_boolean(request.data.get('is_daily_report', "false")) + + try: + start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z') + end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z') + except ValueError: + raise InvalidException(excArgs="Date format") + + query_set = {"start_date_str": start_date_str, + "end_date_str": end_date_str, + "request_id": request_id, + "redemption_id": redemption_id, + "is_reviewed": is_reviewed, + "include_test": include_test, + "subsidiary": subsidiary, + "is_daily_report": is_daily_report, + } + + report_id = "report" + "_" + timezone.datetime.now().strftime("%Y%m%d%H%M%S%z") + "_" + uuid.uuid4().hex + new_report: Report = Report( + report_id=report_id, + is_daily_report=is_daily_report, + subsidiary=subsidiary.lower().replace(" ", ""), + include_test=include_test, + include_reviewed=is_reviewed, + start_at=start_date, + end_at=end_date, + ) + new_report.save() + # Background job to calculate accuracy + shadow_report(report_id, query_set) + + return JsonResponse(status=status.HTTP_200_OK, data={"report_id": report_id}) + + @extend_schema( + parameters=[ + OpenApiParameter( + name='report_id', + location=OpenApiParameter.QUERY, + description='Specific report id', + type=OpenApiTypes.STR, + ), + OpenApiParameter( + name='page', + location=OpenApiParameter.QUERY, + description='Page number', + type=OpenApiTypes.INT, + required=False + ), + OpenApiParameter( + name='page_size', + location=OpenApiParameter.QUERY, + description='Number of items per page', + type=OpenApiTypes.INT, + required=False + ), + ], + responses=None, tags=['Accuracy'] + ) + @action(detail=False, url_path="report_detail_list", methods=["GET"]) + def get_report_detail_list(self, request): + if request.method == 'GET': + report_id = request.GET.get('report_id', None) + page_number = int(request.GET.get('page', 1)) + page_size = int(request.GET.get('page_size', 10)) + + report = Report.objects.filter(report_id=report_id).first() + report_files = ReportFile.objects.filter(report=report) + + paginator = Paginator(report_files, page_size) + page = paginator.get_page(page_number) + + data = [] + for report_file in page: + data.append({ + "Request ID": report_file.correspond_request_id, + "Redemption Number": report_file.correspond_redemption_id, + "Image type": report_file.doc_type, + "IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])), + "IMEI_OCR retrieved": first_of_list(report_file.predict_result.get("imei_number", [None])), + "IMEI1 Accuracy": first_of_list(report_file.feedback_accuracy.get("imei_number", [None])), + "Invoice_Purchase Date_Consumer": report_file.feedback_result.get("purchase_date", None), + "Invoice_Purchase Date_OCR": report_file.predict_result.get("purchase_date", []), + "Invoice_Purchase Date Accuracy": first_of_list(report_file.feedback_accuracy.get("purchase_date", [None])), + "Invoice_Retailer_Consumer": report_file.feedback_result.get("retailername", None), + "Invoice_Retailer_OCR": report_file.predict_result.get("retailername", None), + "Invoice_Retailer Accuracy": first_of_list(report_file.feedback_accuracy.get("retailername", [None])), + "OCR Image Accuracy": report_file.acc, + "OCR Image Speed (seconds)": report_file.time_cost, + "Reviewed?": "No", + "Bad Image Reasons": report_file.bad_image_reason, + "Countermeasures": report_file.counter_measures, + "IMEI_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("imei_number", [None])), + "Purchase Date_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("purchase_date", [None])), + "Retailer_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("retailername", [None])), + }) + + response = { + 'report_detail': data, + 'page': { + 'number': page.number, + 'total_pages': page.paginator.num_pages, + 'count': page.paginator.count, + } + } + return JsonResponse(response, status=200) + + return JsonResponse({'error': 'Invalid request method.'}, status=405) + + @extend_schema( + parameters=[ + OpenApiParameter( + name='start_date', + location=OpenApiParameter.QUERY, + description='Start date (YYYY-mm-DDTHH:MM:SSZ)', + type=OpenApiTypes.DATE, + default='2023-01-02T00:00:00+0700', + ), + OpenApiParameter( + name='end_date', + location=OpenApiParameter.QUERY, + description='End date (YYYY-mm-DDTHH:MM:SSZ)', + type=OpenApiTypes.DATE, + default='2024-01-10T00:00:00+0700', + ), + OpenApiParameter( + name='daily_report_only', + location=OpenApiParameter.QUERY, + description='Specific report id', + type=OpenApiTypes.BOOL, + ), + OpenApiParameter( + name='page', + location=OpenApiParameter.QUERY, + description='Page number', + type=OpenApiTypes.INT, + required=False + ), + OpenApiParameter( + name='page_size', + location=OpenApiParameter.QUERY, + description='Number of items per page', + type=OpenApiTypes.INT, + required=False + ), + ], + responses=None, tags=['Accuracy'] + ) + @action(detail=False, url_path="report_list", methods=["GET"]) + def get_report_list(self, request): + if request.method == 'GET': + daily_report_only = request.GET.get('daily_report_only', False) + start_date_str = request.GET.get('start_date', "") + end_date_str = request.GET.get('end_date', "") + page_number = int(request.GET.get('page', 1)) + page_size = int(request.GET.get('page_size', 10)) + + if not start_date_str or not end_date_str: + reports = Report.objects.all() + else: + try: + start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z') + end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z') + except ValueError: + raise InvalidException(excArgs="Date format") + base_query = Q(created_at__range=(start_date, end_date)) + if daily_report_only: + base_query &= Q(is_daily_report=True) + reports = Report.objects.filter(base_query).order_by('created_at') + + + paginator = Paginator(reports, page_size) + page = paginator.get_page(page_number) + + data = [] + for report in page: + data.append({ + "ID": report.id, + "Created Date": report.created_at, + "No. Requests": report.number_request, + "Status": report.status, + "Purchase Date Acc": report.reviewed_accuracy.get("purchase_date", None) if report.reviewed_accuracy else None, + "Retailer Acc": report.feedback_accuracy.get("retailername", None) if report.reviewed_accuracy else None, + "IMEI Acc": report.feedback_accuracy.get("imei_number", None) if report.reviewed_accuracy else None, + "Avg. Accuracy": report.feedback_accuracy.get("avg", None) if report.reviewed_accuracy else None, + "Avg. Client Request Time": report.average_client_time.get("avg", 0) if report.average_client_time else 0, + "Avg. OCR Processing Time": report.average_OCR_time.get("avg", 0) if report.average_client_time else 0, + "report_id": report.report_id, + }) + + response = { + 'report_detail': data, + 'page': { + 'number': page.number, + 'total_pages': page.paginator.num_pages, + 'count': page.paginator.count, + } + } + return JsonResponse(response, status=200) + + return JsonResponse({'error': 'Invalid request method.'}, status=405) + + @extend_schema( + parameters=[ + OpenApiParameter( + name='start_date', + location=OpenApiParameter.QUERY, + description='Start date (YYYY-mm-DDTHH:MM:SSZ)', + type=OpenApiTypes.DATE, + default='2023-01-02T00:00:00+0700', + ), + OpenApiParameter( + name='end_date', + location=OpenApiParameter.QUERY, + description='End date (YYYY-mm-DDTHH:MM:SSZ)', + type=OpenApiTypes.DATE, + default='2024-01-10T00:00:00+0700', + ), + OpenApiParameter( + name='subsidiary', + location=OpenApiParameter.QUERY, + description='Subsidiary', + type=OpenApiTypes.STR, + ), + OpenApiParameter( + name='page', + location=OpenApiParameter.QUERY, + description='Page number', + type=OpenApiTypes.INT, + required=False + ), + OpenApiParameter( + name='page_size', + location=OpenApiParameter.QUERY, + description='Number of items per page', + type=OpenApiTypes.INT, + required=False + ), + ], + responses=None, tags=['Accuracy'] + ) + @action(detail=False, url_path="overview", methods=["GET"]) + def overview(self, request): + if request.method == 'GET': + subsidiary = request.GET.get('subsidiary', None) + start_date_str = request.GET.get('start_date', "") + end_date_str = request.GET.get('end_date', "") + page_number = int(request.GET.get('page', 1)) + page_size = int(request.GET.get('page_size', 10)) + + + if not start_date_str or not end_date_str: + reports = Report.objects.all() + else: + try: + start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z') + end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z') + except ValueError: + raise InvalidException(excArgs="Date format") + base_query = Q(created_at__range=(start_date, end_date)) + if subsidiary: + base_query &= Q(subsidiary=subsidiary) + base_query &= Q(is_daily_report=True) + reports = Report.objects.filter(base_query).order_by('created_at') + + paginator = Paginator(reports, page_size) + page = paginator.get_page(page_number) + + data = [] + this_month_report = MonthReportAccumulate() + for report in page: + res = this_month_report.add(report) + if not(res): + _, _data, total = this_month_report() + data += [total] + data += _data + this_month_report = MonthReportAccumulate() + this_month_report.add(report) + else: + continue + _, _data, total = this_month_report() + data += [total] + data += _data + + response = { + 'overview_data': data, + 'page': { + 'number': page.number, + 'total_pages': page.paginator.num_pages, + 'count': page.paginator.count, + } + } + return JsonResponse(response, status=200) + + return JsonResponse({'error': 'Invalid request method.'}, status=405) + class RequestViewSet(viewsets.ViewSet): lookup_field = "username" @@ -269,4 +622,4 @@ class RequestViewSet(viewsets.ViewSet): return JsonResponse({'message': 'success.'}, status=200) else: - return JsonResponse({'error': 'Invalid request method.'}, status=405) \ No newline at end of file + return JsonResponse({'error': 'Invalid request method.'}, status=405) diff --git a/cope2n-api/fwd_api/celery_worker/client_connector.py b/cope2n-api/fwd_api/celery_worker/client_connector.py index 16c7dd5..5e0d59c 100755 --- a/cope2n-api/fwd_api/celery_worker/client_connector.py +++ b/cope2n-api/fwd_api/celery_worker/client_connector.py @@ -34,13 +34,16 @@ class CeleryConnector: 'upload_obj_to_s3': {'queue': "upload_obj_to_s3"}, 'remove_local_file': {'queue': "remove_local_file"}, 'csv_feedback': {'queue': "csv_feedback"}, + 'make_a_report': {'queue': "report"}, } app = Celery( 'postman', broker=settings.BROKER_URL, broker_transport_options={'confirm_publish': False}, - ) + ) + def make_a_report(self, args): + return self.send_task('make_a_report', args) def csv_feedback(self, args): return self.send_task('csv_feedback', args) def do_pdf(self, args): diff --git a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py new file mode 100644 index 0000000..c5b2a86 --- /dev/null +++ b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py @@ -0,0 +1,149 @@ +import time +import uuid +import os +import base64 +import traceback +from multiprocessing.pool import ThreadPool + +from fwd_api.models import SubscriptionRequest, UserProfile +from fwd_api.celery_worker.worker import app +from ..constant.common import FolderFileType, image_extensions +from ..exception.exceptions import FileContentInvalidException +from fwd_api.models import SubscriptionRequestFile, FeedbackRequest, Report +from ..utils import file as FileUtils +from ..utils import process as ProcessUtil +from ..utils import s3 as S3Util +from ..utils.accuracy import update_temp_accuracy, IterAvg, calculate_and_save_subcription_file +from fwd_api.constant.common import ProcessType +from django.utils import timezone +from django.db.models import Q +import csv +import json + +from celery.utils.log import get_task_logger +from fwd import settings + + +logger = get_task_logger(__name__) + +s3_client = S3Util.MinioS3Client( + endpoint=settings.S3_ENDPOINT, + access_key=settings.S3_ACCESS_KEY, + secret_key=settings.S3_SECRET_KEY, + bucket_name=settings.S3_BUCKET_NAME +) + +def mean_list(l): + l = [x for x in l if x is not None] + if len(l) == 0: + return 0 + return sum(l)/len(l) + +@app.task(name='make_a_report') +def make_a_report(report_id, query_set): + try: + start_date = timezone.datetime.strptime(query_set["start_date_str"], '%Y-%m-%dT%H:%M:%S%z') + end_date = timezone.datetime.strptime(query_set["end_date_str"], '%Y-%m-%dT%H:%M:%S%z') + base_query = Q(created_at__range=(start_date, end_date)) + if query_set["request_id"]: + base_query &= Q(request_id=query_set["request_id"]) + if query_set["redemption_id"]: + base_query &= Q(redemption_id=query_set["redemption_id"]) + base_query &= Q(is_test_request=False) + if isinstance(query_set["include_test"], str): + query_set["include_test"] = True if query_set["include_test"].lower() in ["true", "yes", "1"] else False + if query_set["include_test"]: + # base_query = ~base_query + base_query.children = base_query.children[:-1] + + elif isinstance(query_set["include_test"], bool): + if query_set["include_test"]: + base_query = ~base_query + if isinstance(query_set["subsidiary"], str): + if query_set["subsidiary"] and query_set["subsidiary"].lower().replace(" ", "")!="all": + base_query &= Q(redemption_id__startswith=query_set["subsidiary"]) + if isinstance(query_set["is_reviewed"], str): + if query_set["is_reviewed"] == "reviewed": + base_query &= Q(is_reviewed=True) + elif query_set["is_reviewed"] == "not reviewed": + base_query &= Q(is_reviewed=False) + # elif query_set["is_reviewed"] == "all": + # pass + + errors = [] + # Create a placeholder to fill + accuracy = {"feedback" :{"imei_number": IterAvg(), + "purchase_date": IterAvg(), + "retailername": IterAvg(), + "sold_to_party": IterAvg(),}, + "reviewed" :{"imei_number": IterAvg(), + "purchase_date": IterAvg(), + "retailername": IterAvg(), + "sold_to_party": IterAvg(),} + } # {"imei": {"acc": 0.1, count: 1}, ...} + time_cost = {"invoice": IterAvg(), + "imei": IterAvg()} + number_images = 0 + number_bad_images = 0 + # TODO: Multithreading + # Calculate accuracy, processing time, ....Then save. + subscription_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at') + report: Report = \ + Report.objects.filter(report_id=report_id).first() + # TODO: number of transaction by doc type + num_request = 0 + for request in subscription_requests: + if request.status != 200 or not (request.reviewed_result or request.feedback_result): + # Failed requests or lack of reviewed_result/feedback_result + continue + request_att = calculate_and_save_subcription_file(report, request) + + request.feedback_accuracy = {"imei_number" : mean_list(request_att["acc"]["feedback"].get("imei_number", [None])), + "purchase_date" : mean_list(request_att["acc"]["feedback"].get("purchase_date", [None])), + "retailername" : mean_list(request_att["acc"]["feedback"].get("retailername", [None])), + "sold_to_party" : mean_list(request_att["acc"]["feedback"].get("sold_to_party", [None]))} + request.reviewed_accuracy = {"imei_number" : mean_list(request_att["acc"]["reviewed"].get("imei_number", [None])), + "purchase_date" : mean_list(request_att["acc"]["reviewed"].get("purchase_date", [None])), + "retailername" : mean_list(request_att["acc"]["reviewed"].get("retailername", [None])), + "sold_to_party" : mean_list(request_att["acc"]["reviewed"].get("sold_to_party", [None]))} + request.save() + number_images += request_att["total_images"] + number_bad_images += request_att["bad_images"] + update_temp_accuracy(accuracy["feedback"], request_att["acc"]["feedback"], keys=["imei_number", "purchase_date", "retailername", "sold_to_party"]) + update_temp_accuracy(accuracy["reviewed"], request_att["acc"]["reviewed"], keys=["imei_number", "purchase_date", "retailername", "sold_to_party"]) + + time_cost["imei"].add(request_att["time_cost"].get("imei", [])) + time_cost["invoice"].add(request_att["time_cost"].get("invoice", [])) + + errors += request_att["err"] + num_request += 1 + # Do saving process + report.number_request = num_request + report.number_images = number_images + report.number_imei = time_cost["imei"].count + report.number_invoice = time_cost["invoice"].count + report.number_bad_images = number_bad_images + report.average_OCR_time = {"invoice": time_cost["invoice"](), "imei": time_cost["imei"](), + "invoice_count": time_cost["invoice"].count, "imei_count": time_cost["imei"].count} + + acumulated_acc = {"feedback": {}, + "reviewed": {}} + + for acc_type in ["feedback", "reviewed"]: + for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: + acumulated_acc[acc_type][key] = accuracy[acc_type][key]() + acumulated_acc[acc_type][key+"_count"] = accuracy[acc_type][key].count + + report.feedback_accuracy = acumulated_acc["feedback"] + report.reviewed_accuracy = acumulated_acc["reviewed"] + + report.errors = "|".join(errors) + report.save() + except IndexError as e: + print(e) + traceback.print_exc() + print("NotFound request by report id, %d", report_id) + except Exception as e: + print("[ERROR]: an error occured while processing report: ", report_id) + traceback.print_exc() + return 400 \ No newline at end of file diff --git a/cope2n-api/fwd_api/celery_worker/worker.py b/cope2n-api/fwd_api/celery_worker/worker.py index a056266..31a3262 100755 --- a/cope2n-api/fwd_api/celery_worker/worker.py +++ b/cope2n-api/fwd_api/celery_worker/worker.py @@ -12,7 +12,7 @@ django.setup() app: Celery = Celery( 'postman', broker=settings.BROKER_URL, - include=['fwd_api.celery_worker.process_result_tasks', 'fwd_api.celery_worker.internal_task'], + include=['fwd_api.celery_worker.process_result_tasks', 'fwd_api.celery_worker.internal_task', 'fwd_api.celery_worker.process_report_tasks'], broker_transport_options={'confirm_publish': False}, ) @@ -40,6 +40,7 @@ app.conf.update({ Queue('upload_obj_to_s3'), Queue('remove_local_file'), Queue('csv_feedback'), + Queue('report'), ], 'task_routes': { @@ -57,6 +58,7 @@ app.conf.update({ 'upload_obj_to_s3': {'queue': "upload_obj_to_s3"}, 'remove_local_file': {'queue': "remove_local_file"}, 'csv_feedback': {'queue': "csv_feedback"}, + 'make_a_report': {'queue': "report"}, } }) diff --git a/cope2n-api/fwd_api/management/commands/migrate-database-010224.py b/cope2n-api/fwd_api/management/commands/migrate-database-010224.py new file mode 100644 index 0000000..bc81388 --- /dev/null +++ b/cope2n-api/fwd_api/management/commands/migrate-database-010224.py @@ -0,0 +1,71 @@ +# myapp/management/commands/mycustomcommand.py +from django.core.management.base import BaseCommand +from tqdm import tqdm +from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest +from fwd_api.utils.accuracy import predict_result_to_ready +import traceback +import copy + +class Command(BaseCommand): + help = 'Refactor database for image level' + + def add_arguments(self, parser): + # Add your command-line arguments here + parser.add_argument('test', type=str, help='Value for the argument') + + + def process_request(self, request): + if len(request.request_id.split(".")[0].split("_")) < 2: + return + images = SubscriptionRequestFile.objects.filter(request=request) + time_cost = {"imei": [], "invoice": [], "all": []} + if request.ai_inference_profile is None: + time_cost["imei"] = [-1 for _ in range(len(images))] + time_cost["invoice"] = [-1] + time_cost["all"] = [-1] + else: + for k, v in request.ai_inference_profile.items(): + time_cost[k.split("_")[0]].append(v["inference"][1][0] - v["inference"][0] + (v["postprocess"][1]-v["postprocess"][0])) + for i, image in enumerate(images): + # temp_imei_SAP_20240127223644_a493434edbf84fc08aeb87ef6cdde102_0.jpg + try: + image.index_in_request = int(image.file_name.split(".")[0].split("_")[-1]) if len(image.file_name.split(".")[0].split("_")) > 4 else 0 + image.doc_type = image.file_name.split(".")[0].split("_")[1] if len(image.file_name.split(".")[0].split("_")) > 4 else "all" + image.processing_time = time_cost[image.doc_type][image.index_in_request] + if not request.predict_result: + raise KeyError(f"Key predict_result not found in {request.request_id}") + if request.predict_result.get("status", 200) != 200: + raise AttributeError(f"Failed request: {request.request_id}") + _predict_result = copy.deepcopy(predict_result_to_ready(request.predict_result)) + _feedback_result = copy.deepcopy(request.feedback_result) + _reviewed_result = copy.deepcopy(request.reviewed_result) + + if image.doc_type == "invoice": + _predict_result["imei_number"] = [] + if _feedback_result: + _feedback_result["imei_number"] = [] + else: + None + if _reviewed_result: + _reviewed_result["imei_number"] = [] + else: + None + else: + _predict_result = {"retailername": None, "sold_to_party": None, "purchase_date": [], "imei_number": [_predict_result["imei_number"][image.index_in_request]]} + _feedback_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_feedback_result["imei_number"][image.index_in_request]]} if _feedback_result else None + _reviewed_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_reviewed_result["imei_number"][image.index_in_request]]} if _reviewed_result else None + image.predict_result = _predict_result + image.feedback_result = _feedback_result + image.reviewed_result = _reviewed_result + image.save() + except Exception as e: + self.stdout.write(self.style.ERROR(f"Request: {request.request_id} failed with {e}")) + print(traceback.format_exc()) + continue + + def handle(self, *args, **options): + test = options['test'] + subcription_iter = SubscriptionRequest.objects.all() + for request in tqdm(subcription_iter.iterator()): + self.process_request(request) + self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!')) diff --git a/cope2n-api/fwd_api/migrations/0167_report_remove_subscriptionrequestfile_accuracy_and_more.py b/cope2n-api/fwd_api/migrations/0167_report_remove_subscriptionrequestfile_accuracy_and_more.py new file mode 100644 index 0000000..bb1a36f --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0167_report_remove_subscriptionrequestfile_accuracy_and_more.py @@ -0,0 +1,102 @@ +# Generated by Django 4.1.3 on 2024-01-25 06:22 + +from django.db import migrations, models +import django.utils.timezone + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0166_remove_subscriptionrequest_is_bad_image_quality_and_more'), + ] + + operations = [ + migrations.CreateModel( + name='Report', + fields=[ + ('id', models.AutoField(primary_key=True, serialize=False)), + ('report_id', models.CharField(max_length=200)), + ('local_file_name', models.CharField(max_length=200)), + ('error_status', models.JSONField(null=True)), + ('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('start_at', models.DateTimeField(null=True)), + ('end_at', models.DateTimeField(null=True)), + ('include_for_test_sample', models.BooleanField(default=False)), + ('status', models.CharField(max_length=100)), + ('is_daily_report', models.BooleanField(default=False)), + ('errors', models.TextField(default='')), + ('S3_uploaded', models.BooleanField(default=False)), + ('number_request', models.IntegerField(default=0)), + ('number_images', models.IntegerField(default=0)), + ('number_bad_images', models.IntegerField(default=0)), + ('average_client_time_profile', models.JSONField(null=True)), + ('average_OCR_time_profile', models.JSONField(null=True)), + ('average_OCR_time', models.JSONField(null=True)), + ('average_client_time', models.JSONField(null=True)), + ('imei_accuracy', models.FloatField(default=-1)), + ('purchase_date_accuracy', models.FloatField(default=-1)), + ('retailer_name_accuracy', models.FloatField(default=-1)), + ('sold_to_party_accuracy', models.FloatField(default=-1)), + ], + ), + migrations.RemoveField( + model_name='subscriptionrequestfile', + name='accuracy', + ), + migrations.AddField( + model_name='subscriptionrequest', + name='imei_accuracy', + field=models.FloatField(default=-1), + ), + migrations.AddField( + model_name='subscriptionrequest', + name='purchase_date_accuracy', + field=models.FloatField(default=-1), + ), + migrations.AddField( + model_name='subscriptionrequest', + name='retailer_name_accuracy', + field=models.FloatField(default=-1), + ), + migrations.AddField( + model_name='subscriptionrequest', + name='sold_to_party_accuracy', + field=models.FloatField(default=-1), + ), + migrations.AddField( + model_name='subscriptionrequestfile', + name='counter_measures', + field=models.TextField(blank=True), + ), + migrations.AddField( + model_name='subscriptionrequestfile', + name='imei_accuracy', + field=models.FloatField(default=-1), + ), + migrations.AddField( + model_name='subscriptionrequestfile', + name='processing_time', + field=models.IntegerField(default=-1), + ), + migrations.AddField( + model_name='subscriptionrequestfile', + name='purchase_date_accuracy', + field=models.FloatField(default=-1), + ), + migrations.AddField( + model_name='subscriptionrequestfile', + name='reason', + field=models.TextField(blank=True), + ), + migrations.AddField( + model_name='subscriptionrequestfile', + name='retailer_name_accuracy', + field=models.FloatField(default=-1), + ), + migrations.AddField( + model_name='subscriptionrequestfile', + name='sold_to_party_accuracy', + field=models.FloatField(default=-1), + ), + ] diff --git a/cope2n-api/fwd_api/migrations/0168_report_number_imei_transaction_and_more.py b/cope2n-api/fwd_api/migrations/0168_report_number_imei_transaction_and_more.py new file mode 100644 index 0000000..5b38f2c --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0168_report_number_imei_transaction_and_more.py @@ -0,0 +1,23 @@ +# Generated by Django 4.1.3 on 2024-01-25 09:44 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0167_report_remove_subscriptionrequestfile_accuracy_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='report', + name='number_imei_transaction', + field=models.IntegerField(default=0), + ), + migrations.AddField( + model_name='report', + name='number_ivoice_transaction', + field=models.IntegerField(default=0), + ), + ] diff --git a/cope2n-api/fwd_api/migrations/0169_report_include_reviewed_report_include_test_and_more.py b/cope2n-api/fwd_api/migrations/0169_report_include_reviewed_report_include_test_and_more.py new file mode 100644 index 0000000..8586cd6 --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0169_report_include_reviewed_report_include_test_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 4.1.3 on 2024-01-25 11:17 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0168_report_number_imei_transaction_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='report', + name='include_reviewed', + field=models.TextField(default=''), + ), + migrations.AddField( + model_name='report', + name='include_test', + field=models.CharField(default='', max_length=200), + ), + migrations.AddField( + model_name='report', + name='subsidiary', + field=models.TextField(default=''), + ), + ] diff --git a/cope2n-api/fwd_api/migrations/0170_alter_report_errors_alter_report_include_reviewed_and_more.py b/cope2n-api/fwd_api/migrations/0170_alter_report_errors_alter_report_include_reviewed_and_more.py new file mode 100644 index 0000000..1bb2793 --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0170_alter_report_errors_alter_report_include_reviewed_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 4.1.3 on 2024-01-25 11:19 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0169_report_include_reviewed_report_include_test_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='report', + name='errors', + field=models.TextField(default='', null=True), + ), + migrations.AlterField( + model_name='report', + name='include_reviewed', + field=models.TextField(default='', null=True), + ), + migrations.AlterField( + model_name='report', + name='subsidiary', + field=models.TextField(default='', null=True), + ), + ] diff --git a/cope2n-api/fwd_api/migrations/0171_rename_imei_accuracy_report_imei_accuracy_ocr_and_more.py b/cope2n-api/fwd_api/migrations/0171_rename_imei_accuracy_report_imei_accuracy_ocr_and_more.py new file mode 100644 index 0000000..fb95803 --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0171_rename_imei_accuracy_report_imei_accuracy_ocr_and_more.py @@ -0,0 +1,112 @@ +# Generated by Django 4.1.3 on 2024-01-28 08:11 + +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0170_alter_report_errors_alter_report_include_reviewed_and_more'), + ] + + operations = [ + migrations.RenameField( + model_name='report', + old_name='imei_accuracy', + new_name='imei_accuracy_ocr', + ), + migrations.RenameField( + model_name='report', + old_name='purchase_date_accuracy', + new_name='imei_accuracy_revised', + ), + migrations.RenameField( + model_name='report', + old_name='retailer_name_accuracy', + new_name='purchase_date_accuracy_ocr', + ), + migrations.RenameField( + model_name='report', + old_name='sold_to_party_accuracy', + new_name='purchase_date_accuracy_revised', + ), + migrations.AddField( + model_name='report', + name='retailer_name_accuracy_ocr', + field=models.FloatField(default=-1), + ), + migrations.AddField( + model_name='report', + name='retailer_name_accuracy_revised', + field=models.FloatField(default=-1), + ), + migrations.AddField( + model_name='report', + name='sold_to_party_accuracy_ocr', + field=models.FloatField(default=-1), + ), + migrations.AddField( + model_name='report', + name='sold_to_party_accuracy_revised', + field=models.FloatField(default=-1), + ), + migrations.AddField( + model_name='subscriptionrequestfile', + name='feedback_result', + field=models.JSONField(null=True), + ), + migrations.AddField( + model_name='subscriptionrequestfile', + name='predict_result', + field=models.JSONField(null=True), + ), + migrations.AddField( + model_name='subscriptionrequestfile', + name='reviewed_result', + field=models.JSONField(null=True), + ), + migrations.AlterField( + model_name='subscriptionrequestfile', + name='doc_type', + field=models.CharField(default='', max_length=10), + ), + migrations.CreateModel( + name='ReportFile', + fields=[ + ('id', models.AutoField(primary_key=True, serialize=False)), + ('correspond_request_id', models.CharField(max_length=200)), + ('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('S3_uploaded', models.BooleanField(default=False)), + ('doc_type', models.CharField(max_length=200)), + ('imei_feedback', models.CharField(default=None, max_length=200, null=True)), + ('purchase_date_feedback', models.CharField(default=None, max_length=200, null=True)), + ('retailer_feedback', models.CharField(default=None, max_length=200, null=True)), + ('sold_to_party_feedback', models.CharField(default=None, max_length=200, null=True)), + ('imei_ocr', models.CharField(default=None, max_length=200, null=True)), + ('purchase_date_ocr', models.CharField(default=None, max_length=200, null=True)), + ('retailer_ocr', models.CharField(default=None, max_length=200, null=True)), + ('sold_to_party_ocr', models.CharField(default=None, max_length=200, null=True)), + ('imei_revised', models.CharField(default=None, max_length=200, null=True)), + ('purchase_date_revised', models.CharField(default=None, max_length=200, null=True)), + ('retailer_revised', models.CharField(default=None, max_length=200, null=True)), + ('sold_to_party_revised', models.CharField(default=None, max_length=200, null=True)), + ('imei_acc_feedback', models.FloatField(default=None, null=True)), + ('purchase_date_acc_feedback', models.FloatField(default=None, null=True)), + ('retailer_acc_feedback', models.FloatField(default=None, null=True)), + ('sold_to_party_acc_feedback', models.CharField(default=None, max_length=200, null=True)), + ('acc_feedback', models.FloatField(default=None, null=True)), + ('imei_acc_revised', models.FloatField(default=None, null=True)), + ('purchase_date_acc_revised', models.FloatField(default=None, null=True)), + ('retailer_acc_revised', models.FloatField(default=None, null=True)), + ('acc_revised', models.FloatField(default=None, null=True)), + ('time_cost', models.FloatField(default=0)), + ('is_reviewed', models.CharField(default='NA', max_length=5)), + ('bad_image_reason', models.TextField(default='')), + ('countermeasures', models.TextField(default='')), + ('report', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='files', to='fwd_api.report')), + ], + ), + ] diff --git a/cope2n-api/fwd_api/migrations/0172_alter_subscriptionrequestfile_imei_accuracy_and_more.py b/cope2n-api/fwd_api/migrations/0172_alter_subscriptionrequestfile_imei_accuracy_and_more.py new file mode 100644 index 0000000..504bb65 --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0172_alter_subscriptionrequestfile_imei_accuracy_and_more.py @@ -0,0 +1,38 @@ +# Generated by Django 4.1.3 on 2024-01-28 09:27 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0171_rename_imei_accuracy_report_imei_accuracy_ocr_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='subscriptionrequestfile', + name='imei_accuracy', + field=models.FloatField(default=None, null=True), + ), + migrations.AlterField( + model_name='subscriptionrequestfile', + name='processing_time', + field=models.FloatField(default=-1), + ), + migrations.AlterField( + model_name='subscriptionrequestfile', + name='purchase_date_accuracy', + field=models.FloatField(default=None, null=True), + ), + migrations.AlterField( + model_name='subscriptionrequestfile', + name='retailer_name_accuracy', + field=models.FloatField(default=None, null=True), + ), + migrations.AlterField( + model_name='subscriptionrequestfile', + name='sold_to_party_accuracy', + field=models.FloatField(default=None, null=True), + ), + ] diff --git a/cope2n-api/fwd_api/migrations/0173_rename_countermeasures_reportfile_counter_measures_and_more.py b/cope2n-api/fwd_api/migrations/0173_rename_countermeasures_reportfile_counter_measures_and_more.py new file mode 100644 index 0000000..e40c9d4 --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0173_rename_countermeasures_reportfile_counter_measures_and_more.py @@ -0,0 +1,226 @@ +# Generated by Django 4.1.3 on 2024-01-28 18:00 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0172_alter_subscriptionrequestfile_imei_accuracy_and_more'), + ] + + operations = [ + migrations.RenameField( + model_name='reportfile', + old_name='countermeasures', + new_name='counter_measures', + ), + migrations.RemoveField( + model_name='report', + name='imei_accuracy_ocr', + ), + migrations.RemoveField( + model_name='report', + name='imei_accuracy_revised', + ), + migrations.RemoveField( + model_name='report', + name='purchase_date_accuracy_ocr', + ), + migrations.RemoveField( + model_name='report', + name='purchase_date_accuracy_revised', + ), + migrations.RemoveField( + model_name='report', + name='retailer_name_accuracy_ocr', + ), + migrations.RemoveField( + model_name='report', + name='retailer_name_accuracy_revised', + ), + migrations.RemoveField( + model_name='report', + name='sold_to_party_accuracy_ocr', + ), + migrations.RemoveField( + model_name='report', + name='sold_to_party_accuracy_revised', + ), + migrations.RemoveField( + model_name='reportfile', + name='acc_feedback', + ), + migrations.RemoveField( + model_name='reportfile', + name='acc_revised', + ), + migrations.RemoveField( + model_name='reportfile', + name='imei_acc_feedback', + ), + migrations.RemoveField( + model_name='reportfile', + name='imei_acc_revised', + ), + migrations.RemoveField( + model_name='reportfile', + name='imei_feedback', + ), + migrations.RemoveField( + model_name='reportfile', + name='imei_ocr', + ), + migrations.RemoveField( + model_name='reportfile', + name='imei_revised', + ), + migrations.RemoveField( + model_name='reportfile', + name='purchase_date_acc_feedback', + ), + migrations.RemoveField( + model_name='reportfile', + name='purchase_date_acc_revised', + ), + migrations.RemoveField( + model_name='reportfile', + name='purchase_date_feedback', + ), + migrations.RemoveField( + model_name='reportfile', + name='purchase_date_ocr', + ), + migrations.RemoveField( + model_name='reportfile', + name='purchase_date_revised', + ), + migrations.RemoveField( + model_name='reportfile', + name='retailer_acc_feedback', + ), + migrations.RemoveField( + model_name='reportfile', + name='retailer_acc_revised', + ), + migrations.RemoveField( + model_name='reportfile', + name='retailer_feedback', + ), + migrations.RemoveField( + model_name='reportfile', + name='retailer_ocr', + ), + migrations.RemoveField( + model_name='reportfile', + name='retailer_revised', + ), + migrations.RemoveField( + model_name='reportfile', + name='sold_to_party_acc_feedback', + ), + migrations.RemoveField( + model_name='reportfile', + name='sold_to_party_feedback', + ), + migrations.RemoveField( + model_name='reportfile', + name='sold_to_party_ocr', + ), + migrations.RemoveField( + model_name='reportfile', + name='sold_to_party_revised', + ), + migrations.RemoveField( + model_name='subscriptionrequest', + name='imei_accuracy', + ), + migrations.RemoveField( + model_name='subscriptionrequest', + name='purchase_date_accuracy', + ), + migrations.RemoveField( + model_name='subscriptionrequest', + name='retailer_name_accuracy', + ), + migrations.RemoveField( + model_name='subscriptionrequest', + name='sold_to_party_accuracy', + ), + migrations.RemoveField( + model_name='subscriptionrequestfile', + name='imei_accuracy', + ), + migrations.RemoveField( + model_name='subscriptionrequestfile', + name='purchase_date_accuracy', + ), + migrations.RemoveField( + model_name='subscriptionrequestfile', + name='retailer_name_accuracy', + ), + migrations.RemoveField( + model_name='subscriptionrequestfile', + name='sold_to_party_accuracy', + ), + migrations.AddField( + model_name='report', + name='feedback_accuracy', + field=models.JSONField(null=True), + ), + migrations.AddField( + model_name='report', + name='reviewed_accuracy', + field=models.JSONField(null=True), + ), + migrations.AddField( + model_name='reportfile', + name='error', + field=models.TextField(default=''), + ), + migrations.AddField( + model_name='reportfile', + name='feedback_accuracy', + field=models.JSONField(null=True), + ), + migrations.AddField( + model_name='reportfile', + name='feedback_result', + field=models.JSONField(null=True), + ), + migrations.AddField( + model_name='reportfile', + name='predict_result', + field=models.JSONField(null=True), + ), + migrations.AddField( + model_name='reportfile', + name='reviewed_accuracy', + field=models.JSONField(null=True), + ), + migrations.AddField( + model_name='reportfile', + name='reviewed_result', + field=models.JSONField(null=True), + ), + migrations.AddField( + model_name='subscriptionrequest', + name='feedback_accuracy', + field=models.JSONField(null=True), + ), + migrations.AddField( + model_name='subscriptionrequest', + name='reviewed_accuracy', + field=models.JSONField(null=True), + ), + migrations.AddField( + model_name='subscriptionrequestfile', + name='feedback_accuracy', + field=models.JSONField(null=True), + ), + migrations.AddField( + model_name='subscriptionrequestfile', + name='reviewed_accuracy', + field=models.JSONField(null=True), + ), + ] diff --git a/cope2n-api/fwd_api/migrations/0174_reportfile_acc_reportfile_correspond_redemption_id_and_more.py b/cope2n-api/fwd_api/migrations/0174_reportfile_acc_reportfile_correspond_redemption_id_and_more.py new file mode 100644 index 0000000..d7c8142 --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0174_reportfile_acc_reportfile_correspond_redemption_id_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 4.1.3 on 2024-01-29 05:59 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0173_rename_countermeasures_reportfile_counter_measures_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='reportfile', + name='acc', + field=models.FloatField(default=0), + ), + migrations.AddField( + model_name='reportfile', + name='correspond_redemption_id', + field=models.CharField(default='', max_length=200), + ), + migrations.AlterField( + model_name='reportfile', + name='correspond_request_id', + field=models.CharField(default='', max_length=200), + ), + ] diff --git a/cope2n-api/fwd_api/migrations/0175_rename_number_ivoice_transaction_report_number_imei_and_more.py b/cope2n-api/fwd_api/migrations/0175_rename_number_ivoice_transaction_report_number_imei_and_more.py new file mode 100644 index 0000000..ae622bb --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0175_rename_number_ivoice_transaction_report_number_imei_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 4.1.3 on 2024-01-30 12:29 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0174_reportfile_acc_reportfile_correspond_redemption_id_and_more'), + ] + + operations = [ + migrations.RenameField( + model_name='report', + old_name='number_ivoice_transaction', + new_name='number_imei', + ), + migrations.AddField( + model_name='report', + name='number_invoice', + field=models.IntegerField(default=0), + ), + migrations.AddField( + model_name='report', + name='number_invoice_transaction', + field=models.IntegerField(default=0), + ), + ] diff --git a/cope2n-api/fwd_api/models/Report.py b/cope2n-api/fwd_api/models/Report.py index ef03c59..340b305 100644 --- a/cope2n-api/fwd_api/models/Report.py +++ b/cope2n-api/fwd_api/models/Report.py @@ -13,19 +13,28 @@ class Report(models.Model): start_at = models.DateTimeField(null=True) end_at = models.DateTimeField(null=True) include_for_test_sample = models.BooleanField(default=False) - status = models.CharField(null=True) + status = models.CharField(max_length=100) is_daily_report = models.BooleanField(default=False) + errors = models.TextField(default="", null=True) + subsidiary = models.TextField(default="", null=True) + include_reviewed = models.TextField(default="", null=True) + include_test = models.CharField(max_length=200, default="") # Data S3_uploaded = models.BooleanField(default=False) number_request = models.IntegerField(default=0) number_images = models.IntegerField(default=0) number_bad_images = models.IntegerField(default=0) - average_client_time_profile = models.JSONField(default=0) # {"0.1": 100, 0.2: 200, ...} - average_OCR_time_profile = models.JSONField(default=0) # {"0.1": 98, 0.2: 202, ...} - average_OCR_time = models.JSONField(null=True) # {"invoice": 0.1, "imei": 0.1} + number_imei = models.IntegerField(default=0) + number_invoice = models.IntegerField(default=0) + + number_imei_transaction = models.IntegerField(default=0) + number_invoice_transaction = models.IntegerField(default=0) + + average_client_time_profile = models.JSONField(null=True) # {"0.1": 100, 0.2: 200, ...} | Future feature + average_OCR_time_profile = models.JSONField(null=True) # {"0.1": 98, 0.2: 202, ...} | Future feature + average_OCR_time = models.JSONField(null=True) # {"invoice": 0.1, "imei": 0.1} | Future feature average_client_time = models.JSONField(null=True) # {"invoice": 0.1, "imei": 0.1} - imei_accuracy = models.FloatField(default=-1) - purchase_date_accuracy = models.FloatField(default=-1) - retailer_name_accuracy = models.FloatField(default=-1) - sold_to_party_accuracy = models.FloatField(default=-1) \ No newline at end of file + + feedback_accuracy = models.JSONField(null=True) + reviewed_accuracy = models.JSONField(null=True) \ No newline at end of file diff --git a/cope2n-api/fwd_api/models/ReportFile.py b/cope2n-api/fwd_api/models/ReportFile.py new file mode 100644 index 0000000..f5ccaab --- /dev/null +++ b/cope2n-api/fwd_api/models/ReportFile.py @@ -0,0 +1,35 @@ +from django.db import models +from django.utils import timezone +from fwd_api.models.Subscription import Subscription +from fwd_api.models.SubscriptionRequest import SubscriptionRequest +from fwd_api.models.Report import Report + +class ReportFile(models.Model): + # Metadata + id = models.AutoField(primary_key=True) + correspond_request_id = models.CharField(max_length=200, default="") + correspond_redemption_id = models.CharField(max_length=200, default="") + created_at = models.DateTimeField(default=timezone.now, db_index=True) + updated_at = models.DateTimeField(auto_now=True) + report = models.ForeignKey(Report, related_name="files", on_delete=models.CASCADE) + + # Data + S3_uploaded = models.BooleanField(default=False) + doc_type = models.CharField(max_length=200) + + predict_result = models.JSONField(null=True) + feedback_result = models.JSONField(null=True) + reviewed_result = models.JSONField(null=True) + + feedback_accuracy = models.JSONField(null=True) + reviewed_accuracy = models.JSONField(null=True) + acc = models.FloatField(default=0) + + time_cost = models.FloatField(default=0) + is_reviewed = models.CharField(default="NA", max_length=5) # NA, No, Yes + bad_image_reason = models.TextField(default="") + counter_measures = models.TextField(default="") + error = models.TextField(default="") + + + diff --git a/cope2n-api/fwd_api/models/SubscriptionRequest.py b/cope2n-api/fwd_api/models/SubscriptionRequest.py index 3839c3a..9ca9ac2 100755 --- a/cope2n-api/fwd_api/models/SubscriptionRequest.py +++ b/cope2n-api/fwd_api/models/SubscriptionRequest.py @@ -21,10 +21,9 @@ class SubscriptionRequest(models.Model): updated_at = models.DateTimeField(auto_now=True) is_test_request = models.BooleanField(default=False) S3_uploaded = models.BooleanField(default=False) - imei_accuracy = models.FloatField(default=-1) - purchase_date_accuracy = models.FloatField(default=-1) - retailer_name_accuracy = models.FloatField(default=-1) - sold_to_party_accuracy = models.FloatField(default=-1) + + feedback_accuracy = models.JSONField(null=True) + reviewed_accuracy = models.JSONField(null=True) ai_inference_profile = models.JSONField(null=True) preprocessing_time = models.FloatField(default=-1) diff --git a/cope2n-api/fwd_api/models/SubscriptionRequestFile.py b/cope2n-api/fwd_api/models/SubscriptionRequestFile.py index 6293421..93d62f5 100755 --- a/cope2n-api/fwd_api/models/SubscriptionRequestFile.py +++ b/cope2n-api/fwd_api/models/SubscriptionRequestFile.py @@ -20,12 +20,15 @@ class SubscriptionRequestFile(models.Model): created_at = models.DateTimeField(default=timezone.now, db_index=True) updated_at = models.DateTimeField(auto_now=True) is_bad_image_quality = models.BooleanField(default=False) - doc_type = models.CharField(max_length=100, default="") - index_in_request = models.IntegerField(default=0) - processing_time = models.IntegerField(default=-1) # in milisecond + doc_type = models.CharField(max_length=10, default="") + index_in_request = models.IntegerField(default=0) # by doc_type + processing_time = models.FloatField(default=-1) # in milisecond reason = models.TextField(blank=True) counter_measures = models.TextField(blank=True) - imei_accuracy = models.FloatField(default=-1) - purchase_date_accuracy = models.FloatField(default=-1) - retailer_name_accuracy = models.FloatField(default=-1) - sold_to_party_accuracy = models.FloatField(default=-1) \ No newline at end of file + + predict_result = models.JSONField(null=True) + feedback_result = models.JSONField(null=True) + reviewed_result = models.JSONField(null=True) + + feedback_accuracy = models.JSONField(null=True) + reviewed_accuracy = models.JSONField(null=True) \ No newline at end of file diff --git a/cope2n-api/fwd_api/models/__init__.py b/cope2n-api/fwd_api/models/__init__.py index 3cfcd22..47f23f0 100755 --- a/cope2n-api/fwd_api/models/__init__.py +++ b/cope2n-api/fwd_api/models/__init__.py @@ -6,4 +6,7 @@ from .OcrTemplateBox import OcrTemplateBox from .PricingPlan import PricingPlan from .Subscription import Subscription from .FeedbackRequest import FeedbackRequest +from .Report import Report +from .ReportFile import ReportFile + diff --git a/cope2n-api/fwd_api/utils/accuracy.py b/cope2n-api/fwd_api/utils/accuracy.py new file mode 100644 index 0000000..56152f6 --- /dev/null +++ b/cope2n-api/fwd_api/utils/accuracy.py @@ -0,0 +1,417 @@ +import re +from datetime import datetime + +import copy +from .ocr_utils.ocr_metrics import eval_ocr_metric +from .ocr_utils.sbt_report import post_processing_str +from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile, ReportFile +from ..celery_worker.client_connector import c_connector + +BAD_THRESHOLD = 0.75 + +valid_keys = ["retailername", "sold_to_party", "purchase_date", "imei_number"] + +class MonthReportAccumulate: + def __init__(self): + self.month = None + self.total = { + 'subs': "+", + 'extraction_date': "Subtotal ()", + 'total_images': 0, + 'images_quality': { + 'successful': 0, + 'successful_percent': 0, + 'bad': 0, + 'bad_percent': 0 + }, + 'average_accuracy_rate': { + 'imei': IterAvg(), + 'purchase_date': IterAvg(), + 'retailer_name': IterAvg() + }, + 'average_processing_time': { + 'imei': IterAvg(), + 'invoice': IterAvg() + }, + 'usage': { + 'imei':0, + 'invoice': 0 + } + } + self.data = [] + self.data_format = { + 'num_imei': 0, + 'num_invoice': 0, + 'total_images': 0, + 'images_quality': { + 'successful': 0, + 'successful_percent': 0, + 'bad': 0, + 'bad_percent': 0 + }, + 'average_accuracy_rate': { + 'imei': 0, + 'purchase_date': 0, + 'retailer_name': 0 + }, + 'average_processing_time': { + 'imei': 0, + 'invoice': 0 + }, + 'usage': { + 'imei':0, + 'invoice': 0 + } + }, + + def accumulate(self, report): + self.total["total_images"] += report.number_images + self.total["images_quality"]["successful"] += report.number_images - report.number_bad_images + self.total["images_quality"]["bad"] += report.number_bad_images + + if sum([report.reviewed_accuracy[x] for x in report.reviewed_accuracy.keys() if "_count" not in x]) > 0 : + self.total["average_accuracy_rate"]["imei"].add_avg(report.reviewed_accuracy.get("imei_number", 0), report.reviewed_accuracy.get("imei_number_count", 0)) + self.total["average_accuracy_rate"]["purchase_date"].add_avg(report.reviewed_accuracy.get("purchase_date", 0), report.reviewed_accuracy.get("purchase_date_count", 0)) + self.total["average_accuracy_rate"]["retailer_name"].add_avg(report.reviewed_accuracy.get("retailername", 0), report.reviewed_accuracy.get("retailername_count", 0)) + elif sum([ report.feedback_accuracy[x] for x in report.feedback_accuracy.keys() if "_count" not in x]) > 0: + self.total["average_accuracy_rate"]["imei"].add_avg(report.feedback_accuracy.get("imei_number", 0), report.feedback_accuracy.get("imei_number_count", 0)) + self.total["average_accuracy_rate"]["purchase_date"].add_avg(report.feedback_accuracy.get("purchase_date", 0), report.feedback_accuracy.get("purchase_date_count", 0)) + self.total["average_accuracy_rate"]["retailer_name"].add_avg(report.feedback_accuracy.get("retailername", 0), report.feedback_accuracy.get("retailername_count", 0)) + + self.total["average_processing_time"]["imei"].add_avg(report.average_OCR_time.get("imei", 0), report.average_OCR_time.get("imei_count", 0)) + self.total["average_processing_time"]["invoice"].add_avg(report.average_OCR_time.get("invoice", 0), report.average_OCR_time.get("invoice_count", 0)) + self.total["usage"]["imei"] += report.number_imei_transaction + self.total["usage"]["invoice"] += report.number_invoice_transaction + + def add(self, report): + report_month = report.created_at.month + + if self.month is None: + self.month = report_month + self.total["extraction_date"] = f"Subtotal ({self.month})" + elif self.month != report_month: + self.total["images_quality"]["successful_percent"] += self.total["images_quality"]["successful"]/self.total["total_images"] + self.total["images_quality"]["bad_percent"] += self.total["images_quality"]["bad"]/self.total["total_images"] + return False # Reports from a different month, stop accumulating + # accumulate fields + new_data = copy.deepcopy(self.data_format)[0] + new_data["num_imei"] = report.number_imei + new_data["num_invoice"] = report.number_invoice + new_data["total_images"] = report.number_images + new_data["images_quality"]["successful"] = report.number_images - report.number_bad_images + new_data["images_quality"]["bad"] = report.number_bad_images + + if sum([ report.reviewed_accuracy[x] for x in report.reviewed_accuracy.keys() if "_count" not in x]): + new_data["average_accuracy_rate"]["imei"] = report.reviewed_accuracy.get("imei_number", None) + new_data["average_accuracy_rate"]["purchase_date"] = report.reviewed_accuracy.get("purchase_date", None) + new_data["average_accuracy_rate"]["retailer_name"] = report.reviewed_accuracy.get("retailername", None) + elif sum([ report.feedback_accuracy[x] for x in report.feedback_accuracy.keys() if "_count" not in x]): + new_data["average_accuracy_rate"]["imei"] = report.feedback_accuracy.get("imei_number", None) + new_data["average_accuracy_rate"]["purchase_date"] = report.feedback_accuracy.get("purchase_date", None) + new_data["average_accuracy_rate"]["retailer_name"] = report.feedback_accuracy.get("retailername", None) + new_data["average_processing_time"]["imei"] = report.average_OCR_time.get("imei", 0) + new_data["average_processing_time"]["invoice"] = report.average_OCR_time.get("invoice", 0) + new_data["usage"]["imei"] = report.number_imei_transaction + new_data["usage"]["invoice"] = report.number_invoice_transaction + + new_data["images_quality"]["successful_percent"] += new_data["images_quality"]["successful"]/new_data["total_images"] + new_data["images_quality"]["bad_percent"] += new_data["images_quality"]["bad"]/new_data["total_images"] + self.data.append(new_data) + self.accumulate(report) + return True + + def __call__(self): + self.total["images_quality"]["successful_percent"] += self.total["images_quality"]["successful"]/self.total["total_images"] + self.total["images_quality"]["bad_percent"] += self.total["images_quality"]["bad"]/self.total["total_images"] + total = copy.deepcopy(self.total) + total["average_accuracy_rate"]["imei"] = total["average_accuracy_rate"]["imei"]() + total["average_accuracy_rate"]["purchase_date"] = total["average_accuracy_rate"]["purchase_date"]() + total["average_accuracy_rate"]["retailer_name"] = total["average_accuracy_rate"]["retailer_name"]() + total["average_processing_time"]["imei"] = total["average_processing_time"]["imei"]() + total["average_processing_time"]["invoice"] = total["average_processing_time"]["invoice"]() + return self.month, self.data, total + +class IterAvg: + def __init__(self, name="default"): + self.name = name + self.avg = 0 + self.count = 0 + + def add(self, values): + """ + Args: + values (list[float]): + """ + values = [x for x in values if x is not None] + if len(values) == 0: + return + self.avg = (self.avg*self.count + sum(values))/(self.count+len(values)) + self.count += len(values) + + def add_avg(self, avg, count): + if avg is None or count is None or count == 0: + return + self.count += count + self.avg = (self.avg*(self.count-count) + avg*count)/(self.count) + + def __call__(self): + return self.avg + +def convert_datetime_format(date_string: str, is_gt=False) -> str: + # pattern_date_string = "2023-02-28" + input_format = "%Y-%m-%d" + output_format = "%d/%m/%Y" + # Validate the input date string format + pattern = r"\d{4}-\d{2}-\d{2}" + if re.match(pattern, date_string): + # Convert the date string to a datetime object + date_object = datetime.strptime(date_string, input_format) + # Convert the datetime object to the desired output format + formatted_date = date_object.strftime(output_format) + return formatted_date + return date_string + +def predict_result_to_ready(result): + dict_result = {"retailername": "", + "sold_to_party": "", + "purchase_date": [], + "imei_number": [],} + dict_result["retailername"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}])[0].get("value", None) + dict_result["sold_to_party"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}])[1].get("value", None) + dict_result["purchase_date"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}])[2].get("value", []) + dict_result["imei_number"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[3].get("value", []) + return dict_result + +def align_fine_result(ready_predict, fine_result): + # print(f"[DEBUG]: fine_result: {fine_result}") + # print(f"[DEBUG]: ready_predict: {ready_predict}") + if fine_result: + if fine_result["purchase_date"] and len(ready_predict["purchase_date"]) == 0: + ready_predict["purchase_date"] = [None] + if fine_result["retailername"] and not ready_predict["retailername"]: + ready_predict["retailername"] = [None] + fine_result["purchase_date"] = [fine_result["purchase_date"] for _ in range(len(ready_predict["purchase_date"]))] + # else: + # fine_result = {} + # for key in ready_predict.keys(): + # fine_result[key] = [] + # fine_result["purchase_date"] = [None for _ in range(len(ready_predict["purchase_date"]))] + return ready_predict, fine_result + +def update_temp_accuracy(accuracy, acc, keys): + for key in keys: + accuracy[key].add(acc[key]) + return accuracy +def calculate_accuracy(key_name, inference, target): + """_summary_ + + Args: + key_name (string): key to calculate accuracy on, ex: retailername + inference (dict): result from ocr, refined to align with the target down below + target (dict): result of type + """ + acc = [] + data = [] + + if not target or not inference: + return acc, data + if not isinstance(inference[key_name], list): + if inference[key_name] is None: + inference[key_name] = [] + else: + inference[key_name] = [inference[key_name]] + if not isinstance(target[key_name], list): + if target[key_name] is None: + target[key_name] = [] + else: + target[key_name] = [target[key_name]] + for i, v in enumerate(inference[key_name]): + # TODO: target[key_name][i] is None, "" + x = post_processing_str(key_name, inference[key_name][i], is_gt=False) + y = post_processing_str(key_name, target[key_name][i], is_gt=True) + + score = eval_ocr_metric( + [x], + [y], + metric=[ + "one_minus_ned", + # "line_acc_ignore_case_symbol", + # "line_acc", + # "one_minus_ned_word", + ]) + acc.append(list(score.values())[0]) + data.append([x, y]) + return acc, data + +def calculate_avg_accuracy(acc, type, keys=[]): + acc_list = [] + # print(f"[DEBUG]: type: {type} - acc: {acc}") + for key in keys: + acc_list += acc.get(type, {}).get(key, []) + + acc_list = [x for x in acc_list if x is not None] + return sum(acc_list)/len(acc_list) if len(acc_list) > 0 else None + + +def calculate_and_save_subcription_file(report, request): + request_att = {"acc": {"feedback": {"imei_number": [], + "purchase_date": [], + "retailername": [], + "sold_to_party": [], + }, + "reviewed": {"imei_number": [], + "purchase_date": [], + "retailername": [], + "sold_to_party": [], + }}, + "err": [], + "time_cost": {}, + "total_images": 0, + "bad_images": 0} + images = SubscriptionRequestFile.objects.filter(request=request) + for image in images: + status, att = calculate_subcription_file(image) + if status != 200: + continue + image.feedback_accuracy = att["acc"]["feedback"] + image.reviewed_accuracy = att["acc"]["reviewed"] + image.is_bad_image_quality = att["is_bad_image"] + image.save() + new_report_file = ReportFile(report=report, + correspond_request_id=request.request_id, + correspond_redemption_id=request.redemption_id, + doc_type=image.doc_type, + predict_result=image.predict_result, + feedback_result=image.feedback_result, + reviewed_result=image.reviewed_result, + feedback_accuracy=att["acc"]["feedback"], + reviewed_accuracy=att["acc"]["reviewed"], + acc=att["avg_acc"], + time_cost=image.processing_time, + bad_image_reason=image.reason, + counter_measures=image.counter_measures, + error="|".join(att["err"]) + ) + new_report_file.save() + if request_att["time_cost"].get(image.doc_type, None): + request_att["time_cost"][image.doc_type].append(image.processing_time) + else: + request_att["time_cost"][image.doc_type] = [image.processing_time] + try: + request_att["acc"]["feedback"]["imei_number"] += att["acc"]["feedback"]["imei_number"] + request_att["acc"]["feedback"]["purchase_date"] += att["acc"]["feedback"]["purchase_date"] + request_att["acc"]["feedback"]["retailername"] += att["acc"]["feedback"]["retailername"] + request_att["acc"]["feedback"]["sold_to_party"] += att["acc"]["feedback"]["sold_to_party"] + + request_att["acc"]["reviewed"]["imei_number"] += att["acc"]["reviewed"]["imei_number"] + request_att["acc"]["reviewed"]["purchase_date"] += att["acc"]["reviewed"]["purchase_date"] + request_att["acc"]["reviewed"]["retailername"] += att["acc"]["reviewed"]["retailername"] + request_att["acc"]["reviewed"]["sold_to_party"] += att["acc"]["reviewed"]["sold_to_party"] + + request_att["bad_images"] += int(att["is_bad_image"]) + request_att["total_images"] += 1 + request_att["err"] += att["err"] + except Exception as e: + print(e) + continue + + return request_att + + +def calculate_subcription_file(subcription_request_file): + att = {"acc": {"feedback": {}, + "reviewed": {}}, + "err": [], + "is_bad_image": False, + "avg_acc": None} + if not subcription_request_file.predict_result: + return 400, att + + inference_result = copy.deepcopy(subcription_request_file.predict_result) + inference_result, feedback_result = align_fine_result(inference_result, copy.deepcopy(subcription_request_file.feedback_result)) + inference_result, reviewed_result = align_fine_result(inference_result, copy.deepcopy(subcription_request_file.reviewed_result)) + # print(f"[DEBUG]: predict_result: {subcription_request_file.predict_result}") + # print(f"[DEBUG]: inference_result: {inference_result}") + # print(f"[DEBUG]: feedback_result: {feedback_result}") + # print(f"[DEBUG]: reviewed_result: {reviewed_result}") + + for key_name in valid_keys: + try: + att["acc"]["feedback"][key_name], _ = calculate_accuracy(key_name, inference_result, feedback_result) + att["acc"]["reviewed"][key_name], _ = calculate_accuracy(key_name, inference_result, reviewed_result) + except Exception as e: + att["err"].append(str(e)) + # print(f"[DEBUG]: e: {e} -key_name: {key_name}") + avg_reviewed = calculate_avg_accuracy(att["acc"], "reviewed", ["retailername", "sold_to_party", "purchase_date", "imei_number"]) + avg_feedback = calculate_avg_accuracy(att["acc"], "feedback", ["retailername", "sold_to_party", "purchase_date", "imei_number"]) + if avg_feedback is not None or avg_reviewed is not None: + avg_acc = max([x for x in [avg_feedback, avg_reviewed] if x is not None]) + if avg_acc < BAD_THRESHOLD: + att["is_bad_image"] = True + att["avg_acc"] = avg_acc + return 200, att + +def calculate_attributions(request): # for one request, return in order + acc = {"feedback": {}, + "reviewed": {}} # {"feedback": {"retailername": [0.1], "sold_to_party":[0.9], "purchase_date":[0.6], "imei_number":[0.8]}, + # "reviewed": {"retailername": [0.1], "sold_to_party":[0.9], "purchase_date":[0.6], "imei_number":[0.8]}} + data = {"feedback": {}, + "reviewed": {}} # {"feedback": {"retailername": [[ocr, feedback], ...], "sold_to_party":[[ocr, feedback], ...], "purchase_date":[[ocr, feedback], ...], "imei_number":[[ocr, feedback], ...]}} + # {"reviewed": {"retailername": [[ocr, reviewed], ...], "sold_to_party":[[ocr, reviewed], ...], "purchase_date":[[ocr, reviewed], ...], "imei_number":[[ocr, reviewed], ...]}} + time_cost = {} # {"imei": [0.1], "invoice": [0.1]} + image_quality_num = [0, 0] # [good, bad] + image_quality_num[0] = len(request.doc_type.split(",")) + error = "" + + inference_result = predict_result_to_ready(request.predict_result) + reviewed_result = align_fine_result(inference_result, request.reviewed_result) + feedback_result = align_fine_result(inference_result, request.feedback_result) + + # accuracy calculation + for key_name in valid_keys: + if isinstance(inference_result[key_name], list): + if len(inference_result[key_name]) != len(reviewed_result.get(key_name, [])): + error = f"Request {request.request_id} failed with different {key_name} in predict and reviewed_result" + break + if len(inference_result[key_name]) != len(feedback_result.get(key_name, [])): + error = f"Request {request.request_id} failed with different {key_name} in predict and feedback_result" + break + # calculate accuracy for feedback result + acc["feedback"][key_name], data["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result) + acc["reviewed"][key_name], data["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result) + else: + inference_result[key_name] = [inference_result[key_name]] + feedback_result[key_name] = [feedback_result[key_name]] + reviewed_result[key_name] = [reviewed_result[key_name]] + + acc["feedback"][key_name], data["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result) + acc["reviewed"][key_name], data["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result) + + acc["feedback"]["purchase_date"] = [max(acc["feedback"]["purchase_date"])] if len(acc["feedback"]["purchase_date"]) > 0 else [] + acc["reviewed"]["purchase_date"] = [max(acc["reviewed"]["purchase_date"])] if len(acc["reviewed"]["purchase_date"]) > 0 else [] + # Count for bad and total images + avg_invoice_feedback = calculate_avg_accuracy(acc, "feedback", ["retailername", "sold_to_party", "purchase_date"]) + avg_invoice_reviewed = calculate_avg_accuracy(acc, "reviewed", ["retailername", "sold_to_party", "purchase_date"]) + if avg_invoice_feedback is not None or avg_invoice_reviewed is not None: + if max([x for x in [avg_invoice_feedback, avg_invoice_reviewed] if x is not None]) < BAD_THRESHOLD: + image_quality_num[1] += 1 + for i, _ in enumerate(acc["feedback"]["imei_number"]): + if acc["feedback"]["imei_number"][i] is not None and acc["reviewed"]["imei_number"][i] is not None: + if max([x for x in [acc["feedback"]["imei_number"][i], acc["reviewed"]["imei_number"][i]] if x is not None]) < BAD_THRESHOLD: + image_quality_num[1] += 1 + # time cost and quality calculation + # TODO: to be deprecated, doc_type would be in file level in the future + try: + for doc_type, doc_profile in request.ai_inference_profile.items(): + doc_type = doc_type.split("_")[0] + inference_time = doc_profile["inference"][1][0] - doc_profile["inference"][0] + postprocess_time = doc_profile["postprocess"][1] - doc_profile["postprocess"][0] + time_cost[doc_type].append(inference_time + postprocess_time) + except Exception as e: + error = f"Request id {request.request_id} failed with error: {e}" + + return acc, data, time_cost, image_quality_num, error + +def shadow_report(report_id, query): + c_connector.make_a_report( + (report_id, query)) \ No newline at end of file diff --git a/cope2n-api/fwd_api/utils/file.py b/cope2n-api/fwd_api/utils/file.py index bd0b4c8..a4d364c 100644 --- a/cope2n-api/fwd_api/utils/file.py +++ b/cope2n-api/fwd_api/utils/file.py @@ -6,6 +6,7 @@ import json from PIL import Image, ExifTags from django.core.files.uploadedfile import TemporaryUploadedFile +from django.utils import timezone from fwd import settings from fwd_api.constant.common import allowed_file_extensions @@ -18,10 +19,33 @@ from fwd_api.utils.image import resize from ..celery_worker.client_connector import c_connector import imagesize import csv - from openpyxl import load_workbook from openpyxl.styles import Font, Border, Side, PatternFill, NamedStyle +def validate_report_list(request): + start_date_str = request.GET.get('start_date') + end_date_str = request.GET.get('end_date') + page_number = int(request.GET.get('page', 0)) + page_size = int(request.GET.get('page_size', 10)) + report_id = request.GET.get('report_id', None) + + validated_data = {} + validated_data["start_date"] = None + validated_data["end_date"] = None + + if len(start_date_str) > 0 and len(end_date_str) > 0: + try: + validated_data["start_date"] = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z') + validated_data["end_date"] = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z') + except ValueError: + raise InvalidException(excArgs="Date format") + validated_data["report_id"] = report_id + validated_data["page_size"] = page_size + validated_data["page_number"] = page_number + if validated_data["report_id"] is None and validated_data["start_date"] is None: + raise RequiredFieldException(excArgs="report_id, start_date, end_date") + return validated_data + def validate_feedback_file(csv_file_path): required_columns = ['redemptionNumber', 'requestId', 'imeiNumber', 'imeiNumber2', 'Purchase Date', 'retailer', 'Sold to party', 'timetakenmilli'] missing_columns = [] @@ -57,7 +81,6 @@ def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUES if total_file_size > settings.MAX_UPLOAD_FILE_SIZE_OF_A_REQUEST: raise LimitReachedException(excArgs=('Total size of all files', str(settings.MAX_UPLOAD_SIZE_OF_A_FILE / 1024 / 1024), 'MB')) - def validate_csv_feedback(files, max_file_num=1, min_file_num=1, file_field="csv files"): total_file_size = 0 if len(files) < min_file_num: diff --git a/cope2n-api/fwd_api/utils/ocr_utils/__init__.py b/cope2n-api/fwd_api/utils/ocr_utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cope2n-api/fwd_api/utils/ocr_utils/ocr_metrics.py b/cope2n-api/fwd_api/utils/ocr_utils/ocr_metrics.py new file mode 100644 index 0000000..71cd84d --- /dev/null +++ b/cope2n-api/fwd_api/utils/ocr_utils/ocr_metrics.py @@ -0,0 +1,385 @@ +import re +from pathlib import Path +from difflib import SequenceMatcher +from terminaltables import AsciiTable +from rapidfuzz.distance import Levenshtein + +from .wiki_diff import inline_diff + + +def is_type_list(x, type): + + if not isinstance(x, list): + return False + + return all(isinstance(item, type) for item in x) + + +def cal_true_positive_char(pred, gt): + """Calculate correct character number in prediction. + Args: + pred (str): Prediction text. + gt (str): Ground truth text. + Returns: + true_positive_char_num (int): The true positive number. + """ + + all_opt = SequenceMatcher(None, pred, gt) + true_positive_char_num = 0 + for opt, _, _, s2, e2 in all_opt.get_opcodes(): + if opt == "equal": + true_positive_char_num += e2 - s2 + else: + pass + return true_positive_char_num + + +def post_processing(text): + """ + - Remove special characters and extra spaces + lower case + """ + + text = re.sub( + r"[^aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ0123456789 ]", + " ", + text, + ) + text = re.sub(r"\s\s+", " ", text) + text = text.strip() + + return text + + +def count_matches(pred_texts, gt_texts, use_ignore=True): + """Count the various match number for metric calculation. + Args: + pred_texts (list[str]): Predicted text string. + gt_texts (list[str]): Ground truth text string. + Returns: + match_res: (dict[str: int]): Match number used for + metric calculation. + """ + match_res = { + "gt_char_num": 0, + "pred_char_num": 0, + "true_positive_char_num": 0, + "gt_word_num": 0, + "match_word_num": 0, + "match_word_ignore_case": 0, + "match_word_ignore_case_symbol": 0, + "match_kie": 0, + "match_kie_ignore_case": 0, + } + # comp = re.compile('[^A-Z^a-z^0-9^\u4e00-\u9fa5]') + # comp = re.compile('[]') + norm_ed_sum = 0.0 + + gt_texts_for_ned_word = [] + pred_texts_for_ned_word = [] + for pred_text, gt_text in zip(pred_texts, gt_texts): + if gt_text == pred_text: + match_res["match_word_num"] += 1 + match_res["match_kie"] += 1 + gt_text_lower = str(gt_text).lower() + pred_text_lower = str(pred_text).lower() + + if gt_text_lower == pred_text_lower: + match_res["match_word_ignore_case"] += 1 + + # gt_text_lower_ignore = comp.sub('', gt_text_lower) + # pred_text_lower_ignore = comp.sub('', pred_text_lower) + if use_ignore: + gt_text_lower_ignore = post_processing(gt_text_lower) + pred_text_lower_ignore = post_processing(pred_text_lower) + else: + gt_text_lower_ignore = gt_text_lower + pred_text_lower_ignore = pred_text_lower + + if gt_text_lower_ignore == pred_text_lower_ignore: + match_res["match_kie_ignore_case"] += 1 + + gt_texts_for_ned_word.append(gt_text_lower_ignore.split(" ")) + pred_texts_for_ned_word.append(pred_text_lower_ignore.split(" ")) + + match_res["gt_word_num"] += 1 + + norm_ed = Levenshtein.normalized_distance( + pred_text_lower_ignore, gt_text_lower_ignore + ) + # if norm_ed > 0.1: + # print(gt_text_lower_ignore, pred_text_lower_ignore, sep='\n') + # print("-"*20) + norm_ed_sum += norm_ed + + # number to calculate char level recall & precision + match_res["gt_char_num"] += len(gt_text_lower_ignore) + match_res["pred_char_num"] += len(pred_text_lower_ignore) + true_positive_char_num = cal_true_positive_char( + pred_text_lower_ignore, gt_text_lower_ignore + ) + match_res["true_positive_char_num"] += true_positive_char_num + + normalized_edit_distance = norm_ed_sum / max(1, len(gt_texts)) + match_res["ned"] = normalized_edit_distance + + # NED for word-level + norm_ed_word_sum = 0.0 + # print(pred_texts_for_ned_word[0]) + unique_words = list( + set( + [x for line in pred_texts_for_ned_word for x in line] + + [x for line in gt_texts_for_ned_word for x in line] + ) + ) + preds = [ + [unique_words.index(w) for w in pred_text_for_ned_word] + for pred_text_for_ned_word in pred_texts_for_ned_word + ] + truths = [ + [unique_words.index(w) for w in gt_text_for_ned_word] + for gt_text_for_ned_word in gt_texts_for_ned_word + ] + for pred_text, gt_text in zip(preds, truths): + norm_ed_word = Levenshtein.normalized_distance(pred_text, gt_text) + # if norm_ed_word < 0.2: + # print(pred_text, gt_text) + norm_ed_word_sum += norm_ed_word + + normalized_edit_distance_word = norm_ed_word_sum / max(1, len(gt_texts)) + match_res["ned_word"] = normalized_edit_distance_word + + return match_res + + +def eval_ocr_metric(pred_texts, gt_texts, metric="acc"): + """Evaluate the text recognition performance with metric: word accuracy and + 1-N.E.D. See https://rrc.cvc.uab.es/?ch=14&com=tasks for details. + Args: + pred_texts (list[str]): Text strings of prediction. + gt_texts (list[str]): Text strings of ground truth. + metric (str | list[str]): Metric(s) to be evaluated. Options are: + - 'word_acc': Accuracy at word level. + - 'word_acc_ignore_case': Accuracy at word level, ignoring letter + case. + - 'word_acc_ignore_case_symbol': Accuracy at word level, ignoring + letter case and symbol. (Default metric for academic evaluation) + - 'char_recall': Recall at character level, ignoring + letter case and symbol. + - 'char_precision': Precision at character level, ignoring + letter case and symbol. + - 'one_minus_ned': 1 - normalized_edit_distance + In particular, if ``metric == 'acc'``, results on all metrics above + will be reported. + Returns: + dict{str: float}: Result dict for text recognition, keys could be some + of the following: ['word_acc', 'word_acc_ignore_case', + 'word_acc_ignore_case_symbol', 'char_recall', 'char_precision', + '1-N.E.D']. + """ + assert isinstance(pred_texts, list) + assert isinstance(gt_texts, list) + assert len(pred_texts) == len(gt_texts) + + assert isinstance(metric, str) or is_type_list(metric, str) + if metric == "acc" or metric == ["acc"]: + metric = [ + "word_acc", + "word_acc_ignore_case", + "word_acc_ignore_case_symbol", + "char_recall", + "char_precision", + "one_minus_ned", + ] + metric = set([metric]) if isinstance(metric, str) else set(metric) + + # supported_metrics = set([ + # 'word_acc', 'word_acc_ignore_case', 'word_acc_ignore_case_symbol', + # 'char_recall', 'char_precision', 'one_minus_ned', 'one_minust_ned_word' + # ]) + # assert metric.issubset(supported_metrics) + + match_res = count_matches(pred_texts, gt_texts) + eps = 1e-8 + eval_res = {} + + if "char_recall" in metric: + char_recall = ( + 1.0 * match_res["true_positive_char_num"] / (eps + match_res["gt_char_num"]) + ) + eval_res["char_recall"] = char_recall + + if "char_precision" in metric: + char_precision = ( + 1.0 + * match_res["true_positive_char_num"] + / (eps + match_res["pred_char_num"]) + ) + eval_res["char_precision"] = char_precision + + if "word_acc" in metric: + word_acc = 1.0 * match_res["match_word_num"] / (eps + match_res["gt_word_num"]) + eval_res["word_acc"] = word_acc + + if "word_acc_ignore_case" in metric: + word_acc_ignore_case = ( + 1.0 * match_res["match_word_ignore_case"] / (eps + match_res["gt_word_num"]) + ) + eval_res["word_acc_ignore_case"] = word_acc_ignore_case + + if "word_acc_ignore_case_symbol" in metric: + word_acc_ignore_case_symbol = ( + 1.0 + * match_res["match_word_ignore_case_symbol"] + / (eps + match_res["gt_word_num"]) + ) + eval_res["word_acc_ignore_case_symbol"] = word_acc_ignore_case_symbol + + if "one_minus_ned" in metric: + + eval_res["1-N.E.D"] = 1.0 - match_res["ned"] + + if "one_minus_ned_word" in metric: + + eval_res["1-N.E.D_word"] = 1.0 - match_res["ned_word"] + + if "line_acc_ignore_case_symbol" in metric: + line_acc_ignore_case_symbol = ( + 1.0 * match_res["match_kie_ignore_case"] / (eps + match_res["gt_word_num"]) + ) + eval_res["line_acc_ignore_case_symbol"] = line_acc_ignore_case_symbol + + if "line_acc" in metric: + word_acc_ignore_case_symbol = ( + 1.0 * match_res["match_kie"] / (eps + match_res["gt_word_num"]) + ) + eval_res["line_acc"] = word_acc_ignore_case_symbol + + for key, value in eval_res.items(): + eval_res[key] = float("{:.4f}".format(value)) + + return eval_res + + +def eval_kie(preds_e2e: dict[str, dict[str, str]], gt_e2e: dict[str, dict[str, str]], labels, skip_labels=[]): + + results = {label: 1 for label in labels} + pred_texts_dict = {label: [] for label in labels} + gt_texts_dict = {label: [] for label in labels} + fail_cases = {} + for img_id in gt_e2e.keys(): + fail_cases[img_id] = {} + pred_items = preds_e2e.get(img_id, {k: '' for k in gt_e2e[img_id]}) + gt_items = gt_e2e[img_id] + + for class_name, text_gt in gt_items.items(): + if class_name in skip_labels: + continue + # if class_name == 'seller_name_value': + # print(gt_items) + if class_name not in pred_items: + text_pred = "" + else: + text_pred = pred_items[class_name] + + if str(text_pred) != str(text_gt): + diff = inline_diff(text_pred, text_gt) + fail_cases[img_id][class_name] = { + 'pred': text_pred, + 'gt': text_gt, + "diff": diff['res_text'], + "ned": diff["ned"], + "score": eval_ocr_metric([text_pred], [text_gt], metric=[ + "one_minus_ned"])["1-N.E.D"], + } + + pred_texts_dict[class_name].append(text_pred) + gt_texts_dict[class_name].append(text_gt) + + for class_name in labels: + pred_texts = pred_texts_dict[class_name] + gt_texts = gt_texts_dict[class_name] + result = eval_ocr_metric( + pred_texts, + gt_texts, + metric=[ + "one_minus_ned", + "line_acc_ignore_case_symbol", + "line_acc", + "one_minus_ned_word", + ], + ) + results[class_name] = { + "1-ned": result["1-N.E.D"], + "1-ned-word": result["1-N.E.D_word"], + "line_acc": result["line_acc"], + "line_acc_ignore_case_symbol": result["line_acc_ignore_case_symbol"], + "samples": len(pred_texts), + } + + # avg reusults + sum_1_ned = sum( + [ + results[class_name]["1-ned"] * results[class_name]["samples"] + for class_name in labels + ] + ) + sum_1_ned_word = sum( + [ + results[class_name]["1-ned-word"] * results[class_name]["samples"] + for class_name in labels + ] + ) + + sum_line_acc = sum( + [ + results[class_name]["line_acc"] * results[class_name]["samples"] + for class_name in labels + ] + ) + sum_line_acc_ignore_case_symbol = sum( + [ + results[class_name]["line_acc_ignore_case_symbol"] + * results[class_name]["samples"] + for class_name in labels + ] + ) + + total_samples = sum( + [results[class_name]["samples"] for class_name in labels] + ) + results["avg_all"] = { + "1-ned": round(sum_1_ned / total_samples, 4), + "1-ned-word": round(sum_1_ned_word / total_samples, 4), + "line_acc": round(sum_line_acc / total_samples, 4), + "line_acc_ignore_case_symbol": round( + sum_line_acc_ignore_case_symbol / total_samples, 4 + ), + "samples": total_samples, + } + + table_data = [ + [ + "class_name", + "1-NED", + "1-N.E.D_word", + "line-acc", + "line_acc_ignore_case_symbol", + "#samples", + ] + ] + for class_name in results.keys(): + # if c < p.shape[0]: + table_data.append( + [ + class_name, + results[class_name]["1-ned"], + results[class_name]["1-ned-word"], + results[class_name]["line_acc"], + results[class_name]["line_acc_ignore_case_symbol"], + results[class_name]["samples"], + ] + ) + + table = AsciiTable(table_data) + print(table.table) + return results, fail_cases diff --git a/cope2n-api/fwd_api/utils/ocr_utils/sbt_report.py b/cope2n-api/fwd_api/utils/ocr_utils/sbt_report.py new file mode 100644 index 0000000..04e02be --- /dev/null +++ b/cope2n-api/fwd_api/utils/ocr_utils/sbt_report.py @@ -0,0 +1,432 @@ +import os +import re +import ast +import time +import json +import glob +import shutil +import pandas as pd +from tqdm import tqdm +from pathlib import Path +from datetime import datetime +from .ocr_metrics import eval_ocr_metric + +import sys +# sys.path.append(os.path.dirname(__file__)) +from sdsvkvu.utils.query.sbt_v2 import get_seller, post_process_seller + + +def read_json(file_path: str): + with open(file_path, 'r') as f: + return json.load(f) + +def write_to_json(file_path, content): + with open(file_path, mode='w', encoding='utf8') as f: + json.dump(content, f, ensure_ascii=False) + + +def convert_datetime_format(date_string: str, is_gt=False) -> str: + # pattern_date_string = "2023-02-28" + output_format = "%Y-%m-%d" + input_format = "%d/%m/%Y" + # Validate the input date string format + pattern = r"\d{2}\/\d{2}\/\d{4}" + if re.match(pattern, date_string): + # Convert the date string to a datetime object + date_object = datetime.strptime(date_string, input_format) + # Convert the datetime object to the desired output format + formatted_date = date_object.strftime(output_format) + return formatted_date + return date_string + + +def normalise_retailer_name(retailer: str): + input_value = { + "text": retailer, + "id": 0, + "class": "seller", + "bbox": [0, 0, 0, 0], + } + output = get_seller({'seller': [input_value]}) + + norm_seller_name = post_process_seller(output) + return norm_seller_name + + +def post_processing_str(class_name: str, s: str, is_gt: bool) -> str: + s = str(s).replace('✪', ' ').strip() + if s.lower() in ['null', 'nan', "none"]: + return '' + if class_name == "purchase_date" and is_gt == True: + s = convert_datetime_format(s) + if class_name == "retailername": + s = normalise_retailer_name(s) + return s + + +def convert_groundtruth_from_csv( + csv_path: str, + save_dir: str, + classes: list = ["retailername", "sold_to_party", "purchase_date", "imei_number"] +): + # if isinstance(csv_path_list, str): + # csv_path_list = [csv_path_list] + + df = pd.read_csv(csv_path) + + total_output = {} + for _, request in df.iterrows(): + req_id = request['requestId'] + + if req_id not in total_output: + total_output[req_id] = {k: None for k in classes} + total_output[req_id]["imei_number"] = [] + + total_output[req_id]["imei_number"].extend([request["imeiNumber"], request["imeiNumber2"]]) + total_output[req_id]["imei_number"] = list(set(total_output[req_id]["imei_number"])) + + total_output[req_id]["purchase_date"] = request["Purchase Date"] + total_output[req_id]["retailername"] = request["retailer"] + + for req_id, output in total_output.items(): + save_path = os.path.join(save_dir, req_id) + os.makedirs(save_path, exist_ok=True) + write_to_json(os.path.join(save_path, f"{req_id}.json"), output) + + +def convert_predict_from_csv( + csv_path: str, + save_dir: str, + classes: list = ["retailername", "sold_to_party", "purchase_date", "imei_number"] +): + # if isinstance(csv_path_list, str): + # csv_path_list = [csv_path_list] + + df = pd.read_csv(csv_path) + + for _, request in df.iterrows(): + n_pages = request['pages'] + req_id = request['request_id'] + if not isinstance(request['doc_type'], str) or not isinstance(request['predict_result'], str): + print(f"[WARNING] Skipped request id {req_id}") + continue + + doc_type_list = request['doc_type'].split(',') + assert n_pages == len(doc_type_list), \ + "No. pages is different no. documents" + + json_path = os.path.join(save_dir, req_id) + os.makedirs(json_path, exist_ok=True) + + # For user_submitted_results + if "feedback_result" in request: + feedback_data = ast.literal_eval(request['feedback_result']) + fname = f"{req_id}.json" + write_to_json(os.path.join(json_path, fname), feedback_data) + + # For predict_results + data = ast.literal_eval(request['predict_result'])['content']['document'][0]['content'] + infer_time = float(request['ai_inference_time']) + float(request['preprocessing_time']) + 0.1 + + n_imei, n_invoice = 0, 0 + for doc_type in doc_type_list: + output = {k: None for k in classes} + if not os.path.exists(json_path): + os.makedirs(json_path, exist_ok=True) + + if doc_type == "imei": + for info in data: + if info['label'] == "imei_number": + output['imei_number'] = info['value'][n_imei] + output['processing_time'] = infer_time + fname = f"temp_{doc_type}_{req_id}_{n_imei}.json" + write_to_json(os.path.join(json_path, fname), output) + n_imei += 1 + break + elif doc_type == "invoice": + for info in data: + if info['label'] == "imei_number": + continue + output[info['label']] = info['value'] + output['processing_time'] = infer_time + fname = f"temp_{doc_type}_{req_id}_{n_invoice}.json" + write_to_json(os.path.join(json_path, fname), output) + n_invoice += 1 + + +def gen_req_to_red_dict(csv_path: str): + df = pd.read_csv(csv_path) + df = df.loc[:, ["requestId", "redemptionNumber"]] + req_to_red = {row["requestId"]: row["redemptionNumber"] for _, row in df.iterrows()} + return req_to_red + + +def gen_req_to_red_dict_2(csv_path: str): + df = pd.read_csv(csv_path) + df = df.loc[:, ["request_id", "redemption_id"]] + req_to_red = {row["request_id"]: row["redemption_id"] for _, row in df.iterrows()} + return req_to_red + + +def init_csv( + gt_dir: str, + pred_dir: str, + req_to_red: dict, +): + list_request_id = os.listdir(gt_dir) + total = [] + for request_id in list_request_id: + gt_path = os.path.join(gt_dir, request_id, request_id+".json") + if not os.path.exists(gt_path): + print(f"[WARNING] Skipped request id {os.path.basename(os.path.dirname(gt_path))}") + continue + gt_data = read_json(gt_path) + json_file_list = glob.glob(os.path.join(pred_dir, request_id, "temp_*.json")) + json_file_list = sorted(json_file_list, key=lambda x: int(x.split(".json")[0].split('_')[-1])) + n_imei, n_invoice = 0, 0 + # if len(json_file_list) > 3: + # continue + + for json_file in json_file_list: + pred_data = read_json(json_file) + if "imei" in json_file: + pred_value = pred_data['imei_number'] + gt_value = gt_data['imei_number'][n_imei] + n_imei += 1 + score = eval_ocr_metric( + [post_processing_str("imei_number", pred_value, is_gt=False)], + [post_processing_str("imei_number", gt_value, is_gt=True)], + metric=["one_minus_ned"] + )['1-N.E.D'] + + total.append({ + "requestId": request_id, + "redemptionNumber": req_to_red[request_id], + "userSubmitResults": gt_value, + "OCRResults": pred_value, + "revisedResults_by_SDSRV": "", + "accuracy": score, + "processingTime (by request)": pred_data['processing_time'], + "class_name": "imei_number", + "file_path": json_file + }) + + elif "invoice" in json_file: + for class_name in ["retailername", "purchase_date"]: + pred_value = pred_data[class_name] + gt_value = gt_data[class_name] + if isinstance(gt_value, list): + gt_value = gt_value[0] + n_invoice += 1 + + if not isinstance(pred_value, list): + pred_value = [pred_value] + + score = 0 + for _pred_value in pred_value: + score1 = eval_ocr_metric( + [post_processing_str(class_name, _pred_value, is_gt=False)], + [post_processing_str(class_name, gt_value, is_gt=True)], + metric=["one_minus_ned"] + )['1-N.E.D'] + score = max(score, score1) + + total.append({ + "requestId": request_id, + "redemptionNumber": req_to_red[request_id], + "userSubmitResults": gt_value, + "OCRResults": pred_value[0] if class_name == "retailername" else pred_value, + "revisedResults_by_SDSRV": "", + "accuracy": score, + "processingTime (by request)": pred_data['processing_time'], + "class_name": class_name, + "file_path": json_file + }) + + return total + + +def export_report( + init_csv: str, +): + df = pd.read_csv(init_csv) + for index, request in df.iterrows(): + file_path = request['file_path'] + class_name = request['class_name'] + pred_value = request['OCRResults'] + revised_value = read_json(file_path)[class_name] + if class_name == "purchase_date": + pred_value = ast.literal_eval(pred_value) + if isinstance(revised_value, list): + if len(revised_value) > 0: + revised_value = revised_value[0] + else: + revised_value = None + + if len(pred_value) == 0: + pred_value = [None] + + score = 0 + for _pred_value in pred_value: + score1 = eval_ocr_metric( + [post_processing_str(class_name, _pred_value, is_gt=False)], + [post_processing_str(class_name, revised_value, is_gt=True)], + metric=["one_minus_ned"] + )['1-N.E.D'] + score = max(score, score1) + else: + score = eval_ocr_metric( + [post_processing_str(class_name, pred_value, is_gt=False)], + [post_processing_str(class_name, revised_value, is_gt=True)], + metric=["one_minus_ned"] + )['1-N.E.D'] + + + df.at[index, "revisedResults_by_SDSRV"] = revised_value + df.at[index, "accuracy"] = score + + return df + + +def pick_sample_to_revise( + ocr_accuracy: list, + gt_dir: str, + save_dir: str +): + empty_err_path = os.path.join(save_dir, "empty_results") + other_err_path = os.path.join(save_dir, "diff_results") + os.makedirs(empty_err_path, exist_ok=True) + os.makedirs(other_err_path, exist_ok=True) + for request in ocr_accuracy: + score = request['accuracy'] + json_path = request['file_path'] + request_id = request['requestId'] + + img_path_folder = os.path.join(gt_dir, Path(json_path).parts[-2], Path(json_path).parts[-1]) + img_path = [ff for ff in glob.glob(img_path_folder.replace(".json", ".*")) if ".json" not in ff] + + if len(img_path) == 0: + print(f"[WARNING] Skipped request id {request_id}") + continue + img_path = img_path[0] + # img_path = [ff for ff in glob.glob(json_path.replace(".json", ".*"))][0] + + if score == 0: + save_path = os.path.join(empty_err_path, request_id) + elif score < 1: + save_path = os.path.join(other_err_path, request_id) + else: + continue + os.makedirs(save_path, exist_ok=True) + shutil.copy(img_path, save_path) + shutil.copy(json_path, save_path) + +def merge_revised_sample( + revised_path_list: list, + save_dir: str +): + if not isinstance(revised_path_list, list): + revised_path_list = [revised_path_list] + + for revised_path in revised_path_list: + list_request = [os.path.basename(ff) for ff in os.listdir(revised_path)] + for request in list_request: + file_list = glob.glob(os.path.join(revised_path, request, "*.json*")) + for file_path in file_list: + # shutil.copyfile(file_path, os.path.join(save_path, request)) + os.system(f"sudo cp {file_path} {os.path.join(save_dir, request)}") + +def calculate_average_by_column(df, column_name): + df = df.groupby(by=["requestId"]) + time_list = [] + for req, sub_df in df: + if len(sub_df) > 0: + time_list.append(sub_df.iloc[0][column_name]) + if len(time_list) > 0: + return sum(time_list)/len(time_list) + return 0 + + +if __name__ == "__main__": + save_path = "/mnt/hdd4T/TannedCung/OCR/Data/SBT_for_acc/15Jan" + save_csv = "logs/eval_20240115" + csv_path = "/mnt/hdd4T/TannedCung/OCR/Data/SBT_for_acc/15Jan.csv" + csv_path_end_user = "logs/eval_20240115/OCR_15Jan2024.csv" + + # Step 1: Convert a csv file to get user submitted results for each request + print("[INFO] Starting convert csv from customer to json") + os.system(f"sudo chmod -R 777 {save_path}") + convert_groundtruth_from_csv(csv_path=csv_path_end_user, save_dir=save_path) + print("[INFO] Converted") + + # # Step 2: Convert a csv file to get predict OCR results for each image + print("[INFO] Starting convert csv from SDSV to json") + convert_predict_from_csv(csv_path=csv_path, save_dir=save_path) + print("[INFO] Converted") + + # # Step 3: Gen initial csv file and calculate OCR result between submitted results and ocr results + print("[INFO] Starting generate csv to get performance") + gt_path = save_path + pred_path = save_path + req_to_red_dict = gen_req_to_red_dict(csv_path_end_user) + init_data = init_csv(gt_dir=gt_path, pred_dir=pred_path, req_to_red=req_to_red_dict) + pd.DataFrame(init_data).to_csv(os.path.join(save_csv, "init1.csv"), index=False) + print("[INFO] Done") + + # # Step 4: Split requests whose accuracy is less than 1 to revise + # print("[INFO] Starting split data to review") + # revised_path = os.path.join(save_csv, "revised") + # # shutil.rmtree(revised_path) + # pick_sample_to_revise(ocr_accuracy=init_data, gt_dir=save_path, save_dir=revised_path) + # print("[INFO] Done") + + # # Step 5: Merge revised results to gt folder + # print("[INFO] Merging revised data to ground truth folder") + # revised_path = os.path.join(save_csv, "revised") + # revised_path = [f'{revised_path}/empty_results', f'{revised_path}/diff_results'] + # merge_revised_sample(revised_path_list=revised_path, save_dir=save_path) + # print("Done") + + # # Step 6: Caculate OCR result between ocr results and revised results + # print("[INFO] Exporting OCR report") + # init_csv_path = os.path.join(save_csv, "init1.csv") + # report = export_report(init_csv=init_csv_path) + # error_path = os.path.join(save_csv, "errors") + # pick_sample_to_revise(ocr_accuracy=report[report.accuracy < 0.75].to_dict('records'), gt_dir=save_path, save_dir=error_path) + + # n_total_images = len(report) + # n_bad_images = len(report[report.accuracy < 0.75]) + # average_acc = report[report.accuracy >= 0.75]['accuracy'].mean() + + # print("Total requests:", len(report['requestId'].unique())) + # print("Total images:", n_total_images) + # print("No. imei images:", len(report[report.class_name == "imei_number"])) + # print("No. invoice images:", len(report[report.class_name == "retailername"])) + # print("No. bad quality images:", n_bad_images) + # print("No. valid images:", n_total_images - n_bad_images) + # print("No. per of bad quality images:", 100*n_bad_images/n_total_images) + # print("Average accuracy:", 100*average_acc) + + # last_row = n_total_images + # report.at[last_row, "requestId"] = "Total requests:" + # report.at[last_row, "redemptionNumber"] = len(report['requestId'].unique()) + # report.at[last_row+1, "requestId"] = "Total images:" + # report.at[last_row+1, "redemptionNumber"] = n_total_images + # report.at[last_row+2, "requestId"] = "No. imei images:" + # report.at[last_row+2, "redemptionNumber"] = len(report[report.class_name == "imei_number"]) + # report.at[last_row+3, "requestId"] = "No. invoice images:" + # report.at[last_row+3, "redemptionNumber"] = len(report[report.class_name == "retailername"]) + # report.at[last_row+4, "requestId"] = "No. bad quality images:" + # report.at[last_row+4, "redemptionNumber"] = n_bad_images + # report.at[last_row+5, "requestId"] = "No. valid images:" + # report.at[last_row+5, "redemptionNumber"] = n_total_images - n_bad_images + # report.at[last_row+6, "requestId"] = "No. per of bad quality images:" + # report.at[last_row+6, "redemptionNumber"] = 100*n_bad_images/n_total_images + # report.at[last_row+7, "requestId"] = "Average accuracy:" + # report.at[last_row+7, "redemptionNumber"] = 100*average_acc + + + # report.drop(columns=["file_path", "class_name"]).to_csv(os.path.join(save_csv, f"SBT_report_{time.strftime('%Y%m%d')}.csv"), index=False) + # print("[INFO] Done") + + \ No newline at end of file diff --git a/cope2n-api/fwd_api/utils/ocr_utils/wiki_diff.py b/cope2n-api/fwd_api/utils/ocr_utils/wiki_diff.py new file mode 100644 index 0000000..dfbbb54 --- /dev/null +++ b/cope2n-api/fwd_api/utils/ocr_utils/wiki_diff.py @@ -0,0 +1,201 @@ +# https://stackoverflow.com/questions/774316/python-difflib-highlighting-differences-inline +import difflib +import unidecode +import os +import glob +import pandas as pd + +VOWELS = 'aeouiy' + 'AEOUIY' +CONSONANTS = 'bcdfghjklmnpqrstvxwz' + 'BCDFGHJKLMNPQRSTVXWZ' +# PREDICT_PATH = 'ocr/result' +# GROUNDTRUTH_PATH = '/mnt/hdd2T/AICR/Datasets/wiki/ground_truth' +PREDICT_PATH = 'ocr/result/cinamon' +GROUNDTRUTH_PATH = '/mnt/hdd2T/AICR/Datasets/Backup/1.Hand_writing/Lines/cinnamon_data' +# note that we also use different preprocess for cinamon data +# SAVE_PATH = 'wiki_diff' +SAVE_PATH = 'wiki_diff/cinamon' +RES_PATH = f'{SAVE_PATH}/result/' +WRONG_ACCENT_FILE = f'{SAVE_PATH}/wrong_accent.txt' +LOST_ACCENT_FILE = f'{SAVE_PATH}/lost_accent.txt' +TOTAL_WORD = 0 + + +def write_accent_error(path, err): + # path should be wrong_accent_file or lost_accent_file + with open(path, 'a') as f: + f.write(err) + f.write('\n') + + +def update_ddata_specialchars(ddata_specialchars, correction_key, char_key): + if char_key in ddata_specialchars[correction_key]: + ddata_specialchars[correction_key][char_key] += 1 + else: + ddata_specialchars[correction_key][char_key] = 1 + + +def process_replace_tag(matcher, i1, i2, j1, j2, ddata, ddata_specialchars): + a_char = matcher.a[i1:i2] + b_char = matcher.b[j1:j2] + ddata['res_text'] += ' ### {' + a_char + ' -> ' + b_char + '} ### ' + ddata['nwrongs'] += 1*len(b_char) + if len(a_char) == 1 and len(b_char) == 1: # single char case + if a_char.lower() == b_char.lower(): # wrong upper/lower case + ddata['UL_single'] += 1 + update_ddata_specialchars(ddata_specialchars, 'UL', (a_char, b_char)) + else: + ddata['nwrongs_single'] += 1 + a_ori = unidecode.unidecode(a_char).lower() + b_ori = unidecode.unidecode(b_char).lower() + if a_ori in VOWELS and b_ori in VOWELS: + if a_ori == b_ori: + err = a_char + ' -> ' + b_char + if b_ori == b_char.lower(): # e.g. Ơ -> O + ddata['nlost_accent'] += 1 + # write_accent_error(LOST_ACCENT_FILE, err) + else: # e.g Ơ -> Ớ + ddata['nwrong_accent'] += 1 + # write_accent_error(WRONG_ACCENT_FILE, err) + else: # e.g Ă ->  + ddata['nwrong_vowels'] += 1 + else: + if a_ori in CONSONANTS and b_ori in CONSONANTS: + ddata['nwrong_consonants'] += 1 + else: + ddata['nwrong_specialchars'] += 1 + update_ddata_specialchars(ddata_specialchars, 'wrong', (a_char, b_char)) + else: + if a_char.lower() == b_char.lower(): + ddata['UL_multiple'] += 1 + update_ddata_specialchars(ddata_specialchars, 'UL', (a_char, b_char)) + else: + ddata['nwrongs_multiple'] += 1 + if len(a_char) > 10 or len(b_char) > 10: + ddata['nlong_sequences'] += 1 + # print(a_char) + + +def process_delete_tag(matcher, i1, i2, ddata, ddata_specialchars): + a_char = matcher.a[i1:i2] + ddata['res_text'] += ' ### {- ' + a_char + '} ### ' + ddata['nadds'] += 1*len(a_char) + if len(a_char) == 1: + ddata['nadds_single'] += 1 + if a_char.lower() in CONSONANTS + VOWELS: + ddata['nadds_chars'] += 1 + else: + if a_char == ' ': + ddata['nadds_space'] += 1 + else: + ddata['nadds_specialchars'] += 1 + update_ddata_specialchars(ddata_specialchars, 'add', a_char) + + else: + ddata['nadds_multiple'] += 1 + if len(a_char) > 10: + ddata['nlong_sequences'] += 1 + # print(a_char) + + +def process_insert_tag(matcher, j1, j2, ddata, ddata_specialchars): + b_char = matcher.b[j1:j2] + ddata['nlosts'] += 1*len(b_char) + ddata['res_text'] += ' ### {+ ' + b_char + '} ### ' + if len(b_char) == 1: + ddata['nlosts_single'] += 1 + if b_char.lower() in CONSONANTS + VOWELS: + ddata['nlosts_chars'] += 1 + else: + if b_char == ' ': + ddata['nlosts_space'] += 1 + else: + ddata['nlosts_specialchars'] += 1 + update_ddata_specialchars(ddata_specialchars, 'lost', b_char) + + else: + ddata['nlosts_multiple'] += 1 + if len(b_char) > 10: + ddata['nlong_sequences'] += 1 + # print(b_char) + + +def inline_diff(a, b, ddata_specialchars={'lost': {}, 'add': {}, 'wrong': {}, 'UL': {}}): + matcher = difflib.SequenceMatcher(None, a, b) + ddata = {'res_text': ''} + # ddata = ddata | {key: 0 for key in ['nsingle', 'nmultiple']} + ddata = ddata | {key: 0 for key in ['UL_single', 'UL_multiple']} + ddata = ddata | { + key: 0 for key in + ['nlosts', 'nlosts_single', 'nlosts_multiple', 'nlosts_chars', 'nlosts_specialchars', 'nlosts_space']} + ddata = ddata | { + key: 0 for key in + ['nadds', 'nadds_single', 'nadds_multiple', 'nadds_chars', 'nadds_specialchars', 'nadds_space']} + ddata = ddata | { + key: 0 for key in + ['nwrongs', 'nwrongs_single', 'nwrongs_multiple', 'nwrong_accent', 'nlost_accent', 'nwrong_vowels', + 'nwrong_consonants', 'nwrong_specialchars']} + ddata['nlong_sequences'] = 0 + for tag, i1, i2, j1, j2 in matcher.get_opcodes(): + if tag == 'replace': # wrong + process_replace_tag(matcher, i1, i2, j1, j2, ddata, ddata_specialchars) + if tag == 'delete': # OCR add char so the matcher "delete" + process_delete_tag(matcher, i1, i2, ddata, ddata_specialchars) + if tag == 'equal': + ddata['res_text'] += matcher.a[i1:i2] + if tag == 'insert': # OCR lost char so the matcher "insert" + process_insert_tag(matcher, j1, j2, ddata, ddata_specialchars) + ddata["ned"] = ddata['nwrongs'] + ddata['nadds'] + ddata['nlosts'] + return ddata + + +def process_single_file(file_name, ddata_specialchars): + + # read predict file + with open(os.path.join(PREDICT_PATH, file_name), 'r') as f: + predict = f.readlines()[0].strip() + # predict = ''.join(predict) + # predict = predict.replace(' ', '') + # predict = predict.replace('\n', '') + # print(predict) + + # read groundtruth file + with open(os.path.join(GROUNDTRUTH_PATH, file_name), 'r') as f: + gt = f.readlines()[0].strip() + # gt = ''.join(gt) + # gt = gt.replace('\n', '') + + # get statiscal data of difference between predict and ground truth + ddata = inline_diff(predict, gt, ddata_specialchars) + global TOTAL_WORD + TOTAL_WORD = TOTAL_WORD + len(gt.split()) + # write to save_path + res_text = ddata.pop('res_text', None) + save_file = os.path.join(RES_PATH, file_name) + with open(save_file, 'w') as f: + f.write(res_text) + + # generate csv file + ddata = {'file_name': save_file} | ddata + return ddata + + +def main(overwrite=False): + for accent_file in [WRONG_ACCENT_FILE, LOST_ACCENT_FILE]: + if os.path.exists(accent_file): + os.remove(accent_file) + lddata = [] + ddata_specialchars = {'lost': {}, 'add': {}, 'wrong': {}, 'UL': {}} + for file_ in glob.glob(f'{PREDICT_PATH}/*.txt'): + file_name = file_.split('/')[-1] + ddata = process_single_file(file_name, ddata_specialchars) + lddata.append(ddata) + if overwrite: + df = pd.DataFrame(lddata) + df.to_csv(f'{SAVE_PATH}/wiki_diff.csv', sep='\t') + df_ = pd.DataFrame(ddata_specialchars) + df_.to_csv(f'{SAVE_PATH}/wiki_diff_specialchars.csv', sep='\t') + print(TOTAL_WORD) + + +if __name__ == '__main__': + main(overwrite=True) diff --git a/cope2n-api/requirements.txt b/cope2n-api/requirements.txt index c204228..698f2db 100755 --- a/cope2n-api/requirements.txt +++ b/cope2n-api/requirements.txt @@ -36,7 +36,7 @@ requests==2.28.1 ruamel.yaml==0.17.21 ruamel.yaml.clib==0.2.7 sqlparse==0.4.3 -tzdata==2022.6 +tzdata==2022.7 uritemplate==4.1.1 urllib3==1.26.13 uvicorn==0.20.0 @@ -50,4 +50,13 @@ boto3==1.29.7 imagesize==1.4.1 pdf2image==1.16.3 redis==5.0.1 -django-celery-beat==2.5.0 \ No newline at end of file +django-celery-beat==2.5.0 +terminaltables==3.1.10 +rapidfuzz==3.6.1 +Unidecode==1.3.8 +pandas==2.2.0 +openpyxl==3.1.2 +# For sdsvkvu compatibility +# torch==1.13.1+cu116 +# torchvision==0.14.1+cu116 +# --extra-index-url https://download.pytorch.org/whl/cu116 \ No newline at end of file diff --git a/cope2n-api/scripts/database_cloning.sh b/cope2n-api/scripts/database_cloning.sh new file mode 100644 index 0000000..eaf54f6 --- /dev/null +++ b/cope2n-api/scripts/database_cloning.sh @@ -0,0 +1 @@ +pg_dump -U sbt -h sbt.cxetpslawu4p.ap-southeast-1.rds.amazonaws.com sbt2 >> sbt2.sql \ No newline at end of file diff --git a/cope2n-api/token.txt b/cope2n-api/token.txt new file mode 100644 index 0000000..5f06852 --- /dev/null +++ b/cope2n-api/token.txt @@ -0,0 +1 @@ +eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJpZCI6InNidCIsImV4cGlyZWRfYXQiOiIwMS8wMi8yMDI0IDEyOjQ2OjA3IiwiaW50ZXJuYWxfaWQiOjEsInN0YXR1cyI6MSwic3Vic2NyaXB0aW9uX2lkIjoxfQ.VFsoGm5BdeyNptMsdU4f4l70bDIYHTmB8Y-2-PXs7cKhzGB1pUpgqax-V39N_IEXriRl3caDiotzU0psR0WR3g \ No newline at end of file diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index f50412c..e9bf8c7 100755 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -83,12 +83,12 @@ services: depends_on: db-sbt: condition: service_started - command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input && - python manage.py makemigrations && - python manage.py migrate && - python manage.py compilemessages && - gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod - # command: bash -c "tail -f > /dev/null" + # command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input && + # python manage.py makemigrations && + # python manage.py migrate && + # python manage.py compilemessages && + # gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod + command: bash -c "tail -f > /dev/null" minio: image: minio/minio @@ -188,6 +188,8 @@ services: - POSTGRES_USER=${DB_USER} - POSTGRES_PASSWORD=${DB_PASSWORD} - POSTGRES_DB=${DB_SCHEMA} + ports: + - 5432:5432 rabbitmq-sbt: mem_reservation: 600m diff --git a/scripts/crawl_database_by_time.py b/scripts/crawl_database_by_time.py index 17f6570..4befe9a 100644 --- a/scripts/crawl_database_by_time.py +++ b/scripts/crawl_database_by_time.py @@ -10,9 +10,9 @@ from dotenv import load_dotenv load_dotenv("../.env_prod") -OUTPUT_NAME = "missing_records" -START_DATE = datetime(2023, 12, 28, tzinfo=timezone('Asia/Ho_Chi_Minh')) -END_DATE = datetime(2024, 1, 3, tzinfo=timezone('Asia/Ho_Chi_Minh')) +OUTPUT_NAME = "Jan" +START_DATE = datetime(2024, 1, 1, tzinfo=timezone('Asia/Ho_Chi_Minh')) +END_DATE = datetime(2024, 2, 1, tzinfo=timezone('Asia/Ho_Chi_Minh')) # Database connection details db_host = os.environ.get('DB_HOST', "") @@ -62,32 +62,32 @@ with open(csv_file_path, 'w', newline='') as csv_file: cursor.close() conn.close() -# Download folders from S3 -s3_client = boto3.client( - 's3', - aws_access_key_id=access_key, - aws_secret_access_key=secret_key -) +# # Download folders from S3 +# s3_client = boto3.client( +# 's3', +# aws_access_key_id=access_key, +# aws_secret_access_key=secret_key +# ) -request_ids = [] -for rq in data: - rq_id = rq[3] - request_ids.append(rq_id) +# request_ids = [] +# for rq in data: +# rq_id = rq[3] +# request_ids.append(rq_id) -for request_id in tqdm(request_ids): - folder_key = f"{s3_folder_prefix}/{request_id}/" # Assuming folder structure like: s3_bucket_name/s3_folder_prefix/request_id/ - local_folder_path = f"{OUTPUT_NAME}/{request_id}/" # Path to the local folder to save the downloaded files - os.makedirs(OUTPUT_NAME, exist_ok=True) - os.makedirs(local_folder_path, exist_ok=True) +# for request_id in tqdm(request_ids): +# folder_key = f"{s3_folder_prefix}/{request_id}/" # Assuming folder structure like: s3_bucket_name/s3_folder_prefix/request_id/ +# local_folder_path = f"{OUTPUT_NAME}/{request_id}/" # Path to the local folder to save the downloaded files +# os.makedirs(OUTPUT_NAME, exist_ok=True) +# os.makedirs(local_folder_path, exist_ok=True) - # List objects in the S3 folder - response = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=folder_key) - objects = response.get('Contents', []) +# # List objects in the S3 folder +# response = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=folder_key) +# objects = response.get('Contents', []) - for s3_object in objects: - object_key = s3_object['Key'] - local_file_path = local_folder_path + object_key.split('/')[-1] # Extracting the file name from the object key +# for s3_object in objects: +# object_key = s3_object['Key'] +# local_file_path = local_folder_path + object_key.split('/')[-1] # Extracting the file name from the object key - # Download the S3 object to the local file - s3_client.download_file(s3_bucket_name, object_key, local_file_path) \ No newline at end of file +# # Download the S3 object to the local file +# s3_client.download_file(s3_bucket_name, object_key, local_file_path) \ No newline at end of file diff --git a/scripts/database_cloning.sh b/scripts/database_cloning.sh new file mode 100644 index 0000000..eaf54f6 --- /dev/null +++ b/scripts/database_cloning.sh @@ -0,0 +1 @@ +pg_dump -U sbt -h sbt.cxetpslawu4p.ap-southeast-1.rds.amazonaws.com sbt2 >> sbt2.sql \ No newline at end of file From 394af1067cf90da47a5a14ae0ea9363ced1bd37e Mon Sep 17 00:00:00 2001 From: dx-tan Date: Thu, 1 Feb 2024 14:32:20 +0700 Subject: [PATCH 26/27] Add: multiple API --- cope2n-api/fwd_api/api/accuracy_view.py | 126 ++++++++++-------- .../fwd_api/celery_worker/client_connector.py | 7 +- .../fwd_api/celery_worker/internal_task.py | 18 ++- .../celery_worker/process_report_tasks.py | 37 ++--- cope2n-api/fwd_api/celery_worker/worker.py | 2 + .../migrations/0176_report_s3_file_name.py | 18 +++ .../0177_alter_report_subsidiary.py | 18 +++ cope2n-api/fwd_api/models/Report.py | 5 +- cope2n-api/fwd_api/utils/accuracy.py | 89 +++++++++++-- cope2n-api/fwd_api/utils/file.py | 40 +++++- docker-compose-dev.yml | 1 + 11 files changed, 273 insertions(+), 88 deletions(-) create mode 100644 cope2n-api/fwd_api/migrations/0176_report_s3_file_name.py create mode 100644 cope2n-api/fwd_api/migrations/0177_alter_report_subsidiary.py diff --git a/cope2n-api/fwd_api/api/accuracy_view.py b/cope2n-api/fwd_api/api/accuracy_view.py index 2159ad0..4338205 100644 --- a/cope2n-api/fwd_api/api/accuracy_view.py +++ b/cope2n-api/fwd_api/api/accuracy_view.py @@ -2,24 +2,21 @@ from rest_framework import status, viewsets from rest_framework.decorators import action from rest_framework.response import Response from django.core.paginator import Paginator -from django.http import JsonResponse +from django.http import JsonResponse, FileResponse, HttpResponse from django.utils import timezone from django.db.models import Q import uuid - +import os +from fwd import settings from drf_spectacular.utils import extend_schema, OpenApiParameter, OpenApiTypes # from drf_spectacular.types import OpenApiString import json -from ..exception.exceptions import InvalidException, RequiredFieldException +from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException from ..models import SubscriptionRequest, Report, ReportFile -from ..utils.accuracy import shadow_report, MonthReportAccumulate -from ..utils.file import validate_report_list +from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg +from ..utils.file import download_from_S3 from ..utils.process import string_to_boolean - -def first_of_list(the_list): - if not the_list: - return None - return the_list[0] +from ..celery_worker.client_connector import c_connector class AccuracyViewSet(viewsets.ViewSet): lookup_field = "username" @@ -239,10 +236,10 @@ class AccuracyViewSet(viewsets.ViewSet): end_date_str = request.GET.get('end_date') request_id = request.GET.get('request_id', None) redemption_id = request.GET.get('redemption_id', None) - is_reviewed = string_to_boolean(request.data.get('is_reviewed', "false")) - include_test = string_to_boolean(request.data.get('include_test', "false")) + is_reviewed = string_to_boolean(request.GET.get('is_reviewed', "false")) + include_test = string_to_boolean(request.GET.get('include_test', "false")) subsidiary = request.GET.get("subsidiary", "all") - is_daily_report = string_to_boolean(request.data.get('is_daily_report', "false")) + is_daily_report = string_to_boolean(request.GET.get('is_daily_report', "false")) try: start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z') @@ -269,7 +266,10 @@ class AccuracyViewSet(viewsets.ViewSet): include_reviewed=is_reviewed, start_at=start_date, end_at=end_date, + status="Processing", ) + if is_daily_report: + new_report.created_at = end_date new_report.save() # Background job to calculate accuracy shadow_report(report_id, query_set) @@ -314,33 +314,13 @@ class AccuracyViewSet(viewsets.ViewSet): paginator = Paginator(report_files, page_size) page = paginator.get_page(page_number) - data = [] - for report_file in page: - data.append({ - "Request ID": report_file.correspond_request_id, - "Redemption Number": report_file.correspond_redemption_id, - "Image type": report_file.doc_type, - "IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])), - "IMEI_OCR retrieved": first_of_list(report_file.predict_result.get("imei_number", [None])), - "IMEI1 Accuracy": first_of_list(report_file.feedback_accuracy.get("imei_number", [None])), - "Invoice_Purchase Date_Consumer": report_file.feedback_result.get("purchase_date", None), - "Invoice_Purchase Date_OCR": report_file.predict_result.get("purchase_date", []), - "Invoice_Purchase Date Accuracy": first_of_list(report_file.feedback_accuracy.get("purchase_date", [None])), - "Invoice_Retailer_Consumer": report_file.feedback_result.get("retailername", None), - "Invoice_Retailer_OCR": report_file.predict_result.get("retailername", None), - "Invoice_Retailer Accuracy": first_of_list(report_file.feedback_accuracy.get("retailername", [None])), - "OCR Image Accuracy": report_file.acc, - "OCR Image Speed (seconds)": report_file.time_cost, - "Reviewed?": "No", - "Bad Image Reasons": report_file.bad_image_reason, - "Countermeasures": report_file.counter_measures, - "IMEI_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("imei_number", [None])), - "Purchase Date_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("purchase_date", [None])), - "Retailer_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("retailername", [None])), - }) - + data = extract_report_detail_list(page, in_percent=False) + response = { 'report_detail': data, + 'metadata': {"subsidiary": report.subsidiary, + "start_at": report.start_at, + "end_at": report.end_at}, 'page': { 'number': page.number, 'total_pages': page.paginator.num_pages, @@ -428,7 +408,7 @@ class AccuracyViewSet(viewsets.ViewSet): "IMEI Acc": report.feedback_accuracy.get("imei_number", None) if report.reviewed_accuracy else None, "Avg. Accuracy": report.feedback_accuracy.get("avg", None) if report.reviewed_accuracy else None, "Avg. Client Request Time": report.average_client_time.get("avg", 0) if report.average_client_time else 0, - "Avg. OCR Processing Time": report.average_OCR_time.get("avg", 0) if report.average_client_time else 0, + "Avg. OCR Processing Time": report.average_OCR_time.get("avg", 0) if report.average_OCR_time else 0, "report_id": report.report_id, }) @@ -491,21 +471,21 @@ class AccuracyViewSet(viewsets.ViewSet): end_date_str = request.GET.get('end_date', "") page_number = int(request.GET.get('page', 1)) page_size = int(request.GET.get('page_size', 10)) - - if not start_date_str or not end_date_str: - reports = Report.objects.all() - else: + base_query = Q() + + if start_date_str and end_date_str: try: start_date = timezone.datetime.strptime(start_date_str, '%Y-%m-%dT%H:%M:%S%z') end_date = timezone.datetime.strptime(end_date_str, '%Y-%m-%dT%H:%M:%S%z') except ValueError: raise InvalidException(excArgs="Date format") - base_query = Q(created_at__range=(start_date, end_date)) - if subsidiary: - base_query &= Q(subsidiary=subsidiary) - base_query &= Q(is_daily_report=True) - reports = Report.objects.filter(base_query).order_by('created_at') + base_query &= Q(created_at__range=(start_date, end_date)) + + if subsidiary: + base_query &= Q(subsidiary=subsidiary) + base_query &= Q(is_daily_report=True) + reports = Report.objects.filter(base_query).order_by('created_at') paginator = Paginator(reports, page_size) page = paginator.get_page(page_number) @@ -525,8 +505,15 @@ class AccuracyViewSet(viewsets.ViewSet): _, _data, total = this_month_report() data += [total] data += _data + # Generate xlsx file + # workbook = dict2xlsx(data, _type="report") + # tmp_file = f"/tmp/{str(uuid.uuid4())}.xlsx" + # os.makedirs(os.path.dirname(tmp_file), exist_ok=True) + # workbook.save(tmp_file) + # c_connector.remove_local_file((tmp_file, "fake_request_id")) response = { + # 'file': load_xlsx_file(), 'overview_data': data, 'page': { 'number': page.number, @@ -538,28 +525,59 @@ class AccuracyViewSet(viewsets.ViewSet): return JsonResponse({'error': 'Invalid request method.'}, status=405) + @extend_schema( + parameters=[], + responses=None, tags=['Accuracy'] + ) + @action(detail=False, url_path=r"get_report_file/(?P[\w\-]+)", methods=["GET"]) + def get_report_file(self, request, report_id): + if request.method == 'GET': + # report_id = request.GET.get('report_id', None) + if not report_id: + raise RequiredFieldException(excArgs="report_id1") + report_num = Report.objects.filter(report_id=report_id).count() + if report_num == 0: + raise NotFoundException(excArgs=f"report: {report_id}") + report = Report.objects.filter(report_id=report_id).first() + # download from s3 to local + tmp_file = "/tmp/" + "report_" + uuid.uuid4().hex + ".xlsx" + os.makedirs("/tmp", exist_ok=True) + if not report.S3_file_name: + raise NotFoundException(excArgs="S3 file name") + download_from_S3(report.S3_file_name, tmp_file) + file = open(tmp_file, 'rb') + response = FileResponse(file, status=200) + + # Set the content type and content disposition headers + response['Content-Type'] = 'application/octet-stream' + response['Content-Disposition'] = 'attachment; filename="{0}"'.format(os.path.basename(tmp_file)) + return response + + return JsonResponse({'error': 'Invalid request method.'}, status=405) class RequestViewSet(viewsets.ViewSet): lookup_field = "username" - @extend_schema(request = { + @extend_schema( + request={ 'multipart/form-data': { 'type': 'object', 'properties': { 'reviewed_result': { 'type': 'string', + 'default': '''{"request_id": "Sample request_id", "imei_number": ["sample_imei1", "sample_imei2"], "retailername": "Sample Retailer", "purchase_date": "01/01/1970", "sold_to_party": "Sample party"}''', }, - } + }, }, - }, responses=None, tags=['Request'] + }, + responses=None, + tags=['Request'] ) @action(detail=False, url_path=r"request/(?P[\w\-]+)", methods=["GET", "POST"]) def get_subscription_request(self, request, request_id=None): if request.method == 'GET': base_query = Q(request_id=request_id) - subscription_request = SubscriptionRequest.objects.filter(base_query).first() - data = [] imeis = [] @@ -611,7 +629,7 @@ class RequestViewSet(viewsets.ViewSet): subscription_request = SubscriptionRequest.objects.filter(base_query).first() - reviewed_result = json.loads(data["reviewed_result"][1:-1]) + reviewed_result = json.loads(data["reviewed_result"]) for field in ['retailername', 'sold_to_party', 'purchase_date', 'imei_number']: if not field in reviewed_result.keys(): raise RequiredFieldException(excArgs=f'reviewed_result.{field}') diff --git a/cope2n-api/fwd_api/celery_worker/client_connector.py b/cope2n-api/fwd_api/celery_worker/client_connector.py index 5e0d59c..c10cbdd 100755 --- a/cope2n-api/fwd_api/celery_worker/client_connector.py +++ b/cope2n-api/fwd_api/celery_worker/client_connector.py @@ -32,6 +32,7 @@ class CeleryConnector: 'upload_file_to_s3': {'queue': "upload_file_to_s3"}, 'upload_feedback_to_s3': {'queue': "upload_feedback_to_s3"}, 'upload_obj_to_s3': {'queue': "upload_obj_to_s3"}, + 'upload_report_to_s3': {'queue': "upload_report_to_s3"}, 'remove_local_file': {'queue': "remove_local_file"}, 'csv_feedback': {'queue': "csv_feedback"}, 'make_a_report': {'queue': "report"}, @@ -50,8 +51,10 @@ class CeleryConnector: return self.send_task('do_pdf', args) def upload_file_to_s3(self, args): return self.send_task('upload_file_to_s3', args) - def upload_feedback_to_s3(self, args): - return self.send_task('upload_feedback_to_s3', args) + def upload_file_to_s3(self, args): + return self.send_task('upload_file_to_s3', args) + def upload_report_to_s3(self, args): + return self.send_task('upload_report_to_s3', args) def upload_obj_to_s3(self, args): return self.send_task('upload_obj_to_s3', args) def remove_local_file(self, args): diff --git a/cope2n-api/fwd_api/celery_worker/internal_task.py b/cope2n-api/fwd_api/celery_worker/internal_task.py index 4443ad6..bf12b3f 100755 --- a/cope2n-api/fwd_api/celery_worker/internal_task.py +++ b/cope2n-api/fwd_api/celery_worker/internal_task.py @@ -9,7 +9,7 @@ from fwd_api.models import SubscriptionRequest, UserProfile from fwd_api.celery_worker.worker import app from ..constant.common import FolderFileType, image_extensions from ..exception.exceptions import FileContentInvalidException -from fwd_api.models import SubscriptionRequestFile, FeedbackRequest +from fwd_api.models import SubscriptionRequestFile, FeedbackRequest, Report from ..utils import file as FileUtils from ..utils import process as ProcessUtil from ..utils import s3 as S3Util @@ -211,6 +211,22 @@ def upload_feedback_to_s3(local_file_path, s3_key, feedback_id): else: logger.info(f"S3 is not available, skipping,...") +@app.task(name='upload_report_to_s3') +def upload_report_to_s3(local_file_path, s3_key, report_id): + if s3_client.s3_client is not None: + try: + s3_client.upload_file(local_file_path, s3_key) + report = Report.objects.filter(report_id=report_id)[0] + report.S3_uploaded = True + report.S3_file_name = s3_key + report.save() + except Exception as e: + logger.error(f"Unable to set S3: {e}") + print(f"Unable to set S3: {e}") + return + else: + logger.info(f"S3 is not available, skipping,...") + @app.task(name='remove_local_file') def remove_local_file(local_file_path, request_id): print(f"[INFO] Removing local file: {local_file_path}, ...") diff --git a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py index c5b2a86..9dd6915 100644 --- a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py +++ b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py @@ -1,24 +1,12 @@ -import time -import uuid -import os -import base64 import traceback -from multiprocessing.pool import ThreadPool -from fwd_api.models import SubscriptionRequest, UserProfile +from fwd_api.models import SubscriptionRequest, Report, ReportFile from fwd_api.celery_worker.worker import app -from ..constant.common import FolderFileType, image_extensions -from ..exception.exceptions import FileContentInvalidException -from fwd_api.models import SubscriptionRequestFile, FeedbackRequest, Report -from ..utils import file as FileUtils -from ..utils import process as ProcessUtil from ..utils import s3 as S3Util -from ..utils.accuracy import update_temp_accuracy, IterAvg, calculate_and_save_subcription_file -from fwd_api.constant.common import ProcessType +from ..utils.accuracy import update_temp_accuracy, IterAvg, calculate_and_save_subcription_file, count_transactions, extract_report_detail_list +from ..utils.file import dict2xlsx, save_workbook_file, save_report_to_S3 from django.utils import timezone from django.db.models import Q -import csv -import json from celery.utils.log import get_task_logger from fwd import settings @@ -117,28 +105,45 @@ def make_a_report(report_id, query_set): errors += request_att["err"] num_request += 1 + transaction_att = count_transactions(start_date, end_date) # Do saving process report.number_request = num_request report.number_images = number_images report.number_imei = time_cost["imei"].count report.number_invoice = time_cost["invoice"].count report.number_bad_images = number_bad_images + # FIXME: refactor this data stream for endurability report.average_OCR_time = {"invoice": time_cost["invoice"](), "imei": time_cost["imei"](), "invoice_count": time_cost["invoice"].count, "imei_count": time_cost["imei"].count} + report.average_OCR_time["avg"] = (report.average_OCR_time["invoice"]*report.average_OCR_time["invoice_count"] + report.average_OCR_time["imei"]*report.average_OCR_time["imei_count"])/(report.average_OCR_time["imei_count"] + report.average_OCR_time["invoice_count"]) + + report.number_imei_transaction = transaction_att.get("imei", 0) + report.number_invoice_transaction = transaction_att.get("invoice", 0) + acumulated_acc = {"feedback": {}, "reviewed": {}} - for acc_type in ["feedback", "reviewed"]: + avg_acc = IterAvg() for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: acumulated_acc[acc_type][key] = accuracy[acc_type][key]() acumulated_acc[acc_type][key+"_count"] = accuracy[acc_type][key].count + avg_acc.add_avg(acumulated_acc[acc_type][key], acumulated_acc[acc_type][key+"_count"]) + acumulated_acc[acc_type]["avg"] = avg_acc() report.feedback_accuracy = acumulated_acc["feedback"] report.reviewed_accuracy = acumulated_acc["reviewed"] report.errors = "|".join(errors) + report.status = "Ready" report.save() + # Saving a xlsx file + report_files = ReportFile.objects.filter(report=report) + data = extract_report_detail_list(report_files, lower=True) + data_workbook = dict2xlsx(data, _type='report_detail') + local_workbook = save_workbook_file(report.report_id + ".xlsx", report, data_workbook) + s3_key=save_report_to_S3(report.report_id, local_workbook) + except IndexError as e: print(e) traceback.print_exc() diff --git a/cope2n-api/fwd_api/celery_worker/worker.py b/cope2n-api/fwd_api/celery_worker/worker.py index 31a3262..5bb6963 100755 --- a/cope2n-api/fwd_api/celery_worker/worker.py +++ b/cope2n-api/fwd_api/celery_worker/worker.py @@ -38,6 +38,7 @@ app.conf.update({ Queue('upload_file_to_s3'), Queue('upload_feedback_to_s3'), Queue('upload_obj_to_s3'), + Queue('upload_report_to_s3'), Queue('remove_local_file'), Queue('csv_feedback'), Queue('report'), @@ -56,6 +57,7 @@ app.conf.update({ 'upload_file_to_s3': {'queue': "upload_file_to_s3"}, 'upload_feedback_to_s3': {'queue': "upload_feedback_to_s3"}, 'upload_obj_to_s3': {'queue': "upload_obj_to_s3"}, + 'upload_report_to_s3': {'queue': "upload_report_to_s3"}, 'remove_local_file': {'queue': "remove_local_file"}, 'csv_feedback': {'queue': "csv_feedback"}, 'make_a_report': {'queue': "report"}, diff --git a/cope2n-api/fwd_api/migrations/0176_report_s3_file_name.py b/cope2n-api/fwd_api/migrations/0176_report_s3_file_name.py new file mode 100644 index 0000000..aed983a --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0176_report_s3_file_name.py @@ -0,0 +1,18 @@ +# Generated by Django 4.1.3 on 2024-01-31 09:31 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0175_rename_number_ivoice_transaction_report_number_imei_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='report', + name='S3_file_name', + field=models.TextField(default=None, null=True), + ), + ] diff --git a/cope2n-api/fwd_api/migrations/0177_alter_report_subsidiary.py b/cope2n-api/fwd_api/migrations/0177_alter_report_subsidiary.py new file mode 100644 index 0000000..418712b --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0177_alter_report_subsidiary.py @@ -0,0 +1,18 @@ +# Generated by Django 4.1.3 on 2024-02-01 03:27 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0176_report_s3_file_name'), + ] + + operations = [ + migrations.AlterField( + model_name='report', + name='subsidiary', + field=models.CharField(default='', max_length=200, null=True), + ), + ] diff --git a/cope2n-api/fwd_api/models/Report.py b/cope2n-api/fwd_api/models/Report.py index 340b305..92a2755 100644 --- a/cope2n-api/fwd_api/models/Report.py +++ b/cope2n-api/fwd_api/models/Report.py @@ -16,12 +16,13 @@ class Report(models.Model): status = models.CharField(max_length=100) is_daily_report = models.BooleanField(default=False) errors = models.TextField(default="", null=True) - subsidiary = models.TextField(default="", null=True) - include_reviewed = models.TextField(default="", null=True) + subsidiary = models.CharField(default="", null=True, max_length=200) + include_reviewed = models.TextField(default="", null=True, ) include_test = models.CharField(max_length=200, default="") # Data S3_uploaded = models.BooleanField(default=False) + S3_file_name = models.TextField(default=None, null=True) number_request = models.IntegerField(default=0) number_images = models.IntegerField(default=0) number_bad_images = models.IntegerField(default=0) diff --git a/cope2n-api/fwd_api/utils/accuracy.py b/cope2n-api/fwd_api/utils/accuracy.py index 56152f6..06b9187 100644 --- a/cope2n-api/fwd_api/utils/accuracy.py +++ b/cope2n-api/fwd_api/utils/accuracy.py @@ -2,10 +2,12 @@ import re from datetime import datetime import copy +from typing import Any from .ocr_utils.ocr_metrics import eval_ocr_metric from .ocr_utils.sbt_report import post_processing_str from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile, ReportFile from ..celery_worker.client_connector import c_connector +from django.db.models import Q BAD_THRESHOLD = 0.75 @@ -40,6 +42,8 @@ class MonthReportAccumulate: } self.data = [] self.data_format = { + 'subs': "", + 'extraction_date': "", 'num_imei': 0, 'num_invoice': 0, 'total_images': 0, @@ -69,6 +73,7 @@ class MonthReportAccumulate: self.total["images_quality"]["successful"] += report.number_images - report.number_bad_images self.total["images_quality"]["bad"] += report.number_bad_images + if sum([report.reviewed_accuracy[x] for x in report.reviewed_accuracy.keys() if "_count" not in x]) > 0 : self.total["average_accuracy_rate"]["imei"].add_avg(report.reviewed_accuracy.get("imei_number", 0), report.reviewed_accuracy.get("imei_number_count", 0)) self.total["average_accuracy_rate"]["purchase_date"].add_avg(report.reviewed_accuracy.get("purchase_date", 0), report.reviewed_accuracy.get("purchase_date_count", 0)) @@ -78,8 +83,8 @@ class MonthReportAccumulate: self.total["average_accuracy_rate"]["purchase_date"].add_avg(report.feedback_accuracy.get("purchase_date", 0), report.feedback_accuracy.get("purchase_date_count", 0)) self.total["average_accuracy_rate"]["retailer_name"].add_avg(report.feedback_accuracy.get("retailername", 0), report.feedback_accuracy.get("retailername_count", 0)) - self.total["average_processing_time"]["imei"].add_avg(report.average_OCR_time.get("imei", 0), report.average_OCR_time.get("imei_count", 0)) - self.total["average_processing_time"]["invoice"].add_avg(report.average_OCR_time.get("invoice", 0), report.average_OCR_time.get("invoice_count", 0)) + self.total["average_processing_time"]["imei"].add_avg(report.average_OCR_time.get("imei", 0), report.average_OCR_time.get("imei_count", 0)) if report.average_OCR_time else 0 + self.total["average_processing_time"]["invoice"].add_avg(report.average_OCR_time.get("invoice", 0), report.average_OCR_time.get("invoice_count", 0)) if report.average_OCR_time else 0 self.total["usage"]["imei"] += report.number_imei_transaction self.total["usage"]["invoice"] += report.number_invoice_transaction @@ -89,6 +94,7 @@ class MonthReportAccumulate: if self.month is None: self.month = report_month self.total["extraction_date"] = f"Subtotal ({self.month})" + elif self.month != report_month: self.total["images_quality"]["successful_percent"] += self.total["images_quality"]["successful"]/self.total["total_images"] self.total["images_quality"]["bad_percent"] += self.total["images_quality"]["bad"]/self.total["total_images"] @@ -96,11 +102,15 @@ class MonthReportAccumulate: # accumulate fields new_data = copy.deepcopy(self.data_format)[0] new_data["num_imei"] = report.number_imei + new_data["subs"] = report.subsidiary + new_data["extraction_date"] = report.created_at new_data["num_invoice"] = report.number_invoice new_data["total_images"] = report.number_images new_data["images_quality"]["successful"] = report.number_images - report.number_bad_images new_data["images_quality"]["bad"] = report.number_bad_images - + + report.reviewed_accuracy = {} if report.reviewed_accuracy is None else report.reviewed_accuracy + report.feedback_accuracy = {} if report.feedback_accuracy is None else report.feedback_accuracy if sum([ report.reviewed_accuracy[x] for x in report.reviewed_accuracy.keys() if "_count" not in x]): new_data["average_accuracy_rate"]["imei"] = report.reviewed_accuracy.get("imei_number", None) new_data["average_accuracy_rate"]["purchase_date"] = report.reviewed_accuracy.get("purchase_date", None) @@ -109,20 +119,20 @@ class MonthReportAccumulate: new_data["average_accuracy_rate"]["imei"] = report.feedback_accuracy.get("imei_number", None) new_data["average_accuracy_rate"]["purchase_date"] = report.feedback_accuracy.get("purchase_date", None) new_data["average_accuracy_rate"]["retailer_name"] = report.feedback_accuracy.get("retailername", None) - new_data["average_processing_time"]["imei"] = report.average_OCR_time.get("imei", 0) - new_data["average_processing_time"]["invoice"] = report.average_OCR_time.get("invoice", 0) + new_data["average_processing_time"]["imei"] = report.average_OCR_time.get("imei", 0) if report.average_OCR_time else 0 + new_data["average_processing_time"]["invoice"] = report.average_OCR_time.get("invoice", 0) if report.average_OCR_time else 0 new_data["usage"]["imei"] = report.number_imei_transaction new_data["usage"]["invoice"] = report.number_invoice_transaction - new_data["images_quality"]["successful_percent"] += new_data["images_quality"]["successful"]/new_data["total_images"] - new_data["images_quality"]["bad_percent"] += new_data["images_quality"]["bad"]/new_data["total_images"] + new_data["images_quality"]["successful_percent"] += new_data["images_quality"]["successful"]/new_data["total_images"] if new_data["total_images"] else 0 + new_data["images_quality"]["bad_percent"] += new_data["images_quality"]["bad"]/new_data["total_images"] if new_data["total_images"] else 0 self.data.append(new_data) self.accumulate(report) return True def __call__(self): - self.total["images_quality"]["successful_percent"] += self.total["images_quality"]["successful"]/self.total["total_images"] - self.total["images_quality"]["bad_percent"] += self.total["images_quality"]["bad"]/self.total["total_images"] + self.total["images_quality"]["successful_percent"] += self.total["images_quality"]["successful"]/self.total["total_images"] if self.total["total_images"] else 0 + self.total["images_quality"]["bad_percent"] += self.total["images_quality"]["bad"]/self.total["total_images"] if self.total["total_images"] else 0 total = copy.deepcopy(self.total) total["average_accuracy_rate"]["imei"] = total["average_accuracy_rate"]["imei"]() total["average_accuracy_rate"]["purchase_date"] = total["average_accuracy_rate"]["purchase_date"]() @@ -157,6 +167,67 @@ class IterAvg: def __call__(self): return self.avg +def first_of_list(the_list): + if not the_list: + return None + return the_list[0] + +def extract_report_detail_list(report_detail_list, lower=False, in_percent=True): + data = [] + for report_file in report_detail_list: + data.append({ + "Request ID": report_file.correspond_request_id, + "Redemption Number": report_file.correspond_redemption_id, + "Image type": report_file.doc_type, + "IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])), + "IMEI_OCR retrieved": first_of_list(report_file.predict_result.get("imei_number", [None])), + "IMEI1 Accuracy": first_of_list(report_file.feedback_accuracy.get("imei_number", [None])), + "Invoice_Purchase Date_Consumer": report_file.feedback_result.get("purchase_date", None), + "Invoice_Purchase Date_OCR": report_file.predict_result.get("purchase_date", []), + "Invoice_Purchase Date Accuracy": first_of_list(report_file.feedback_accuracy.get("purchase_date", [None])), + "Invoice_Retailer_Consumer": report_file.feedback_result.get("retailername", None), + "Invoice_Retailer_OCR": report_file.predict_result.get("retailername", None), + "Invoice_Retailer Accuracy": first_of_list(report_file.feedback_accuracy.get("retailername", [None])), + "OCR Image Accuracy": report_file.acc, + "OCR Image Speed (seconds)": report_file.time_cost, + "Reviewed?": "No", + "Bad Image Reasons": report_file.bad_image_reason, + "Countermeasures": report_file.counter_measures, + "IMEI_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("imei_number", [None])), + "Purchase Date_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("purchase_date", [None])), + "Retailer_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("retailername", [None])), + }) + if lower: + for i, dat in enumerate(data): + keys = list(dat.keys()) + for old_key in keys: + data[i][old_key.lower().replace(" ", "_")] = data[i].pop(old_key) + if in_percent: + for i, dat in enumerate(data): + keys = [x for x in list(dat.keys()) if "accuracy" in x.lower()] + for key in keys: + if data[i][key]: + data[i][key] = data[i][key]*100 + return data + +def count_transactions(start_date, end_date): + base_query = Q(created_at__range=(start_date, end_date)) + base_query &= Q(is_test_request=False) + transaction_att = {} + + print(f"[DEBUG]: atracting transactions attribute...") + total_transaction_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at') + for request in total_transaction_requests: + if not request.doc_type: + continue + doc_types = request.doc_type.split(",") + for doc_type in doc_types: + if transaction_att.get(doc_type, None) == None: + transaction_att[doc_type] = 1 + else: + transaction_att[doc_type] += 1 + return transaction_att + def convert_datetime_format(date_string: str, is_gt=False) -> str: # pattern_date_string = "2023-02-28" input_format = "%Y-%m-%d" diff --git a/cope2n-api/fwd_api/utils/file.py b/cope2n-api/fwd_api/utils/file.py index a4d364c..d79ed96 100644 --- a/cope2n-api/fwd_api/utils/file.py +++ b/cope2n-api/fwd_api/utils/file.py @@ -9,10 +9,11 @@ from django.core.files.uploadedfile import TemporaryUploadedFile from django.utils import timezone from fwd import settings +from ..utils import s3 as S3Util from fwd_api.constant.common import allowed_file_extensions from fwd_api.exception.exceptions import GeneralException, RequiredFieldException, InvalidException, \ ServiceUnavailableException, FileFormatInvalidException, LimitReachedException, InvalidDecompressedSizeException, RequiredColumnException -from fwd_api.models import SubscriptionRequest, OcrTemplate, FeedbackRequest, SubscriptionRequestFile +from fwd_api.models import SubscriptionRequest, OcrTemplate, FeedbackRequest, SubscriptionRequestFile, Report, ReportFile from fwd_api.utils import process as ProcessUtil from fwd_api.utils.crypto import image_authenticator from fwd_api.utils.image import resize @@ -22,6 +23,13 @@ import csv from openpyxl import load_workbook from openpyxl.styles import Font, Border, Side, PatternFill, NamedStyle +s3_client = S3Util.MinioS3Client( + endpoint=settings.S3_ENDPOINT, + access_key=settings.S3_ACCESS_KEY, + secret_key=settings.S3_SECRET_KEY, + bucket_name=settings.S3_BUCKET_NAME +) + def validate_report_list(request): start_date_str = request.GET.get('start_date') end_date_str = request.GET.get('end_date') @@ -182,6 +190,16 @@ def save_feedback_file(file_name: str, rq: FeedbackRequest, uploaded_file: dict) csvfile.write(file_contents) return file_path +def save_workbook_file(file_name: str, rp: Report, workbook): + report_id = str(rp.report_id) + + folder_path = os.path.join(settings.MEDIA_ROOT, "report", report_id) + os.makedirs(folder_path, exist_ok = True) + + file_path = os.path.join(folder_path, file_name) + workbook.save(file_path) + return file_path + def delete_file_with_path(file_path: str) -> bool: try: os.remove(file_path) @@ -266,6 +284,19 @@ def save_feedback_to_S3(file_name, id, local_file_path): print(f"[ERROR]: {e}") raise ServiceUnavailableException() +def save_report_to_S3(id, local_file_path): + try: + s3_key = os.path.join("report", local_file_path.split("/")[-2], local_file_path.split("/")[-1]) + c_connector.upload_report_to_s3((local_file_path, s3_key, id)) + c_connector.remove_local_file((local_file_path, id)) + return s3_key + except Exception as e: + print(f"[ERROR]: {e}") + raise ServiceUnavailableException() + +def download_from_S3(s3_key, local_file_path): + s3_client.download_file(s3_key, local_file_path) + def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path): try: file_path = os.path.join(folder_path, file_name) @@ -363,10 +394,11 @@ def get_value(_dict, keys): else: value = value.get(key, {}) - if value != 0: - return value - else: + if not value: return "-" + elif isinstance(value, list): + value = str(value) + return value def dict2xlsx(input: json, _type='report'): diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index e9bf8c7..44f8c94 100755 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -67,6 +67,7 @@ services: - ADMIN_PASSWORD=${ADMIN_PASSWORD} - STANDARD_USER_NAME=${STANDARD_USER_NAME} - STANDARD_PASSWORD=${STANDARD_PASSWORD} + - S3_ENDPOINT=${S3_ENDPOINT} - S3_ACCESS_KEY=${S3_ACCESS_KEY} - S3_SECRET_KEY=${S3_SECRET_KEY} - S3_BUCKET_NAME=${S3_BUCKET_NAME} From 3f86cd10fec7b4cf2420fad53dcac2c196b12ac3 Mon Sep 17 00:00:00 2001 From: dx-tan Date: Thu, 1 Feb 2024 14:41:27 +0700 Subject: [PATCH 27/27] remove token --- cope2n-api/token.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 cope2n-api/token.txt diff --git a/cope2n-api/token.txt b/cope2n-api/token.txt deleted file mode 100644 index 5f06852..0000000 --- a/cope2n-api/token.txt +++ /dev/null @@ -1 +0,0 @@ -eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJpZCI6InNidCIsImV4cGlyZWRfYXQiOiIwMS8wMi8yMDI0IDEyOjQ2OjA3IiwiaW50ZXJuYWxfaWQiOjEsInN0YXR1cyI6MSwic3Vic2NyaXB0aW9uX2lkIjoxfQ.VFsoGm5BdeyNptMsdU4f4l70bDIYHTmB8Y-2-PXs7cKhzGB1pUpgqax-V39N_IEXriRl3caDiotzU0psR0WR3g \ No newline at end of file