From cecee15cd67263d14083f6ebe0aacaa7a33cf323 Mon Sep 17 00:00:00 2001 From: dx-tan Date: Tue, 9 Jan 2024 19:41:17 +0700 Subject: [PATCH] Feature: feedback csv API --- cope2n-api/fwd_api/api/ctel_view.py | 53 ++++++++++++- .../fwd_api/celery_worker/client_connector.py | 6 ++ .../fwd_api/celery_worker/internal_task.py | 75 ++++++++++++++++++- cope2n-api/fwd_api/celery_worker/worker.py | 7 +- cope2n-api/fwd_api/exception/exceptions.py | 5 ++ .../migrations/0165_feedbackrequest.py | 29 +++++++ cope2n-api/fwd_api/models/FeedbackRequest.py | 14 ++++ cope2n-api/fwd_api/models/__init__.py | 2 + cope2n-api/fwd_api/utils/file.py | 66 +++++++++++++++- cope2n-api/fwd_api/utils/process.py | 4 +- .../swagger-ui-bundle.js.LICENSE.txt | 1 + deploy_images.sh | 2 +- docker-compose-dev.yml | 3 + 13 files changed, 256 insertions(+), 11 deletions(-) create mode 100644 cope2n-api/fwd_api/migrations/0165_feedbackrequest.py create mode 100644 cope2n-api/fwd_api/models/FeedbackRequest.py diff --git a/cope2n-api/fwd_api/api/ctel_view.py b/cope2n-api/fwd_api/api/ctel_view.py index 6b77471..6d4c632 100755 --- a/cope2n-api/fwd_api/api/ctel_view.py +++ b/cope2n-api/fwd_api/api/ctel_view.py @@ -20,7 +20,7 @@ from ..annotation.api import throw_on_failure from ..constant.common import ProcessType, REQUEST_ID, FOLDER_TYPE, EntityStatus, pdf_extensions, allowed_file_extensions, image_extensions, standard_ocr_list from ..exception.exceptions import RequiredFieldException, InvalidException, NotFoundException, \ PermissionDeniedException, LockedEntityException, FileContentInvalidException, ServiceTimeoutException -from ..models import SubscriptionRequest, SubscriptionRequestFile, OcrTemplate +from ..models import SubscriptionRequest, SubscriptionRequestFile, OcrTemplate, FeedbackRequest from ..response.ReportSerializer import ReportSerializer from ..utils import file as FileUtils from ..utils import process as ProcessUtil @@ -348,6 +348,57 @@ class CtelViewSet(viewsets.ViewSet): return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id}) + @extend_schema(request={ + 'multipart/form-data': { + 'type': 'object', + 'properties': { + 'files': { + 'type': 'array', + 'items': { + 'type': 'string', + 'format': 'binary' + } + }, + }, + 'required': ['files'] + } + }, responses=None, tags=['OCR']) + @action(detail=False, url_path="images/feedback_file", methods=["POST"]) + def feedback_file(self, request): + files = request.data.getlist('files') + FileUtils.validate_csv_feedback(files) + + user_info = ProcessUtil.get_user(request) + user = user_info.user + sub = user_info.current_sub + + feedback_id = "FB_" + datetime.now().strftime("%Y%m%d%H%M%S") + "_" + uuid.uuid4().hex + + origin_name = "" + file_names = "" + for i, file in enumerate(files): + origin_name += file.name + "," + file_names += f"{feedback_id}_{i}.csv" + origin_name = origin_name[:-1] + + new_request: FeedbackRequest = FeedbackRequest(feedback_id=feedback_id, + origin_name=origin_name, + file_name=file_names, + subscription=sub) + new_request.save() + + for i, file in enumerate(files): + file_name = f"{feedback_id}_{i}.csv" + # Save to local + file_path = FileUtils.save_feedback_file(file_name, new_request, file) + FileUtils.validate_feedback_file(file_path) + # Upload to S3 + S3_path = FileUtils.save_feedback_to_S3(file_name, feedback_id, file_path) + # Process csv file in the background + ProcessUtil.process_feedback(feedback_id, file_path) + + return JsonResponse(status=status.HTTP_200_OK, data={"feedback_id": feedback_id}) + @extend_schema(request=None, responses=None, tags=['Data']) @extend_schema(request=None, responses=None, tags=['templates'], methods=['GET']) @action(detail=False, url_path=r"media/(?P\w+)/(?P\w+)", methods=["GET"]) diff --git a/cope2n-api/fwd_api/celery_worker/client_connector.py b/cope2n-api/fwd_api/celery_worker/client_connector.py index 3ff8f88..16c7dd5 100755 --- a/cope2n-api/fwd_api/celery_worker/client_connector.py +++ b/cope2n-api/fwd_api/celery_worker/client_connector.py @@ -30,8 +30,10 @@ class CeleryConnector: 'process_sbt_invoice': {'queue': "invoice_sbt"}, 'do_pdf': {'queue': "do_pdf"}, 'upload_file_to_s3': {'queue': "upload_file_to_s3"}, + 'upload_feedback_to_s3': {'queue': "upload_feedback_to_s3"}, 'upload_obj_to_s3': {'queue': "upload_obj_to_s3"}, 'remove_local_file': {'queue': "remove_local_file"}, + 'csv_feedback': {'queue': "csv_feedback"}, } app = Celery( @@ -39,10 +41,14 @@ class CeleryConnector: broker=settings.BROKER_URL, broker_transport_options={'confirm_publish': False}, ) + def csv_feedback(self, args): + return self.send_task('csv_feedback', args) def do_pdf(self, args): return self.send_task('do_pdf', args) def upload_file_to_s3(self, args): return self.send_task('upload_file_to_s3', args) + def upload_feedback_to_s3(self, args): + return self.send_task('upload_feedback_to_s3', args) def upload_obj_to_s3(self, args): return self.send_task('upload_obj_to_s3', args) def remove_local_file(self, args): diff --git a/cope2n-api/fwd_api/celery_worker/internal_task.py b/cope2n-api/fwd_api/celery_worker/internal_task.py index 04d875f..5ea175b 100755 --- a/cope2n-api/fwd_api/celery_worker/internal_task.py +++ b/cope2n-api/fwd_api/celery_worker/internal_task.py @@ -9,12 +9,13 @@ from fwd_api.models import SubscriptionRequest, UserProfile from fwd_api.celery_worker.worker import app from ..constant.common import FolderFileType, image_extensions from ..exception.exceptions import FileContentInvalidException -from fwd_api.models import SubscriptionRequestFile +from fwd_api.models import SubscriptionRequestFile, FeedbackRequest from ..utils import file as FileUtils from ..utils import process as ProcessUtil from ..utils import s3 as S3Util from fwd_api.constant.common import ProcessType - +import csv +import json from celery.utils.log import get_task_logger from fwd import settings @@ -59,6 +60,61 @@ def process_image_file(file_name: str, file_path, request, user) -> list: 'request_file_id': new_request_file.code }] +@app.task(name="csv_feedback") +def process_csv_feedback(csv_file_path, feedback_id): + # load file to RAM + status = {} + with open(csv_file_path, 'r') as file: + reader = csv.DictReader(file) + # for rq in rqs + for row in reader: + # get request_subcription + request_id = row.get('requestId') + sub_rqs = SubscriptionRequest.objects.filter(request_id=request_id) + if len(sub_rqs) != 1: + status[request_id] = f"Found {len(sub_rqs)} records of request id {request_id}" + continue + else: + sub_rq = sub_rqs[0] + fb = {} + # update user result (with validate) + redemption_id = row.get('redemptionNumber') + imei1 = row.get('imeiNumber') + imei2 = row.get('imeiNumber2') + purchase_date = row.get('Purchase Date') + retailer = row.get('retailer') + sold_to_party = row.get('Sold to party') + server_time = float(row.get('timetakenmilli')) + fb['request_id'] = request_id + fb['retailername'] = retailer + fb['sold_to_party'] = sold_to_party + fb['purchase_date'] = purchase_date + fb['imei_number'] = [imei1, imei2] + sub_rq.feedback_result = fb + sub_rq.client_request_time = server_time + # update redemption_id if exist + if len(redemption_id) > 0: + sub_rq.redemption_id = redemption_id + sub_rq.save() + # update log into database + feedback_rq = FeedbackRequest.objects.filter(feedback_id=feedback_id).first() + feedback_rq.error_status = status + # save log to local + directory_name = os.path.dirname(csv_file_path) + file_path = csv_file_path.replace(".csv", "_error.json") + with open(file_path, "w") as outfile: + json.dump(status, outfile) + # save to s3 + s3_key = os.path.join("feedback", directory_name.split("/")[-1], file_path.split("/")[-1]) + if s3_client.s3_client is not None: + try: + # check if saved then delete local + s3_client.upload_file(file_path, s3_key) + os.remove(file_path) + except Exception as e: + logger.error(f"Unable to set S3: {e}") + print(f"Unable to set S3: {e}") + feedback_rq.save() @app.task(name='do_pdf') def process_pdf(rq_id, sub_id, p_type, user_id, files): @@ -136,6 +192,21 @@ def upload_file_to_s3(local_file_path, s3_key, request_id): else: logger.info(f"S3 is not available, skipping,...") +@app.task(name='upload_feedback_to_s3') +def upload_feedback_to_s3(local_file_path, s3_key, feedback_id): + if s3_client.s3_client is not None: + try: + s3_client.upload_file(local_file_path, s3_key) + feed_request = FeedbackRequest.objects.filter(feedback_id=feedback_id)[0] + feed_request.S3_uploaded = True + feed_request.save() + except Exception as e: + logger.error(f"Unable to set S3: {e}") + print(f"Unable to set S3: {e}") + return + else: + logger.info(f"S3 is not available, skipping,...") + @app.task(name='remove_local_file') def remove_local_file(local_file_path, request_id): print(f"[INFO] Removing local file: {local_file_path}, ...") diff --git a/cope2n-api/fwd_api/celery_worker/worker.py b/cope2n-api/fwd_api/celery_worker/worker.py index ee497cd..a47b5c9 100755 --- a/cope2n-api/fwd_api/celery_worker/worker.py +++ b/cope2n-api/fwd_api/celery_worker/worker.py @@ -38,7 +38,7 @@ app.conf.update({ Queue('upload_file_to_s3'), Queue('upload_obj_to_s3'), Queue('remove_local_file'), - + Queue('csv_feedback'), ], 'task_routes': { @@ -52,9 +52,10 @@ app.conf.update({ 'process_sbt_invoice': {'queue': "invoice_sbt"}, 'do_pdf': {'queue': "do_pdf"}, 'upload_file_to_s3': {'queue': "upload_file_to_s3"}, - 'upload_obj_to_s3': {'queue': "upload_obj_to_s3"}, - 'upload_file_to_s3': {'queue': "upload_file_to_s3"}, + 'upload_feedback_to_s3': {'queue': "upload_feedback_to_s3"}, + 'upload_obj_to_s3': {'queue': "upload_obj_to_s3"}, 'remove_local_file': {'queue': "remove_local_file"}, + 'csv_feedback': {'queue': "csv_feedback"}, } }) diff --git a/cope2n-api/fwd_api/exception/exceptions.py b/cope2n-api/fwd_api/exception/exceptions.py index 3292a5e..d584fb2 100755 --- a/cope2n-api/fwd_api/exception/exceptions.py +++ b/cope2n-api/fwd_api/exception/exceptions.py @@ -67,6 +67,11 @@ class RequiredFieldException(GeneralException): default_detail = 'Field required' detail_with_arg = '{} param is required' +class RequiredColumnException(GeneralException): + status_code = status.HTTP_400_BAD_REQUEST + default_code = 4003 + default_detail = 'Collumns required' + detail_with_arg = '{} collumns are required' class DuplicateEntityException(GeneralException): status_code = status.HTTP_400_BAD_REQUEST diff --git a/cope2n-api/fwd_api/migrations/0165_feedbackrequest.py b/cope2n-api/fwd_api/migrations/0165_feedbackrequest.py new file mode 100644 index 0000000..8d18c02 --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0165_feedbackrequest.py @@ -0,0 +1,29 @@ +# Generated by Django 4.1.3 on 2024-01-09 10:08 + +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0164_subscriptionrequest_client_request_time_and_more'), + ] + + operations = [ + migrations.CreateModel( + name='FeedbackRequest', + fields=[ + ('id', models.AutoField(primary_key=True, serialize=False)), + ('feedback_id', models.CharField(max_length=200)), + ('file_name', models.CharField(max_length=200)), + ('origin_name', models.CharField(max_length=200)), + ('error_status', models.JSONField(null=True)), + ('created_at', models.DateTimeField(db_index=True, default=django.utils.timezone.now)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('S3_uploaded', models.BooleanField(default=False)), + ('subscription', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='fwd_api.subscription')), + ], + ), + ] diff --git a/cope2n-api/fwd_api/models/FeedbackRequest.py b/cope2n-api/fwd_api/models/FeedbackRequest.py new file mode 100644 index 0000000..b4448fa --- /dev/null +++ b/cope2n-api/fwd_api/models/FeedbackRequest.py @@ -0,0 +1,14 @@ +from django.db import models +from django.utils import timezone +from fwd_api.models.Subscription import Subscription + +class FeedbackRequest(models.Model): + id = models.AutoField(primary_key=True) + feedback_id = models.CharField(max_length=200) # Change to request_id + file_name = models.CharField(max_length=200) # Change to request_id + origin_name = models.CharField(max_length=200) # Change to request_id + error_status = models.JSONField(null=True) + created_at = models.DateTimeField(default=timezone.now, db_index=True) + updated_at = models.DateTimeField(auto_now=True) + subscription = models.ForeignKey(Subscription, on_delete=models.CASCADE) + S3_uploaded = models.BooleanField(default=False) \ No newline at end of file diff --git a/cope2n-api/fwd_api/models/__init__.py b/cope2n-api/fwd_api/models/__init__.py index 0308fc6..3cfcd22 100755 --- a/cope2n-api/fwd_api/models/__init__.py +++ b/cope2n-api/fwd_api/models/__init__.py @@ -5,3 +5,5 @@ from .OcrTemplate import OcrTemplate from .OcrTemplateBox import OcrTemplateBox from .PricingPlan import PricingPlan from .Subscription import Subscription +from .FeedbackRequest import FeedbackRequest + diff --git a/cope2n-api/fwd_api/utils/file.py b/cope2n-api/fwd_api/utils/file.py index f3af27e..0fa22ed 100644 --- a/cope2n-api/fwd_api/utils/file.py +++ b/cope2n-api/fwd_api/utils/file.py @@ -10,13 +10,29 @@ from django.core.files.uploadedfile import TemporaryUploadedFile from fwd import settings from fwd_api.constant.common import allowed_file_extensions from fwd_api.exception.exceptions import GeneralException, RequiredFieldException, InvalidException, \ - ServiceUnavailableException, FileFormatInvalidException, LimitReachedException, InvalidDecompressedSizeException -from fwd_api.models import SubscriptionRequest, OcrTemplate + ServiceUnavailableException, FileFormatInvalidException, LimitReachedException, InvalidDecompressedSizeException, RequiredColumnException +from fwd_api.models import SubscriptionRequest, OcrTemplate, FeedbackRequest from fwd_api.utils import process as ProcessUtil from fwd_api.utils.crypto import image_authenticator from fwd_api.utils.image import resize from ..celery_worker.client_connector import c_connector import imagesize +import csv + +def validate_feedback_file(csv_file_path): + required_columns = ['redemptionNumber', 'requestId', 'imeiNumber', 'imeiNumber2', 'Purchase Date', 'retailer', 'Sold to party', 'timetakenmilli'] + missing_columns = [] + + with open(csv_file_path, 'r') as file: + reader = csv.DictReader(file) + + # Check if all required columns are present + for column in required_columns: + if column not in reader.fieldnames: + missing_columns.append(column) + + if missing_columns: + raise RequiredColumnException(excArgs=str(missing_columns)) def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUEST, min_file_num=1, file_field="files"): total_file_size = 0 @@ -39,6 +55,26 @@ def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUES raise LimitReachedException(excArgs=('Total size of all files', str(settings.MAX_UPLOAD_SIZE_OF_A_FILE / 1024 / 1024), 'MB')) +def validate_csv_feedback(files, max_file_num=1, min_file_num=1, file_field="csv files"): + total_file_size = 0 + if len(files) < min_file_num: + raise RequiredFieldException(excArgs=file_field) + if len(files) > max_file_num: + raise LimitReachedException(excArgs=(f'Number of {file_field}', str(max_file_num), '')) + + for f in files: + if not isinstance(f, TemporaryUploadedFile): + # print(f'[DEBUG]: {f.name}') + raise InvalidException(excArgs="files") + extension = f.name.split(".")[-1].lower() in ["csv"] + if not extension or "." not in f.name: + raise FileFormatInvalidException(excArgs=[".csv"]) + if f.size > settings.MAX_UPLOAD_SIZE_OF_A_FILE: + raise LimitReachedException(excArgs=('A file', str(settings.MAX_UPLOAD_SIZE_OF_A_FILE / 1024 / 1024), 'MB')) + total_file_size += f.size + if total_file_size > settings.MAX_UPLOAD_FILE_SIZE_OF_A_REQUEST: + raise LimitReachedException(excArgs=('Total size of all files', str(settings.MAX_UPLOAD_SIZE_OF_A_FILE / 1024 / 1024), 'MB')) + def get_file(file_path: str): try: return open(file_path, 'rb') @@ -105,6 +141,21 @@ def save_json_file(file_name: str, rq: SubscriptionRequest, data: dict): json.dump(data, json_file) return file_path +def save_feedback_file(file_name: str, rq: FeedbackRequest, uploaded_file: dict): + user_id = str(rq.subscription.user.id) + feedback_id = str(rq.id) + + folder_path = os.path.join(settings.MEDIA_ROOT, 'users', user_id, "feedbacks", feedback_id, 'requests', feedback_id) + os.makedirs(folder_path, exist_ok = True) + + file_path = os.path.join(folder_path, file_name) + with uploaded_file.open() as file: + # Read the contents of the file + file_contents = file.read().decode('utf-8') + with open(file_path, 'w', newline='') as csvfile: + csvfile.write(file_contents) + return file_path + def delete_file_with_path(file_path: str) -> bool: try: os.remove(file_path) @@ -166,6 +217,17 @@ def save_to_S3(file_name, rq, local_file_path): print(f"[ERROR]: {e}") raise ServiceUnavailableException() +def save_feedback_to_S3(file_name, id, local_file_path): + try: + assert len(local_file_path.split("/")) >= 2, "file_path must have at least feedback_folder and feedback_id" + s3_key = os.path.join(local_file_path.split("/")[-2], local_file_path.split("/")[-1], file_name) + c_connector.upload_feedback_to_s3((local_file_path, s3_key, id)) + c_connector.remove_local_file((local_file_path, id)) + return s3_key + except Exception as e: + print(f"[ERROR]: {e}") + raise ServiceUnavailableException() + def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path): try: file_path = os.path.join(folder_path, file_name) diff --git a/cope2n-api/fwd_api/utils/process.py b/cope2n-api/fwd_api/utils/process.py index 6b6e43b..b5d6c16 100644 --- a/cope2n-api/fwd_api/utils/process.py +++ b/cope2n-api/fwd_api/utils/process.py @@ -306,7 +306,6 @@ def token_value(token_type): return 5 return 1 # Basic OCR - def send_to_queue2(rq_id, sub_id, file_url, user_id, typez, metadata={}): try: if typez == ProcessType.ID_CARD.value: @@ -324,7 +323,6 @@ def send_to_queue2(rq_id, sub_id, file_url, user_id, typez, metadata={}): print(e) raise BadGatewayException() - def build_template_matching_data(template): temp_dict = { @@ -362,6 +360,8 @@ def send_template_queue(rq_id, file_url, template: OcrTemplate, uid): print(e) raise BadGatewayException() +def process_feedback(feedback_id, local_file_path): + c_connector.csv_feedback((local_file_path, feedback_id)) def process_pdf_file(file_name: str, file_obj: TemporaryUploadedFile, request: SubscriptionRequest, user) -> list: doc: fitz.Document = fitz.open(stream=file_obj.file.read()) diff --git a/cope2n-api/static/drf_spectacular_sidecar/swagger-ui-dist/swagger-ui-bundle.js.LICENSE.txt b/cope2n-api/static/drf_spectacular_sidecar/swagger-ui-dist/swagger-ui-bundle.js.LICENSE.txt index d645695..5471dc1 100644 --- a/cope2n-api/static/drf_spectacular_sidecar/swagger-ui-dist/swagger-ui-bundle.js.LICENSE.txt +++ b/cope2n-api/static/drf_spectacular_sidecar/swagger-ui-dist/swagger-ui-bundle.js.LICENSE.txt @@ -1,4 +1,5 @@ + Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ diff --git a/deploy_images.sh b/deploy_images.sh index cb89a0e..11e5360 100755 --- a/deploy_images.sh +++ b/deploy_images.sh @@ -2,7 +2,7 @@ set -e tag=$1 -is_prod=${$2:-False} +# is_prod=${$2:-False} echo "[INFO] Tag received from Python: $tag" diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 5d58c16..580665a 100755 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -98,6 +98,9 @@ services: - MINIO_SECRET_KEY=${S3_SECRET_KEY} volumes: - ./data/minio_data:/data + ports: + - 9884:9884 + - 9885:9885 networks: - ctel-sbt restart: always