diff --git a/.gitignore b/.gitignore index b5cd87e..7f48c33 100644 --- a/.gitignore +++ b/.gitignore @@ -42,3 +42,4 @@ Jan.csv cope2n-api/reviewed/date.xlsx cope2n-api/reviewed/retailer.xlsx /scripts/* +scripts/crawl_database.py diff --git a/api-cronjob/run.py b/api-cronjob/run.py index 3a2f225..90ae97a 100644 --- a/api-cronjob/run.py +++ b/api-cronjob/run.py @@ -4,7 +4,7 @@ import requests from datetime import datetime # Get the proxy URL from the environment variable -interval = 60*60*1 # 1 minute +interval = 60*60*3 # 1 minute update_cost = int(60*2) proxy_url = os.getenv('PROXY', "localhost") user = os.getenv('ADMIN_USER_NAME', "") diff --git a/cope2n-ai-fi/modules/sdsvkvu b/cope2n-ai-fi/modules/sdsvkvu index 671d791..bae5f73 160000 --- a/cope2n-ai-fi/modules/sdsvkvu +++ b/cope2n-ai-fi/modules/sdsvkvu @@ -1 +1 @@ -Subproject commit 671d7917c657ad185a06772e0b707b45fe59788a +Subproject commit bae5f732e12c5d5307b3ce14a98fa198ea49e372 diff --git a/cope2n-api/Dockerfile b/cope2n-api/Dockerfile index c841ccb..bc603be 100755 --- a/cope2n-api/Dockerfile +++ b/cope2n-api/Dockerfile @@ -17,6 +17,7 @@ RUN pip install uvicorn gunicorn Celery RUN pip install pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116 RUN pip install -U openmim==0.3.7 --no-cache-dir RUN mim install mmcv-full==1.7.2 +RUN pip install fastdeploy-gpu-python==1.0.7 -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html --no-cache-dir # End intergration with sdskvu USER ${UID} diff --git a/cope2n-api/fwd_api/api/accuracy_view.py b/cope2n-api/fwd_api/api/accuracy_view.py index 6c751bf..5ba4bed 100755 --- a/cope2n-api/fwd_api/api/accuracy_view.py +++ b/cope2n-api/fwd_api/api/accuracy_view.py @@ -16,7 +16,7 @@ import json from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException from ..models import SubscriptionRequest, Report, ReportFile, SubscriptionRequestFile from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg -from ..utils.file import download_from_S3, dict2xlsx, save_report_to_S3, build_S3_url +from ..utils.file import download_from_S3, dict2xlsx, save_report_to_S3, build_S3_url, validate_review from ..utils.redis import RedisUtils from ..utils.process import string_to_boolean from ..utils.cache import get_cache, set_cache @@ -695,8 +695,10 @@ class AccuracyViewSet(viewsets.ViewSet): if not reviewed_result: reviewed_result = copy.deepcopy(sample_result) + reviewed_result["imei_number"] = [None for _ in range(subscription_request.doc_type.split(",").count("imei"))] if not feedback_result: feedback_result = copy.deepcopy(sample_result) + feedback_result["imei_number"] = [None for _ in range(subscription_request.doc_type.split(",").count("imei"))] if not predicted_result: predicted_result = copy.deepcopy(sample_result) @@ -726,8 +728,10 @@ class AccuracyViewSet(viewsets.ViewSet): if not reviewed_result: reviewed_result = copy.deepcopy(sample_result) + reviewed_result["imei_number"] = [None for _ in range(subscription_request.doc_type.split(",").count("imei"))] if not feedback_result: feedback_result = copy.deepcopy(sample_result) + feedback_result["imei_number"] = [None for _ in range(subscription_request.doc_type.split(",").count("imei"))] if not predicted_result: predicted_result = copy.deepcopy(sample_result) @@ -760,25 +764,22 @@ class AccuracyViewSet(viewsets.ViewSet): elif request.method == 'POST': data = request.data - base_query = Q(request_id=request_id) - subscription_request = SubscriptionRequest.objects.filter(base_query) if subscription_request.count() == 0: raise NotFoundException(excArgs=request_id) subscription_request = subscription_request.first() - subscription_request_files = SubscriptionRequestFile.objects.filter(request=subscription_request.id) if "reviewed_result" not in data: raise InvalidException(excArgs=f'reviewed_result') reviewed_result = data["reviewed_result"] - for field in ['retailername', 'sold_to_party', 'invoice_no', 'purchase_date', 'imei_number']: - if not field in reviewed_result.keys(): - raise RequiredFieldException(excArgs=f'reviewed_result.{field}') + if not subscription_request.predict_result: + raise InvalidException(excArgs=f'request_id') + validate_review(reviewed_result, len(subscription_request.predict_result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[3].get("value", []))) reviewed_result['request_id'] = request_id for subscription_request_file in subscription_request_files: diff --git a/cope2n-api/fwd_api/management/commands/migrate-database-mark-requests.py b/cope2n-api/fwd_api/management/commands/migrate-database-mark-requests.py new file mode 100644 index 0000000..d1be5f1 --- /dev/null +++ b/cope2n-api/fwd_api/management/commands/migrate-database-mark-requests.py @@ -0,0 +1,79 @@ +# myapp/management/commands/mycustomcommand.py +from django.core.management.base import BaseCommand +from tqdm import tqdm +from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest +from fwd_api.exception.exceptions import InvalidException +from fwd_api.utils.accuracy import predict_result_to_ready +import traceback +import copy +from django.utils import timezone + +TEST_IMEI_VALUES = ["357822611219904", "RFAW2022FED", "5AWH14MT400396N", "0HU33NIW200044K", "0GJG4DBW200318X", "358975990917032", "350731691693549", "R52W70BHDWX", "R52W70BHDWX", "350073345090297", "0PBL3NHW500023N"] +TEST_RETAILER_VALUES = ["Best Denki", "Arrow Communication"] +KEY = "invoice_no" + +class Command(BaseCommand): + help = 'Refactor database for image level' + + def add_arguments(self, parser): + # Add your command-line arguments here + parser.add_argument('start', type=str, help='start date, sample: 2023-01-02T00:00:00+0700') + parser.add_argument('end', type=str, help='end date, sample: 2024-01-25T00:00:00+0800') + + def process_request(self, request, test_requests, supicious_requests): + if len(request.request_id.split(".")[0].split("_")) < 2: + return + + if not request.predict_result: + self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}")) + return + if request.predict_result.get("status", 200) != 200: + self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}")) + return + + imeis = [] + retailername = None + + for content in request.predict_result["content"]["document"][0]["content"]: + if content["label"] == "retailername": + retailername = content["value"] + elif content["label"] == "imei_number": + imeis = content["value"] + if retailername in TEST_RETAILER_VALUES and any(imei in TEST_IMEI_VALUES for imei in imeis): + request.is_test_request= True + request.save() + test_requests.append(request.request_id) + + elif any(imei in TEST_IMEI_VALUES for imei in imeis): + request.is_test_request= True + request.save() + test_requests.append(request.request_id) + + elif retailername in TEST_RETAILER_VALUES: + supicious_requests.append(request.request_id) + + def handle(self, *args, **options): + start = options['start'] + end = options['end'] + + test_requests = [] + supicious_requests = [] + + if start or end: + try: + start_date = timezone.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z') # We care only about day precision only + end_date = timezone.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z') + except Exception as e: + print(f"[INFO]: start: {start}") + print(f"[INFO]: end: {end}") + raise InvalidException(excArgs="Date format") + subcription_iter = SubscriptionRequest.objects.filter(created_at__range=(start_date, end_date)) + else: + subcription_iter = SubscriptionRequest.objects.all() + + for request in tqdm(subcription_iter.iterator()): + self.process_request(request, test_requests, supicious_requests) + self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!')) + self.stdout.write(self.style.SUCCESS(f'Test request: \n {test_requests} \n =============================')) + self.stdout.write(self.style.SUCCESS(f'Supicious request: \n {supicious_requests} \n =============================')) + diff --git a/cope2n-api/fwd_api/management/commands/migrate-database-test-request-24-Feb.py b/cope2n-api/fwd_api/management/commands/migrate-database-test-request-24-Feb.py new file mode 100644 index 0000000..7a8cda3 --- /dev/null +++ b/cope2n-api/fwd_api/management/commands/migrate-database-test-request-24-Feb.py @@ -0,0 +1,49 @@ +# myapp/management/commands/mycustomcommand.py +from django.core.management.base import BaseCommand +from tqdm import tqdm +from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest +from fwd_api.exception.exceptions import InvalidException +from fwd_api.utils.accuracy import predict_result_to_ready +import traceback +import copy +from django.utils import timezone + +class Command(BaseCommand): + help = 'Refactor database for image level' + + def add_arguments(self, parser): + # Add your command-line arguments here + parser.add_argument('start', type=str, help='start date, sample: 2023-01-02T00:00:00+0700') + parser.add_argument('end', type=str, help='end date, sample: 2024-01-25T00:00:00+0800') + + def process_request(self, request): + # if not request.predict_result: + # self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}")) + # return + # if request.predict_result.get("status", 200) != 200: + # self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}")) + # return + + if not request.feedback_result or not request.redemption_id: + request.is_test_request= True + request.save() + + def handle(self, *args, **options): + start = options['start'] + end = options['end'] + + if start or end: + try: + start_date = timezone.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z') # We care only about day precision only + end_date = timezone.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z') + except Exception as e: + print(f"[INFO]: start: {start}") + print(f"[INFO]: end: {end}") + raise InvalidException(excArgs="Date format") + subcription_iter = SubscriptionRequest.objects.filter(created_at__range=(start_date, end_date)) + else: + subcription_iter = SubscriptionRequest.objects.all() + + for request in tqdm(subcription_iter.iterator()): + self.process_request(request) + self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!')) \ No newline at end of file diff --git a/cope2n-api/fwd_api/utils/accuracy.py b/cope2n-api/fwd_api/utils/accuracy.py index 4aaaa21..ddb3d81 100755 --- a/cope2n-api/fwd_api/utils/accuracy.py +++ b/cope2n-api/fwd_api/utils/accuracy.py @@ -536,7 +536,7 @@ def extract_report_detail_list(report_detail_list, lower=False, in_percent=True) "Request ID": report_file.correspond_request_id, "Redemption Number": report_file.correspond_redemption_id, "Image type": report_file.doc_type, - "IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])), + "IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])) if report_file.feedback_result else None, "IMEI_OCR retrieved": first_of_list(report_file.predict_result.get("imei_number", [None])), "IMEI Revised": first_of_list(report_file.reviewed_result.get("imei_number", [None])) if report_file.reviewed_result else None, "IMEI1 Accuracy": first_of_list(report_file.feedback_accuracy.get("imei_number", [None])), @@ -623,9 +623,9 @@ def predict_result_to_ready(result): return dict_result dict_result["retailername"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}])[0].get("value", None) dict_result["sold_to_party"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}])[1].get("value", None) - dict_result["invoice_no"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}])[2].get("value", None) - dict_result["purchase_date"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}])[3].get("value", []) - dict_result["imei_number"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[4].get("value", []) + dict_result["purchase_date"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}])[2].get("value", []) + dict_result["imei_number"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[3].get("value", []) + dict_result["invoice_no"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}, {}])[4].get("value", None) return dict_result def align_fine_result(ready_predict, fine_result): diff --git a/cope2n-api/fwd_api/utils/file.py b/cope2n-api/fwd_api/utils/file.py index fea47c0..fa2a1cd 100755 --- a/cope2n-api/fwd_api/utils/file.py +++ b/cope2n-api/fwd_api/utils/file.py @@ -80,6 +80,13 @@ def validate_feedback_file(csv_file_path): if missing_columns: raise RequiredColumnException(excArgs=str(missing_columns)) +def validate_review(review, num_imei): + for field in settings.FIELD: + if not field in review.keys(): + raise RequiredFieldException(excArgs=f'reviewed_result.{field}') + if not isinstance(review["imei_number"], list) or len(review["imei_number"]) != num_imei: + raise InvalidException(excArgs=f'imei_number') + def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUEST, min_file_num=1, file_field="files"): total_file_size = 0 if len(files) < min_file_num: @@ -140,7 +147,7 @@ def get_folder_path(rq: SubscriptionRequest): logger = get_task_logger(__name__) request_id = str(rq.request_id) - logger.info(f"[DEBUG]: rq.process_type: {rq.process_type}") + logger.debug(f"rq.process_type: {rq.process_type}") p_type = ProcessUtil.map_process_type_to_folder_name(int(rq.process_type)) sub_id = str(rq.subscription.id) user_id = str(rq.subscription.user.id)