diff --git a/cope2n-ai-fi/Dockerfile b/cope2n-ai-fi/Dockerfile index 2a1f54d..8968edf 100755 --- a/cope2n-ai-fi/Dockerfile +++ b/cope2n-ai-fi/Dockerfile @@ -15,7 +15,9 @@ RUN pip install mmcv==1.6.0 -f https://download.openmmlab.com/mmcv/dst/cu116/tor RUN ln -s /opt/conda/lib/python3.10/site-packages/torch/lib/libcudnn.so.8 /usr/lib/libcudnn.so && \ ln -s /opt/conda/lib/libcublas.so /usr/lib/libcublas.so -RUN python -m pip install paddlepaddle-gpu==2.4.2.post116 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html --no-cache-dir +# RUN python -m pip install paddlepaddle-gpu==2.4.2.post116 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html --no-cache-dir +RUN pip install fastdeploy-gpu-python==1.0.7 -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html --no-cache-dir + RUN python -m pip install 'git+https://github.com/facebookresearch/detectron2.git' --no-cache-dir diff --git a/cope2n-ai-fi/modules/sdsvkvu b/cope2n-ai-fi/modules/sdsvkvu index d01de31..671d791 160000 --- a/cope2n-ai-fi/modules/sdsvkvu +++ b/cope2n-ai-fi/modules/sdsvkvu @@ -1 +1 @@ -Subproject commit d01de312ab86db554ffa2f1b01396ef8d56b78ed +Subproject commit 671d7917c657ad185a06772e0b707b45fe59788a diff --git a/cope2n-api/fwd/settings.py b/cope2n-api/fwd/settings.py index 9bdaf69..7513fcd 100755 --- a/cope2n-api/fwd/settings.py +++ b/cope2n-api/fwd/settings.py @@ -224,7 +224,7 @@ OVERVIEW_REFRESH_INTERVAL = 2 OVERVIEW_REPORT_ROOT = "overview" OVERVIEW_REPORT_DURATION = ["30d", "7d"] -ACC_EXCLUDE_RESEASONS = ["Invalid Input", "Handwritten information", "handwritten"] +ACC_EXCLUDE_RESEASONS = ["Invalid Input", "Handwritten information", "handwritten", "invalid_image", "missing_information", "too_blurry_text", "too_small_text"] SUBS = { "SEAU": "AU", @@ -233,13 +233,17 @@ SUBS = { "SEPCO": "PH", "TSE": "TH", "SEIN": "ID", - "ALL": "all" + "ALL": "all", # all_detail + "SEAO": "seao" } +BAD_THRESHOLD = 0.75 +NEED_REVIEW = 1.0 + +SUB_FOR_BILLING = ["all", "seao"] + CACHES = { 'default': { 'BACKEND': 'django.core.cache.backends.dummy.DummyCache', } -} - - +} \ No newline at end of file diff --git a/cope2n-api/fwd_api/api/accuracy_view.py b/cope2n-api/fwd_api/api/accuracy_view.py index f0498cd..03659f9 100644 --- a/cope2n-api/fwd_api/api/accuracy_view.py +++ b/cope2n-api/fwd_api/api/accuracy_view.py @@ -7,6 +7,7 @@ from django.utils import timezone from django.db.models import Q import uuid import os +import copy import pytz from fwd import settings from drf_spectacular.utils import extend_schema, OpenApiParameter, OpenApiTypes @@ -15,11 +16,12 @@ import json from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException from ..models import SubscriptionRequest, Report, ReportFile, SubscriptionRequestFile from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg -from ..utils.file import download_from_S3, convert_date_string, build_media_url_v2, build_url, build_S3_url +from ..utils.file import download_from_S3, dict2xlsx, save_report_to_S3, build_S3_url from ..utils.redis import RedisUtils from ..utils.process import string_to_boolean from ..request.ReportCreationSerializer import ReportCreationSerializer from ..utils.subsidiary import map_subsidiary_long_to_short, map_subsidiary_short_to_long +from ..utils.report import aggregate_overview from fwd_api.utils.accuracy import predict_result_to_ready import copy @@ -455,8 +457,8 @@ class AccuracyViewSet(viewsets.ViewSet): ], responses=None, tags=['Accuracy'] ) - @action(detail=False, url_path="overview", methods=["GET"]) - def overview(self, request): + @action(detail=False, url_path="overview_sumary", methods=["GET"]) + def overview_sumary(self, request): if request.method == 'GET': _subsidiary = request.GET.get('subsidiary', "ALL") duration = request.GET.get('duration', "") @@ -472,6 +474,89 @@ class AccuracyViewSet(viewsets.ViewSet): return JsonResponse(response, status=200) return JsonResponse({'error': 'Invalid request method.'}, status=405) + + @extend_schema( + parameters=[ + OpenApiParameter( + name='duration', + location=OpenApiParameter.QUERY, + description='one of [30d, 7d]', + type=OpenApiTypes.STR, + default='30d', + ), + OpenApiParameter( + name='subsidiary', + location=OpenApiParameter.QUERY, + description='Subsidiary', + type=OpenApiTypes.STR, + ) + ], + responses=None, tags=['Accuracy'] + ) + @action(detail=False, url_path="overview", methods=["GET"]) + def overview(self, request): + if request.method == 'GET': + _subsidiary = request.GET.get('subsidiary', "ALL") + duration = request.GET.get('duration', "") + + subsidiary = map_subsidiary_long_to_short(_subsidiary) + + if _subsidiary == "ALL": + # aggregate_overview from subsibdiaries + subsidiaries_to_include = list(settings.SUBS.values()) + subsidiaries_to_include.remove("all") + # subsidiaries_to_include.remove("seao") + subsidiary_overview_reports = [] + for sub in subsidiaries_to_include: + key = f"{sub}_{duration}" + try: + this_overview = json.loads(redis_client.get_specific_cache(settings.OVERVIEW_REPORT_ROOT, key)).get("data", []) + if sub != "seao": + this_overview = [d for d in this_overview if d.get("subs") != "+"] + else: + for item in this_overview: + if item.get("subs") == "+": + item["extraction_date"] = item["extraction_date"].replace("Subtotal ", "").replace("(", "").replace(")", "") + "-32" + subsidiary_overview_reports += this_overview + + except Exception as e: + print(f"[WARM]: Unable to retrive data {key} from Redis, skipping...") + data = aggregate_overview(subsidiary_overview_reports) + for item in data: + if item.get("subs") == "+": + item["extraction_date"] = "Subtotal (" + item["extraction_date"].replace("-32", "") + ")" + # Do the saving process + report_fine_data = copy.deepcopy(data) + for i, dat in enumerate(report_fine_data): + keys = [x for x in list(dat.keys()) if "accuracy" in x.lower()] + keys_percent = "images_quality" + for x_key in report_fine_data[i][keys_percent].keys(): + if "percent" not in x_key: + continue + report_fine_data[i][keys_percent][x_key] = report_fine_data[i][keys_percent][x_key]*100 + for key in keys: + if report_fine_data[i][key]: + for x_key in report_fine_data[i][key].keys(): + report_fine_data[i][key][x_key] = report_fine_data[i][key][x_key]*100 + overview_filename = _subsidiary + "_" + duration + ".xlsx" + data_workbook = dict2xlsx(report_fine_data, _type='report') + + folder_path = os.path.join(settings.MEDIA_ROOT, "report", settings.OVERVIEW_REPORT_ROOT) + os.makedirs(folder_path, exist_ok = True) + file_path = os.path.join(folder_path, overview_filename) + data_workbook.save(file_path) + + s3_key=save_report_to_S3(None, file_path) + # redis_client.set_cache(settings.OVERVIEW_REPORT_ROOT, overview_filename.replace(".xlsx", ""), json.dumps(save_data)) + else: + # Retrive data from Redis + key = f"{subsidiary}_{duration}" + data = json.loads(redis_client.get_specific_cache(settings.OVERVIEW_REPORT_ROOT, key)).get("data", []) + response = { + 'overview_data': data, + } + return JsonResponse(response, status=200) + return JsonResponse({'error': 'Invalid request method.'}, status=405) @extend_schema( parameters=[ diff --git a/cope2n-api/fwd_api/celery_worker/internal_task.py b/cope2n-api/fwd_api/celery_worker/internal_task.py index 07de96a..36523f5 100755 --- a/cope2n-api/fwd_api/celery_worker/internal_task.py +++ b/cope2n-api/fwd_api/celery_worker/internal_task.py @@ -119,7 +119,7 @@ def process_csv_feedback(csv_file_path, feedback_id): continue _predict_result = copy.deepcopy(predict_result_to_ready(sub_rq.predict_result)) _feedback_result = copy.deepcopy(sub_rq.feedback_result) - _reviewed_result = copy.deepcopy(sub_rq.reviewed_result) + # _reviewed_result = copy.deepcopy(sub_rq.reviewed_result) try: image.processing_time = time_cost.get(image.doc_type, [0 for _ in range(image.index_in_request)])[image.index_in_request] except Exception as e: @@ -132,19 +132,19 @@ def process_csv_feedback(csv_file_path, feedback_id): if _feedback_result: _feedback_result["imei_number"] = [] - if _reviewed_result: - _reviewed_result["imei_number"] = [] + # if _reviewed_result: + # _reviewed_result["imei_number"] = [] else: try: _predict_result = {"retailername": None, "sold_to_party": None, "purchase_date": [], "imei_number": [_predict_result["imei_number"][image.index_in_request]]} _feedback_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_feedback_result["imei_number"][image.index_in_request]]} if _feedback_result else None - _reviewed_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_reviewed_result["imei_number"][image.index_in_request]]} if _reviewed_result else None + # _reviewed_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_reviewed_result["imei_number"][image.index_in_request]]} if _reviewed_result else None except Exception as e: print (f"[ERROR]: {request_id} - {e}") image.predict_result = _predict_result image.feedback_result = _feedback_result - image.reviewed_result = _reviewed_result + # image.reviewed_result = _reviewed_result image.save() # update log into database feedback_rq = FeedbackRequest.objects.filter(feedback_id=feedback_id).first() @@ -264,10 +264,11 @@ def upload_report_to_s3(local_file_path, s3_key, report_id, delay): try: time.sleep(delay) s3_client.upload_file(local_file_path, s3_key) - report = Report.objects.filter(report_id=report_id)[0] - report.S3_uploaded = True - report.S3_file_name = s3_key - report.save() + if report_id: + report = Report.objects.filter(report_id=report_id)[0] + report.S3_uploaded = True + report.S3_file_name = s3_key + report.save() except Exception as e: logger.error(f"Unable to set S3: {e}") print(f"Unable to set S3: {e}") diff --git a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py index 6755432..7e483da 100644 --- a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py +++ b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py @@ -180,7 +180,7 @@ def make_a_report_2(report_id, query_set): if query_set["include_test"]: base_query = ~base_query if isinstance(query_set["subsidiary"], str): - if query_set["subsidiary"] and query_set["subsidiary"].lower().replace(" ", "")!="all": + if query_set["subsidiary"] and query_set["subsidiary"].lower().replace(" ", "") not in settings.SUB_FOR_BILLING: base_query &= Q(redemption_id__startswith=query_set["subsidiary"]) if isinstance(query_set["is_reviewed"], str): if query_set["is_reviewed"] == "reviewed": @@ -208,6 +208,7 @@ def make_a_report_2(report_id, query_set): bad_image_list = [] number_images = 0 number_bad_images = 0 + review_progress = [] # TODO: Multithreading # Calculate accuracy, processing time, ....Then save. subscription_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at') @@ -245,6 +246,7 @@ def make_a_report_2(report_id, query_set): errors += request_att["err"] num_request += 1 + review_progress += request_att.get("is_reviewed", []) report_fine_data, _save_data = report_engine.save(report.report_id, query_set.get("is_daily_report", False), query_set["include_test"]) transaction_att = count_transactions(start_date, end_date, report.subsidiary) @@ -277,6 +279,10 @@ def make_a_report_2(report_id, query_set): report.feedback_accuracy = acumulated_acc["feedback"] report.reviewed_accuracy = acumulated_acc["reviewed"] report.combined_accuracy = acumulated_acc["acumulated"] + + report.num_reviewed = review_progress.count(1) + report.num_not_reviewed = review_progress.count(0) + report.num_no_reviewed = review_progress.count(-1) report.errors = "|".join(errors) report.status = "Ready" @@ -292,7 +298,9 @@ def make_a_report_2(report_id, query_set): # Save overview dashboard # multiple accuracy by 100 save_data = copy.deepcopy(_save_data) + review_key = "review_progress" for i, dat in enumerate(report_fine_data): + report_fine_data[i][review_key] = report_fine_data[i][review_key]*100 keys = [x for x in list(dat.keys()) if "accuracy" in x.lower()] keys_percent = "images_quality" for x_key in report_fine_data[i][keys_percent].keys(): diff --git a/cope2n-api/fwd_api/management/commands/migrate-csv-revert.py b/cope2n-api/fwd_api/management/commands/migrate-csv-revert.py new file mode 100644 index 0000000..f178819 --- /dev/null +++ b/cope2n-api/fwd_api/management/commands/migrate-csv-revert.py @@ -0,0 +1,128 @@ +# myapp/management/commands/mycustomcommand.py +from io import StringIO +from django.core.management.base import BaseCommand +from tqdm import tqdm +from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest +from fwd_api.utils.accuracy import predict_result_to_ready +import traceback +import copy +import csv +from fwd_api.constant.common import FileCategory +import re + +PREDICT_INDEX = 4 +FEEDBACK_INDEX = 3 +REVIEWED_INDEX = 5 +REASON_INDEX = 6 +COUNTER_INDEX = 9 + +def detect_date_format(date_string): + pattern = r'^\d{2}/\d{2}/\d{4}$' + match = re.match(pattern, date_string) + if match: + return True + else: + return False + +class Command(BaseCommand): + help = 'Refactor database for image level' + + def add_arguments(self, parser): + # Add your command-line arguments here + parser.add_argument('test', type=str, help='Value for the argument') + + def process_request(self, request_id): + request = SubscriptionRequest.objects.filter(request_id=request_id).first() + images = SubscriptionRequestFile.objects.filter(request=request, file_category=FileCategory.Origin.value).order_by('index_in_request') + request_review = {"imei_number": [], "retailername": None, "purchase_date": None, "sold_to_party": None} + for image in images: + if image.doc_type == "imei": + request_review["imei_number"] += image.reviewed_result.get("imei_number", []) + elif image.doc_type == "invoice": + request_review["retailername"] = image.reviewed_result.get("retailername", None) + request_review["purchase_date"] = image.reviewed_result.get("purchase_date", None) + request_review["sold_to_party"] = image.reviewed_result.get("sold_to_party", None) + + request.reviewed_result = request_review + request.save() + + def process_requestfile(self, request_list, traversed_requestfiles, request_filename, predict_result, user_feedback, reviewed_result, reason=None, counter=None): + image = SubscriptionRequestFile.objects.filter(file_name=request_filename).first() + try: + if image.doc_type == "imei": + if request_filename not in traversed_requestfiles: + image.reviewed_result = {"imei_number": [reviewed_result], "retailername": None, "purchase_date": None, "sold_to_party": None} + else: + image.reviewed_result["imei_number"].append(reviewed_result) + if request_filename not in traversed_requestfiles: + image.feedback_result = {"imei_number": [user_feedback], "retailername": None, "purchase_date": None, "sold_to_party": None} + else: + image.feedback_result["imei_number"].append(user_feedback) + if request_filename not in traversed_requestfiles: + image.predict_result = {"imei_number": [predict_result], "retailername": None, "purchase_date": [], "sold_to_party": None} + else: + image.predict_result["imei_number"].append(predict_result) + + elif image.doc_type == "invoice": + if detect_date_format(reviewed_result): + if not image.reviewed_result: + image.reviewed_result = {"imei_number": [], "retailername": None, "purchase_date": reviewed_result, "sold_to_party": None} + else: + image.reviewed_result["purchase_date"] = reviewed_result + if not image.feedback_result: + image.feedback_result = {"imei_number": [], "retailername": None, "purchase_date": user_feedback, "sold_to_party": None} + else: + image.feedback_result["purchase_date"] = user_feedback + if not image.predict_result: + image.predict_result = {"imei_number": [], "retailername": None, "purchase_date": [predict_result], "sold_to_party": None} + else: + image.predict_result["purchase_date"] = [predict_result] + else: + if not image.reviewed_result: + image.reviewed_result = {"imei_number": [], "retailername": reviewed_result, "purchase_date": None, "sold_to_party": None} + else: + image.reviewed_result["retailername"] = reviewed_result + if not image.feedback_result: + image.feedback_result = {"imei_number": [], "retailername": user_feedback, "purchase_date": None, "sold_to_party": None} + else: + image.feedback_result["retailername"] = user_feedback + if not image.predict_result: + image.predict_result = {"imei_number": [], "retailername": predict_result, "purchase_date": [], "sold_to_party": None} + else: + image.predict_result["retailername"] = predict_result + + if reason: + image.reason = reason + if counter: + image.counter_measures = counter + image.save() + request_list.append(image.request.request_id) + traversed_requestfiles.append(request_filename) + if request_filename == "temp_imei_SAP_20240201130151_7e7fa87017af40c1bd079b7da6950193_0.pdf": + print(f"[INFO]: {image.reviewed_result}") + print(f"[INFO]: {image.predict_result}") + print(f"[INFO]: {image.feedback_result}") + + except Exception as e: + self.stdout.write(self.style.ERROR(f"Request File: {request_filename} failed with {e}")) + print(traceback.format_exc()) + + def handle(self, *args, **options): + test = options['test'] + request_list = [] + traversed_requestfiles = [] + #open csv file + with open(test, 'r') as csvfile: + reader = csv.reader(csvfile) + index = 0 + for row in reader: + if index != 0: + # request_list, traversed_requestfiles, request_filename, predict_result, user_feedback, reviewed_result, reason=None, counter=None + self.process_requestfile(request_list, traversed_requestfiles, row[2], row[PREDICT_INDEX], row[FEEDBACK_INDEX], row[REVIEWED_INDEX]) + index += 1 + + self.stdout.write(self.style.SUCCESS(f"Reverting {len(list(set(request_list)))} from images")) + for request in request_list: + self.process_request(request) + self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!')) + diff --git a/cope2n-api/fwd_api/management/commands/migrate-csv.py b/cope2n-api/fwd_api/management/commands/migrate-csv.py index ddb63d1..f3c8ebd 100644 --- a/cope2n-api/fwd_api/management/commands/migrate-csv.py +++ b/cope2n-api/fwd_api/management/commands/migrate-csv.py @@ -1,4 +1,5 @@ # myapp/management/commands/mycustomcommand.py +from io import StringIO from django.core.management.base import BaseCommand from tqdm import tqdm from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest @@ -7,6 +8,12 @@ import traceback import copy import csv +PREDICT_INDEX = 3 +FEEDBACK_INDEX = 2 +REVIEWED_INDEX = 4 +REASON_INDEX = 6 +COUNTER_INDEX = 9 + class Command(BaseCommand): help = 'Refactor database for image level' @@ -14,21 +21,12 @@ class Command(BaseCommand): # Add your command-line arguments here parser.add_argument('test', type=str, help='Value for the argument') - def process_request(self, request, predict_result, user_feedback, reviewed_result): + def process_request(self, total, failed, request, predict_result, user_feedback, reviewed_result, reason, counter): if len(request.request_id.split(".")[0].split("_")) < 2: return - - request_feedback = copy.deepcopy(request.feedback_result) + total[0] += 1 + request_review = copy.deepcopy(request.reviewed_result) - - if not request_feedback: - request_feedback = { - "request_id": request.request_id, - "imei_number": [], - "retailername": "", - "purchase_date": "", - "sold_to_party": "" - } if not request_review: request_review = { @@ -53,87 +51,60 @@ class Command(BaseCommand): is_match = True if field == 'imei_number': - if not reviewed_result in request_review["imei_number"]: + if not (reviewed_result in request_review["imei_number"]): request_review["imei_number"].append(reviewed_result) - if not user_feedback in request_feedback["imei_number"]: - request_feedback["imei_number"].append(user_feedback) else: if not reviewed_result == request_review[field]: request_review[field] = reviewed_result - if not user_feedback == request_feedback[field]: - request_feedback[field] = user_feedback - _predict_result = copy.deepcopy(predict_result_to_ready(request.predict_result)) - _feedback_result = copy.deepcopy(request.feedback_result) _reviewed_result = copy.deepcopy(request.reviewed_result) - if not _feedback_result: - _feedback_result = { - "imei_number": [], - "retailername": "", - "purchase_date": "", - "sold_to_party": "" - } if not _reviewed_result: _reviewed_result = { + "request_id": image.request_id, "imei_number": [], "retailername": "", "purchase_date": "", "sold_to_party": "" } - if image.doc_type == "invoice": - _predict_result[field] = predict_result - _predict_result["imei_number"] = [] - if _feedback_result: - _feedback_result[field] = user_feedback - _feedback_result["imei_number"] = [] - else: - None + if image.doc_type == "invoice" and field in ['retailername', 'purchase_date']: if _reviewed_result: _reviewed_result[field] = reviewed_result _reviewed_result["imei_number"] = [] else: None - else: - _predict_result = { - "retailername": None, - "sold_to_party": None, - "purchase_date": [], - "imei_number": [predict_result] - } - _feedback_result = { - "retailername": None, - "sold_to_party": None, - "purchase_date": None, - "imei_number": [user_feedback] - } if _feedback_result else None + elif image.doc_type == "imei" and field == "imei_number": _reviewed_result = { "retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [reviewed_result] } if _reviewed_result else None - image.predict_result = _predict_result - image.feedback_result = _feedback_result image.reviewed_result = _reviewed_result + if reason: + image.reason = reason + if counter: + image.counter_measures = counter image.save() - request.feedback_result = request_feedback request.reviewed_result = request_review - request.feedback_result["request_id"] = request.request_id request.reviewed_result["request_id"] = request.request_id request.is_reviewed = True request.save() except Exception as e: self.stdout.write(self.style.ERROR(f"Request: {request.request_id} failed with {e}")) + failed[0] += 1 print(traceback.format_exc()) if not is_match: + failed[0] += 1 print("FAIL =====>", image.feedback_result, predict_result, user_feedback, reviewed_result) def handle(self, *args, **options): - test = options['test'] + test = options['test'] + total = [0] + failed = [0] #open csv file with open(test, 'r') as csvfile: reader = csv.reader(csvfile) @@ -144,7 +115,10 @@ class Command(BaseCommand): if not request: print("Not found ====>", row) else: - self.process_request(request, row[3], row[2], row[4]) + # request, predict_result, user_feedback, reviewed_result, reason, counter + self.process_request(total, failed, request, row[PREDICT_INDEX], row[FEEDBACK_INDEX], row[REVIEWED_INDEX], row[REASON_INDEX], row[COUNTER_INDEX]) index += 1 + self.stdout.write(self.style.SUCCESS(f"Failed/Total: {failed[0]}/{total[0]}")) self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!')) + diff --git a/cope2n-api/fwd_api/management/commands/migrate-database-010224.py b/cope2n-api/fwd_api/management/commands/migrate-database-010224.py index bc81388..8788c3c 100644 --- a/cope2n-api/fwd_api/management/commands/migrate-database-010224.py +++ b/cope2n-api/fwd_api/management/commands/migrate-database-010224.py @@ -2,17 +2,57 @@ from django.core.management.base import BaseCommand from tqdm import tqdm from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest +from fwd_api.exception.exceptions import InvalidException from fwd_api.utils.accuracy import predict_result_to_ready import traceback import copy +from django.utils import timezone + +RETAILER_LIST = [ +'', +'Amazon.sg', +'Gain City (TV/AV)', +'Harvey Norman (TV/AV)', +'KRIS SHOP', +'Lazada (Samsung Brand Store)', +'M1 Shop', +'Mohamed Mustafa & Samsuddin CO (TV/AV)', +'Parisilk (TV/AV)', +'Shopee (Samsung Brand Store)', +'StarHub Shop', +'M1 Shop', +'Ectri', +'Whandising', +'3 Mobile', +'Mister Mobile', +'Lazada', +'Mister Mobile', +'Samsung Experience Store', +'A & Samsuddin Co.', +'Parisilk', +'Samsung Brand Store', +'Shopee', +'M1 Shop', +'Onephone Online', +'3 Mobile', +'Samsung Experience Store', +'Challenger', +'Eas Marketing', +'Ing Mobile', +'Starhub Shop', +'Mister Mobile', +'Onephone Online', +'Starho' +] +RETAILER_LIST = list(set(RETAILER_LIST)) class Command(BaseCommand): help = 'Refactor database for image level' def add_arguments(self, parser): # Add your command-line arguments here - parser.add_argument('test', type=str, help='Value for the argument') - + parser.add_argument('start', type=str, help='start date, sample: 2023-01-02T00:00:00+0700') + parser.add_argument('end', type=str, help='end date, sample: 2023-01-03T00:00:00+0700') def process_request(self, request): if len(request.request_id.split(".")[0].split("_")) < 2: @@ -29,7 +69,7 @@ class Command(BaseCommand): for i, image in enumerate(images): # temp_imei_SAP_20240127223644_a493434edbf84fc08aeb87ef6cdde102_0.jpg try: - image.index_in_request = int(image.file_name.split(".")[0].split("_")[-1]) if len(image.file_name.split(".")[0].split("_")) > 4 else 0 + # image.index_in_request = int(image.file_name.split(".")[0].split("_")[-1]) if len(image.file_name.split(".")[0].split("_")) > 4 else 0 image.doc_type = image.file_name.split(".")[0].split("_")[1] if len(image.file_name.split(".")[0].split("_")) > 4 else "all" image.processing_time = time_cost[image.doc_type][image.index_in_request] if not request.predict_result: @@ -53,7 +93,13 @@ class Command(BaseCommand): else: _predict_result = {"retailername": None, "sold_to_party": None, "purchase_date": [], "imei_number": [_predict_result["imei_number"][image.index_in_request]]} _feedback_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_feedback_result["imei_number"][image.index_in_request]]} if _feedback_result else None - _reviewed_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_reviewed_result["imei_number"][image.index_in_request]]} if _reviewed_result else None + if isinstance(_reviewed_result, dict) and (len(_reviewed_result.get("imei_number", [])) or any(element in RETAILER_LIST for element in _reviewed_result.get("imei_number", []))) : + _reviewed_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_reviewed_result["imei_number"][image.index_in_request]]} if _reviewed_result else None + else: + _reviewed_result = None + request.reviewed_result = None + request.is_reviewed = False + request.save() image.predict_result = _predict_result image.feedback_result = _feedback_result image.reviewed_result = _reviewed_result @@ -64,8 +110,28 @@ class Command(BaseCommand): continue def handle(self, *args, **options): - test = options['test'] - subcription_iter = SubscriptionRequest.objects.all() + start = options['start'] + end = options['end'] + + white_list = [ + ] + + if start or end: + try: + start_date = timezone.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z') # We care only about day precision only + end_date = timezone.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z') + # start_date = timezone.make_aware(start_date) + # end_date = timezone.make_aware(end_date) + except Exception as e: + print(f"[INFO]: start: {start}") + print(f"[INFO]: end: {end}") + raise InvalidException(excArgs="Date format") + subcription_iter = SubscriptionRequest.objects.filter(created_at__range=(start_date, end_date)) + else: + subcription_iter = SubscriptionRequest.objects.all() + for request in tqdm(subcription_iter.iterator()): + if request.request_id not in white_list: + continue self.process_request(request) self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!')) diff --git a/cope2n-api/fwd_api/migrations/0183_report_num_no_reviewed_report_num_not_reviewed_and_more.py b/cope2n-api/fwd_api/migrations/0183_report_num_no_reviewed_report_num_not_reviewed_and_more.py new file mode 100644 index 0000000..474b2f0 --- /dev/null +++ b/cope2n-api/fwd_api/migrations/0183_report_num_no_reviewed_report_num_not_reviewed_and_more.py @@ -0,0 +1,33 @@ +# Generated by Django 4.1.3 on 2024-02-28 09:30 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('fwd_api', '0182_report_combined_accuracy'), + ] + + operations = [ + migrations.AddField( + model_name='report', + name='num_no_reviewed', + field=models.IntegerField(default=0), + ), + migrations.AddField( + model_name='report', + name='num_not_reviewed', + field=models.IntegerField(default=0), + ), + migrations.AddField( + model_name='report', + name='num_reviewed', + field=models.IntegerField(default=0), + ), + migrations.AddField( + model_name='reportfile', + name='review_status', + field=models.IntegerField(default=-1), + ), + ] diff --git a/cope2n-api/fwd_api/models/Report.py b/cope2n-api/fwd_api/models/Report.py index 05236cb..22df35a 100644 --- a/cope2n-api/fwd_api/models/Report.py +++ b/cope2n-api/fwd_api/models/Report.py @@ -28,6 +28,9 @@ class Report(models.Model): number_bad_images = models.IntegerField(default=0) number_imei = models.IntegerField(default=0) number_invoice = models.IntegerField(default=0) + num_reviewed = models.IntegerField(default=0) + num_not_reviewed = models.IntegerField(default=0) + num_no_reviewed = models.IntegerField(default=0) number_imei_transaction = models.IntegerField(default=0) number_invoice_transaction = models.IntegerField(default=0) diff --git a/cope2n-api/fwd_api/models/ReportFile.py b/cope2n-api/fwd_api/models/ReportFile.py index a4559d3..2ba5247 100644 --- a/cope2n-api/fwd_api/models/ReportFile.py +++ b/cope2n-api/fwd_api/models/ReportFile.py @@ -32,6 +32,7 @@ class ReportFile(models.Model): bad_image_reason = models.TextField(default="") counter_measures = models.TextField(default="") error = models.TextField(default="") + review_status = models.IntegerField(default=-1) # -1: No need review, 0: not reviewed, 1: reviewed diff --git a/cope2n-api/fwd_api/utils/accuracy.py b/cope2n-api/fwd_api/utils/accuracy.py index 1e8016d..5651de3 100644 --- a/cope2n-api/fwd_api/utils/accuracy.py +++ b/cope2n-api/fwd_api/utils/accuracy.py @@ -17,8 +17,6 @@ from fwd import settings from ..models import SubscriptionRequest, Report, ReportFile import json -BAD_THRESHOLD = 0.75 - valid_keys = ["retailername", "sold_to_party", "purchase_date", "imei_number"] class ReportAccumulateByRequest: @@ -67,7 +65,8 @@ class ReportAccumulateByRequest: 'retailername': IterAvg(), 'sold_to_party': IterAvg() }, - 'num_request': 0 + 'num_request': 0, + "review_progress": [] } self.day_format = { 'subs': sub, @@ -110,7 +109,8 @@ class ReportAccumulateByRequest: 'sold_to_party': IterAvg() }, "report_files": [], - 'num_request': 0 + "num_request": 0, + "review_progress": [] }, @staticmethod @@ -155,7 +155,7 @@ class ReportAccumulateByRequest: total["usage"]["imei"] += 1 if doc_type == "imei" else 0 total["usage"]["invoice"] += 1 if doc_type == "invoice" else 0 total["usage"]["total_images"] += 1 - + total["review_progress"].append(report_file.review_status) return total @staticmethod @@ -192,7 +192,7 @@ class ReportAccumulateByRequest: print(f"[WARM]: Weird doctype: {report_file.doc_type}") day_data["average_processing_time"][report_file.doc_type] = IterAvg() day_data["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 - + day_data["review_progress"].append(report_file.review_status) return day_data def add(self, request, report_files): @@ -212,9 +212,18 @@ class ReportAccumulateByRequest: self.data[this_month][1][this_day]['num_request'] += 1 self.data[this_month][0]['num_request'] += 1 + for report_file in report_files: - self.data[this_month][0] = self.update_total(self.data[this_month][0], report_file) # Update the subtotal within the month - self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], report_file) # Update the subtotal of the day + _report_file = copy.deepcopy(report_file) + if _report_file.is_bad_image or _report_file.bad_image_reason in settings.ACC_EXCLUDE_RESEASONS: + _report_file.acc = None + for t in _report_file.feedback_accuracy.keys(): + _report_file.feedback_accuracy[t] = [] + for t in _report_file.reviewed_accuracy.keys(): + _report_file.reviewed_accuracy[t] = [] + + self.data[this_month][0] = self.update_total(self.data[this_month][0], _report_file) # Update the subtotal within the month + self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], _report_file) # Update the subtotal of the day def count_transactions_within_day(self, date_string): # convert this day into timezone.datetime at UTC @@ -310,6 +319,7 @@ class ReportAccumulateByRequest: _data[month][1][day]["reviewed_accuracy"]["purchase_date"] = _data[month][1][day]["reviewed_accuracy"]["purchase_date"]() _data[month][1][day]["reviewed_accuracy"]["retailername"] = _data[month][1][day]["reviewed_accuracy"]["retailername"]() _data[month][1][day]["reviewed_accuracy"]["sold_to_party"] = _data[month][1][day]["reviewed_accuracy"]["sold_to_party"]() + _data[month][1][day]["review_progress"] = _data[month][1][day]["review_progress"].count(1)/(_data[month][1][day]["review_progress"].count(0)+ _data[month][1][day]["review_progress"].count(1)) if (_data[month][1][day]["review_progress"].count(0)+ _data[month][1][day]["review_progress"].count(1)) >0 else 0 _data[month][1][day].pop("report_files") _data[month][1][day]["images_quality"]["successful_percent"] = _data[month][1][day]["images_quality"]["successful"]/_data[month][1][day]["total_images"] if _data[month][1][day]["total_images"] > 0 else 0 @@ -333,6 +343,7 @@ class ReportAccumulateByRequest: _data[month][0]["reviewed_accuracy"]["purchase_date"] = _data[month][0]["reviewed_accuracy"]["purchase_date"]() _data[month][0]["reviewed_accuracy"]["retailername"] = _data[month][0]["reviewed_accuracy"]["retailername"]() _data[month][0]["reviewed_accuracy"]["sold_to_party"] = _data[month][0]["reviewed_accuracy"]["sold_to_party"]() + _data[month][0]["review_progress"] = _data[month][0]["review_progress"].count(1)/(_data[month][0]["review_progress"].count(0)+ _data[month][0]["review_progress"].count(1)) if (_data[month][0]["review_progress"].count(0)+ _data[month][0]["review_progress"].count(1)) >0 else 0 return _data @@ -575,7 +586,7 @@ def extract_report_detail_list(report_detail_list, lower=False, in_percent=True) def count_transactions(start_date, end_date, subsidiary="all"): base_query = Q(created_at__range=(start_date, end_date)) base_query &= Q(is_test_request=False) - if subsidiary and subsidiary.lower().replace(" ", "")!="all": + if subsidiary and subsidiary.lower().replace(" ", "") not in settings.SUB_FOR_BILLING: base_query &= Q(redemption_id__startswith=subsidiary) transaction_att = {} @@ -707,6 +718,7 @@ def calculate_avg_accuracy(acc, type, keys=[]): return sum(acc_list)/len(acc_list) if len(acc_list) > 0 else None +# Deprecated def calculate_and_save_subcription_file(report, request): request_att = {"acc": {"feedback": {"imei_number": [], "purchase_date": [], @@ -772,11 +784,18 @@ def calculate_and_save_subcription_file(report, request): return request_att +# def result_maximize_list_values(result, acc): +# for k in acc.keys(): +# if isinstance(acc[k], list) and len(acc[k]) > 0: + def acc_maximize_list_values(acc): + pos = {} for k in acc.keys(): + pos[k] = 0 if isinstance(acc[k], list) and len(acc[k]) > 0: acc[k] = [max(acc[k])] - return acc + pos[k] = acc[k].index(acc[k][0]) + return acc, pos def calculate_a_request(report, request): request_att = {"acc": {"feedback": {"imei_number": [], @@ -795,15 +814,23 @@ def calculate_a_request(report, request): "sold_to_party": [], }}, "err": [], - "time_cost": {}, + "time_cost": {"imei": [], + "invoice": []}, "total_images": 0, "bad_images": 0, "bad_image_list": [], + "is_reviewed": [], # -1: No need to review, 0: Not reviewed, 1: Reviewed } images = SubscriptionRequestFile.objects.filter(request=request, file_category=FileCategory.Origin.value) report_files = [] for image in images: status, att = calculate_subcription_file(image) + + att["acc"]["feedback"], fb_max_indexes = acc_maximize_list_values(att["acc"]["feedback"]) + att["acc"]["reviewed"], rv_max_indexes = acc_maximize_list_values(att["acc"]["reviewed"]) + + _att = copy.deepcopy(att) + if status != 200: continue image.feedback_accuracy = att["acc"]["feedback"] # dict {key: [values]} @@ -820,6 +847,17 @@ def calculate_a_request(report, request): _sub = map_subsidiary_short_to_long(request.redemption_id[:2]) else: print(f"[WARM]: empty redemption_id, check request: {request.request_id}") + + # Little trick to replace purchase date to normalized + if len(att["normalized_data"]["feedback"].get("purchase_date", [])) > 0: + image.predict_result["purchase_date"] = [att["normalized_data"]["feedback"]["purchase_date"][i][0] for i in range(len(att["normalized_data"]["feedback"]["purchase_date"]))] + image.feedback_result["purchase_date"] = att["normalized_data"]["feedback"]["purchase_date"][fb_max_indexes["purchase_date"]][1] + if len(att["normalized_data"]["reviewed"].get("purchase_date", [])) > 0: + image.predict_result["purchase_date"] = [att["normalized_data"]["reviewed"]["purchase_date"][i][0] for i in range(len(att["normalized_data"]["reviewed"]["purchase_date"]))] + image.reviewed_result["purchase_date"] = att["normalized_data"]["reviewed"]["purchase_date"][rv_max_indexes["purchase_date"]][1] + if request.is_reviewed: + att["is_reviewed"] = 1 + request_att["is_reviewed"].append(att["is_reviewed"]) new_report_file = ReportFile(report=report, subsidiary=_sub, correspond_request_id=request.request_id, @@ -828,31 +866,34 @@ def calculate_a_request(report, request): predict_result=image.predict_result, feedback_result=image.feedback_result, reviewed_result=image.reviewed_result, - feedback_accuracy=acc_maximize_list_values(att["acc"]["feedback"]), - reviewed_accuracy=acc_maximize_list_values(att["acc"]["reviewed"]), + feedback_accuracy=att["acc"]["feedback"], + reviewed_accuracy=att["acc"]["reviewed"], acc=att["avg_acc"], is_bad_image=att["is_bad_image"], - is_reviewed="Yes" if request.is_reviewed else "No", + is_reviewed= "Yes" if request.is_reviewed else "No", time_cost=image.processing_time, bad_image_reason=image.reason, counter_measures=image.counter_measures, - error="|".join(att["err"]) + error="|".join(att["err"]), + review_status=att["is_reviewed"], ) report_files.append(new_report_file) - _att = copy.deepcopy(att) + if att["is_bad_image"] or image.reason in settings.ACC_EXCLUDE_RESEASONS: - request_att["bad_image_list"].append(image.file_name) + if att["is_bad_image"]: + request_att["bad_image_list"].append(image.file_name) + # if image.reason in settings.ACC_EXCLUDE_RESEASONS: + # print(f"[DEBUG]: {image.reason}") # TODO: Exclude bad image accuracy from average accuracy _att["avg_acc"] = None - for t in ["feedback", "reviewed"]: - for k in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: + for t in _att["acc"].keys(): + for k in _att["acc"][t].keys(): _att["acc"][t][k] = [] - - - if request_att["time_cost"].get(image.doc_type, None): - request_att["time_cost"][image.doc_type].append(image.processing_time) else: - request_att["time_cost"][image.doc_type] = [image.processing_time] + if request_att["time_cost"].get(image.doc_type, None): + request_att["time_cost"][image.doc_type].append(image.processing_time) + else: + request_att["time_cost"][image.doc_type] = [image.processing_time] try: request_att["acc"]["feedback"]["imei_number"] += _att["acc"]["feedback"]["imei_number"] @@ -865,13 +906,14 @@ def calculate_a_request(report, request): request_att["acc"]["reviewed"]["retailername"] += _att["acc"]["reviewed"]["retailername"] request_att["acc"]["reviewed"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] - request_att["acc"]["acumulated"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] if att["acc"]["reviewed"]["imei_number"] else att["acc"]["feedback"]["imei_number"] - request_att["acc"]["acumulated"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] if att["acc"]["reviewed"]["purchase_date"] else att["acc"]["feedback"]["purchase_date"] - request_att["acc"]["acumulated"]["retailername"] += _att["acc"]["reviewed"]["retailername"] if att["acc"]["reviewed"]["retailername"] else att["acc"]["feedback"]["retailername"] - request_att["acc"]["acumulated"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] if att["acc"]["reviewed"]["sold_to_party"] else att["acc"]["feedback"]["sold_to_party"] + request_att["acc"]["acumulated"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] if _att["acc"]["reviewed"]["imei_number"] else _att["acc"]["feedback"]["imei_number"] + request_att["acc"]["acumulated"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] if _att["acc"]["reviewed"]["purchase_date"] else _att["acc"]["feedback"]["purchase_date"] + request_att["acc"]["acumulated"]["retailername"] += _att["acc"]["reviewed"]["retailername"] if _att["acc"]["reviewed"]["retailername"] else _att["acc"]["feedback"]["retailername"] + request_att["acc"]["acumulated"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] if _att["acc"]["reviewed"]["sold_to_party"] else _att["acc"]["feedback"]["sold_to_party"] - request_att["bad_images"] += int(_att["is_bad_image"]) - request_att["total_images"] += 1 + if image.reason not in settings.ACC_EXCLUDE_RESEASONS: + request_att["bad_images"] += int(_att["is_bad_image"]) + request_att["total_images"] += 1 request_att["err"] += _att["err"] except Exception as e: print(f"[ERROR]: failed to calculate request: {request.request_id} - request_file: {image.file_name} because of {e}") @@ -882,9 +924,13 @@ def calculate_a_request(report, request): def calculate_subcription_file(subcription_request_file): att = {"acc": {"feedback": {}, "reviewed": {}}, + "normalized_data": {"feedback": {}, + "reviewed": {}}, "err": [], "is_bad_image": False, - "avg_acc": None} + "avg_acc": None, + "is_reviewed": -1, # -1: No need to review, 0: Not reviewed, 1: Reviewed + } if not subcription_request_file.predict_result: return 400, att @@ -898,32 +944,29 @@ def calculate_subcription_file(subcription_request_file): for key_name in valid_keys: try: - att["acc"]["feedback"][key_name], _ = calculate_accuracy(key_name, inference_result, feedback_result) - att["acc"]["reviewed"][key_name], _ = calculate_accuracy(key_name, inference_result, reviewed_result) + att["acc"]["feedback"][key_name], att["normalized_data"]["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result) + att["acc"]["reviewed"][key_name], att["normalized_data"]["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result) except Exception as e: att["err"].append(str(e)) # print(f"[DEBUG]: predict_result: {subcription_request_file.predict_result}") # print(f"[DEBUG]: e: {e} -key_name: {key_name}") subcription_request_file.feedback_accuracy = att["acc"]["feedback"] subcription_request_file.reviewed_accuracy = att["acc"]["reviewed"] - subcription_request_file.save() avg_reviewed = calculate_avg_accuracy(att["acc"], "reviewed", ["retailername", "sold_to_party", "purchase_date", "imei_number"]) avg_feedback = calculate_avg_accuracy(att["acc"], "feedback", ["retailername", "sold_to_party", "purchase_date", "imei_number"]) if avg_feedback is not None or avg_reviewed is not None: avg_acc = 0 if avg_feedback is not None: avg_acc = avg_feedback + if avg_feedback < settings.NEED_REVIEW: + att["is_reviewed"] = 0 if avg_reviewed is not None: avg_acc = avg_reviewed + att["is_reviewed"] = 1 att["avg_acc"] = avg_acc - if avg_acc < BAD_THRESHOLD: + if avg_acc < settings.BAD_THRESHOLD: att["is_bad_image"] = True - # exclude bad images - # for key_name in valid_keys: - # att["acc"]["feedback"][key_name] = [] - # att["acc"]["reviewed"][key_name] = [] - # att["avg_acc"] = None return 200, att def calculate_attributions(request): # for one request, return in order @@ -969,11 +1012,11 @@ def calculate_attributions(request): # for one request, return in order avg_invoice_feedback = calculate_avg_accuracy(acc, "feedback", ["retailername", "sold_to_party", "purchase_date"]) avg_invoice_reviewed = calculate_avg_accuracy(acc, "reviewed", ["retailername", "sold_to_party", "purchase_date"]) if avg_invoice_feedback is not None or avg_invoice_reviewed is not None: - if max([x for x in [avg_invoice_feedback, avg_invoice_reviewed] if x is not None]) < BAD_THRESHOLD: + if max([x for x in [avg_invoice_feedback, avg_invoice_reviewed] if x is not None]) < settings.BAD_THRESHOLD: image_quality_num[1] += 1 for i, _ in enumerate(acc["feedback"]["imei_number"]): if acc["feedback"]["imei_number"][i] is not None and acc["reviewed"]["imei_number"][i] is not None: - if max([x for x in [acc["feedback"]["imei_number"][i], acc["reviewed"]["imei_number"][i]] if x is not None]) < BAD_THRESHOLD: + if max([x for x in [acc["feedback"]["imei_number"][i], acc["reviewed"]["imei_number"][i]] if x is not None]) < settings.BAD_THRESHOLD: image_quality_num[1] += 1 # time cost and quality calculation # TODO: to be deprecated, doc_type would be in file level in the future diff --git a/cope2n-api/fwd_api/utils/file.py b/cope2n-api/fwd_api/utils/file.py index c92a3b0..f7be5e7 100644 --- a/cope2n-api/fwd_api/utils/file.py +++ b/cope2n-api/fwd_api/utils/file.py @@ -489,7 +489,7 @@ def dict2xlsx(input: json, _type='report'): 'O': 'average_accuracy_rate.retailer_name', 'P': 'average_processing_time.imei', 'Q': 'average_processing_time.invoice', - 'R': 'preview_progress' + 'R': 'review_progress' } start_index = 5 @@ -529,13 +529,13 @@ def dict2xlsx(input: json, _type='report'): ws[key + str(start_index)] = "-" ws[key + str(start_index)].border = border ws[key + str(start_index)].font = font_black - if 'accuracy' in mapping[key] or 'time' in mapping[key] or 'percent' in mapping[key] or 'speed' in mapping[key]: + if 'accuracy' in mapping[key] or 'time' in mapping[key] or 'percent' in mapping[key] or 'speed' in mapping[key] or mapping[key] in ["review_progress"]: ws[key + str(start_index)].number_format = '0.0' if _type == 'report': if subtotal['subs'] == '+': ws[key + str(start_index)].font = font_black_bold - if key in ['A', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q']: + if key in ['A', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R']: ws[key + str(start_index)].fill = fill_gray elif key == 'B': ws[key + str(start_index)].fill = fill_green diff --git a/cope2n-api/fwd_api/utils/report.py b/cope2n-api/fwd_api/utils/report.py new file mode 100644 index 0000000..b1e4462 --- /dev/null +++ b/cope2n-api/fwd_api/utils/report.py @@ -0,0 +1,7 @@ +CAT_VALUES = { + "ALL": "ZZZZZZZZ", + "SEAO": "ZZ" +} +def aggregate_overview(overview_list): + overview_list = sorted(overview_list, key=lambda x: x["extraction_date"] + CAT_VALUES.get(x["subs"], x["subs"]), reverse=True) + return overview_list \ No newline at end of file diff --git a/cope2n-api/fwd_api/utils/sdsvkvu b/cope2n-api/fwd_api/utils/sdsvkvu index d01de31..671d791 160000 --- a/cope2n-api/fwd_api/utils/sdsvkvu +++ b/cope2n-api/fwd_api/utils/sdsvkvu @@ -1 +1 @@ -Subproject commit d01de312ab86db554ffa2f1b01396ef8d56b78ed +Subproject commit 671d7917c657ad185a06772e0b707b45fe59788a diff --git a/cope2n-api/fwd_api/utils/subsidiary.py b/cope2n-api/fwd_api/utils/subsidiary.py index d10c879..78a8b55 100644 --- a/cope2n-api/fwd_api/utils/subsidiary.py +++ b/cope2n-api/fwd_api/utils/subsidiary.py @@ -2,10 +2,10 @@ from fwd.settings import SUBS def map_subsidiary_long_to_short(long_sub): short_sub = SUBS.get(long_sub.upper(), "all") - return short_sub.upper() + return short_sub def map_subsidiary_short_to_long(short_sub): for k, v in SUBS.items(): - if v == short_sub.upper(): + if v == short_sub.upper() or v == short_sub: return k - return "ALL" \ No newline at end of file + return "SEAO" \ No newline at end of file diff --git a/cope2n-api/scripts/note.txt b/cope2n-api/scripts/note.txt new file mode 100644 index 0000000..0842f98 --- /dev/null +++ b/cope2n-api/scripts/note.txt @@ -0,0 +1,2 @@ +python manage.py migrate-csv-revert reviewed/0131-0206-Mai-.csv +python manage.py migrate-database-010224 2024-01-28T00:00:00+0700 2024-02-07T00:00:00+0700 \ No newline at end of file diff --git a/cope2n-api/scripts/script.py b/cope2n-api/scripts/script.py index a906a44..f6f0c64 100644 --- a/cope2n-api/scripts/script.py +++ b/cope2n-api/scripts/script.py @@ -5,7 +5,7 @@ from datetime import datetime # Get the proxy URL from the environment variable interval = 60*60*1 # 1 minute -update_cost = int(60*1.5) +update_cost = int(60*2) proxy_url = os.getenv('PROXY', "localhost") # Define the login API URL @@ -15,8 +15,8 @@ login_token = None # Define the login credentials login_credentials = { 'username': 'sbt', - # 'password': '7Eg4AbWIXDnufgn' - 'password': 'abc' + 'password': '7Eg4AbWIXDnufgn' + # 'password': 'abc' } # Define the command to call the update API @@ -29,7 +29,8 @@ update_data = { "report_overview_duration" -def update_report(login_token, report_overview_duration=["30d", "7d"], subsidiary=["all", "SEAU", "SESP", "SME", "SEPCO", "TSE", "SEIN"]): +# def update_report(login_token, report_overview_duration=["30d", "7d"], subsidiary=["all", "SEAU", "SESP", "SME", "SEPCO", "TSE", "SEIN"]): +def update_report(login_token, report_overview_duration=["7d", "30d"], subsidiary=["SEAO", "SEAU", "SESP", "SME", "SEPCO", "TSE", "SEIN"]): headers = {'Authorization': login_token} for dur in report_overview_duration: for sub in subsidiary: diff --git a/cope2n-fe/src/components/report-detail/report-overview-table.tsx b/cope2n-fe/src/components/report-detail/report-overview-table.tsx index f6aab1f..5a40ea6 100644 --- a/cope2n-fe/src/components/report-detail/report-overview-table.tsx +++ b/cope2n-fe/src/components/report-detail/report-overview-table.tsx @@ -21,6 +21,7 @@ interface DataType { invoiceAPT: number; snImeiTC: number; // TC: transaction count invoiceTC: number; + reviewProgress: number; } const columns: TableColumnsType = [ @@ -213,7 +214,7 @@ const columns: TableColumnsType = [ ], }, { - title: 'Average Processing Per Image (Seconds)', + title: 'Average Processing Time Per Image (Seconds)', children: [ { title: 'SN/IMEI', @@ -223,7 +224,7 @@ const columns: TableColumnsType = [ const isAbnormal = ensureMax(record.snImeiAPT, 2); return ( - {record?.snImeiAPT?.toFixed(2)} + {record?.snImeiAPT?.toFixed(1)} ); }, @@ -236,13 +237,26 @@ const columns: TableColumnsType = [ const isAbnormal = ensureMax(record.invoiceAPT, 2); return ( - {record?.invoiceAPT?.toFixed(2)} + {record?.invoiceAPT?.toFixed(1)} ); }, }, ], }, + { + title: 'Review Progress', + dataIndex: 'review_progress', + key: 'review_progress', + width: '100px', + render: (_, record) => { + return ( + + {formatPercent(record.reviewProgress)==='-'? 0:formatPercent(record.reviewProgress)} + + ); + }, + }, ]; interface ReportOverViewTableProps { @@ -275,6 +289,7 @@ const ReportOverViewTable: React.FC = ({ invoiceAPT: item.average_processing_time.invoice, snImeiTC: item.usage.imei, invoiceTC: item.usage.invoice, + reviewProgress:item.review_progress, }; }, ); diff --git a/cope2n-fe/src/components/report-detail/report-table.tsx b/cope2n-fe/src/components/report-detail/report-table.tsx index e0091be..0156358 100644 --- a/cope2n-fe/src/components/report-detail/report-table.tsx +++ b/cope2n-fe/src/components/report-detail/report-table.tsx @@ -91,7 +91,7 @@ const ReportTable: React.FC = () => { }, }, { - title: 'Purchase Date Acc', + title: 'Purchase Date Accuracy', dataIndex: 'Purchase Date Acc', key: 'Purchase Date Acc', render: (_, record) => { @@ -105,7 +105,7 @@ const ReportTable: React.FC = () => { }, { - title: 'Retailer Acc', + title: 'Retailer Accuracy', dataIndex: 'Retailer Acc', key: 'Retailer Acc', render: (_, record) => { @@ -118,7 +118,7 @@ const ReportTable: React.FC = () => { }, }, { - title: 'IMEI Acc', + title: 'IMEI Accuracy', dataIndex: 'IMEI Acc', key: 'IMEI Acc', render: (_, record) => { diff --git a/deploy_images.sh b/deploy_images.sh index 648d6ba..3b57e42 100755 --- a/deploy_images.sh +++ b/deploy_images.sh @@ -6,8 +6,8 @@ tag=$1 echo "[INFO] Tag received from Python: $tag" -echo "[INFO] Updating everything the remote..." -git submodule update --recursive --remote +# echo "[INFO] Updating everything the remote..." +# git submodule update --recursive --remote echo "[INFO] Pushing AI image with tag: $tag..." docker compose -f docker-compose-dev.yml build cope2n-fi-sbt diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index f6d8386..4de33ee 100755 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -77,7 +77,7 @@ services: networks: - ctel-sbt volumes: - # - BE_media:${MEDIA_ROOT} + - BE_media:${MEDIA_ROOT} - BE_static:/app/static - ./cope2n-api:/app working_dir: /app @@ -170,12 +170,12 @@ services: rabbitmq-sbt: condition: service_started volumes: - # - BE_media:${MEDIA_ROOT} + - BE_media:${MEDIA_ROOT} - ./cope2n-api:/app working_dir: /app - # command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5" - command: bash -c "tail -f > /dev/null" + command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5" + # command: bash -c "tail -f > /dev/null" # Back-end persistent db-sbt: diff --git a/scripts/crawl_database_by_time.py b/scripts/crawl_database_by_time.py index 4befe9a..2671181 100644 --- a/scripts/crawl_database_by_time.py +++ b/scripts/crawl_database_by_time.py @@ -10,9 +10,9 @@ from dotenv import load_dotenv load_dotenv("../.env_prod") -OUTPUT_NAME = "Jan" -START_DATE = datetime(2024, 1, 1, tzinfo=timezone('Asia/Ho_Chi_Minh')) -END_DATE = datetime(2024, 2, 1, tzinfo=timezone('Asia/Ho_Chi_Minh')) +OUTPUT_NAME = "all_0219_0226" +START_DATE = datetime(2024, 2, 19, tzinfo=timezone('Asia/Ho_Chi_Minh')) +END_DATE = datetime(2024, 2, 27, tzinfo=timezone('Asia/Ho_Chi_Minh')) # Database connection details db_host = os.environ.get('DB_HOST', "") @@ -63,31 +63,31 @@ cursor.close() conn.close() # # Download folders from S3 -# s3_client = boto3.client( -# 's3', -# aws_access_key_id=access_key, -# aws_secret_access_key=secret_key -# ) +s3_client = boto3.client( + 's3', + aws_access_key_id=access_key, + aws_secret_access_key=secret_key +) -# request_ids = [] -# for rq in data: -# rq_id = rq[3] -# request_ids.append(rq_id) +request_ids = [] +for rq in data: + rq_id = rq[3] + request_ids.append(rq_id) -# for request_id in tqdm(request_ids): -# folder_key = f"{s3_folder_prefix}/{request_id}/" # Assuming folder structure like: s3_bucket_name/s3_folder_prefix/request_id/ -# local_folder_path = f"{OUTPUT_NAME}/{request_id}/" # Path to the local folder to save the downloaded files -# os.makedirs(OUTPUT_NAME, exist_ok=True) -# os.makedirs(local_folder_path, exist_ok=True) +for request_id in tqdm(request_ids): + folder_key = f"{s3_folder_prefix}/{request_id}/" # Assuming folder structure like: s3_bucket_name/s3_folder_prefix/request_id/ + local_folder_path = f"{OUTPUT_NAME}/{request_id}/" # Path to the local folder to save the downloaded files + os.makedirs(OUTPUT_NAME, exist_ok=True) + os.makedirs(local_folder_path, exist_ok=True) -# # List objects in the S3 folder -# response = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=folder_key) -# objects = response.get('Contents', []) + # List objects in the S3 folder + response = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=folder_key) + objects = response.get('Contents', []) -# for s3_object in objects: -# object_key = s3_object['Key'] -# local_file_path = local_folder_path + object_key.split('/')[-1] # Extracting the file name from the object key + for s3_object in objects: + object_key = s3_object['Key'] + local_file_path = local_folder_path + object_key.split('/')[-1] # Extracting the file name from the object key -# # Download the S3 object to the local file -# s3_client.download_file(s3_bucket_name, object_key, local_file_path) \ No newline at end of file + # Download the S3 object to the local file + s3_client.download_file(s3_bucket_name, object_key, local_file_path) \ No newline at end of file