From 3eb6ab61a9fa75adbb6166cb9aecd278688e32c0 Mon Sep 17 00:00:00 2001 From: dx-tan Date: Fri, 23 Feb 2024 03:58:10 +0700 Subject: [PATCH] Update: issues on 23 Feb --- cope2n-api/fwd/settings.py | 9 +- cope2n-api/fwd_api/api/accuracy_view.py | 90 ++++++++++++++++++- .../fwd_api/celery_worker/internal_task.py | 9 +- .../celery_worker/process_report_tasks.py | 2 + .../management/commands/migrate-csv.py | 61 ++----------- cope2n-api/fwd_api/utils/accuracy.py | 77 +++++++++++----- cope2n-api/fwd_api/utils/report.py | 6 ++ 7 files changed, 169 insertions(+), 85 deletions(-) create mode 100644 cope2n-api/fwd_api/utils/report.py diff --git a/cope2n-api/fwd/settings.py b/cope2n-api/fwd/settings.py index 9bdaf69..39860be 100755 --- a/cope2n-api/fwd/settings.py +++ b/cope2n-api/fwd/settings.py @@ -224,7 +224,7 @@ OVERVIEW_REFRESH_INTERVAL = 2 OVERVIEW_REPORT_ROOT = "overview" OVERVIEW_REPORT_DURATION = ["30d", "7d"] -ACC_EXCLUDE_RESEASONS = ["Invalid Input", "Handwritten information", "handwritten"] +ACC_EXCLUDE_RESEASONS = ["Invalid Input", "Handwritten information", "handwritten", "invalid_image", "missing_information", "too_blurry_text", "too_small_text"] SUBS = { "SEAU": "AU", @@ -233,13 +233,12 @@ SUBS = { "SEPCO": "PH", "TSE": "TH", "SEIN": "ID", - "ALL": "all" + "ALL": "all", # all_detail + "ALL_SUMARY": "ALL_SUMARY" } CACHES = { 'default': { 'BACKEND': 'django.core.cache.backends.dummy.DummyCache', } -} - - +} \ No newline at end of file diff --git a/cope2n-api/fwd_api/api/accuracy_view.py b/cope2n-api/fwd_api/api/accuracy_view.py index db6f0cd..ea08187 100644 --- a/cope2n-api/fwd_api/api/accuracy_view.py +++ b/cope2n-api/fwd_api/api/accuracy_view.py @@ -7,6 +7,7 @@ from django.utils import timezone from django.db.models import Q import uuid import os +import copy import pytz from fwd import settings from drf_spectacular.utils import extend_schema, OpenApiParameter, OpenApiTypes @@ -15,11 +16,12 @@ import json from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException from ..models import SubscriptionRequest, Report, ReportFile, SubscriptionRequestFile from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg -from ..utils.file import download_from_S3, convert_date_string, build_media_url_v2, build_url +from ..utils.file import download_from_S3, convert_date_string, build_media_url_v2, build_url, dict2xlsx, save_report_to_S3 from ..utils.redis import RedisUtils from ..utils.process import string_to_boolean from ..request.ReportCreationSerializer import ReportCreationSerializer from ..utils.subsidiary import map_subsidiary_long_to_short, map_subsidiary_short_to_long +from ..utils.report import aggregate_overview redis_client = RedisUtils() @@ -454,8 +456,8 @@ class AccuracyViewSet(viewsets.ViewSet): ], responses=None, tags=['Accuracy'] ) - @action(detail=False, url_path="overview", methods=["GET"]) - def overview(self, request): + @action(detail=False, url_path="overview_sumary", methods=["GET"]) + def overview_sumary(self, request): if request.method == 'GET': _subsidiary = request.GET.get('subsidiary', "ALL") duration = request.GET.get('duration', "") @@ -471,6 +473,88 @@ class AccuracyViewSet(viewsets.ViewSet): return JsonResponse(response, status=200) return JsonResponse({'error': 'Invalid request method.'}, status=405) + + @extend_schema( + parameters=[ + OpenApiParameter( + name='duration', + location=OpenApiParameter.QUERY, + description='one of [30d, 7d]', + type=OpenApiTypes.STR, + default='30d', + ), + OpenApiParameter( + name='subsidiary', + location=OpenApiParameter.QUERY, + description='Subsidiary', + type=OpenApiTypes.STR, + ) + ], + responses=None, tags=['Accuracy'] + ) + @action(detail=False, url_path="overview", methods=["GET"]) + def overview(self, request): + if request.method == 'GET': + _subsidiary = request.GET.get('subsidiary', "ALL") + duration = request.GET.get('duration', "") + + subsidiary = map_subsidiary_long_to_short(_subsidiary) + + if _subsidiary == "ALL": + # aggregate_overview from subsibdiaries + subsidiaries_to_include = list(settings.SUBS.values()) + subsidiaries_to_include.remove("all") + subsidiary_overview_reports = [] + for sub in subsidiaries_to_include: + key = f"{sub}_{duration}" + try: + this_overview = json.loads(redis_client.get_specific_cache(settings.OVERVIEW_REPORT_ROOT, key)).get("data", []) + if sub != "ALL_SUMARY": + this_overview = [d for d in this_overview if d.get("subs") != "+"] + else: + for item in this_overview: + if item.get("subs") == "+": + item["extraction_date"] = item["extraction_date"].replace("Subtotal ", "").replace("(", "").replace(")", "") + "-32" + subsidiary_overview_reports += this_overview + + except Exception as e: + print(f"[WARM]: Unable to retrive data {key} from Redis, skipping...") + data = aggregate_overview(subsidiary_overview_reports) + for item in data: + if item.get("subs") == "+": + item["extraction_date"] = "Subtotal (" + item["extraction_date"].replace("-32", "") + ")" + # Do the saving process + report_fine_data = copy.deepcopy(data) + for i, dat in enumerate(report_fine_data): + keys = [x for x in list(dat.keys()) if "accuracy" in x.lower()] + keys_percent = "images_quality" + for x_key in report_fine_data[i][keys_percent].keys(): + if "percent" not in x_key: + continue + report_fine_data[i][keys_percent][x_key] = report_fine_data[i][keys_percent][x_key]*100 + for key in keys: + if report_fine_data[i][key]: + for x_key in report_fine_data[i][key].keys(): + report_fine_data[i][key][x_key] = report_fine_data[i][key][x_key]*100 + overview_filename = _subsidiary + "_" + duration + ".xlsx" + data_workbook = dict2xlsx(report_fine_data, _type='report') + + folder_path = os.path.join(settings.MEDIA_ROOT, "report", settings.OVERVIEW_REPORT_ROOT) + os.makedirs(folder_path, exist_ok = True) + file_path = os.path.join(folder_path, overview_filename) + data_workbook.save(file_path) + + s3_key=save_report_to_S3(None, file_path) + # redis_client.set_cache(settings.OVERVIEW_REPORT_ROOT, overview_filename.replace(".xlsx", ""), json.dumps(save_data)) + else: + # Retrive data from Redis + key = f"{subsidiary}_{duration}" + data = json.loads(redis_client.get_specific_cache(settings.OVERVIEW_REPORT_ROOT, key)).get("data", []) + response = { + 'overview_data': data, + } + return JsonResponse(response, status=200) + return JsonResponse({'error': 'Invalid request method.'}, status=405) @extend_schema( parameters=[ diff --git a/cope2n-api/fwd_api/celery_worker/internal_task.py b/cope2n-api/fwd_api/celery_worker/internal_task.py index 07de96a..91953ae 100755 --- a/cope2n-api/fwd_api/celery_worker/internal_task.py +++ b/cope2n-api/fwd_api/celery_worker/internal_task.py @@ -264,10 +264,11 @@ def upload_report_to_s3(local_file_path, s3_key, report_id, delay): try: time.sleep(delay) s3_client.upload_file(local_file_path, s3_key) - report = Report.objects.filter(report_id=report_id)[0] - report.S3_uploaded = True - report.S3_file_name = s3_key - report.save() + if report_id: + report = Report.objects.filter(report_id=report_id)[0] + report.S3_uploaded = True + report.S3_file_name = s3_key + report.save() except Exception as e: logger.error(f"Unable to set S3: {e}") print(f"Unable to set S3: {e}") diff --git a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py index 6755432..0e1e710 100644 --- a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py +++ b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py @@ -304,6 +304,8 @@ def make_a_report_2(report_id, query_set): for x_key in report_fine_data[i][key].keys(): report_fine_data[i][key][x_key] = report_fine_data[i][key][x_key]*100 data_workbook = dict2xlsx(report_fine_data, _type='report') + if query_set["subsidiary"] == "ALL": + query_set["subsidiary"] = "ALL_SUMARY" overview_filename = query_set["subsidiary"] + "_" + query_set["report_overview_duration"] + ".xlsx" local_workbook = save_workbook_file(overview_filename, report, data_workbook, settings.OVERVIEW_REPORT_ROOT) s3_key=save_report_to_S3(report.report_id, local_workbook) diff --git a/cope2n-api/fwd_api/management/commands/migrate-csv.py b/cope2n-api/fwd_api/management/commands/migrate-csv.py index ddb63d1..279bb59 100644 --- a/cope2n-api/fwd_api/management/commands/migrate-csv.py +++ b/cope2n-api/fwd_api/management/commands/migrate-csv.py @@ -14,21 +14,11 @@ class Command(BaseCommand): # Add your command-line arguments here parser.add_argument('test', type=str, help='Value for the argument') - def process_request(self, request, predict_result, user_feedback, reviewed_result): + def process_request(self, request, predict_result, user_feedback, reviewed_result, reason): if len(request.request_id.split(".")[0].split("_")) < 2: return - - request_feedback = copy.deepcopy(request.feedback_result) + request_review = copy.deepcopy(request.reviewed_result) - - if not request_feedback: - request_feedback = { - "request_id": request.request_id, - "imei_number": [], - "retailername": "", - "purchase_date": "", - "sold_to_party": "" - } if not request_review: request_review = { @@ -53,74 +43,40 @@ class Command(BaseCommand): is_match = True if field == 'imei_number': - if not reviewed_result in request_review["imei_number"]: + if not (reviewed_result in request_review["imei_number"]): request_review["imei_number"].append(reviewed_result) - if not user_feedback in request_feedback["imei_number"]: - request_feedback["imei_number"].append(user_feedback) else: if not reviewed_result == request_review[field]: request_review[field] = reviewed_result - if not user_feedback == request_feedback[field]: - request_feedback[field] = user_feedback - _predict_result = copy.deepcopy(predict_result_to_ready(request.predict_result)) - _feedback_result = copy.deepcopy(request.feedback_result) _reviewed_result = copy.deepcopy(request.reviewed_result) - if not _feedback_result: - _feedback_result = { - "imei_number": [], - "retailername": "", - "purchase_date": "", - "sold_to_party": "" - } if not _reviewed_result: _reviewed_result = { + "request_id": image.request_id, "imei_number": [], "retailername": "", "purchase_date": "", "sold_to_party": "" } - if image.doc_type == "invoice": - _predict_result[field] = predict_result - _predict_result["imei_number"] = [] - if _feedback_result: - _feedback_result[field] = user_feedback - _feedback_result["imei_number"] = [] - else: - None + if image.doc_type == "invoice" and field in ['retailername', 'purchase_date']: if _reviewed_result: _reviewed_result[field] = reviewed_result _reviewed_result["imei_number"] = [] else: None - else: - _predict_result = { - "retailername": None, - "sold_to_party": None, - "purchase_date": [], - "imei_number": [predict_result] - } - _feedback_result = { - "retailername": None, - "sold_to_party": None, - "purchase_date": None, - "imei_number": [user_feedback] - } if _feedback_result else None + elif image.doc_type == "imei" and field == "imei_number": _reviewed_result = { "retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [reviewed_result] } if _reviewed_result else None - image.predict_result = _predict_result - image.feedback_result = _feedback_result image.reviewed_result = _reviewed_result + image.reason = reason image.save() - request.feedback_result = request_feedback request.reviewed_result = request_review - request.feedback_result["request_id"] = request.request_id request.reviewed_result["request_id"] = request.request_id request.is_reviewed = True request.save() @@ -144,7 +100,8 @@ class Command(BaseCommand): if not request: print("Not found ====>", row) else: - self.process_request(request, row[3], row[2], row[4]) + # request, predict_result, user_feedback, reviewed_result + self.process_request(request, row[3], row[2], row[4], row[8]) index += 1 self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!')) diff --git a/cope2n-api/fwd_api/utils/accuracy.py b/cope2n-api/fwd_api/utils/accuracy.py index 36a1e27..cd460a0 100644 --- a/cope2n-api/fwd_api/utils/accuracy.py +++ b/cope2n-api/fwd_api/utils/accuracy.py @@ -212,7 +212,14 @@ class ReportAccumulateByRequest: self.data[this_month][1][this_day]['num_request'] += 1 self.data[this_month][0]['num_request'] += 1 + for report_file in report_files: + if report_file.is_bad_image or report_file.bad_image_reason in settings.ACC_EXCLUDE_RESEASONS: + report_file.acc = None + for t in report_file.feedback_accuracy.keys(): + report_file.feedback_accuracy[t] = [] + for t in report_file.reviewed_accuracy.keys(): + report_file.reviewed_accuracy[t] = [] self.data[this_month][0] = self.update_total(self.data[this_month][0], report_file) # Update the subtotal within the month self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], report_file) # Update the subtotal of the day @@ -770,11 +777,18 @@ def calculate_and_save_subcription_file(report, request): return request_att +# def result_maximize_list_values(result, acc): +# for k in acc.keys(): +# if isinstance(acc[k], list) and len(acc[k]) > 0: + def acc_maximize_list_values(acc): + pos = {} for k in acc.keys(): + pos[k] = 0 if isinstance(acc[k], list) and len(acc[k]) > 0: acc[k] = [max(acc[k])] - return acc + pos[k] = acc[k].index(acc[k][0]) + return acc, pos def calculate_a_request(report, request): request_att = {"acc": {"feedback": {"imei_number": [], @@ -793,7 +807,8 @@ def calculate_a_request(report, request): "sold_to_party": [], }}, "err": [], - "time_cost": {}, + "time_cost": {"imei": [], + "invoice": []}, "total_images": 0, "bad_images": 0, "bad_image_list": [], @@ -802,6 +817,13 @@ def calculate_a_request(report, request): report_files = [] for image in images: status, att = calculate_subcription_file(image) + + att["acc"]["feedback"], fb_max_indexes = acc_maximize_list_values(att["acc"]["feedback"]) + att["acc"]["reviewed"], rv_max_indexes = acc_maximize_list_values(att["acc"]["reviewed"]) + + + _att = copy.deepcopy(att) + if status != 200: continue image.feedback_accuracy = att["acc"]["feedback"] # dict {key: [values]} @@ -818,6 +840,14 @@ def calculate_a_request(report, request): _sub = map_subsidiary_short_to_long(request.redemption_id[:2]) else: print(f"[WARM]: empty redemption_id, check request: {request.request_id}") + + # Little trick to replace purchase date to normalized + if len(att["normalized_data"]["feedback"].get("purchase_date", [])) > 0: + image.predict_result["purchase_date"] = [att["normalized_data"]["feedback"]["purchase_date"][i][0] for i in range(len(att["normalized_data"]["feedback"]["purchase_date"]))] + image.feedback_result["purchase_date"] = att["normalized_data"]["feedback"]["purchase_date"][fb_max_indexes["purchase_date"]][1] + if len(att["normalized_data"]["reviewed"].get("purchase_date", [])) > 0: + image.predict_result["purchase_date"] = [att["normalized_data"]["reviewed"]["purchase_date"][i][0] for i in range(len(att["normalized_data"]["reviewed"]["purchase_date"]))] + image.reviewed_result["purchase_date"] = att["normalized_data"]["reviewed"]["purchase_date"][rv_max_indexes["purchase_date"]][1] new_report_file = ReportFile(report=report, subsidiary=_sub, correspond_request_id=request.request_id, @@ -826,8 +856,8 @@ def calculate_a_request(report, request): predict_result=image.predict_result, feedback_result=image.feedback_result, reviewed_result=image.reviewed_result, - feedback_accuracy=acc_maximize_list_values(att["acc"]["feedback"]), - reviewed_accuracy=acc_maximize_list_values(att["acc"]["reviewed"]), + feedback_accuracy=att["acc"]["feedback"], + reviewed_accuracy=att["acc"]["reviewed"], acc=att["avg_acc"], is_bad_image=att["is_bad_image"], is_reviewed="Yes" if request.is_reviewed else "No", @@ -837,20 +867,22 @@ def calculate_a_request(report, request): error="|".join(att["err"]) ) report_files.append(new_report_file) - _att = copy.deepcopy(att) + if att["is_bad_image"] or image.reason in settings.ACC_EXCLUDE_RESEASONS: - request_att["bad_image_list"].append(image.file_name) + if att["is_bad_image"]: + request_att["bad_image_list"].append(image.file_name) + # if image.reason in settings.ACC_EXCLUDE_RESEASONS: + # print(f"[DEBUG]: {image.reason}") # TODO: Exclude bad image accuracy from average accuracy _att["avg_acc"] = None - for t in ["feedback", "reviewed"]: - for k in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: + for t in _att["acc"].keys(): + for k in _att["acc"][t].keys(): _att["acc"][t][k] = [] - - - if request_att["time_cost"].get(image.doc_type, None): - request_att["time_cost"][image.doc_type].append(image.processing_time) else: - request_att["time_cost"][image.doc_type] = [image.processing_time] + if request_att["time_cost"].get(image.doc_type, None): + request_att["time_cost"][image.doc_type].append(image.processing_time) + else: + request_att["time_cost"][image.doc_type] = [image.processing_time] try: request_att["acc"]["feedback"]["imei_number"] += _att["acc"]["feedback"]["imei_number"] @@ -863,13 +895,14 @@ def calculate_a_request(report, request): request_att["acc"]["reviewed"]["retailername"] += _att["acc"]["reviewed"]["retailername"] request_att["acc"]["reviewed"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] - request_att["acc"]["acumulated"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] if att["acc"]["reviewed"]["imei_number"] else att["acc"]["feedback"]["imei_number"] - request_att["acc"]["acumulated"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] if att["acc"]["reviewed"]["purchase_date"] else att["acc"]["feedback"]["purchase_date"] - request_att["acc"]["acumulated"]["retailername"] += _att["acc"]["reviewed"]["retailername"] if att["acc"]["reviewed"]["retailername"] else att["acc"]["feedback"]["retailername"] - request_att["acc"]["acumulated"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] if att["acc"]["reviewed"]["sold_to_party"] else att["acc"]["feedback"]["sold_to_party"] + request_att["acc"]["acumulated"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] if _att["acc"]["reviewed"]["imei_number"] else _att["acc"]["feedback"]["imei_number"] + request_att["acc"]["acumulated"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] if _att["acc"]["reviewed"]["purchase_date"] else _att["acc"]["feedback"]["purchase_date"] + request_att["acc"]["acumulated"]["retailername"] += _att["acc"]["reviewed"]["retailername"] if _att["acc"]["reviewed"]["retailername"] else _att["acc"]["feedback"]["retailername"] + request_att["acc"]["acumulated"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] if _att["acc"]["reviewed"]["sold_to_party"] else _att["acc"]["feedback"]["sold_to_party"] - request_att["bad_images"] += int(_att["is_bad_image"]) - request_att["total_images"] += 1 + if image.reason not in settings.ACC_EXCLUDE_RESEASONS: + request_att["bad_images"] += int(_att["is_bad_image"]) + request_att["total_images"] += 1 request_att["err"] += _att["err"] except Exception as e: print(f"[ERROR]: failed to calculate request: {request.request_id} - request_file: {image.file_name} because of {e}") @@ -880,6 +913,8 @@ def calculate_a_request(report, request): def calculate_subcription_file(subcription_request_file): att = {"acc": {"feedback": {}, "reviewed": {}}, + "normalized_data": {"feedback": {}, + "reviewed": {}}, "err": [], "is_bad_image": False, "avg_acc": None} @@ -896,8 +931,8 @@ def calculate_subcription_file(subcription_request_file): for key_name in valid_keys: try: - att["acc"]["feedback"][key_name], _ = calculate_accuracy(key_name, inference_result, feedback_result) - att["acc"]["reviewed"][key_name], _ = calculate_accuracy(key_name, inference_result, reviewed_result) + att["acc"]["feedback"][key_name], att["normalized_data"]["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result) + att["acc"]["reviewed"][key_name], att["normalized_data"]["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result) except Exception as e: att["err"].append(str(e)) # print(f"[DEBUG]: predict_result: {subcription_request_file.predict_result}") diff --git a/cope2n-api/fwd_api/utils/report.py b/cope2n-api/fwd_api/utils/report.py new file mode 100644 index 0000000..fcc2a37 --- /dev/null +++ b/cope2n-api/fwd_api/utils/report.py @@ -0,0 +1,6 @@ +CAT_VALUES = { + "ALL": "ZZZZZZZZ", +} +def aggregate_overview(overview_list): + overview_list = sorted(overview_list, key=lambda x: x["extraction_date"] + CAT_VALUES.get(x["subs"], x["subs"]), reverse=True) + return overview_list \ No newline at end of file