From 74431c3bc9e7166d593d60c192a2211a85d7ce5d Mon Sep 17 00:00:00 2001 From: TannedCung Date: Thu, 20 Jun 2024 15:02:10 +0700 Subject: [PATCH] Update: Accuracy fomula --- cope2n-api/fwd/settings.py | 7 ++ .../celery_worker/process_report_tasks.py | 12 ++-- ...se-populate-redemption-id-to-subsidiary.py | 65 +++++++++++++++++++ cope2n-api/fwd_api/utils/accuracy.py | 18 +++-- 4 files changed, 87 insertions(+), 15 deletions(-) create mode 100644 cope2n-api/fwd_api/management/commands/migrate-datebase-populate-redemption-id-to-subsidiary.py diff --git a/cope2n-api/fwd/settings.py b/cope2n-api/fwd/settings.py index 34f9655..48270c6 100755 --- a/cope2n-api/fwd/settings.py +++ b/cope2n-api/fwd/settings.py @@ -238,6 +238,13 @@ SUBS = { "SEAO": "seao" } +FIELDS_BY_SUB = { + "SG": {"imei": ["imei_number"], + "invoice": ["retailername", "purchase_date"]}, + "default": {"imei": ["imei_number"], + "invoice": ["retailername", "invoice_no", "purchase_date"]}, + } + BAD_THRESHOLD = 0.75 NEED_REVIEW = 1.0 diff --git a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py index ea01557..9667d2f 100755 --- a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py +++ b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py @@ -126,11 +126,13 @@ def create_accuracy_report(report_id, **kwargs): "invoice_no": mean_list(request_att["acc"]["reviewed"].get("invoice_no", [None]))} rq_accuracy = [] - for rpf in _report_files: - if sum(len(value_list) for value_list in rpf.reviewed_accuracy.values()): - rq_accuracy += list(chain(*rpf.reviewed_accuracy.values())) - elif sum(len(value_list) for value_list in rpf.feedback_accuracy.values()): - rq_accuracy += list(chain(*rpf.feedback_accuracy.values())) + for i, _att in enumerate(_atts): + if _report_files[i].bad_image_reason in settings.ACC_EXCLUDE_RESEASONS: + continue + if sum(len(value_list) for value_list in _att["acc"]["reviewed"].values()): + rq_accuracy += list(chain(*_att["acc"]["reviewed"].values())) + elif sum(len(value_list) for value_list in _att["acc"]["feedback"].values()): + rq_accuracy += list(chain(*_att["acc"]["feedback"].values())) request.is_required = False if len(rq_accuracy) > 0: diff --git a/cope2n-api/fwd_api/management/commands/migrate-datebase-populate-redemption-id-to-subsidiary.py b/cope2n-api/fwd_api/management/commands/migrate-datebase-populate-redemption-id-to-subsidiary.py new file mode 100644 index 0000000..c40b4b1 --- /dev/null +++ b/cope2n-api/fwd_api/management/commands/migrate-datebase-populate-redemption-id-to-subsidiary.py @@ -0,0 +1,65 @@ +from django.core.management.base import BaseCommand +from tqdm import tqdm +from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest +from fwd_api.exception.exceptions import InvalidException +from fwd_api.utils.accuracy import predict_result_to_ready +import traceback +import copy +from django.utils import timezone + +KEY = "imei_number" +VALUE = "None" +EXPECTED_VALUE = [] + +class Command(BaseCommand): + help = 'Refactor database for image level' + + def add_arguments(self, parser): + # Add your command-line arguments here + parser.add_argument('start', type=str, help='start date, sample: 2023-01-02T00:00:00+0700') + parser.add_argument('end', type=str, help='end date, sample: 2023-01-03T00:00:00+0700') + + def process_request(self, request, result): + if len(request.request_id.split(".")[0].split("_")) < 2: + return + images = SubscriptionRequestFile.objects.filter(request=request) + if not request.predict_result: + # self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}")) + return + if request.predict_result.get("status", 200) != 200: + # self.stdout.write(self.style.WARNING(f"Not a sucess request {request.request_id}")) + return + + if isinstance(request.redemption_id, str) and request.subsidiary is not None: + try: + request.subsidiary = request.redemption_id[:2] + request.save() + result['total'] += 1 + result['subs'].add(request.redemption_id[:2]) + except Exception as e: + print(e) + result["failed"] += 1 + + def handle(self, *args, **options): + start = options['start'] + end = options['end'] + result = {'total':0, + 'failed':0, + 'subs': set()} + if start or end: + try: + start_date = timezone.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z') # We care only about day precision only + end_date = timezone.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z') + except Exception as e: + print(f"[INFO]: start: {start}") + print(f"[INFO]: end: {end}") + raise InvalidException(excArgs="Date format") + subcription_iter = SubscriptionRequest.objects.filter(created_at__range=(start_date, end_date)) + else: + subcription_iter = SubscriptionRequest.objects.all() + + # file = open("modified.txt", "w") + for request in tqdm(subcription_iter.iterator()): + self.process_request(request, result) + # file.close() + self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully! total/failed: {}/{} - subs: {}'.format(result['total'], result['failed'], result['subs']))) \ No newline at end of file diff --git a/cope2n-api/fwd_api/utils/accuracy.py b/cope2n-api/fwd_api/utils/accuracy.py index 8e23fbd..8329894 100755 --- a/cope2n-api/fwd_api/utils/accuracy.py +++ b/cope2n-api/fwd_api/utils/accuracy.py @@ -19,8 +19,6 @@ import json from typing import Union, List, Dict VALID_KEYS = ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"] -KEYS_BY_FILE_TYPE = {"imei": ["imei_number"], - "invoice": ["retailername", "invoice_no", "purchase_date"]} OPTIONAL_KEYS = ['invoice_no'] class ReportAccumulateByRequest: @@ -776,7 +774,7 @@ def calculate_accuracy(key_name: str, inference: Dict[str, Union[str, List]], ta _inference = inference[key_name] _target = target[key_name] - _will_acc_be_ignored = _acc_will_be_ignored(key_name, _target) + # _will_acc_be_ignored = _acc_will_be_ignored(key_name, _target) _inference, _target = _accuracy_calculate_formatter(_inference, _target) for i, v in enumerate(_inference): @@ -793,8 +791,7 @@ def calculate_accuracy(key_name: str, inference: Dict[str, Union[str, List]], ta # "line_acc", # "one_minus_ned_word", ]) - if not _will_acc_be_ignored: - acc.append(list(score.values())[0]) + acc.append(list(score.values())[0]) data.append([x, y]) return acc, data @@ -908,12 +905,11 @@ def calculate_a_request(report, request): images = SubscriptionRequestFile.objects.filter(request=request, file_category=FileCategory.Origin.value) report_files = [] for image in images: - status, att = calculate_subcription_file(image) - atts.append(att) + status, att = calculate_subcription_file(image, request.subsidiary) att["acc"]["feedback"], fb_max_indexes = acc_maximize_list_values(att["acc"]["feedback"]) att["acc"]["reviewed"], rv_max_indexes = acc_maximize_list_values(att["acc"]["reviewed"]) - _att = copy.deepcopy(att) + _att = copy.deepcopy(att) # deep copy right here to advoid removing acc for bad images in the next steps fb_avg_acc = avg_dict(att["acc"]["feedback"]) rv_avg_acc = avg_dict(att["acc"]["reviewed"]) @@ -924,6 +920,8 @@ def calculate_a_request(report, request): continue if status != 200: continue + + atts.append(att) image.feedback_accuracy = att["acc"]["feedback"] # dict {key: [values]} image.is_bad_image_quality = att["is_bad_image"] # is_bad_image=avg_acc