From c3114534fcc25ecaf1cfcd6de80acf47868ad44b Mon Sep 17 00:00:00 2001 From: TannedCung Date: Mon, 17 Jun 2024 19:05:20 +0700 Subject: [PATCH] Fix: misleading avg, update accuracy formula --- cope2n-api/fwd_api/utils/accuracy.py | 43 +++++++++++++++++++--------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/cope2n-api/fwd_api/utils/accuracy.py b/cope2n-api/fwd_api/utils/accuracy.py index ef88970..8e23fbd 100755 --- a/cope2n-api/fwd_api/utils/accuracy.py +++ b/cope2n-api/fwd_api/utils/accuracy.py @@ -18,8 +18,10 @@ from ..models import SubscriptionRequest, Report, ReportFile import json from typing import Union, List, Dict -valid_keys = ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"] -optional_keys = ['invoice_no'] +VALID_KEYS = ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"] +KEYS_BY_FILE_TYPE = {"imei": ["imei_number"], + "invoice": ["retailername", "invoice_no", "purchase_date"]} +OPTIONAL_KEYS = ['invoice_no'] class ReportAccumulateByRequest: def __init__(self, sub): @@ -123,6 +125,7 @@ class ReportAccumulateByRequest: "review_progress": [] }, self.report = copy.deepcopy(self.month_format) + self.report["average_accuracy_rate"]["avg"] = IterAvg() @staticmethod def update_total(total, report_file): @@ -142,8 +145,10 @@ class ReportAccumulateByRequest: for key in settings.FIELD: if sum([len(report_file.reviewed_accuracy[x]) for x in report_file.reviewed_accuracy.keys() if "_count" not in x]) > 0 : total["average_accuracy_rate"][key].add(report_file.reviewed_accuracy.get(key, [])) + total["average_accuracy_rate"]['avg'].add(report_file.reviewed_accuracy.get(key, [])) elif sum([len(report_file.feedback_accuracy[x]) for x in report_file.feedback_accuracy.keys() if "_count" not in x]) > 0: total["average_accuracy_rate"][key].add(report_file.feedback_accuracy.get(key, [])) + total["average_accuracy_rate"]['avg'].add(report_file.feedback_accuracy.get(key, [])) total["feedback_accuracy"][key].add(report_file.feedback_accuracy.get(key, [])) total["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, [])) @@ -346,17 +351,17 @@ class ReportAccumulateByRequest: for key in _report["average_processing_time"].keys(): _report["average_processing_time"][key] = _report["average_processing_time"][key]() - avg_acc = 0 - count_acc = 0 + # avg_acc = 0 + # count_acc = 0 for key in settings.FIELD: _report["average_accuracy_rate"][key] = _report["average_accuracy_rate"][key]() for accuracy_type in ["feedback_accuracy", "reviewed_accuracy"]: - if (_report[accuracy_type][key].count + count_acc) > 0: - avg_acc = (avg_acc*count_acc + _report[accuracy_type][key].avg*_report[accuracy_type][key].count) / (_report[accuracy_type][key].count + count_acc) - count_acc += _report[accuracy_type][key].count + # if (_report[accuracy_type][key].count + count_acc) > 0: + # avg_acc = (avg_acc*count_acc + _report[accuracy_type][key].avg*_report[accuracy_type][key].count) / (_report[accuracy_type][key].count + count_acc) + # count_acc += _report[accuracy_type][key].count _report[accuracy_type][key] = _report[accuracy_type][key]() - _report["average_accuracy_rate"]["avg"] = avg_acc + _report["average_accuracy_rate"]["avg"] = _report["average_accuracy_rate"]["avg"]() _report["review_progress"] = _report["review_progress"].count(1)/(_report["review_progress"].count(0)+ _report["review_progress"].count(1)) if (_report["review_progress"].count(0)+ _report["review_progress"].count(1)) >0 else 0 _report["images_quality"]["successful_percent"] = _report["images_quality"]["successful"]/_report["total_images"] if _report["total_images"] > 0 else 0 @@ -734,9 +739,11 @@ def _accuracy_calculate_formatter(inference, target): Make both list inference and target to be the same length. """ if not isinstance(inference, list): - inference = [] if inference is None else [inference] + # inference = [] if inference is None else [inference] + inference = [inference] if not isinstance(target, list): - target = [] if target is None else [target] + # target = [] if target is None else [target] + target = [target] length = max(len(target), len(inference)) target = target + (length - len(target))*[target[0]] if len(target) > 0 else target + (length - len(target))*[None] @@ -745,7 +752,7 @@ def _accuracy_calculate_formatter(inference, target): return inference, target def _acc_will_be_ignored(key_name, _target): - is_optional_key = key_name in optional_keys + is_optional_key = key_name in OPTIONAL_KEYS is_empty_target = _target in [[], None, ''] if is_optional_key and is_empty_target: return True @@ -1043,7 +1050,15 @@ def calculate_subcription_file(subcription_request_file): feedback_result = copy.deepcopy(subcription_request_file.feedback_result) reviewed_result = copy.deepcopy(subcription_request_file.reviewed_result) - for key_name in valid_keys: + accuracy_keys_for_this_image = KEYS_BY_FILE_TYPE.get(subcription_request_file.doc_type, []) + + for key_name in VALID_KEYS: + att["acc"]["feedback"][key_name] = [] + att["normalized_data"]["feedback"][key_name] = [] + att["acc"]["reviewed"][key_name] = [] + att["normalized_data"]["reviewed"][key_name] = [] + + for key_name in accuracy_keys_for_this_image: try: att["acc"]["feedback"][key_name], att["normalized_data"]["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result, "feedback", sub=subcription_request_file.request.subsidiary) att["acc"]["reviewed"][key_name], att["normalized_data"]["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result, "reviewed", sub=subcription_request_file.request.subsidiary) @@ -1052,8 +1067,8 @@ def calculate_subcription_file(subcription_request_file): subcription_request_file.feedback_accuracy = att["acc"]["feedback"] subcription_request_file.reviewed_accuracy = att["acc"]["reviewed"] - avg_reviewed = calculate_avg_accuracy(att["acc"], "reviewed", valid_keys) - avg_feedback = calculate_avg_accuracy(att["acc"], "feedback", valid_keys) + avg_reviewed = calculate_avg_accuracy(att["acc"], "reviewed", VALID_KEYS) + avg_feedback = calculate_avg_accuracy(att["acc"], "feedback", VALID_KEYS) if avg_feedback is not None or avg_reviewed is not None: avg_acc = 0