This commit is contained in:
PhanThanhTrung 2024-04-04 13:58:16 +07:00
parent b44f593430
commit 9b253c3352

View File

@ -16,8 +16,10 @@ import redis
from fwd import settings from fwd import settings
from ..models import SubscriptionRequest, Report, ReportFile from ..models import SubscriptionRequest, Report, ReportFile
import json import json
from typing import Union, List, Dict
valid_keys = ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"] valid_keys = ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"]
optional_keys = ['invoice_no']
class ReportAccumulateByRequest: class ReportAccumulateByRequest:
def __init__(self, sub): def __init__(self, sub):
@ -533,6 +535,13 @@ def first_of_list(the_list):
return None return None
return the_list[0] return the_list[0]
def _feedback_invoice_no_exist(feedback_result):
invoice_no = feedback_result.get("invoice_no", None)
if invoice_no in ["", [], None]:
return False
else:
return True
def extract_report_detail_list(report_detail_list, lower=False, in_percent=True): def extract_report_detail_list(report_detail_list, lower=False, in_percent=True):
data = [] data = []
for report_file in report_detail_list: for report_file in report_detail_list:
@ -549,7 +558,7 @@ def extract_report_detail_list(report_detail_list, lower=False, in_percent=True)
"Invoice_Number_User": report_file.feedback_result.get("invoice_no", None) if report_file.feedback_result else None, "Invoice_Number_User": report_file.feedback_result.get("invoice_no", None) if report_file.feedback_result else None,
"Invoice_Number_OCR": report_file.predict_result.get("invoice_no", None), "Invoice_Number_OCR": report_file.predict_result.get("invoice_no", None),
"Invoice_Number Revised": report_file.reviewed_result.get("invoice_no", None) if report_file.reviewed_result else None, "Invoice_Number Revised": report_file.reviewed_result.get("invoice_no", None) if report_file.reviewed_result else None,
"Invoice_Number_Accuracy": first_of_list(report_file.feedback_accuracy.get("invoice_no", [None])), "Invoice_Number_Accuracy": first_of_list(report_file.feedback_accuracy.get("invoice_no", [None])) if _feedback_invoice_no_exist(report_file.feedback_result) else None,
"Invoice_Purchase Date_Consumer": report_file.feedback_result.get("purchase_date", None) if report_file.feedback_result else None, "Invoice_Purchase Date_Consumer": report_file.feedback_result.get("purchase_date", None) if report_file.feedback_result else None,
"Invoice_Purchase Date_OCR": format_purchase_date_ocr_for_report(report_file.predict_result.get("purchase_date", [])), "Invoice_Purchase Date_OCR": format_purchase_date_ocr_for_report(report_file.predict_result.get("purchase_date", [])),
"Invoice_Purchase Date Revised": report_file.reviewed_result.get("purchase_date", None) if report_file.reviewed_result else None, "Invoice_Purchase Date Revised": report_file.reviewed_result.get("purchase_date", None) if report_file.reviewed_result else None,
@ -644,57 +653,60 @@ def predict_result_to_ready(result):
dict_result["invoice_no"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}, {}])[4].get("value", None) dict_result["invoice_no"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}, {}])[4].get("value", None)
return dict_result return dict_result
def align_fine_result(ready_predict, fine_result):
# print(f"[DEBUG]: fine_result: {fine_result}")
# print(f"[DEBUG]: ready_predict: {ready_predict}")
if fine_result:
if fine_result["purchase_date"] and len(ready_predict["purchase_date"]) == 0:
ready_predict["purchase_date"] = [None]
if fine_result["retailername"] and not ready_predict["retailername"]:
ready_predict["retailername"] = [None]
if ready_predict.get("invoice_no", None) and not fine_result.get("invoice_no", None):
fine_result["invoice_no"] = [None]
fine_result["purchase_date"] = [fine_result["purchase_date"] for _ in range(len(ready_predict["purchase_date"]))]
return ready_predict, fine_result
def update_temp_accuracy(accuracy, acc, keys): def update_temp_accuracy(accuracy, acc, keys):
for key in keys: for key in keys:
accuracy[key].add(acc[key]) accuracy[key].add(acc[key])
return accuracy return accuracy
def calculate_accuracy(key_name, inference, target): def _accuracy_calculate_formatter(inference, target):
"""_summary_ """_summary_
format type of inference, and target from str/None to List of str/None.
Make both list inference and target to be the same length.
"""
if not isinstance(inference, list):
inference = [] if inference is None else [inference]
if not isinstance(target, list):
target = [] if target is None else [target]
length = max(len(target), len(inference))
target = target + (length - len(target))*[None]
inference = inference + (length - len(inference))*[None]
return inference, target
def _acc_will_be_ignored(key_name, _target, type):
is_optional_key = key_name in optional_keys
is_empty_target = _target in [[], None, '']
if is_optional_key and is_empty_target and type == 'feedback':
return True
else:
return False
def calculate_accuracy(key_name: str, inference: Dict[str, Union[str, List]], target: Dict[str, Union[str, List]], type: str):
"""_summary_
NOTE: This has been changed to return accuracy = None if
Args: Args:
key_name (string): key to calculate accuracy on, ex: retailername key_name (string): key to calculate accuracy on, ex: retailername
inference (dict): result from ocr, refined to align with the target down below inference (dict): result from ocr, refined to align with the target down below
target (dict): result of type target (dict): result of type
is_optional_keyname: default is set to False (which mean this is not an optional keyname)
currently we have invoice_no is an optional keyname.
""" """
acc = [] acc = []
data = [] data = []
if not target or not inference: if not target or not inference:
return acc, data return acc, data
if not isinstance(inference[key_name], list):
if inference[key_name] is None:
inference[key_name] = []
else:
inference[key_name] = [inference[key_name]]
if not isinstance(target[key_name], list):
if target[key_name] is None:
target[key_name] = []
else:
target[key_name] = [target[key_name]]
# Realign lenght for mis predicted/feedback/reivew result
if len(target[key_name]) == 0 and len(inference[key_name]) > 0:
target[key_name] = [None for _ in range(len(inference[key_name]))]
elif len(inference[key_name]) == 0 and len(target[key_name]) > 0:
target[key_name] = [None for _ in range(len(inference[key_name]))]
for i, v in enumerate(inference[key_name]): _inference = inference[key_name]
# TODO: target[key_name][i] is None, "" _target = target[key_name]
x = post_processing_str(key_name, inference[key_name][i], is_gt=False) _will_acc_be_ignored = _acc_will_be_ignored(key_name, _target, type)
y = post_processing_str(key_name, target[key_name][i], is_gt=True) _inference = _accuracy_calculate_formatter(_inference)
_target = _accuracy_calculate_formatter(_target)
for i, v in enumerate(_inference):
# TODO: target[i] is None, ""
x = post_processing_str(key_name, _inference[i], is_gt=False)
y = post_processing_str(key_name, _target[i], is_gt=True)
score = eval_ocr_metric( score = eval_ocr_metric(
[x], [x],
@ -705,6 +717,7 @@ def calculate_accuracy(key_name, inference, target):
# "line_acc", # "line_acc",
# "one_minus_ned_word", # "one_minus_ned_word",
]) ])
if not _will_acc_be_ignored:
acc.append(list(score.values())[0]) acc.append(list(score.values())[0])
data.append([x, y]) data.append([x, y])
return acc, data return acc, data
@ -821,30 +834,43 @@ def calculate_a_request(report, request):
if status != 200: if status != 200:
continue continue
image.feedback_accuracy = att["acc"]["feedback"] # dict {key: [values]} image.feedback_accuracy = att["acc"]["feedback"] # dict {key: [values]}
image.is_bad_image_quality = att["is_bad_image"] # is_bad_image=avg_acc<threshold (avg_acc=feedback_acc)
if att["is_reviewed"]==1: # Image is already reviewed
image.reviewed_accuracy = att["acc"]["reviewed"] # dict {key: [values]} image.reviewed_accuracy = att["acc"]["reviewed"] # dict {key: [values]}
image.is_bad_image_quality = att["is_bad_image"]
if not image.doc_type: if not image.doc_type:
# try to revert doc type from filename
_doc_type = image.file_name.split("_")[1] _doc_type = image.file_name.split("_")[1]
if _doc_type in ["imei", "invoice"]: if _doc_type in ["imei", "invoice"]:
image.doc_type = _doc_type image.doc_type = _doc_type
image.save() image.save()
_sub = "NA" _sub = "NA"
if request.redemption_id: if request.redemption_id:
_sub = map_subsidiary_short_to_long(request.redemption_id[:2]) _sub = map_subsidiary_short_to_long(request.redemption_id[:2])
else: else:
print(f"[WARM]: empty redemption_id, check request: {request.request_id}") print(f"[WARN]: empty redemption_id, check request: {request.request_id}")
# Little trick to replace purchase date to normalized # Little trick to replace purchase date to normalized
if len(att["normalized_data"]["feedback"].get("purchase_date", [])) > 0: if len(att["normalized_data"]["feedback"].get("purchase_date", [])) > 0:
image.predict_result["purchase_date"] = [att["normalized_data"]["feedback"]["purchase_date"][i][0] for i in range(len(att["normalized_data"]["feedback"]["purchase_date"]))] image.predict_result["purchase_date"] = [value_pair[0] for value_pair in att["normalized_data"]["feedback"]["purchase_date"]]
image.feedback_result["purchase_date"] = att["normalized_data"]["feedback"]["purchase_date"][fb_max_indexes["purchase_date"]][1] image.feedback_result["purchase_date"] = att["normalized_data"]["feedback"]["purchase_date"][fb_max_indexes["purchase_date"]][1]
if len(att["normalized_data"]["reviewed"].get("purchase_date", [])) > 0: if len(att["normalized_data"]["reviewed"].get("purchase_date", [])) > 0:
image.predict_result["purchase_date"] = [att["normalized_data"]["reviewed"]["purchase_date"][i][0] for i in range(len(att["normalized_data"]["reviewed"]["purchase_date"]))] image.predict_result["purchase_date"] = [value_pair[0] for value_pair in att["normalized_data"]["reviewed"]["purchase_date"]]
image.reviewed_result["purchase_date"] = att["normalized_data"]["reviewed"]["purchase_date"][rv_max_indexes["purchase_date"]][1] image.reviewed_result["purchase_date"] = att["normalized_data"]["reviewed"]["purchase_date"][rv_max_indexes["purchase_date"]][1]
# if request.is_reviewed:
# att["is_reviewed"] = 1
request_att["is_reviewed"].append(att["is_reviewed"]) request_att["is_reviewed"].append(att["is_reviewed"])
if att["is_reviewed"] == -1: # -1 means "not required"
att["acc"]["reviewed"] = {}
reviewed_result = {}
reason = None
counter_measure = None
else:
if att["is_reviewed"] == 1:
reviewed_result = image.reviewed_result
reason = image.reason
counter_measure = image.counter_measures
new_report_file = ReportFile(report=report, new_report_file = ReportFile(report=report,
subsidiary=_sub, subsidiary=_sub,
correspond_request_id=request.request_id, correspond_request_id=request.request_id,
@ -853,15 +879,15 @@ def calculate_a_request(report, request):
doc_type=image.doc_type, doc_type=image.doc_type,
predict_result=image.predict_result, predict_result=image.predict_result,
feedback_result=image.feedback_result, feedback_result=image.feedback_result,
reviewed_result=image.reviewed_result, reviewed_result=reviewed_result,
feedback_accuracy=att["acc"]["feedback"], feedback_accuracy=att["acc"]["feedback"],
reviewed_accuracy=att["acc"]["reviewed"], reviewed_accuracy=att["acc"]["reviewed"],
acc=att["avg_acc"], acc=att["avg_acc"],
is_bad_image=att["is_bad_image"], is_bad_image=att["is_bad_image"],
is_reviewed= review_status_map(att["is_reviewed"]), is_reviewed= review_status_map(att["is_reviewed"]),
time_cost=image.processing_time, time_cost=image.processing_time,
bad_image_reason=image.reason, bad_image_reason=reason,
counter_measures=image.counter_measures, counter_measures=counter_measure,
error="|".join(att["err"]), error="|".join(att["err"]),
review_status=att["is_reviewed"], review_status=att["is_reviewed"],
) )
@ -890,17 +916,17 @@ def calculate_a_request(report, request):
request_att["acc"]["feedback"]["sold_to_party"] += _att["acc"]["feedback"]["sold_to_party"] request_att["acc"]["feedback"]["sold_to_party"] += _att["acc"]["feedback"]["sold_to_party"]
request_att["acc"]["feedback"]["invoice_no"] += _att["acc"]["feedback"]["invoice_no"] request_att["acc"]["feedback"]["invoice_no"] += _att["acc"]["feedback"]["invoice_no"]
request_att["acc"]["reviewed"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] request_att["acc"]["reviewed"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] if _att["is_reviewed"]==1 else []
request_att["acc"]["reviewed"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] request_att["acc"]["reviewed"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] if _att["is_reviewed"]==1 else []
request_att["acc"]["reviewed"]["retailername"] += _att["acc"]["reviewed"]["retailername"] request_att["acc"]["reviewed"]["retailername"] += _att["acc"]["reviewed"]["retailername"] if _att["is_reviewed"]==1 else []
request_att["acc"]["reviewed"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] request_att["acc"]["reviewed"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] if _att["is_reviewed"]==1 else []
request_att["acc"]["reviewed"]["invoice_no"] += _att["acc"]["reviewed"]["invoice_no"] request_att["acc"]["reviewed"]["invoice_no"] += _att["acc"]["reviewed"]["invoice_no"] if _att["is_reviewed"]==1 else []
request_att["acc"]["acumulated"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] if _att["acc"]["reviewed"]["imei_number"] else _att["acc"]["feedback"]["imei_number"] request_att["acc"]["acumulated"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] if _att["acc"]["reviewed"]["imei_number"] and _att["is_reviewed"]==1 else _att["acc"]["feedback"]["imei_number"]
request_att["acc"]["acumulated"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] if _att["acc"]["reviewed"]["purchase_date"] else _att["acc"]["feedback"]["purchase_date"] request_att["acc"]["acumulated"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] if _att["acc"]["reviewed"]["purchase_date"] and _att["is_reviewed"]==1 else _att["acc"]["feedback"]["purchase_date"]
request_att["acc"]["acumulated"]["retailername"] += _att["acc"]["reviewed"]["retailername"] if _att["acc"]["reviewed"]["retailername"] else _att["acc"]["feedback"]["retailername"] request_att["acc"]["acumulated"]["retailername"] += _att["acc"]["reviewed"]["retailername"] if _att["acc"]["reviewed"]["retailername"] and _att["is_reviewed"]==1 else _att["acc"]["feedback"]["retailername"]
request_att["acc"]["acumulated"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] if _att["acc"]["reviewed"]["sold_to_party"] else _att["acc"]["feedback"]["sold_to_party"] request_att["acc"]["acumulated"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] if _att["acc"]["reviewed"]["sold_to_party"] and _att["is_reviewed"]==1 else _att["acc"]["feedback"]["sold_to_party"]
request_att["acc"]["acumulated"]["invoice_no"] += _att["acc"]["reviewed"]["invoice_no"] if _att["acc"]["reviewed"]["invoice_no"] else _att["acc"]["feedback"]["invoice_no"] request_att["acc"]["acumulated"]["invoice_no"] += _att["acc"]["reviewed"]["invoice_no"] if _att["acc"]["reviewed"]["invoice_no"] and _att["is_reviewed"]==1 else _att["acc"]["feedback"]["invoice_no"]
if image.reason not in settings.ACC_EXCLUDE_RESEASONS: if image.reason not in settings.ACC_EXCLUDE_RESEASONS:
request_att["bad_images"] += int(_att["is_bad_image"]) request_att["bad_images"] += int(_att["is_bad_image"])
@ -926,33 +952,35 @@ def calculate_subcription_file(subcription_request_file):
return 400, att return 400, att
inference_result = copy.deepcopy(subcription_request_file.predict_result) inference_result = copy.deepcopy(subcription_request_file.predict_result)
inference_result, feedback_result = align_fine_result(inference_result, copy.deepcopy(subcription_request_file.feedback_result)) feedback_result = copy.deepcopy(subcription_request_file.feedback_result)
inference_result, reviewed_result = align_fine_result(inference_result, copy.deepcopy(subcription_request_file.reviewed_result)) reviewed_result = copy.deepcopy(subcription_request_file.reviewed_result)
for key_name in valid_keys: for key_name in valid_keys:
try: try:
att["acc"]["feedback"][key_name], att["normalized_data"]["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result) att["acc"]["feedback"][key_name], att["normalized_data"]["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result, "feedback")
att["acc"]["reviewed"][key_name], att["normalized_data"]["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result) att["acc"]["reviewed"][key_name], att["normalized_data"]["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result, "reviewed")
except Exception as e: except Exception as e:
att["err"].append(str(e)) att["err"].append(str(e))
# print(f"[DEBUG]: predict_result: {subcription_request_file.predict_result}")
# print(f"[DEBUG]: e: {e} -key_name: {key_name}")
subcription_request_file.feedback_accuracy = att["acc"]["feedback"] subcription_request_file.feedback_accuracy = att["acc"]["feedback"]
subcription_request_file.reviewed_accuracy = att["acc"]["reviewed"] subcription_request_file.reviewed_accuracy = att["acc"]["reviewed"]
avg_reviewed = calculate_avg_accuracy(att["acc"], "reviewed", ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"])
avg_feedback = calculate_avg_accuracy(att["acc"], "feedback", ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"]) avg_reviewed = calculate_avg_accuracy(att["acc"], "reviewed", valid_keys)
avg_feedback = calculate_avg_accuracy(att["acc"], "feedback", valid_keys)
if avg_feedback is not None or avg_reviewed is not None: if avg_feedback is not None or avg_reviewed is not None:
avg_acc = 0 avg_acc = 0
if avg_feedback is not None: if avg_feedback is not None:
avg_acc = avg_feedback avg_acc = avg_feedback
if avg_feedback < settings.NEED_REVIEW: if avg_feedback < settings.NEED_REVIEW:
att["is_reviewed"] = 0 att["is_reviewed"] = 0
if avg_reviewed is not None: else:
att["is_reviewed"] = -1
if avg_reviewed is not None and att["is_reviewed"]!=-1:
avg_acc = avg_reviewed avg_acc = avg_reviewed
att["is_reviewed"] = 1 att["is_reviewed"] = 1
# Little trick to overcome issue caused by misleading manually review process # Little trick to overcome issue caused by misleading manually review process
if subcription_request_file.reason or subcription_request_file.counter_measures: if (subcription_request_file.reason or subcription_request_file.counter_measures) and att["is_reviewed"]!=-1:
att["is_reviewed"] = 1 att["is_reviewed"] = 1
att["avg_acc"] = avg_acc att["avg_acc"] = avg_acc