Merge pull request #106 from SDSRV-IDP/trungpt/not_required_fix
Trungpt/not required fix
This commit is contained in:
commit
d2f5052472
@ -16,8 +16,10 @@ import redis
|
||||
from fwd import settings
|
||||
from ..models import SubscriptionRequest, Report, ReportFile
|
||||
import json
|
||||
from typing import Union, List, Dict
|
||||
|
||||
valid_keys = ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"]
|
||||
optional_keys = ['invoice_no']
|
||||
|
||||
class ReportAccumulateByRequest:
|
||||
def __init__(self, sub):
|
||||
@ -533,6 +535,13 @@ def first_of_list(the_list):
|
||||
return None
|
||||
return the_list[0]
|
||||
|
||||
def _feedback_invoice_no_exist(feedback_result):
|
||||
invoice_no = feedback_result.get("invoice_no", None)
|
||||
if invoice_no in ["", [], None]:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
def extract_report_detail_list(report_detail_list, lower=False, in_percent=True):
|
||||
data = []
|
||||
for report_file in report_detail_list:
|
||||
@ -549,7 +558,7 @@ def extract_report_detail_list(report_detail_list, lower=False, in_percent=True)
|
||||
"Invoice_Number_User": report_file.feedback_result.get("invoice_no", None) if report_file.feedback_result else None,
|
||||
"Invoice_Number_OCR": report_file.predict_result.get("invoice_no", None),
|
||||
"Invoice_Number Revised": report_file.reviewed_result.get("invoice_no", None) if report_file.reviewed_result else None,
|
||||
"Invoice_Number_Accuracy": first_of_list(report_file.feedback_accuracy.get("invoice_no", [None])),
|
||||
"Invoice_Number_Accuracy": first_of_list(report_file.feedback_accuracy.get("invoice_no", [None])) if _feedback_invoice_no_exist(report_file.feedback_result) else None,
|
||||
"Invoice_Purchase Date_Consumer": report_file.feedback_result.get("purchase_date", None) if report_file.feedback_result else None,
|
||||
"Invoice_Purchase Date_OCR": format_purchase_date_ocr_for_report(report_file.predict_result.get("purchase_date", [])),
|
||||
"Invoice_Purchase Date Revised": report_file.reviewed_result.get("purchase_date", None) if report_file.reviewed_result else None,
|
||||
@ -644,57 +653,60 @@ def predict_result_to_ready(result):
|
||||
dict_result["invoice_no"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}, {}])[4].get("value", None)
|
||||
return dict_result
|
||||
|
||||
def align_fine_result(ready_predict, fine_result):
|
||||
# print(f"[DEBUG]: fine_result: {fine_result}")
|
||||
# print(f"[DEBUG]: ready_predict: {ready_predict}")
|
||||
if fine_result:
|
||||
if fine_result["purchase_date"] and len(ready_predict["purchase_date"]) == 0:
|
||||
ready_predict["purchase_date"] = [None]
|
||||
if fine_result["retailername"] and not ready_predict["retailername"]:
|
||||
ready_predict["retailername"] = [None]
|
||||
if ready_predict.get("invoice_no", None) and not fine_result.get("invoice_no", None):
|
||||
fine_result["invoice_no"] = [None]
|
||||
fine_result["purchase_date"] = [fine_result["purchase_date"] for _ in range(len(ready_predict["purchase_date"]))]
|
||||
return ready_predict, fine_result
|
||||
|
||||
def update_temp_accuracy(accuracy, acc, keys):
|
||||
for key in keys:
|
||||
accuracy[key].add(acc[key])
|
||||
return accuracy
|
||||
|
||||
def calculate_accuracy(key_name, inference, target):
|
||||
def _accuracy_calculate_formatter(inference, target):
|
||||
"""_summary_
|
||||
format type of inference, and target from str/None to List of str/None.
|
||||
Make both list inference and target to be the same length.
|
||||
"""
|
||||
if not isinstance(inference, list):
|
||||
inference = [] if inference is None else [inference]
|
||||
if not isinstance(target, list):
|
||||
target = [] if target is None else [target]
|
||||
|
||||
length = max(len(target), len(inference))
|
||||
target = target + (length - len(target))*[None]
|
||||
inference = inference + (length - len(inference))*[None]
|
||||
|
||||
return inference, target
|
||||
|
||||
def _acc_will_be_ignored(key_name, _target, type):
|
||||
is_optional_key = key_name in optional_keys
|
||||
is_empty_target = _target in [[], None, '']
|
||||
if is_optional_key and is_empty_target and type == 'feedback':
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def calculate_accuracy(key_name: str, inference: Dict[str, Union[str, List]], target: Dict[str, Union[str, List]], type: str):
|
||||
"""_summary_
|
||||
NOTE: This has been changed to return accuracy = None if
|
||||
Args:
|
||||
key_name (string): key to calculate accuracy on, ex: retailername
|
||||
inference (dict): result from ocr, refined to align with the target down below
|
||||
target (dict): result of type
|
||||
is_optional_keyname: default is set to False (which mean this is not an optional keyname)
|
||||
currently we have invoice_no is an optional keyname.
|
||||
"""
|
||||
acc = []
|
||||
data = []
|
||||
|
||||
if not target or not inference:
|
||||
return acc, data
|
||||
if not isinstance(inference[key_name], list):
|
||||
if inference[key_name] is None:
|
||||
inference[key_name] = []
|
||||
else:
|
||||
inference[key_name] = [inference[key_name]]
|
||||
if not isinstance(target[key_name], list):
|
||||
if target[key_name] is None:
|
||||
target[key_name] = []
|
||||
else:
|
||||
target[key_name] = [target[key_name]]
|
||||
# Realign lenght for mis predicted/feedback/reivew result
|
||||
if len(target[key_name]) == 0 and len(inference[key_name]) > 0:
|
||||
target[key_name] = [None for _ in range(len(inference[key_name]))]
|
||||
elif len(inference[key_name]) == 0 and len(target[key_name]) > 0:
|
||||
target[key_name] = [None for _ in range(len(inference[key_name]))]
|
||||
|
||||
for i, v in enumerate(inference[key_name]):
|
||||
# TODO: target[key_name][i] is None, ""
|
||||
x = post_processing_str(key_name, inference[key_name][i], is_gt=False)
|
||||
y = post_processing_str(key_name, target[key_name][i], is_gt=True)
|
||||
_inference = inference[key_name]
|
||||
_target = target[key_name]
|
||||
_will_acc_be_ignored = _acc_will_be_ignored(key_name, _target, type)
|
||||
_inference = _accuracy_calculate_formatter(_inference)
|
||||
_target = _accuracy_calculate_formatter(_target)
|
||||
|
||||
for i, v in enumerate(_inference):
|
||||
# TODO: target[i] is None, ""
|
||||
x = post_processing_str(key_name, _inference[i], is_gt=False)
|
||||
y = post_processing_str(key_name, _target[i], is_gt=True)
|
||||
|
||||
score = eval_ocr_metric(
|
||||
[x],
|
||||
@ -705,7 +717,8 @@ def calculate_accuracy(key_name, inference, target):
|
||||
# "line_acc",
|
||||
# "one_minus_ned_word",
|
||||
])
|
||||
acc.append(list(score.values())[0])
|
||||
if not _will_acc_be_ignored:
|
||||
acc.append(list(score.values())[0])
|
||||
data.append([x, y])
|
||||
return acc, data
|
||||
|
||||
@ -821,30 +834,43 @@ def calculate_a_request(report, request):
|
||||
if status != 200:
|
||||
continue
|
||||
image.feedback_accuracy = att["acc"]["feedback"] # dict {key: [values]}
|
||||
image.reviewed_accuracy = att["acc"]["reviewed"] # dict {key: [values]}
|
||||
image.is_bad_image_quality = att["is_bad_image"]
|
||||
image.is_bad_image_quality = att["is_bad_image"] # is_bad_image=avg_acc<threshold (avg_acc=feedback_acc)
|
||||
|
||||
if att["is_reviewed"]==1: # Image is already reviewed
|
||||
image.reviewed_accuracy = att["acc"]["reviewed"] # dict {key: [values]}
|
||||
|
||||
if not image.doc_type:
|
||||
# try to revert doc type from filename
|
||||
_doc_type = image.file_name.split("_")[1]
|
||||
if _doc_type in ["imei", "invoice"]:
|
||||
image.doc_type = _doc_type
|
||||
image.save()
|
||||
|
||||
_sub = "NA"
|
||||
if request.redemption_id:
|
||||
_sub = map_subsidiary_short_to_long(request.redemption_id[:2])
|
||||
else:
|
||||
print(f"[WARM]: empty redemption_id, check request: {request.request_id}")
|
||||
print(f"[WARN]: empty redemption_id, check request: {request.request_id}")
|
||||
|
||||
# Little trick to replace purchase date to normalized
|
||||
if len(att["normalized_data"]["feedback"].get("purchase_date", [])) > 0:
|
||||
image.predict_result["purchase_date"] = [att["normalized_data"]["feedback"]["purchase_date"][i][0] for i in range(len(att["normalized_data"]["feedback"]["purchase_date"]))]
|
||||
image.predict_result["purchase_date"] = [value_pair[0] for value_pair in att["normalized_data"]["feedback"]["purchase_date"]]
|
||||
image.feedback_result["purchase_date"] = att["normalized_data"]["feedback"]["purchase_date"][fb_max_indexes["purchase_date"]][1]
|
||||
if len(att["normalized_data"]["reviewed"].get("purchase_date", [])) > 0:
|
||||
image.predict_result["purchase_date"] = [att["normalized_data"]["reviewed"]["purchase_date"][i][0] for i in range(len(att["normalized_data"]["reviewed"]["purchase_date"]))]
|
||||
image.predict_result["purchase_date"] = [value_pair[0] for value_pair in att["normalized_data"]["reviewed"]["purchase_date"]]
|
||||
image.reviewed_result["purchase_date"] = att["normalized_data"]["reviewed"]["purchase_date"][rv_max_indexes["purchase_date"]][1]
|
||||
# if request.is_reviewed:
|
||||
# att["is_reviewed"] = 1
|
||||
request_att["is_reviewed"].append(att["is_reviewed"])
|
||||
|
||||
if att["is_reviewed"] == -1: # -1 means "not required"
|
||||
att["acc"]["reviewed"] = {}
|
||||
reviewed_result = {}
|
||||
reason = None
|
||||
counter_measure = None
|
||||
else:
|
||||
if att["is_reviewed"] == 1:
|
||||
reviewed_result = image.reviewed_result
|
||||
reason = image.reason
|
||||
counter_measure = image.counter_measures
|
||||
|
||||
new_report_file = ReportFile(report=report,
|
||||
subsidiary=_sub,
|
||||
correspond_request_id=request.request_id,
|
||||
@ -853,15 +879,15 @@ def calculate_a_request(report, request):
|
||||
doc_type=image.doc_type,
|
||||
predict_result=image.predict_result,
|
||||
feedback_result=image.feedback_result,
|
||||
reviewed_result=image.reviewed_result,
|
||||
reviewed_result=reviewed_result,
|
||||
feedback_accuracy=att["acc"]["feedback"],
|
||||
reviewed_accuracy=att["acc"]["reviewed"],
|
||||
acc=att["avg_acc"],
|
||||
is_bad_image=att["is_bad_image"],
|
||||
is_reviewed= review_status_map(att["is_reviewed"]),
|
||||
time_cost=image.processing_time,
|
||||
bad_image_reason=image.reason,
|
||||
counter_measures=image.counter_measures,
|
||||
bad_image_reason=reason,
|
||||
counter_measures=counter_measure,
|
||||
error="|".join(att["err"]),
|
||||
review_status=att["is_reviewed"],
|
||||
)
|
||||
@ -890,17 +916,17 @@ def calculate_a_request(report, request):
|
||||
request_att["acc"]["feedback"]["sold_to_party"] += _att["acc"]["feedback"]["sold_to_party"]
|
||||
request_att["acc"]["feedback"]["invoice_no"] += _att["acc"]["feedback"]["invoice_no"]
|
||||
|
||||
request_att["acc"]["reviewed"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"]
|
||||
request_att["acc"]["reviewed"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"]
|
||||
request_att["acc"]["reviewed"]["retailername"] += _att["acc"]["reviewed"]["retailername"]
|
||||
request_att["acc"]["reviewed"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"]
|
||||
request_att["acc"]["reviewed"]["invoice_no"] += _att["acc"]["reviewed"]["invoice_no"]
|
||||
request_att["acc"]["reviewed"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] if _att["is_reviewed"]==1 else []
|
||||
request_att["acc"]["reviewed"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] if _att["is_reviewed"]==1 else []
|
||||
request_att["acc"]["reviewed"]["retailername"] += _att["acc"]["reviewed"]["retailername"] if _att["is_reviewed"]==1 else []
|
||||
request_att["acc"]["reviewed"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] if _att["is_reviewed"]==1 else []
|
||||
request_att["acc"]["reviewed"]["invoice_no"] += _att["acc"]["reviewed"]["invoice_no"] if _att["is_reviewed"]==1 else []
|
||||
|
||||
request_att["acc"]["acumulated"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] if _att["acc"]["reviewed"]["imei_number"] else _att["acc"]["feedback"]["imei_number"]
|
||||
request_att["acc"]["acumulated"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] if _att["acc"]["reviewed"]["purchase_date"] else _att["acc"]["feedback"]["purchase_date"]
|
||||
request_att["acc"]["acumulated"]["retailername"] += _att["acc"]["reviewed"]["retailername"] if _att["acc"]["reviewed"]["retailername"] else _att["acc"]["feedback"]["retailername"]
|
||||
request_att["acc"]["acumulated"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] if _att["acc"]["reviewed"]["sold_to_party"] else _att["acc"]["feedback"]["sold_to_party"]
|
||||
request_att["acc"]["acumulated"]["invoice_no"] += _att["acc"]["reviewed"]["invoice_no"] if _att["acc"]["reviewed"]["invoice_no"] else _att["acc"]["feedback"]["invoice_no"]
|
||||
request_att["acc"]["acumulated"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] if _att["acc"]["reviewed"]["imei_number"] and _att["is_reviewed"]==1 else _att["acc"]["feedback"]["imei_number"]
|
||||
request_att["acc"]["acumulated"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] if _att["acc"]["reviewed"]["purchase_date"] and _att["is_reviewed"]==1 else _att["acc"]["feedback"]["purchase_date"]
|
||||
request_att["acc"]["acumulated"]["retailername"] += _att["acc"]["reviewed"]["retailername"] if _att["acc"]["reviewed"]["retailername"] and _att["is_reviewed"]==1 else _att["acc"]["feedback"]["retailername"]
|
||||
request_att["acc"]["acumulated"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] if _att["acc"]["reviewed"]["sold_to_party"] and _att["is_reviewed"]==1 else _att["acc"]["feedback"]["sold_to_party"]
|
||||
request_att["acc"]["acumulated"]["invoice_no"] += _att["acc"]["reviewed"]["invoice_no"] if _att["acc"]["reviewed"]["invoice_no"] and _att["is_reviewed"]==1 else _att["acc"]["feedback"]["invoice_no"]
|
||||
|
||||
if image.reason not in settings.ACC_EXCLUDE_RESEASONS:
|
||||
request_att["bad_images"] += int(_att["is_bad_image"])
|
||||
@ -926,33 +952,35 @@ def calculate_subcription_file(subcription_request_file):
|
||||
return 400, att
|
||||
|
||||
inference_result = copy.deepcopy(subcription_request_file.predict_result)
|
||||
inference_result, feedback_result = align_fine_result(inference_result, copy.deepcopy(subcription_request_file.feedback_result))
|
||||
inference_result, reviewed_result = align_fine_result(inference_result, copy.deepcopy(subcription_request_file.reviewed_result))
|
||||
feedback_result = copy.deepcopy(subcription_request_file.feedback_result)
|
||||
reviewed_result = copy.deepcopy(subcription_request_file.reviewed_result)
|
||||
|
||||
for key_name in valid_keys:
|
||||
try:
|
||||
att["acc"]["feedback"][key_name], att["normalized_data"]["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result)
|
||||
att["acc"]["reviewed"][key_name], att["normalized_data"]["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result)
|
||||
att["acc"]["feedback"][key_name], att["normalized_data"]["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result, "feedback")
|
||||
att["acc"]["reviewed"][key_name], att["normalized_data"]["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result, "reviewed")
|
||||
except Exception as e:
|
||||
att["err"].append(str(e))
|
||||
# print(f"[DEBUG]: predict_result: {subcription_request_file.predict_result}")
|
||||
# print(f"[DEBUG]: e: {e} -key_name: {key_name}")
|
||||
subcription_request_file.feedback_accuracy = att["acc"]["feedback"]
|
||||
subcription_request_file.reviewed_accuracy = att["acc"]["reviewed"]
|
||||
avg_reviewed = calculate_avg_accuracy(att["acc"], "reviewed", ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"])
|
||||
avg_feedback = calculate_avg_accuracy(att["acc"], "feedback", ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"])
|
||||
|
||||
avg_reviewed = calculate_avg_accuracy(att["acc"], "reviewed", valid_keys)
|
||||
avg_feedback = calculate_avg_accuracy(att["acc"], "feedback", valid_keys)
|
||||
|
||||
if avg_feedback is not None or avg_reviewed is not None:
|
||||
avg_acc = 0
|
||||
if avg_feedback is not None:
|
||||
avg_acc = avg_feedback
|
||||
if avg_feedback < settings.NEED_REVIEW:
|
||||
att["is_reviewed"] = 0
|
||||
if avg_reviewed is not None:
|
||||
else:
|
||||
att["is_reviewed"] = -1
|
||||
if avg_reviewed is not None and att["is_reviewed"]!=-1:
|
||||
avg_acc = avg_reviewed
|
||||
att["is_reviewed"] = 1
|
||||
|
||||
# Little trick to overcome issue caused by misleading manually review process
|
||||
if subcription_request_file.reason or subcription_request_file.counter_measures:
|
||||
if (subcription_request_file.reason or subcription_request_file.counter_measures) and att["is_reviewed"]!=-1:
|
||||
att["is_reviewed"] = 1
|
||||
|
||||
att["avg_acc"] = avg_acc
|
||||
|
Loading…
Reference in New Issue
Block a user