import re from datetime import datetime import copy from typing import Any from .ocr_utils.ocr_metrics import eval_ocr_metric from .ocr_utils.sbt_report import post_processing_str from fwd_api.constant.common import FileCategory from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile, ReportFile from ..celery_worker.client_connector import c_connector from ..utils.file import dict2xlsx, save_workbook_file, save_report_to_S3 from ..utils.subsidiary import map_subsidiary_short_to_long from ..utils.processing_time import backend_cost from django.db.models import Q from django.utils import timezone from fwd import settings from ..models import SubscriptionRequest, Report, ReportFile from typing import Union, List, Dict import logging logger = logging.getLogger(__name__) VALID_KEYS = ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"] OPTIONAL_KEYS = ['invoice_no'] class ReportAccumulateByRequest: def __init__(self, sub): # self.redis_client = redis.Redis(host=settings.REDIS_HOST, port=settings.REDIS_PORT, decode_responses=True) self.sub = sub self.current_time = None self.data = {} # {"month": [total, {"day": day_data}]} self.month_format = { 'subs': "+", 'extraction_date': "Subtotal ()", 'num_imei': 0, 'num_invoice': 0, 'total_images': 0, 'images_quality': { 'successful': 0, 'successful_percent': 0, 'bad': 0, 'bad_percent': 0 }, 'average_accuracy_rate': { 'imei_number': IterAvg(), 'purchase_date': IterAvg(), 'retailername': IterAvg(), 'sold_to_party': IterAvg(), 'invoice_no': IterAvg() }, 'average_processing_time': { 'imei': IterAvg(), 'invoice': IterAvg(), 'avg': IterAvg(), }, 'file_average_processing_time': { 'imei': IterAvg(), 'invoice': IterAvg(), 'avg': IterAvg(), }, 'usage': { 'imei':0, 'invoice': 0, 'total_images': 0, 'request': 0 }, 'feedback_accuracy': { 'imei_number': IterAvg(), 'purchase_date': IterAvg(), 'retailername': IterAvg(), 'sold_to_party': IterAvg(), 'invoice_no': IterAvg() }, 'reviewed_accuracy': { 'imei_number': IterAvg(), 'purchase_date': IterAvg(), 'retailername': IterAvg(), 'sold_to_party': IterAvg(), 'invoice_no': IterAvg() }, 'num_request': 0, "review_progress": [] } self.day_format = { 'subs': sub, 'extraction_date': "", 'num_imei': 0, 'num_invoice': 0, 'total_images': 0, 'images_quality': { 'successful': 0, 'successful_percent': 0, 'bad': 0, 'bad_percent': 0 }, 'average_accuracy_rate': { 'imei_number': IterAvg(), 'purchase_date': IterAvg(), 'retailername': IterAvg(), 'sold_to_party': IterAvg(), 'invoice_no': IterAvg() }, 'average_processing_time': { 'imei': IterAvg(), 'invoice': IterAvg(), 'avg': IterAvg(), }, 'file_average_processing_time': { 'imei': IterAvg(), 'invoice': IterAvg(), 'avg': IterAvg(), }, 'usage': { 'imei': 0, 'invoice': 0, 'total_images': 0, 'request': 0 }, 'feedback_accuracy': { 'imei_number': IterAvg(), 'purchase_date': IterAvg(), 'retailername': IterAvg(), 'sold_to_party': IterAvg(), 'invoice_no': IterAvg() }, 'reviewed_accuracy': { 'imei_number': IterAvg(), 'purchase_date': IterAvg(), 'retailername': IterAvg(), 'sold_to_party': IterAvg(), 'invoice_no': IterAvg() }, "report_files": [], "num_request": 0, "review_progress": [] }, self.report = copy.deepcopy(self.month_format) self.report["average_accuracy_rate"]["avg"] = IterAvg() @staticmethod def update_total(total, report_file, image_avg_cost): # Update total = update total if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS: total["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0 total["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0 total["total_images"] += 1 doc_type = "imei" if report_file.doc_type in ["imei", "invoice", "all"]: doc_type = report_file.doc_type else: logger.warning(f"Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}") total["num_imei"] += 1 if doc_type == "imei" else 0 total["num_invoice"] += 1 if doc_type == "invoice" else 0 for key in settings.FIELD: if sum([len(report_file.reviewed_accuracy[x]) for x in report_file.reviewed_accuracy.keys() if "_count" not in x]) > 0 : total["average_accuracy_rate"][key].add(report_file.reviewed_accuracy.get(key, [])) total["average_accuracy_rate"]['avg'].add(report_file.reviewed_accuracy.get(key, [])) elif sum([len(report_file.feedback_accuracy[x]) for x in report_file.feedback_accuracy.keys() if "_count" not in x]) > 0: total["average_accuracy_rate"][key].add(report_file.feedback_accuracy.get(key, [])) total["average_accuracy_rate"]['avg'].add(report_file.feedback_accuracy.get(key, [])) total["feedback_accuracy"][key].add(report_file.feedback_accuracy.get(key, [])) total["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, [])) if not total["average_processing_time"].get(report_file.doc_type, None): logger.warning(f"Weird doctype: {report_file.doc_type}") total["average_processing_time"][report_file.doc_type] = IterAvg() total["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 total["average_processing_time"]["avg"].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 if not total["file_average_processing_time"].get(report_file.doc_type, None): logger.warning(f"Weird doctype: {report_file.doc_type}") total["file_average_processing_time"][report_file.doc_type] = IterAvg() total["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0 total["file_average_processing_time"]["avg"].add_avg(image_avg_cost, 1) if image_avg_cost else 0 doc_type = "imei" if report_file.doc_type in ["imei", "invoice", "all"]: doc_type = report_file.doc_type else: logger.warning(f"Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}") total["usage"]["imei"] += 1 if doc_type == "imei" else 0 total["usage"]["invoice"] += 1 if doc_type == "invoice" else 0 total["usage"]["total_images"] += 1 total["review_progress"].append(report_file.review_status) return total @staticmethod def update_month(month, report_file, image_avg_cost): # Update month = update month if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS: month["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0 month["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0 month["total_images"] += 1 doc_type = "imei" if report_file.doc_type in ["imei", "invoice", "all"]: doc_type = report_file.doc_type else: logger.warning(f"Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}") month["num_imei"] += 1 if doc_type == "imei" else 0 month["num_invoice"] += 1 if doc_type == "invoice" else 0 for key in settings.FIELD: if sum([len(report_file.reviewed_accuracy[x]) for x in report_file.reviewed_accuracy.keys() if "_count" not in x]) > 0 : month["average_accuracy_rate"][key].add(report_file.reviewed_accuracy.get(key, [])) elif sum([len(report_file.feedback_accuracy[x]) for x in report_file.feedback_accuracy.keys() if "_count" not in x]) > 0: month["average_accuracy_rate"][key].add(report_file.feedback_accuracy.get(key, [])) month["feedback_accuracy"][key].add(report_file.feedback_accuracy.get(key, [])) month["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, [])) if not month["average_processing_time"].get(report_file.doc_type, None): logger.warning(f"Weird doctype: {report_file.doc_type}") month["average_processing_time"][report_file.doc_type] = IterAvg() month["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 month["average_processing_time"]["avg"].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 if not month["file_average_processing_time"].get(report_file.doc_type, None): logger.warning(f"Weird doctype: {report_file.doc_type}") month["file_average_processing_time"][report_file.doc_type] = IterAvg() month["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0 month["file_average_processing_time"]["avg"].add_avg(image_avg_cost, 1) if image_avg_cost else 0 doc_type = "imei" if report_file.doc_type in ["imei", "invoice", "all"]: doc_type = report_file.doc_type else: logger.warning(f"Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}") month["usage"]["imei"] += 1 if doc_type == "imei" else 0 month["usage"]["invoice"] += 1 if doc_type == "invoice" else 0 month["usage"]["total_images"] += 1 month["review_progress"].append(report_file.review_status) return month @staticmethod def update_day(day_data, report_file, image_avg_cost): if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS: day_data["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0 day_data["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0 day_data["total_images"] += 1 doc_type = "imei" if report_file.doc_type in ["imei", "invoice", "all"]: doc_type = report_file.doc_type else: logger.warning(f"Weird doc type {report_file.doc_type} if request id: {report_file.correspond_request_id}") day_data["num_imei"] += 1 if doc_type == "imei" else 0 day_data["num_invoice"] += 1 if doc_type == "invoice" else 0 day_data["report_files"].append(report_file) for key in settings.FIELD: if sum([len(report_file.reviewed_accuracy[x]) for x in report_file.reviewed_accuracy.keys() if "_count" not in x]) > 0: day_data["average_accuracy_rate"][key].add(report_file.reviewed_accuracy.get(key, [])) elif sum([len(report_file.feedback_accuracy[x]) for x in report_file.feedback_accuracy.keys() if "_count" not in x]) > 0: day_data["average_accuracy_rate"][key].add(report_file.feedback_accuracy.get(key, [])) day_data["feedback_accuracy"][key].add(report_file.feedback_accuracy.get(key, [])) day_data["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, [])) if not day_data["average_processing_time"].get(report_file.doc_type, None): logger.warning(f"Weird doctype: {report_file.doc_type}") day_data["average_processing_time"][report_file.doc_type] = IterAvg() day_data["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 day_data["average_processing_time"]['avg'].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 if not day_data["file_average_processing_time"].get(report_file.doc_type, None): logger.warning(f"Weird doctype: {report_file.doc_type}") day_data["file_average_processing_time"][report_file.doc_type] = IterAvg() day_data["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0 day_data["file_average_processing_time"]['avg'].add_avg(image_avg_cost, 1) if image_avg_cost else 0 day_data["review_progress"].append(report_file.review_status) return day_data def add(self, request, report_files, report): this_month = timezone.localtime(request.created_at).strftime("%Y%m") this_day = timezone.localtime(request.created_at).strftime("%Y%m%d") if not self.data.get(this_month, None): self.data[this_month] = [copy.deepcopy(self.month_format), {}] self.data[this_month][0]["extraction_date"] = "Subtotal (" + timezone.localtime(request.created_at).strftime("%Y-%m") + ")" if not self.data[this_month][1].get(this_day, None): logger.info(f" Adding a new day: {this_day} for report: {report.id} ...") self.data[this_month][1][this_day] = copy.deepcopy(self.day_format)[0] self.data[this_month][1][this_day]['extraction_date'] = timezone.localtime(request.created_at).strftime("%Y-%m-%d") usage = self.count_transactions_within_day(this_day) self.data[this_month][1][this_day]["usage"]["imei"] = usage.get("imei", 0) self.data[this_month][1][this_day]["usage"]["invoice"] = usage.get("invoice", 0) self.data[this_month][1][this_day]["usage"]["request"] = usage.get("request", 0) self.data[this_month][1][this_day]["usage"]["total_images"] = usage.get("imei", 0) + usage.get("invoice", 0) self.data[this_month][1][this_day]['num_request'] += 1 self.data[this_month][0]['num_request'] += 1 _number_of_file = request.pages _be_cost = backend_cost(request.created_at, request.ai_inference_start_time) _ai_cost = request.ai_inference_time processing_time_by_averaging_request_cost = (_be_cost + _ai_cost)/_number_of_file if _number_of_file > 0 else 0 for report_file in report_files: # report_file.time_cost = processing_time_by_averaging_request_cost _report_file = copy.deepcopy(report_file) if _report_file.is_bad_image or _report_file.bad_image_reason in settings.ACC_EXCLUDE_RESEASONS: _report_file.acc = None for t in _report_file.feedback_accuracy.keys(): _report_file.feedback_accuracy[t] = [] for t in _report_file.reviewed_accuracy.keys(): _report_file.reviewed_accuracy[t] = [] self.report = self.update_total(self.report, _report_file, processing_time_by_averaging_request_cost) self.data[this_month][0] = self.update_month(self.data[this_month][0], _report_file, processing_time_by_averaging_request_cost) # Update the subtotal within the month self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], _report_file, processing_time_by_averaging_request_cost) # Update the subtotal of the day def count_transactions_within_day(self, date_string): start_date = datetime.strptime(date_string, "%Y%m%d") start_date_with_timezone = timezone.make_aware(start_date) end_date_with_timezone = start_date_with_timezone + timezone.timedelta(days=1) return count_transactions(start_date_with_timezone, end_date_with_timezone, self.sub) def save(self, root_report_id, is_daily_report=False, include_test=False): report_data, overall_report = self.get() fine_data = [] save_data = {"file": {"overview": f"{root_report_id}/{root_report_id}.xlsx"}, "data": fine_data, # {"sub_report_id": "S3 location", "data": fine_data} "report": overall_report} # extract data month_keys = list(report_data.keys()) month_keys.sort(reverse=True) for month in month_keys: fine_data.append(report_data[month][0]) day_keys = list(report_data[month][1].keys()) day_keys.sort(reverse = True) for day in day_keys: report_data[month][1][day]['subs'] = map_subsidiary_short_to_long(report_data[month][1][day]['subs']) fine_data.append(report_data[month][1][day]) # save daily reports report_id = root_report_id + "_" + day start_date = datetime.strptime(day, "%Y%m%d") start_date_with_timezone = timezone.make_aware(start_date) end_date_with_timezone = start_date_with_timezone + timezone.timedelta(days=1) _average_OCR_time = {"invoice": self.data[month][1][day]["average_processing_time"]["invoice"](), "imei": self.data[month][1][day]["average_processing_time"]["imei"](), "invoice_count": self.data[month][1][day]["average_processing_time"]["invoice"].count, "imei_count": self.data[month][1][day]["average_processing_time"]["imei"].count} _average_OCR_time["invoice"] = 0 if _average_OCR_time["invoice"] is None else _average_OCR_time["invoice"] _average_OCR_time["imei"] = 0 if _average_OCR_time["imei"] is None else _average_OCR_time["imei"] if "avg" not in _average_OCR_time: _average_OCR_time["avg"] = (_average_OCR_time["invoice"]*_average_OCR_time["invoice_count"] + _average_OCR_time["imei"]*_average_OCR_time["imei_count"])/(_average_OCR_time["imei_count"] + _average_OCR_time["invoice_count"]) if (_average_OCR_time["imei_count"] + _average_OCR_time["invoice_count"]) > 0 else None _file_average_OCR_time = {"invoice": self.data[month][1][day]["file_average_processing_time"]["invoice"](), "imei": self.data[month][1][day]["file_average_processing_time"]["imei"](), "invoice_count": self.data[month][1][day]["file_average_processing_time"]["invoice"].count, "imei_count": self.data[month][1][day]["file_average_processing_time"]["imei"].count} _file_average_OCR_time["invoice"] = 0 if _file_average_OCR_time["invoice"] is None else _file_average_OCR_time["invoice"] _file_average_OCR_time["imei"] = 0 if _file_average_OCR_time["imei"] is None else _file_average_OCR_time["imei"] if "avg" not in _file_average_OCR_time: _file_average_OCR_time["avg"] = (_file_average_OCR_time["invoice"]*_file_average_OCR_time["invoice_count"] + _file_average_OCR_time["imei"]*_file_average_OCR_time["imei_count"])/(_file_average_OCR_time["imei_count"] + _file_average_OCR_time["invoice_count"]) if (_file_average_OCR_time["imei_count"] + _file_average_OCR_time["invoice_count"]) > 0 else None acumulated_acc = {"feedback_accuracy": {}, "reviewed_accuracy": {}} for acc_type in ["feedback_accuracy", "reviewed_accuracy"]: avg_acc = IterAvg() for key in settings.FIELD: acumulated_acc[acc_type][key] = self.data[month][1][day][acc_type][key]() acumulated_acc[acc_type][key+"_count"] = self.data[month][1][day][acc_type][key].count avg_acc.add_avg(acumulated_acc[acc_type][key], acumulated_acc[acc_type][key+"_count"]) acumulated_acc[acc_type]["avg"] = avg_acc() acumulated_acc[acc_type]["avg_count"] = avg_acc.count new_report: Report = Report( report_id=report_id, is_daily_report=is_daily_report, subsidiary=self.sub.lower().replace(" ", ""), include_test=include_test, start_at=start_date_with_timezone, end_at=end_date_with_timezone, status="Ready", number_request=report_data[month][1][day]["num_request"], number_images=report_data[month][1][day]["total_images"], number_imei=report_data[month][1][day]["num_imei"], number_invoice=report_data[month][1][day]["num_invoice"], number_bad_images=report_data[month][1][day]["images_quality"]["bad"], average_OCR_time=_file_average_OCR_time, number_imei_transaction=report_data[month][1][day]["usage"]["imei"], number_invoice_transaction=report_data[month][1][day]["usage"]["invoice"], feedback_accuracy=acumulated_acc["feedback_accuracy"], reviewed_accuracy=acumulated_acc["reviewed_accuracy"], ) if is_daily_report: new_report.save() data = extract_report_detail_list(self.data[month][1][day]["report_files"], lower=True) data_workbook = dict2xlsx(data, _type='report_detail') local_workbook = save_workbook_file(report_id + ".xlsx", new_report, data_workbook) s3_key=save_report_to_S3(report_id, local_workbook) return fine_data, save_data def get(self) -> Any: # FIXME: This looks like a junk _data = copy.deepcopy(self.data) _report = copy.deepcopy(self.report) # export report data for key in _report["average_processing_time"].keys(): _report["average_processing_time"][key] = _report["average_processing_time"][key]() _ks = list(_report["file_average_processing_time"].keys()) for key in _ks: _report["file_average_processing_time"][key+"_count"] = _report["file_average_processing_time"][key].count _report["file_average_processing_time"][key] = _report["file_average_processing_time"][key]() # avg_acc = 0 # count_acc = 0 for key in settings.FIELD: _report["average_accuracy_rate"][key] = _report["average_accuracy_rate"][key]() for accuracy_type in ["feedback_accuracy", "reviewed_accuracy"]: # if (_report[accuracy_type][key].count + count_acc) > 0: # avg_acc = (avg_acc*count_acc + _report[accuracy_type][key].avg*_report[accuracy_type][key].count) / (_report[accuracy_type][key].count + count_acc) # count_acc += _report[accuracy_type][key].count _report[accuracy_type][key] = _report[accuracy_type][key]() _report["average_accuracy_rate"]["avg"] = _report["average_accuracy_rate"]["avg"]() _report["review_progress"] = _report["review_progress"].count(1)/(_report["review_progress"].count(0)+ _report["review_progress"].count(1)) if (_report["review_progress"].count(0)+ _report["review_progress"].count(1)) >0 else 1.0 _report["images_quality"]["successful_percent"] = _report["images_quality"]["successful"]/_report["total_images"] if _report["total_images"] > 0 else 0 _report["images_quality"]["bad_percent"] = _report["images_quality"]["bad"]/_report["total_images"] if _report["total_images"] > 0 else 0 # export data for dashboard for month in _data.keys(): _data[month][0]["images_quality"]["successful_percent"] = _data[month][0]["images_quality"]["successful"]/_data[month][0]["total_images"] if _data[month][0]["total_images"] > 0 else 0 _data[month][0]["images_quality"]["bad_percent"] = _data[month][0]["images_quality"]["bad"]/_data[month][0]["total_images"] if _data[month][0]["total_images"] > 0 else 0 num_transaction_imei = 0 num_transaction_invoice = 0 for day in _data[month][1].keys(): num_transaction_imei += _data[month][1][day]["usage"].get("imei", 0) num_transaction_invoice += _data[month][1][day]["usage"].get("invoice", 0) for key in _data[month][1][day]["average_processing_time"].keys(): _data[month][1][day]["average_processing_time"][key] = _data[month][1][day]["average_processing_time"][key]() for key in _data[month][1][day]["file_average_processing_time"].keys(): _data[month][1][day]["file_average_processing_time"][key] = _data[month][1][day]["file_average_processing_time"][key]() for key in settings.FIELD: _data[month][1][day]["average_accuracy_rate"][key] = _data[month][1][day]["average_accuracy_rate"][key]() for accuracy_type in ["feedback_accuracy", "reviewed_accuracy"]: _data[month][1][day][accuracy_type][key] = _data[month][1][day][accuracy_type][key]() _data[month][1][day]["review_progress"] = _data[month][1][day]["review_progress"].count(1)/(_data[month][1][day]["review_progress"].count(0)+ _data[month][1][day]["review_progress"].count(1)) if (_data[month][1][day]["review_progress"].count(0)+ _data[month][1][day]["review_progress"].count(1)) >0 else 0 _data[month][1][day].pop("report_files") _data[month][1][day]["images_quality"]["successful_percent"] = _data[month][1][day]["images_quality"]["successful"]/_data[month][1][day]["total_images"] if _data[month][1][day]["total_images"] > 0 else 0 _data[month][1][day]["images_quality"]["bad_percent"] = _data[month][1][day]["images_quality"]["bad"]/_data[month][1][day]["total_images"] if _data[month][1][day]["total_images"] > 0 else 0 _data[month][0]["usage"]["imei"] = num_transaction_imei _data[month][0]["usage"]["invoice"] = num_transaction_invoice _data[month][0]["usage"]["total_images"] = num_transaction_invoice + num_transaction_imei for key in _data[month][0]["average_processing_time"].keys(): _data[month][0]["average_processing_time"][key] = _data[month][0]["average_processing_time"][key]() for key in _data[month][0]["file_average_processing_time"].keys(): _data[month][0]["file_average_processing_time"][key] = _data[month][0]["file_average_processing_time"][key]() for key in settings.FIELD: _data[month][0]["average_accuracy_rate"][key] = _data[month][0]["average_accuracy_rate"][key]() for accuracy_type in ["feedback_accuracy", "reviewed_accuracy"]: _data[month][0][accuracy_type][key] = _data[month][0][accuracy_type][key]() _data[month][0]["review_progress"] = _data[month][0]["review_progress"].count(1)/(_data[month][0]["review_progress"].count(0)+ _data[month][0]["review_progress"].count(1)) if (_data[month][0]["review_progress"].count(0)+ _data[month][0]["review_progress"].count(1)) >0 else 0 return _data, _report class MonthReportAccumulate: def __init__(self): self.month = None self.total = { 'subs': "+", 'extraction_date': "Subtotal ()", 'total_images': 0, 'images_quality': { 'successful': 0, 'successful_percent': 0, 'bad': 0, 'bad_percent': 0 }, 'average_accuracy_rate': { 'imei': IterAvg(), 'purchase_date': IterAvg(), 'retailer_name': IterAvg(), 'invoice_no': IterAvg() }, 'average_processing_time': { 'imei': IterAvg(), 'invoice': IterAvg() }, 'usage': { 'imei':0, 'invoice': 0 } } self.data = [] self.data_format = { 'subs': "", 'extraction_date': "", 'num_imei': 0, 'num_invoice': 0, 'total_images': 0, 'images_quality': { 'successful': 0, 'successful_percent': 0, 'bad': 0, 'bad_percent': 0 }, 'average_accuracy_rate': { 'imei': 0, 'purchase_date': 0, 'retailer_name': 0, 'invoice_no': 0 }, 'average_processing_time': { 'imei': 0, 'invoice': 0 }, 'usage': { 'imei':0, 'invoice': 0 } }, def accumulate(self, report): self.total["total_images"] += report.number_images self.total["images_quality"]["successful"] += report.number_images - report.number_bad_images self.total["images_quality"]["bad"] += report.number_bad_images if sum([report.reviewed_accuracy[x] for x in report.reviewed_accuracy.keys() if "_count" not in x]) > 0 : self.total["average_accuracy_rate"]["imei"].add_avg(report.reviewed_accuracy.get("imei_number", 0), report.reviewed_accuracy.get("imei_number_count", 0)) self.total["average_accuracy_rate"]["purchase_date"].add_avg(report.reviewed_accuracy.get("purchase_date", 0), report.reviewed_accuracy.get("purchase_date_count", 0)) self.total["average_accuracy_rate"]["retailer_name"].add_avg(report.reviewed_accuracy.get("retailername", 0), report.reviewed_accuracy.get("retailername_count", 0)) self.total["average_accuracy_rate"]["invoice_no"].add_avg(report.reviewed_accuracy.get("invoice_no", 0), report.reviewed_accuracy.get("invoice_no_count", 0)) elif sum([ report.feedback_accuracy[x] for x in report.feedback_accuracy.keys() if "_count" not in x]) > 0: self.total["average_accuracy_rate"]["imei"].add_avg(report.feedback_accuracy.get("imei_number", 0), report.feedback_accuracy.get("imei_number_count", 0)) self.total["average_accuracy_rate"]["purchase_date"].add_avg(report.feedback_accuracy.get("purchase_date", 0), report.feedback_accuracy.get("purchase_date_count", 0)) self.total["average_accuracy_rate"]["retailer_name"].add_avg(report.feedback_accuracy.get("retailername", 0), report.feedback_accuracy.get("retailername_count", 0)) self.total["average_accuracy_rate"]["invoice_no"].add_avg(report.feedback_accuracy.get("invoice_no", 0), report.feedback_accuracy.get("invoice_no_count", 0)) self.total["average_processing_time"]["imei"].add_avg(report.average_OCR_time.get("imei", 0), report.average_OCR_time.get("imei_count", 0)) if report.average_OCR_time else 0 self.total["average_processing_time"]["invoice"].add_avg(report.average_OCR_time.get("invoice", 0), report.average_OCR_time.get("invoice_count", 0)) if report.average_OCR_time else 0 self.total["usage"]["imei"] += report.number_imei_transaction self.total["usage"]["invoice"] += report.number_invoice_transaction def add(self, report): report_month = report.start_at.month if self.month is None: self.month = report_month self.total["extraction_date"] = f"Subtotal ({self.month})" elif self.month != report_month: self.total["images_quality"]["successful_percent"] += self.total["images_quality"]["successful"]/self.total["total_images"] self.total["images_quality"]["bad_percent"] += self.total["images_quality"]["bad"]/self.total["total_images"] return False # Reports from a different month, stop accumulating # accumulate fields new_data = copy.deepcopy(self.data_format)[0] new_data["num_imei"] = report.number_imei new_data["subs"] = report.subsidiary new_data["extraction_date"] = report.start_at new_data["num_invoice"] = report.number_invoice new_data["total_images"] = report.number_images new_data["images_quality"]["successful"] = report.number_images - report.number_bad_images new_data["images_quality"]["bad"] = report.number_bad_images report.reviewed_accuracy = {} if report.reviewed_accuracy is None else report.reviewed_accuracy report.feedback_accuracy = {} if report.feedback_accuracy is None else report.feedback_accuracy if sum([ report.reviewed_accuracy[x] for x in report.reviewed_accuracy.keys() if "_count" not in x]): new_data["average_accuracy_rate"]["imei"] = report.reviewed_accuracy.get("imei_number", None) new_data["average_accuracy_rate"]["purchase_date"] = report.reviewed_accuracy.get("purchase_date", None) new_data["average_accuracy_rate"]["retailer_name"] = report.reviewed_accuracy.get("retailername", None) new_data["average_accuracy_rate"]["invoice_no"] = report.reviewed_accuracy.get("invoice_no", None) elif sum([ report.feedback_accuracy[x] for x in report.feedback_accuracy.keys() if "_count" not in x]): new_data["average_accuracy_rate"]["imei"] = report.feedback_accuracy.get("imei_number", None) new_data["average_accuracy_rate"]["purchase_date"] = report.feedback_accuracy.get("purchase_date", None) new_data["average_accuracy_rate"]["retailer_name"] = report.feedback_accuracy.get("retailername", None) new_data["average_accuracy_rate"]["invoice_no"] = report.feedback_accuracy.get("invoice_no", None) new_data["average_processing_time"]["imei"] = report.average_OCR_time.get("imei", 0) if report.average_OCR_time else 0 new_data["average_processing_time"]["invoice"] = report.average_OCR_time.get("invoice", 0) if report.average_OCR_time else 0 new_data["usage"]["imei"] = report.number_imei_transaction new_data["usage"]["invoice"] = report.number_invoice_transaction new_data["images_quality"]["successful_percent"] += new_data["images_quality"]["successful"]/new_data["total_images"] if new_data["total_images"] else 0 new_data["images_quality"]["bad_percent"] += new_data["images_quality"]["bad"]/new_data["total_images"] if new_data["total_images"] else 0 self.data.append(new_data) self.accumulate(report) return True def clear(self): self.month = None self.total = { 'subs': "+", 'extraction_date': "Subtotal ()", 'total_images': 0, 'images_quality': { 'successful': 0, 'successful_percent': 0, 'bad': 0, 'bad_percent': 0 }, 'average_accuracy_rate': { 'imei': IterAvg(), 'purchase_date': IterAvg(), 'retailer_name': IterAvg() }, 'average_processing_time': { 'imei': IterAvg(), 'invoice': IterAvg() }, 'usage': { 'imei':0, 'invoice': 0 } } self.data = [] def __call__(self): total = copy.deepcopy(self.total) total["images_quality"]["successful_percent"] = total["images_quality"]["successful"]/total["total_images"] if total["total_images"] else 0 total["images_quality"]["bad_percent"] = total["images_quality"]["bad"]/total["total_images"] if total["total_images"] else 0 total["average_accuracy_rate"]["imei"] = total["average_accuracy_rate"]["imei"]() total["average_accuracy_rate"]["purchase_date"] = total["average_accuracy_rate"]["purchase_date"]() total["average_accuracy_rate"]["retailer_name"] = total["average_accuracy_rate"]["retailer_name"]() total["average_processing_time"]["imei"] = total["average_processing_time"]["imei"]() total["average_processing_time"]["invoice"] = total["average_processing_time"]["invoice"]() return self.month, self.data, total class IterAvg: def __init__(self, name="default"): self.name = name self.avg = 0 self.count = 0 def add(self, values): """ Args: values (list[float]): """ values = [x for x in values if x is not None] if len(values) == 0: return self.avg = (self.avg*self.count + sum(values))/(self.count+len(values)) self.count += len(values) def add_avg(self, avg, count): if avg is None or count is None or count == 0: return self.count += count self.avg = (self.avg*(self.count-count) + avg*count)/(self.count) def __call__(self): if self.count == 0: return None return self.avg def validate_feedback_file(feedback, predict): if feedback: imei_feedback = feedback.get("imei_number", []) imei_feedback = [x for x in imei_feedback if x != ""] num_imei_feedback = len(imei_feedback) num_imei_predict = len(predict.get("imei_number", [])) if num_imei_feedback != num_imei_predict: return False feedback["imei_number"] = imei_feedback return True def first_of_list(the_list): if not the_list: return None return the_list[0] def _feedback_invoice_no_exist(feedback_result): if feedback_result is None: return True if not isinstance(feedback_result, dict): return True invoice_no = feedback_result.get("invoice_no", None) if invoice_no in ["", [], None]: return False else: return True def extract_report_detail_list(report_detail_list, lower=False, in_percent=True): data = [] for report_file in report_detail_list: data.append({ "Subs": report_file.subsidiary, "Request ID": report_file.correspond_request_id, "OCR Extraction Date": format_datetime_for_report_detail(report_file.correspond_request_created_at), "Redemption Number": report_file.correspond_redemption_id, "Image type": report_file.doc_type, "IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])) if report_file.feedback_result else None, "IMEI_OCR retrieved": first_of_list(report_file.predict_result.get("imei_number", [None])), "IMEI Revised": first_of_list(report_file.reviewed_result.get("imei_number", [None])) if report_file.reviewed_result else None, "IMEI1 Accuracy": first_of_list(report_file.feedback_accuracy.get("imei_number", [None])), "Invoice_Number_User": report_file.feedback_result.get("invoice_no", None) if report_file.feedback_result else None, "Invoice_Number_OCR": report_file.predict_result.get("invoice_no", None), "Invoice_Number Revised": report_file.reviewed_result.get("invoice_no", None) if report_file.reviewed_result else None, "Invoice_Number_Accuracy": first_of_list(report_file.feedback_accuracy.get("invoice_no", [None])) if _feedback_invoice_no_exist(report_file.feedback_result) else None, "Invoice_Purchase Date_Consumer": report_file.feedback_result.get("purchase_date", None) if report_file.feedback_result else None, "Invoice_Purchase Date_OCR": format_purchase_date_ocr_for_report(report_file.predict_result.get("purchase_date", [])), "Invoice_Purchase Date Revised": report_file.reviewed_result.get("purchase_date", None) if report_file.reviewed_result else None, "Invoice_Purchase Date Accuracy": first_of_list(report_file.feedback_accuracy.get("purchase_date", [None])), "Invoice_Retailer_Consumer": report_file.feedback_result.get("retailername", None) if report_file.feedback_result else None, "Invoice_Retailer_OCR": report_file.predict_result.get("retailername", None), "Invoice_Retailer Revised": report_file.reviewed_result.get("retailername", None) if report_file.reviewed_result else None, "Invoice_Retailer Accuracy": first_of_list(report_file.feedback_accuracy.get("retailername", [None])), "OCR Image Accuracy": report_file.acc, "OCR Image Speed (seconds)": report_file.time_cost, "Is Reviewed": report_file.is_reviewed, "Bad Image Reasons": report_file.bad_image_reason, "Countermeasures": report_file.counter_measures, "IMEI_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("imei_number", [None])), "Purchase Date_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("purchase_date", [None])), "Retailer_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("retailername", [None])), "Invoice_Number_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("invoice_no", [None])) }) if lower: for i, dat in enumerate(data): keys = list(dat.keys()) for old_key in keys: data[i][old_key.lower().replace(" ", "_")] = data[i].pop(old_key) if in_percent: for i, dat in enumerate(data): keys = [x for x in list(dat.keys()) if "accuracy" in x.lower()] for key in keys: if data[i][key]: data[i][key] = data[i][key]*100 return data def format_datetime_for_report_detail(ocr_extraction_date): naived_time = timezone.make_naive(ocr_extraction_date) ocr_extraction_date = timezone.make_aware(value=naived_time, timezone=timezone.get_current_timezone()) format_to_date = '%Y-%m-%d' ocr_extraction_date = ocr_extraction_date.strftime(format_to_date) return ocr_extraction_date def format_purchase_date_ocr_for_report(ocr_result): return ", ".join(ocr_result) def count_transactions(start_date, end_date, subsidiary="all"): base_query = Q(created_at__range=(start_date, end_date)) base_query &= Q(is_test_request=False) if subsidiary and subsidiary.lower().replace(" ", "") not in settings.SUB_FOR_BILLING: base_query &= Q(redemption_id__startswith=subsidiary) transaction_att = {} total_transaction_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at') for request in total_transaction_requests: if not request.doc_type: continue doc_types = request.doc_type.split(",") for doc_type in doc_types: if transaction_att.get(doc_type, None) == None: transaction_att[doc_type] = 1 else: transaction_att[doc_type] += 1 if not transaction_att.get("request", None): transaction_att["request"] = 1 else: transaction_att["request"] += 1 return transaction_att def convert_datetime_format(date_string: str, is_gt=False) -> str: # pattern_date_string = "2023-02-28" input_format = "%Y-%m-%d" output_format = "%d/%m/%Y" # Validate the input date string format pattern = r"\d{4}-\d{2}-\d{2}" if re.match(pattern, date_string): # Convert the date string to a datetime object date_object = datetime.strptime(date_string, input_format) # Convert the datetime object to the desired output format formatted_date = date_object.strftime(output_format) return formatted_date return date_string def predict_result_to_ready(result): dict_result = {"retailername": "", "sold_to_party": "", "invoice_no": "", "purchase_date": [], "imei_number": [],} if not result: return dict_result dict_result["retailername"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}])[0].get("value", None) dict_result["sold_to_party"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}])[1].get("value", None) dict_result["purchase_date"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}])[2].get("value", []) dict_result["imei_number"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[3].get("value", []) dict_result["invoice_no"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}, {}])[4].get("value", None) return dict_result def update_temp_accuracy(accuracy, acc, keys): for key in keys: accuracy[key].add(acc[key]) return accuracy def _accuracy_calculate_formatter(inference, target): """_summary_ format type of inference, and target from str/None to List of str/None. Make both list inference and target to be the same length. """ if not isinstance(inference, list): # inference = [] if inference is None else [inference] inference = [inference] if not isinstance(target, list): # target = [] if target is None else [target] target = [target] length = max(len(target), len(inference)) target = target + (length - len(target))*[target[0]] if len(target) > 0 else target + (length - len(target))*[None] inference = inference + (length - len(inference))*[None] return inference, target def _acc_will_be_ignored(key_name, _target): is_optional_key = key_name in OPTIONAL_KEYS is_empty_target = _target in [[], None, ''] if is_optional_key and is_empty_target: return True else: return False def calculate_accuracy(key_name: str, inference: Dict[str, Union[str, List]], target: Dict[str, Union[str, List]], type: str, sub: str): """_summary_ NOTE: This has been changed to return accuracy = None if Args: key_name (string): key to calculate accuracy on, ex: retailername inference (dict): result from ocr, refined to align with the target down below target (dict): result of type is_optional_keyname: default is set to False (which mean this is not an optional keyname) currently we have invoice_no is an optional keyname. """ acc = [] data = [] if not target or not inference: return acc, data _inference = inference[key_name] _target = target[key_name] # _will_acc_be_ignored = _acc_will_be_ignored(key_name, _target) _inference, _target = _accuracy_calculate_formatter(_inference, _target) for i, v in enumerate(_inference): # TODO: target[i] is None, "" x = post_processing_str(key_name, _inference[i], is_gt=False, sub=sub) y = post_processing_str(key_name, _target[i], is_gt=True, sub=sub) score = eval_ocr_metric( [x], [y], metric=[ "one_minus_ned", # "line_acc_ignore_case_symbol", # "line_acc", # "one_minus_ned_word", ]) acc.append(list(score.values())[0]) data.append([x, y]) return acc, data def calculate_avg_accuracy(acc, type, keys=[]): acc_list = [] for key in keys: this_acc = acc.get(type, {}).get(key, []) if len(this_acc) > 0: this_acc = [max(this_acc)] acc_list += this_acc acc_list = [x for x in acc_list if x is not None] return sum(acc_list)/len(acc_list) if len(acc_list) > 0 else None def acc_maximize_list_values(acc): pos = {} for k in acc.keys(): pos[k] = 0 if isinstance(acc[k], list) and len(acc[k]) > 0: acc[k] = [max(acc[k])] pos[k] = acc[k].index(acc[k][0]) return acc, pos def create_billing_data(subscription_requests): billing_data = [] for request in subscription_requests: if request.status != 200: continue images = SubscriptionRequestFile.objects.filter(request=request, file_category=FileCategory.Origin.value) for image in images: if not image.doc_type: _doc_type = image.file_name.split("_")[1] if _doc_type in ["imei", "invoice"]: image.doc_type = _doc_type image.save() else: _doc_type = image.doc_type doc_type = "SN/IMEI" if _doc_type == "imei" else "Invoice" _sub = "" redemption_id = "" if request.redemption_id: _sub = map_subsidiary_short_to_long(request.redemption_id[:2]) redemption_id = request.redemption_id format_to_time = '%Y-%m-%d %H:%M' format_to_date = '%Y-%m-%d' format_to_month = '%B %Y' naive_created_time = timezone.make_naive(request.created_at) rq_created_at = timezone.make_aware(value=naive_created_time, timezone=timezone.get_current_timezone()) rq_month = rq_created_at.strftime(format_to_month) rq_date = rq_created_at.strftime(format_to_date) rq_time = rq_created_at.strftime(format_to_time) billing_data.append({ "request_month": rq_month, "subsidiary": _sub, "image_type": doc_type, "redemption_number": redemption_id, "request_id": request.request_id, "request_date": rq_date, "request_time_(utc)": rq_time }) return billing_data def avg_dict(data): values = [] for k, v in data.items(): if isinstance(v, list): values += v return sum(values)/len(values) if len(values) > 0 else -1 def calculate_a_request(report, request): def review_status_map(input): review_status = {-1: "Not Required", 0: "No", 1: "Yes"} return review_status.get(input, "N/A") atts = [] request_att = {"acc": {"feedback": {"imei_number": [], "purchase_date": [], "retailername": [], "sold_to_party": [], "invoice_no": [], }, "reviewed": {"imei_number": [], "purchase_date": [], "retailername": [], "sold_to_party": [], "invoice_no": [], }, "acumulated":{"imei_number": [], "purchase_date": [], "retailername": [], "sold_to_party": [], "invoice_no": [], }}, "err": [], "time_cost": {"imei": [], "invoice": []}, "total_images": 0, "bad_images": 0, "bad_image_list": [], "is_reviewed": [], # -1: No need to review, 0: Not reviewed, 1: Reviewed } images = SubscriptionRequestFile.objects.filter(request=request, file_category=FileCategory.Origin.value) report_files = [] for image in images: status, att = calculate_subcription_file(image, request.subsidiary) att["acc"]["feedback"], fb_max_indexes = acc_maximize_list_values(att["acc"]["feedback"]) att["acc"]["reviewed"], rv_max_indexes = acc_maximize_list_values(att["acc"]["reviewed"]) _att = copy.deepcopy(att) # deep copy right here to advoid removing acc for bad images in the next steps fb_avg_acc = avg_dict(att["acc"]["feedback"]) rv_avg_acc = avg_dict(att["acc"]["reviewed"]) image.is_required = fb_avg_acc < settings.NEED_REVIEW if image.processing_time < 0: continue if status != 200: continue atts.append(att) image.feedback_accuracy = att["acc"]["feedback"] # dict {key: [values]} image.is_bad_image_quality = att["is_bad_image"] # is_bad_image=avg_acc 0: image.predict_result["purchase_date"] = [value_pair[0] for value_pair in att["normalized_data"]["feedback"]["purchase_date"]] image.feedback_result["purchase_date"] = att["normalized_data"]["feedback"]["purchase_date"][fb_max_indexes["purchase_date"]][1] if len(att["normalized_data"]["reviewed"].get("purchase_date", [])) > 0: image.predict_result["purchase_date"] = [value_pair[0] for value_pair in att["normalized_data"]["reviewed"]["purchase_date"]] image.reviewed_result["purchase_date"] = att["normalized_data"]["reviewed"]["purchase_date"][rv_max_indexes["purchase_date"]][1] request_att["is_reviewed"].append(att["is_reviewed"]) if att["is_reviewed"] !=1: att["acc"]["reviewed"] = {} reviewed_result = {} reason = None counter_measure = None else: reviewed_result = image.reviewed_result reason = image.reason counter_measure = image.counter_measures if att["is_bad_image"] or image.reason in settings.ACC_EXCLUDE_RESEASONS: if image.reason in settings.ACC_EXCLUDE_RESEASONS: _att["avg_acc"] = None for t in _att["acc"].keys(): for k in _att["acc"][t].keys(): _att["acc"][t][k] = [] else: if request_att["time_cost"].get(image.doc_type, None): request_att["time_cost"][image.doc_type].append(image.processing_time) else: request_att["time_cost"][image.doc_type] = [image.processing_time] new_report_file = ReportFile(report=report, subsidiary=_sub, correspond_request_id=request.request_id, correspond_request_created_at=request.created_at, correspond_redemption_id=request.redemption_id, doc_type=image.doc_type, predict_result=image.predict_result, feedback_result=image.feedback_result, reviewed_result=reviewed_result, feedback_accuracy=_att["acc"]["feedback"], reviewed_accuracy=_att["acc"]["reviewed"], acc=_att["avg_acc"], is_bad_image=att["is_bad_image"], is_reviewed= review_status_map(att["is_reviewed"]), time_cost=image.processing_time, bad_image_reason=reason, counter_measures=counter_measure, error="|".join(att["err"]), review_status=att["is_reviewed"], ) report_files.append(new_report_file) if att["is_bad_image"]: request_att["bad_image_list"].append(image.file_name) _att["avg_acc"] = None for t in _att["acc"].keys(): for k in _att["acc"][t].keys(): _att["acc"][t][k] = [] try: request_att["acc"]["feedback"]["imei_number"] += _att["acc"]["feedback"]["imei_number"] request_att["acc"]["feedback"]["purchase_date"] += _att["acc"]["feedback"]["purchase_date"] request_att["acc"]["feedback"]["retailername"] += _att["acc"]["feedback"]["retailername"] request_att["acc"]["feedback"]["sold_to_party"] += _att["acc"]["feedback"]["sold_to_party"] request_att["acc"]["feedback"]["invoice_no"] += _att["acc"]["feedback"]["invoice_no"] request_att["acc"]["reviewed"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] if _att["is_reviewed"]==1 else [] request_att["acc"]["reviewed"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] if _att["is_reviewed"]==1 else [] request_att["acc"]["reviewed"]["retailername"] += _att["acc"]["reviewed"]["retailername"] if _att["is_reviewed"]==1 else [] request_att["acc"]["reviewed"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] if _att["is_reviewed"]==1 else [] request_att["acc"]["reviewed"]["invoice_no"] += _att["acc"]["reviewed"]["invoice_no"] if _att["is_reviewed"]==1 else [] request_att["acc"]["acumulated"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] if _att["acc"]["reviewed"]["imei_number"] and _att["is_reviewed"]==1 else _att["acc"]["feedback"]["imei_number"] request_att["acc"]["acumulated"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] if _att["acc"]["reviewed"]["purchase_date"] and _att["is_reviewed"]==1 else _att["acc"]["feedback"]["purchase_date"] request_att["acc"]["acumulated"]["retailername"] += _att["acc"]["reviewed"]["retailername"] if _att["acc"]["reviewed"]["retailername"] and _att["is_reviewed"]==1 else _att["acc"]["feedback"]["retailername"] request_att["acc"]["acumulated"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] if _att["acc"]["reviewed"]["sold_to_party"] and _att["is_reviewed"]==1 else _att["acc"]["feedback"]["sold_to_party"] request_att["acc"]["acumulated"]["invoice_no"] += _att["acc"]["reviewed"]["invoice_no"] if _att["acc"]["reviewed"]["invoice_no"] and _att["is_reviewed"]==1 else _att["acc"]["feedback"]["invoice_no"] if image.reason not in settings.ACC_EXCLUDE_RESEASONS: request_att["bad_images"] += int(_att["is_bad_image"]) request_att["total_images"] += 1 request_att["err"] += _att["err"] except Exception as e: logger.error(f"Failed to calculate request: {request.request_id} - request_file: {image.file_name} because of {e}") continue return request_att, report_files, atts def calculate_subcription_file(subcription_request_file, subsidiary): att = {"acc": {"feedback": {}, "reviewed": {}}, "normalized_data": {"feedback": {}, "reviewed": {}}, "err": [], "is_bad_image": False, "avg_acc": None, "is_reviewed": -1, # -1: No need to review, 0: Not reviewed, 1: Reviewed } if not subcription_request_file.predict_result: return 400, att inference_result = copy.deepcopy(subcription_request_file.predict_result) feedback_result = copy.deepcopy(subcription_request_file.feedback_result) reviewed_result = copy.deepcopy(subcription_request_file.reviewed_result) accuracy_keys_for_this_image = settings.FIELDS_BY_SUB.get(subsidiary, settings.FIELDS_BY_SUB["default"]).get(subcription_request_file.doc_type, []) for key_name in VALID_KEYS: att["acc"]["feedback"][key_name] = [] att["normalized_data"]["feedback"][key_name] = [] att["acc"]["reviewed"][key_name] = [] att["normalized_data"]["reviewed"][key_name] = [] for key_name in accuracy_keys_for_this_image: try: att["acc"]["feedback"][key_name], att["normalized_data"]["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result, "feedback", sub=subcription_request_file.request.subsidiary) att["acc"]["reviewed"][key_name], att["normalized_data"]["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result, "reviewed", sub=subcription_request_file.request.subsidiary) except Exception as e: att["err"].append(str(e)) subcription_request_file.feedback_accuracy = att["acc"]["feedback"] subcription_request_file.reviewed_accuracy = att["acc"]["reviewed"] avg_reviewed = calculate_avg_accuracy(att["acc"], "reviewed", VALID_KEYS) avg_feedback = calculate_avg_accuracy(att["acc"], "feedback", VALID_KEYS) if avg_feedback is not None or avg_reviewed is not None: avg_acc = 0 if avg_feedback is not None: avg_acc = avg_feedback if avg_feedback < settings.NEED_REVIEW: att["is_reviewed"] = 0 else: att["is_reviewed"] = -1 if avg_reviewed is not None and att["is_reviewed"]!=-1: avg_acc = avg_reviewed att["is_reviewed"] = 1 # Little trick to overcome issue caused by misleading manually review process if (subcription_request_file.reason or subcription_request_file.counter_measures) and att["is_reviewed"]!=-1: att["is_reviewed"] = 1 att["avg_acc"] = avg_acc if avg_acc < settings.BAD_THRESHOLD: att["is_bad_image"] = True return 200, att def mean_list(l): l = [x for x in l if x is not None] if len(l) == 0: return 0 return sum(l)/len(l) def shadow_report(report_id, query): c_connector.make_a_report_2((report_id, query))