diff --git a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py index 9667d2f..cc0a5fb 100755 --- a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py +++ b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py @@ -164,8 +164,8 @@ def create_accuracy_report(report_id, **kwargs): report.number_invoice = time_cost["invoice"].count report.number_bad_images = number_bad_images # FIXME: refactor this data stream for endurability - report.average_OCR_time = {"invoice": time_cost["invoice"](), "imei": time_cost["imei"](), - "invoice_count": time_cost["invoice"].count, "imei_count": time_cost["imei"].count} + report.average_OCR_time = {"invoice": _save_data["report"]["file_average_processing_time"]["invoice"], "imei": _save_data["report"]["file_average_processing_time"]["imei"], + "invoice_count": _save_data["report"]["file_average_processing_time"]["invoice_count"], "imei_count": _save_data["report"]["file_average_processing_time"]["imei_count"]} report.average_OCR_time["invoice"] = 0 if report.average_OCR_time["invoice"] is None else report.average_OCR_time["invoice"] report.average_OCR_time["imei"] = 0 if report.average_OCR_time["imei"] is None else report.average_OCR_time["imei"] diff --git a/cope2n-api/fwd_api/utils/accuracy.py b/cope2n-api/fwd_api/utils/accuracy.py index 8329894..60f6425 100755 --- a/cope2n-api/fwd_api/utils/accuracy.py +++ b/cope2n-api/fwd_api/utils/accuracy.py @@ -10,12 +10,11 @@ from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile, ReportF from ..celery_worker.client_connector import c_connector from ..utils.file import dict2xlsx, save_workbook_file, save_report_to_S3 from ..utils.subsidiary import map_subsidiary_short_to_long +from ..utils.processing_time import backend_cost from django.db.models import Q from django.utils import timezone -import redis from fwd import settings from ..models import SubscriptionRequest, Report, ReportFile -import json from typing import Union, List, Dict VALID_KEYS = ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"] @@ -51,6 +50,11 @@ class ReportAccumulateByRequest: 'invoice': IterAvg(), 'avg': IterAvg(), }, + 'file_average_processing_time': { + 'imei': IterAvg(), + 'invoice': IterAvg(), + 'avg': IterAvg(), + }, 'usage': { 'imei':0, 'invoice': 0, @@ -98,6 +102,11 @@ class ReportAccumulateByRequest: 'invoice': IterAvg(), 'avg': IterAvg(), }, + 'file_average_processing_time': { + 'imei': IterAvg(), + 'invoice': IterAvg(), + 'avg': IterAvg(), + }, 'usage': { 'imei': 0, 'invoice': 0, @@ -126,7 +135,7 @@ class ReportAccumulateByRequest: self.report["average_accuracy_rate"]["avg"] = IterAvg() @staticmethod - def update_total(total, report_file): + def update_total(total, report_file, image_avg_cost): # Update total = update total if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS: total["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0 @@ -156,6 +165,12 @@ class ReportAccumulateByRequest: total["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 total["average_processing_time"]["avg"].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 + if not total["file_average_processing_time"].get(report_file.doc_type, None): + print(f"[WARM]: Weird doctype: {report_file.doc_type}") + total["file_average_processing_time"][report_file.doc_type] = IterAvg() + total["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0 + total["file_average_processing_time"]["avg"].add_avg(image_avg_cost, 1) if image_avg_cost else 0 + doc_type = "imei" if report_file.doc_type in ["imei", "invoice", "all"]: doc_type = report_file.doc_type @@ -168,7 +183,7 @@ class ReportAccumulateByRequest: return total @staticmethod - def update_month(month, report_file): + def update_month(month, report_file, image_avg_cost): # Update month = update month if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS: month["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0 @@ -196,6 +211,12 @@ class ReportAccumulateByRequest: month["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 month["average_processing_time"]["avg"].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 + if not month["file_average_processing_time"].get(report_file.doc_type, None): + print(f"[WARM]: Weird doctype: {report_file.doc_type}") + month["file_average_processing_time"][report_file.doc_type] = IterAvg() + month["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0 + month["file_average_processing_time"]["avg"].add_avg(image_avg_cost, 1) if image_avg_cost else 0 + doc_type = "imei" if report_file.doc_type in ["imei", "invoice", "all"]: doc_type = report_file.doc_type @@ -208,7 +229,7 @@ class ReportAccumulateByRequest: return month @staticmethod - def update_day(day_data, report_file): + def update_day(day_data, report_file, image_avg_cost): if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS: day_data["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0 day_data["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0 @@ -235,6 +256,13 @@ class ReportAccumulateByRequest: day_data["average_processing_time"][report_file.doc_type] = IterAvg() day_data["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 day_data["average_processing_time"]['avg'].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 + + if not day_data["file_average_processing_time"].get(report_file.doc_type, None): + print(f"[WARM]: Weird doctype: {report_file.doc_type}") + day_data["file_average_processing_time"][report_file.doc_type] = IterAvg() + day_data["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0 + day_data["file_average_processing_time"]['avg'].add_avg(image_avg_cost, 1) if image_avg_cost else 0 + day_data["review_progress"].append(report_file.review_status) return day_data @@ -256,7 +284,10 @@ class ReportAccumulateByRequest: self.data[this_month][1][this_day]['num_request'] += 1 self.data[this_month][0]['num_request'] += 1 - + _number_of_file = request.pages + _be_cost = backend_cost(request.created_at, request.ai_inference_start_time) + _ai_cost = request.ai_inference_time + processing_time_by_averaging_request_cost = (_be_cost + _ai_cost)/_number_of_file if _number_of_file > 0 else 0 for report_file in report_files: _report_file = copy.deepcopy(report_file) if _report_file.is_bad_image or _report_file.bad_image_reason in settings.ACC_EXCLUDE_RESEASONS: @@ -266,9 +297,9 @@ class ReportAccumulateByRequest: for t in _report_file.reviewed_accuracy.keys(): _report_file.reviewed_accuracy[t] = [] - self.report = self.update_total(self.report, _report_file) - self.data[this_month][0] = self.update_month(self.data[this_month][0], _report_file) # Update the subtotal within the month - self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], _report_file) # Update the subtotal of the day + self.report = self.update_total(self.report, _report_file, processing_time_by_averaging_request_cost) + self.data[this_month][0] = self.update_month(self.data[this_month][0], _report_file, processing_time_by_averaging_request_cost) # Update the subtotal within the month + self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], _report_file, processing_time_by_averaging_request_cost) # Update the subtotal of the day def count_transactions_within_day(self, date_string): start_date = datetime.strptime(date_string, "%Y%m%d") @@ -299,11 +330,18 @@ class ReportAccumulateByRequest: end_date_with_timezone = start_date_with_timezone + timezone.timedelta(days=1) _average_OCR_time = {"invoice": self.data[month][1][day]["average_processing_time"]["invoice"](), "imei": self.data[month][1][day]["average_processing_time"]["imei"](), "invoice_count": self.data[month][1][day]["average_processing_time"]["invoice"].count, "imei_count": self.data[month][1][day]["average_processing_time"]["imei"].count} - _average_OCR_time["invoice"] = 0 if _average_OCR_time["invoice"] is None else _average_OCR_time["invoice"] _average_OCR_time["imei"] = 0 if _average_OCR_time["imei"] is None else _average_OCR_time["imei"] if "avg" not in _average_OCR_time: _average_OCR_time["avg"] = (_average_OCR_time["invoice"]*_average_OCR_time["invoice_count"] + _average_OCR_time["imei"]*_average_OCR_time["imei_count"])/(_average_OCR_time["imei_count"] + _average_OCR_time["invoice_count"]) if (_average_OCR_time["imei_count"] + _average_OCR_time["invoice_count"]) > 0 else None + + _file_average_OCR_time = {"invoice": self.data[month][1][day]["file_average_processing_time"]["invoice"](), "imei": self.data[month][1][day]["file_average_processing_time"]["imei"](), + "invoice_count": self.data[month][1][day]["file_average_processing_time"]["invoice"].count, "imei_count": self.data[month][1][day]["file_average_processing_time"]["imei"].count} + _file_average_OCR_time["invoice"] = 0 if _file_average_OCR_time["invoice"] is None else _file_average_OCR_time["invoice"] + _file_average_OCR_time["imei"] = 0 if _file_average_OCR_time["imei"] is None else _file_average_OCR_time["imei"] + if "avg" not in _file_average_OCR_time: + _file_average_OCR_time["avg"] = (_file_average_OCR_time["invoice"]*_file_average_OCR_time["invoice_count"] + _file_average_OCR_time["imei"]*_file_average_OCR_time["imei_count"])/(_file_average_OCR_time["imei_count"] + _file_average_OCR_time["invoice_count"]) if (_file_average_OCR_time["imei_count"] + _file_average_OCR_time["invoice_count"]) > 0 else None + acumulated_acc = {"feedback_accuracy": {}, "reviewed_accuracy": {}} for acc_type in ["feedback_accuracy", "reviewed_accuracy"]: @@ -327,7 +365,7 @@ class ReportAccumulateByRequest: number_imei=report_data[month][1][day]["num_imei"], number_invoice=report_data[month][1][day]["num_invoice"], number_bad_images=report_data[month][1][day]["images_quality"]["bad"], - average_OCR_time=_average_OCR_time, + average_OCR_time=_file_average_OCR_time, number_imei_transaction=report_data[month][1][day]["usage"]["imei"], number_invoice_transaction=report_data[month][1][day]["usage"]["invoice"], feedback_accuracy=acumulated_acc["feedback_accuracy"], @@ -348,6 +386,11 @@ class ReportAccumulateByRequest: # export report data for key in _report["average_processing_time"].keys(): _report["average_processing_time"][key] = _report["average_processing_time"][key]() + _ks = list(_report["file_average_processing_time"].keys()) + for key in _ks: + _report["file_average_processing_time"][key+"_count"] = _report["file_average_processing_time"][key].count + _report["file_average_processing_time"][key] = _report["file_average_processing_time"][key]() + # avg_acc = 0 # count_acc = 0 @@ -375,6 +418,8 @@ class ReportAccumulateByRequest: num_transaction_invoice += _data[month][1][day]["usage"].get("invoice", 0) for key in _data[month][1][day]["average_processing_time"].keys(): _data[month][1][day]["average_processing_time"][key] = _data[month][1][day]["average_processing_time"][key]() + for key in _data[month][1][day]["file_average_processing_time"].keys(): + _data[month][1][day]["file_average_processing_time"][key] = _data[month][1][day]["file_average_processing_time"][key]() for key in settings.FIELD: _data[month][1][day]["average_accuracy_rate"][key] = _data[month][1][day]["average_accuracy_rate"][key]() @@ -391,6 +436,8 @@ class ReportAccumulateByRequest: _data[month][0]["usage"]["total_images"] = num_transaction_invoice + num_transaction_imei for key in _data[month][0]["average_processing_time"].keys(): _data[month][0]["average_processing_time"][key] = _data[month][0]["average_processing_time"][key]() + for key in _data[month][0]["file_average_processing_time"].keys(): + _data[month][0]["file_average_processing_time"][key] = _data[month][0]["file_average_processing_time"][key]() for key in settings.FIELD: _data[month][0]["average_accuracy_rate"][key] = _data[month][0]["average_accuracy_rate"][key]() for accuracy_type in ["feedback_accuracy", "reviewed_accuracy"]: diff --git a/cope2n-api/fwd_api/utils/file.py b/cope2n-api/fwd_api/utils/file.py index 619651b..cba080b 100755 --- a/cope2n-api/fwd_api/utils/file.py +++ b/cope2n-api/fwd_api/utils/file.py @@ -502,8 +502,8 @@ def dump_excel_report(input: json): 'N': 'average_accuracy_rate.purchase_date', 'O': 'average_accuracy_rate.retailername', 'P': 'average_accuracy_rate.invoice_no', - 'Q': 'average_processing_time.imei', - 'R': 'average_processing_time.invoice', + 'Q': 'file_average_processing_time.imei', + 'R': 'file_average_processing_time.invoice', 'S': 'review_progress' } start_index = 5 @@ -528,7 +528,7 @@ def dump_excel_report(input: json): ws[key + str(start_index)].fill = fill_yellow if 'average_accuracy_rate' in mapping[key] and type(value) in [int, float] and value < 98: ws[key + str(start_index)].font = font_red - elif 'average_processing_time' in mapping[key] and type(value) in [int, float] and value > 2.0: + elif 'file_average_processing_time' in mapping[key] and type(value) in [int, float] and value > 2.0: ws[key + str(start_index)].font = font_red elif 'bad_percent' in mapping[key] and type(value) in [int, float] and value > 10: ws[key + str(start_index)].font = font_red diff --git a/cope2n-api/fwd_api/utils/processing_time.py b/cope2n-api/fwd_api/utils/processing_time.py new file mode 100644 index 0000000..47ffd32 --- /dev/null +++ b/cope2n-api/fwd_api/utils/processing_time.py @@ -0,0 +1,27 @@ +import numpy as np +import time +from django.utils import timezone + +def queuing_time(start_time, ai_profile_individual): + return ai_profile_individual["inference"][0] - start_time + +def inference_cost(ai_profile_individual): + return ai_profile_individual["inference"][1][0] - ai_profile_individual["inference"][0] + +def postprocessing_cost(ai_profile_individual): + return ai_profile_individual["postprocess"][1] - ai_profile_individual["postprocess"][0] + +def cost_profile(start_time, ai_profile_individual): + result = {"queue": queuing_time(start_time, ai_profile_individual), + "inference": inference_cost(ai_profile_individual), + "postprocessing":postprocessing_cost(ai_profile_individual)} + return result + +def backend_cost(created_at, ai_start_time): + """ + Args: + created_at: str : django timezone format: example: 2024-04-01 16:16:18.344 +0700 + ai_start_time: float : timestamp + """ + received = created_at.timestamp() + return ai_start_time - received \ No newline at end of file diff --git a/cope2n-fe/src/components/report-detail/report-overview-table.tsx b/cope2n-fe/src/components/report-detail/report-overview-table.tsx index b572872..8536d39 100644 --- a/cope2n-fe/src/components/report-detail/report-overview-table.tsx +++ b/cope2n-fe/src/components/report-detail/report-overview-table.tsx @@ -329,8 +329,8 @@ const ReportOverViewTable: React.FC = ({ purchaseDateAAR: item.average_accuracy_rate.purchase_date, retailerNameAAR: item.average_accuracy_rate.retailername, invoiceNumberAAR: item.average_accuracy_rate.invoice_no, - snImeiAPT: item.average_processing_time.imei, - invoiceAPT: item.average_processing_time.invoice, + snImeiAPT: item.file_average_processing_time.imei, + invoiceAPT: item.file_average_processing_time.invoice, snImeiTC: item.usage.imei, invoiceTC: item.usage.invoice, reviewProgress: item.review_progress,