Merge pull request #141 from SDSRV-IDP/BE/processing_time

Be/processing time
This commit is contained in:
Phan Thành Trung 2024-06-24 16:57:25 +07:00 committed by GitHub Enterprise
commit b2cebf192f
5 changed files with 92 additions and 18 deletions

View File

@ -164,8 +164,8 @@ def create_accuracy_report(report_id, **kwargs):
report.number_invoice = time_cost["invoice"].count report.number_invoice = time_cost["invoice"].count
report.number_bad_images = number_bad_images report.number_bad_images = number_bad_images
# FIXME: refactor this data stream for endurability # FIXME: refactor this data stream for endurability
report.average_OCR_time = {"invoice": time_cost["invoice"](), "imei": time_cost["imei"](), report.average_OCR_time = {"invoice": _save_data["report"]["file_average_processing_time"]["invoice"], "imei": _save_data["report"]["file_average_processing_time"]["imei"],
"invoice_count": time_cost["invoice"].count, "imei_count": time_cost["imei"].count} "invoice_count": _save_data["report"]["file_average_processing_time"]["invoice_count"], "imei_count": _save_data["report"]["file_average_processing_time"]["imei_count"]}
report.average_OCR_time["invoice"] = 0 if report.average_OCR_time["invoice"] is None else report.average_OCR_time["invoice"] report.average_OCR_time["invoice"] = 0 if report.average_OCR_time["invoice"] is None else report.average_OCR_time["invoice"]
report.average_OCR_time["imei"] = 0 if report.average_OCR_time["imei"] is None else report.average_OCR_time["imei"] report.average_OCR_time["imei"] = 0 if report.average_OCR_time["imei"] is None else report.average_OCR_time["imei"]

View File

@ -10,12 +10,11 @@ from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile, ReportF
from ..celery_worker.client_connector import c_connector from ..celery_worker.client_connector import c_connector
from ..utils.file import dict2xlsx, save_workbook_file, save_report_to_S3 from ..utils.file import dict2xlsx, save_workbook_file, save_report_to_S3
from ..utils.subsidiary import map_subsidiary_short_to_long from ..utils.subsidiary import map_subsidiary_short_to_long
from ..utils.processing_time import backend_cost
from django.db.models import Q from django.db.models import Q
from django.utils import timezone from django.utils import timezone
import redis
from fwd import settings from fwd import settings
from ..models import SubscriptionRequest, Report, ReportFile from ..models import SubscriptionRequest, Report, ReportFile
import json
from typing import Union, List, Dict from typing import Union, List, Dict
VALID_KEYS = ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"] VALID_KEYS = ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"]
@ -51,6 +50,11 @@ class ReportAccumulateByRequest:
'invoice': IterAvg(), 'invoice': IterAvg(),
'avg': IterAvg(), 'avg': IterAvg(),
}, },
'file_average_processing_time': {
'imei': IterAvg(),
'invoice': IterAvg(),
'avg': IterAvg(),
},
'usage': { 'usage': {
'imei':0, 'imei':0,
'invoice': 0, 'invoice': 0,
@ -98,6 +102,11 @@ class ReportAccumulateByRequest:
'invoice': IterAvg(), 'invoice': IterAvg(),
'avg': IterAvg(), 'avg': IterAvg(),
}, },
'file_average_processing_time': {
'imei': IterAvg(),
'invoice': IterAvg(),
'avg': IterAvg(),
},
'usage': { 'usage': {
'imei': 0, 'imei': 0,
'invoice': 0, 'invoice': 0,
@ -126,7 +135,7 @@ class ReportAccumulateByRequest:
self.report["average_accuracy_rate"]["avg"] = IterAvg() self.report["average_accuracy_rate"]["avg"] = IterAvg()
@staticmethod @staticmethod
def update_total(total, report_file): def update_total(total, report_file, image_avg_cost):
# Update total = update total # Update total = update total
if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS: if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS:
total["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0 total["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0
@ -156,6 +165,12 @@ class ReportAccumulateByRequest:
total["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 total["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
total["average_processing_time"]["avg"].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 total["average_processing_time"]["avg"].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
if not total["file_average_processing_time"].get(report_file.doc_type, None):
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
total["file_average_processing_time"][report_file.doc_type] = IterAvg()
total["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0
total["file_average_processing_time"]["avg"].add_avg(image_avg_cost, 1) if image_avg_cost else 0
doc_type = "imei" doc_type = "imei"
if report_file.doc_type in ["imei", "invoice", "all"]: if report_file.doc_type in ["imei", "invoice", "all"]:
doc_type = report_file.doc_type doc_type = report_file.doc_type
@ -168,7 +183,7 @@ class ReportAccumulateByRequest:
return total return total
@staticmethod @staticmethod
def update_month(month, report_file): def update_month(month, report_file, image_avg_cost):
# Update month = update month # Update month = update month
if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS: if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS:
month["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0 month["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0
@ -196,6 +211,12 @@ class ReportAccumulateByRequest:
month["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 month["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
month["average_processing_time"]["avg"].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 month["average_processing_time"]["avg"].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
if not month["file_average_processing_time"].get(report_file.doc_type, None):
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
month["file_average_processing_time"][report_file.doc_type] = IterAvg()
month["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0
month["file_average_processing_time"]["avg"].add_avg(image_avg_cost, 1) if image_avg_cost else 0
doc_type = "imei" doc_type = "imei"
if report_file.doc_type in ["imei", "invoice", "all"]: if report_file.doc_type in ["imei", "invoice", "all"]:
doc_type = report_file.doc_type doc_type = report_file.doc_type
@ -208,7 +229,7 @@ class ReportAccumulateByRequest:
return month return month
@staticmethod @staticmethod
def update_day(day_data, report_file): def update_day(day_data, report_file, image_avg_cost):
if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS: if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS:
day_data["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0 day_data["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0
day_data["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0 day_data["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0
@ -235,6 +256,13 @@ class ReportAccumulateByRequest:
day_data["average_processing_time"][report_file.doc_type] = IterAvg() day_data["average_processing_time"][report_file.doc_type] = IterAvg()
day_data["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 day_data["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
day_data["average_processing_time"]['avg'].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0 day_data["average_processing_time"]['avg'].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
if not day_data["file_average_processing_time"].get(report_file.doc_type, None):
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
day_data["file_average_processing_time"][report_file.doc_type] = IterAvg()
day_data["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0
day_data["file_average_processing_time"]['avg'].add_avg(image_avg_cost, 1) if image_avg_cost else 0
day_data["review_progress"].append(report_file.review_status) day_data["review_progress"].append(report_file.review_status)
return day_data return day_data
@ -256,7 +284,10 @@ class ReportAccumulateByRequest:
self.data[this_month][1][this_day]['num_request'] += 1 self.data[this_month][1][this_day]['num_request'] += 1
self.data[this_month][0]['num_request'] += 1 self.data[this_month][0]['num_request'] += 1
_number_of_file = request.pages
_be_cost = backend_cost(request.created_at, request.ai_inference_start_time)
_ai_cost = request.ai_inference_time
processing_time_by_averaging_request_cost = (_be_cost + _ai_cost)/_number_of_file if _number_of_file > 0 else 0
for report_file in report_files: for report_file in report_files:
_report_file = copy.deepcopy(report_file) _report_file = copy.deepcopy(report_file)
if _report_file.is_bad_image or _report_file.bad_image_reason in settings.ACC_EXCLUDE_RESEASONS: if _report_file.is_bad_image or _report_file.bad_image_reason in settings.ACC_EXCLUDE_RESEASONS:
@ -266,9 +297,9 @@ class ReportAccumulateByRequest:
for t in _report_file.reviewed_accuracy.keys(): for t in _report_file.reviewed_accuracy.keys():
_report_file.reviewed_accuracy[t] = [] _report_file.reviewed_accuracy[t] = []
self.report = self.update_total(self.report, _report_file) self.report = self.update_total(self.report, _report_file, processing_time_by_averaging_request_cost)
self.data[this_month][0] = self.update_month(self.data[this_month][0], _report_file) # Update the subtotal within the month self.data[this_month][0] = self.update_month(self.data[this_month][0], _report_file, processing_time_by_averaging_request_cost) # Update the subtotal within the month
self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], _report_file) # Update the subtotal of the day self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], _report_file, processing_time_by_averaging_request_cost) # Update the subtotal of the day
def count_transactions_within_day(self, date_string): def count_transactions_within_day(self, date_string):
start_date = datetime.strptime(date_string, "%Y%m%d") start_date = datetime.strptime(date_string, "%Y%m%d")
@ -299,11 +330,18 @@ class ReportAccumulateByRequest:
end_date_with_timezone = start_date_with_timezone + timezone.timedelta(days=1) end_date_with_timezone = start_date_with_timezone + timezone.timedelta(days=1)
_average_OCR_time = {"invoice": self.data[month][1][day]["average_processing_time"]["invoice"](), "imei": self.data[month][1][day]["average_processing_time"]["imei"](), _average_OCR_time = {"invoice": self.data[month][1][day]["average_processing_time"]["invoice"](), "imei": self.data[month][1][day]["average_processing_time"]["imei"](),
"invoice_count": self.data[month][1][day]["average_processing_time"]["invoice"].count, "imei_count": self.data[month][1][day]["average_processing_time"]["imei"].count} "invoice_count": self.data[month][1][day]["average_processing_time"]["invoice"].count, "imei_count": self.data[month][1][day]["average_processing_time"]["imei"].count}
_average_OCR_time["invoice"] = 0 if _average_OCR_time["invoice"] is None else _average_OCR_time["invoice"] _average_OCR_time["invoice"] = 0 if _average_OCR_time["invoice"] is None else _average_OCR_time["invoice"]
_average_OCR_time["imei"] = 0 if _average_OCR_time["imei"] is None else _average_OCR_time["imei"] _average_OCR_time["imei"] = 0 if _average_OCR_time["imei"] is None else _average_OCR_time["imei"]
if "avg" not in _average_OCR_time: if "avg" not in _average_OCR_time:
_average_OCR_time["avg"] = (_average_OCR_time["invoice"]*_average_OCR_time["invoice_count"] + _average_OCR_time["imei"]*_average_OCR_time["imei_count"])/(_average_OCR_time["imei_count"] + _average_OCR_time["invoice_count"]) if (_average_OCR_time["imei_count"] + _average_OCR_time["invoice_count"]) > 0 else None _average_OCR_time["avg"] = (_average_OCR_time["invoice"]*_average_OCR_time["invoice_count"] + _average_OCR_time["imei"]*_average_OCR_time["imei_count"])/(_average_OCR_time["imei_count"] + _average_OCR_time["invoice_count"]) if (_average_OCR_time["imei_count"] + _average_OCR_time["invoice_count"]) > 0 else None
_file_average_OCR_time = {"invoice": self.data[month][1][day]["file_average_processing_time"]["invoice"](), "imei": self.data[month][1][day]["file_average_processing_time"]["imei"](),
"invoice_count": self.data[month][1][day]["file_average_processing_time"]["invoice"].count, "imei_count": self.data[month][1][day]["file_average_processing_time"]["imei"].count}
_file_average_OCR_time["invoice"] = 0 if _file_average_OCR_time["invoice"] is None else _file_average_OCR_time["invoice"]
_file_average_OCR_time["imei"] = 0 if _file_average_OCR_time["imei"] is None else _file_average_OCR_time["imei"]
if "avg" not in _file_average_OCR_time:
_file_average_OCR_time["avg"] = (_file_average_OCR_time["invoice"]*_file_average_OCR_time["invoice_count"] + _file_average_OCR_time["imei"]*_file_average_OCR_time["imei_count"])/(_file_average_OCR_time["imei_count"] + _file_average_OCR_time["invoice_count"]) if (_file_average_OCR_time["imei_count"] + _file_average_OCR_time["invoice_count"]) > 0 else None
acumulated_acc = {"feedback_accuracy": {}, acumulated_acc = {"feedback_accuracy": {},
"reviewed_accuracy": {}} "reviewed_accuracy": {}}
for acc_type in ["feedback_accuracy", "reviewed_accuracy"]: for acc_type in ["feedback_accuracy", "reviewed_accuracy"]:
@ -327,7 +365,7 @@ class ReportAccumulateByRequest:
number_imei=report_data[month][1][day]["num_imei"], number_imei=report_data[month][1][day]["num_imei"],
number_invoice=report_data[month][1][day]["num_invoice"], number_invoice=report_data[month][1][day]["num_invoice"],
number_bad_images=report_data[month][1][day]["images_quality"]["bad"], number_bad_images=report_data[month][1][day]["images_quality"]["bad"],
average_OCR_time=_average_OCR_time, average_OCR_time=_file_average_OCR_time,
number_imei_transaction=report_data[month][1][day]["usage"]["imei"], number_imei_transaction=report_data[month][1][day]["usage"]["imei"],
number_invoice_transaction=report_data[month][1][day]["usage"]["invoice"], number_invoice_transaction=report_data[month][1][day]["usage"]["invoice"],
feedback_accuracy=acumulated_acc["feedback_accuracy"], feedback_accuracy=acumulated_acc["feedback_accuracy"],
@ -348,6 +386,11 @@ class ReportAccumulateByRequest:
# export report data # export report data
for key in _report["average_processing_time"].keys(): for key in _report["average_processing_time"].keys():
_report["average_processing_time"][key] = _report["average_processing_time"][key]() _report["average_processing_time"][key] = _report["average_processing_time"][key]()
_ks = list(_report["file_average_processing_time"].keys())
for key in _ks:
_report["file_average_processing_time"][key+"_count"] = _report["file_average_processing_time"][key].count
_report["file_average_processing_time"][key] = _report["file_average_processing_time"][key]()
# avg_acc = 0 # avg_acc = 0
# count_acc = 0 # count_acc = 0
@ -375,6 +418,8 @@ class ReportAccumulateByRequest:
num_transaction_invoice += _data[month][1][day]["usage"].get("invoice", 0) num_transaction_invoice += _data[month][1][day]["usage"].get("invoice", 0)
for key in _data[month][1][day]["average_processing_time"].keys(): for key in _data[month][1][day]["average_processing_time"].keys():
_data[month][1][day]["average_processing_time"][key] = _data[month][1][day]["average_processing_time"][key]() _data[month][1][day]["average_processing_time"][key] = _data[month][1][day]["average_processing_time"][key]()
for key in _data[month][1][day]["file_average_processing_time"].keys():
_data[month][1][day]["file_average_processing_time"][key] = _data[month][1][day]["file_average_processing_time"][key]()
for key in settings.FIELD: for key in settings.FIELD:
_data[month][1][day]["average_accuracy_rate"][key] = _data[month][1][day]["average_accuracy_rate"][key]() _data[month][1][day]["average_accuracy_rate"][key] = _data[month][1][day]["average_accuracy_rate"][key]()
@ -391,6 +436,8 @@ class ReportAccumulateByRequest:
_data[month][0]["usage"]["total_images"] = num_transaction_invoice + num_transaction_imei _data[month][0]["usage"]["total_images"] = num_transaction_invoice + num_transaction_imei
for key in _data[month][0]["average_processing_time"].keys(): for key in _data[month][0]["average_processing_time"].keys():
_data[month][0]["average_processing_time"][key] = _data[month][0]["average_processing_time"][key]() _data[month][0]["average_processing_time"][key] = _data[month][0]["average_processing_time"][key]()
for key in _data[month][0]["file_average_processing_time"].keys():
_data[month][0]["file_average_processing_time"][key] = _data[month][0]["file_average_processing_time"][key]()
for key in settings.FIELD: for key in settings.FIELD:
_data[month][0]["average_accuracy_rate"][key] = _data[month][0]["average_accuracy_rate"][key]() _data[month][0]["average_accuracy_rate"][key] = _data[month][0]["average_accuracy_rate"][key]()
for accuracy_type in ["feedback_accuracy", "reviewed_accuracy"]: for accuracy_type in ["feedback_accuracy", "reviewed_accuracy"]:

View File

@ -502,8 +502,8 @@ def dump_excel_report(input: json):
'N': 'average_accuracy_rate.purchase_date', 'N': 'average_accuracy_rate.purchase_date',
'O': 'average_accuracy_rate.retailername', 'O': 'average_accuracy_rate.retailername',
'P': 'average_accuracy_rate.invoice_no', 'P': 'average_accuracy_rate.invoice_no',
'Q': 'average_processing_time.imei', 'Q': 'file_average_processing_time.imei',
'R': 'average_processing_time.invoice', 'R': 'file_average_processing_time.invoice',
'S': 'review_progress' 'S': 'review_progress'
} }
start_index = 5 start_index = 5
@ -528,7 +528,7 @@ def dump_excel_report(input: json):
ws[key + str(start_index)].fill = fill_yellow ws[key + str(start_index)].fill = fill_yellow
if 'average_accuracy_rate' in mapping[key] and type(value) in [int, float] and value < 98: if 'average_accuracy_rate' in mapping[key] and type(value) in [int, float] and value < 98:
ws[key + str(start_index)].font = font_red ws[key + str(start_index)].font = font_red
elif 'average_processing_time' in mapping[key] and type(value) in [int, float] and value > 2.0: elif 'file_average_processing_time' in mapping[key] and type(value) in [int, float] and value > 2.0:
ws[key + str(start_index)].font = font_red ws[key + str(start_index)].font = font_red
elif 'bad_percent' in mapping[key] and type(value) in [int, float] and value > 10: elif 'bad_percent' in mapping[key] and type(value) in [int, float] and value > 10:
ws[key + str(start_index)].font = font_red ws[key + str(start_index)].font = font_red

View File

@ -0,0 +1,27 @@
import numpy as np
import time
from django.utils import timezone
def queuing_time(start_time, ai_profile_individual):
return ai_profile_individual["inference"][0] - start_time
def inference_cost(ai_profile_individual):
return ai_profile_individual["inference"][1][0] - ai_profile_individual["inference"][0]
def postprocessing_cost(ai_profile_individual):
return ai_profile_individual["postprocess"][1] - ai_profile_individual["postprocess"][0]
def cost_profile(start_time, ai_profile_individual):
result = {"queue": queuing_time(start_time, ai_profile_individual),
"inference": inference_cost(ai_profile_individual),
"postprocessing":postprocessing_cost(ai_profile_individual)}
return result
def backend_cost(created_at, ai_start_time):
"""
Args:
created_at: str : django timezone format: example: 2024-04-01 16:16:18.344 +0700
ai_start_time: float : timestamp
"""
received = created_at.timestamp()
return ai_start_time - received

View File

@ -329,8 +329,8 @@ const ReportOverViewTable: React.FC<ReportOverViewTableProps> = ({
purchaseDateAAR: item.average_accuracy_rate.purchase_date, purchaseDateAAR: item.average_accuracy_rate.purchase_date,
retailerNameAAR: item.average_accuracy_rate.retailername, retailerNameAAR: item.average_accuracy_rate.retailername,
invoiceNumberAAR: item.average_accuracy_rate.invoice_no, invoiceNumberAAR: item.average_accuracy_rate.invoice_no,
snImeiAPT: item.average_processing_time.imei, snImeiAPT: item.file_average_processing_time.imei,
invoiceAPT: item.average_processing_time.invoice, invoiceAPT: item.file_average_processing_time.invoice,
snImeiTC: item.usage.imei, snImeiTC: item.usage.imei,
invoiceTC: item.usage.invoice, invoiceTC: item.usage.invoice,
reviewProgress: item.review_progress, reviewProgress: item.review_progress,