Merge pull request #141 from SDSRV-IDP/BE/processing_time

Be/processing time
This commit is contained in:
Phan Thành Trung 2024-06-24 16:57:25 +07:00 committed by GitHub Enterprise
commit b2cebf192f
5 changed files with 92 additions and 18 deletions

View File

@ -164,8 +164,8 @@ def create_accuracy_report(report_id, **kwargs):
report.number_invoice = time_cost["invoice"].count
report.number_bad_images = number_bad_images
# FIXME: refactor this data stream for endurability
report.average_OCR_time = {"invoice": time_cost["invoice"](), "imei": time_cost["imei"](),
"invoice_count": time_cost["invoice"].count, "imei_count": time_cost["imei"].count}
report.average_OCR_time = {"invoice": _save_data["report"]["file_average_processing_time"]["invoice"], "imei": _save_data["report"]["file_average_processing_time"]["imei"],
"invoice_count": _save_data["report"]["file_average_processing_time"]["invoice_count"], "imei_count": _save_data["report"]["file_average_processing_time"]["imei_count"]}
report.average_OCR_time["invoice"] = 0 if report.average_OCR_time["invoice"] is None else report.average_OCR_time["invoice"]
report.average_OCR_time["imei"] = 0 if report.average_OCR_time["imei"] is None else report.average_OCR_time["imei"]

View File

@ -10,12 +10,11 @@ from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile, ReportF
from ..celery_worker.client_connector import c_connector
from ..utils.file import dict2xlsx, save_workbook_file, save_report_to_S3
from ..utils.subsidiary import map_subsidiary_short_to_long
from ..utils.processing_time import backend_cost
from django.db.models import Q
from django.utils import timezone
import redis
from fwd import settings
from ..models import SubscriptionRequest, Report, ReportFile
import json
from typing import Union, List, Dict
VALID_KEYS = ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"]
@ -51,6 +50,11 @@ class ReportAccumulateByRequest:
'invoice': IterAvg(),
'avg': IterAvg(),
},
'file_average_processing_time': {
'imei': IterAvg(),
'invoice': IterAvg(),
'avg': IterAvg(),
},
'usage': {
'imei':0,
'invoice': 0,
@ -98,6 +102,11 @@ class ReportAccumulateByRequest:
'invoice': IterAvg(),
'avg': IterAvg(),
},
'file_average_processing_time': {
'imei': IterAvg(),
'invoice': IterAvg(),
'avg': IterAvg(),
},
'usage': {
'imei': 0,
'invoice': 0,
@ -126,7 +135,7 @@ class ReportAccumulateByRequest:
self.report["average_accuracy_rate"]["avg"] = IterAvg()
@staticmethod
def update_total(total, report_file):
def update_total(total, report_file, image_avg_cost):
# Update total = update total
if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS:
total["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0
@ -156,6 +165,12 @@ class ReportAccumulateByRequest:
total["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
total["average_processing_time"]["avg"].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
if not total["file_average_processing_time"].get(report_file.doc_type, None):
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
total["file_average_processing_time"][report_file.doc_type] = IterAvg()
total["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0
total["file_average_processing_time"]["avg"].add_avg(image_avg_cost, 1) if image_avg_cost else 0
doc_type = "imei"
if report_file.doc_type in ["imei", "invoice", "all"]:
doc_type = report_file.doc_type
@ -168,7 +183,7 @@ class ReportAccumulateByRequest:
return total
@staticmethod
def update_month(month, report_file):
def update_month(month, report_file, image_avg_cost):
# Update month = update month
if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS:
month["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0
@ -196,6 +211,12 @@ class ReportAccumulateByRequest:
month["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
month["average_processing_time"]["avg"].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
if not month["file_average_processing_time"].get(report_file.doc_type, None):
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
month["file_average_processing_time"][report_file.doc_type] = IterAvg()
month["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0
month["file_average_processing_time"]["avg"].add_avg(image_avg_cost, 1) if image_avg_cost else 0
doc_type = "imei"
if report_file.doc_type in ["imei", "invoice", "all"]:
doc_type = report_file.doc_type
@ -208,7 +229,7 @@ class ReportAccumulateByRequest:
return month
@staticmethod
def update_day(day_data, report_file):
def update_day(day_data, report_file, image_avg_cost):
if report_file.bad_image_reason not in settings.ACC_EXCLUDE_RESEASONS:
day_data["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0
day_data["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0
@ -235,6 +256,13 @@ class ReportAccumulateByRequest:
day_data["average_processing_time"][report_file.doc_type] = IterAvg()
day_data["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
day_data["average_processing_time"]['avg'].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
if not day_data["file_average_processing_time"].get(report_file.doc_type, None):
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
day_data["file_average_processing_time"][report_file.doc_type] = IterAvg()
day_data["file_average_processing_time"][report_file.doc_type].add_avg(image_avg_cost, 1) if image_avg_cost else 0
day_data["file_average_processing_time"]['avg'].add_avg(image_avg_cost, 1) if image_avg_cost else 0
day_data["review_progress"].append(report_file.review_status)
return day_data
@ -256,7 +284,10 @@ class ReportAccumulateByRequest:
self.data[this_month][1][this_day]['num_request'] += 1
self.data[this_month][0]['num_request'] += 1
_number_of_file = request.pages
_be_cost = backend_cost(request.created_at, request.ai_inference_start_time)
_ai_cost = request.ai_inference_time
processing_time_by_averaging_request_cost = (_be_cost + _ai_cost)/_number_of_file if _number_of_file > 0 else 0
for report_file in report_files:
_report_file = copy.deepcopy(report_file)
if _report_file.is_bad_image or _report_file.bad_image_reason in settings.ACC_EXCLUDE_RESEASONS:
@ -266,9 +297,9 @@ class ReportAccumulateByRequest:
for t in _report_file.reviewed_accuracy.keys():
_report_file.reviewed_accuracy[t] = []
self.report = self.update_total(self.report, _report_file)
self.data[this_month][0] = self.update_month(self.data[this_month][0], _report_file) # Update the subtotal within the month
self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], _report_file) # Update the subtotal of the day
self.report = self.update_total(self.report, _report_file, processing_time_by_averaging_request_cost)
self.data[this_month][0] = self.update_month(self.data[this_month][0], _report_file, processing_time_by_averaging_request_cost) # Update the subtotal within the month
self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], _report_file, processing_time_by_averaging_request_cost) # Update the subtotal of the day
def count_transactions_within_day(self, date_string):
start_date = datetime.strptime(date_string, "%Y%m%d")
@ -299,11 +330,18 @@ class ReportAccumulateByRequest:
end_date_with_timezone = start_date_with_timezone + timezone.timedelta(days=1)
_average_OCR_time = {"invoice": self.data[month][1][day]["average_processing_time"]["invoice"](), "imei": self.data[month][1][day]["average_processing_time"]["imei"](),
"invoice_count": self.data[month][1][day]["average_processing_time"]["invoice"].count, "imei_count": self.data[month][1][day]["average_processing_time"]["imei"].count}
_average_OCR_time["invoice"] = 0 if _average_OCR_time["invoice"] is None else _average_OCR_time["invoice"]
_average_OCR_time["imei"] = 0 if _average_OCR_time["imei"] is None else _average_OCR_time["imei"]
if "avg" not in _average_OCR_time:
_average_OCR_time["avg"] = (_average_OCR_time["invoice"]*_average_OCR_time["invoice_count"] + _average_OCR_time["imei"]*_average_OCR_time["imei_count"])/(_average_OCR_time["imei_count"] + _average_OCR_time["invoice_count"]) if (_average_OCR_time["imei_count"] + _average_OCR_time["invoice_count"]) > 0 else None
_file_average_OCR_time = {"invoice": self.data[month][1][day]["file_average_processing_time"]["invoice"](), "imei": self.data[month][1][day]["file_average_processing_time"]["imei"](),
"invoice_count": self.data[month][1][day]["file_average_processing_time"]["invoice"].count, "imei_count": self.data[month][1][day]["file_average_processing_time"]["imei"].count}
_file_average_OCR_time["invoice"] = 0 if _file_average_OCR_time["invoice"] is None else _file_average_OCR_time["invoice"]
_file_average_OCR_time["imei"] = 0 if _file_average_OCR_time["imei"] is None else _file_average_OCR_time["imei"]
if "avg" not in _file_average_OCR_time:
_file_average_OCR_time["avg"] = (_file_average_OCR_time["invoice"]*_file_average_OCR_time["invoice_count"] + _file_average_OCR_time["imei"]*_file_average_OCR_time["imei_count"])/(_file_average_OCR_time["imei_count"] + _file_average_OCR_time["invoice_count"]) if (_file_average_OCR_time["imei_count"] + _file_average_OCR_time["invoice_count"]) > 0 else None
acumulated_acc = {"feedback_accuracy": {},
"reviewed_accuracy": {}}
for acc_type in ["feedback_accuracy", "reviewed_accuracy"]:
@ -327,7 +365,7 @@ class ReportAccumulateByRequest:
number_imei=report_data[month][1][day]["num_imei"],
number_invoice=report_data[month][1][day]["num_invoice"],
number_bad_images=report_data[month][1][day]["images_quality"]["bad"],
average_OCR_time=_average_OCR_time,
average_OCR_time=_file_average_OCR_time,
number_imei_transaction=report_data[month][1][day]["usage"]["imei"],
number_invoice_transaction=report_data[month][1][day]["usage"]["invoice"],
feedback_accuracy=acumulated_acc["feedback_accuracy"],
@ -348,6 +386,11 @@ class ReportAccumulateByRequest:
# export report data
for key in _report["average_processing_time"].keys():
_report["average_processing_time"][key] = _report["average_processing_time"][key]()
_ks = list(_report["file_average_processing_time"].keys())
for key in _ks:
_report["file_average_processing_time"][key+"_count"] = _report["file_average_processing_time"][key].count
_report["file_average_processing_time"][key] = _report["file_average_processing_time"][key]()
# avg_acc = 0
# count_acc = 0
@ -375,6 +418,8 @@ class ReportAccumulateByRequest:
num_transaction_invoice += _data[month][1][day]["usage"].get("invoice", 0)
for key in _data[month][1][day]["average_processing_time"].keys():
_data[month][1][day]["average_processing_time"][key] = _data[month][1][day]["average_processing_time"][key]()
for key in _data[month][1][day]["file_average_processing_time"].keys():
_data[month][1][day]["file_average_processing_time"][key] = _data[month][1][day]["file_average_processing_time"][key]()
for key in settings.FIELD:
_data[month][1][day]["average_accuracy_rate"][key] = _data[month][1][day]["average_accuracy_rate"][key]()
@ -391,6 +436,8 @@ class ReportAccumulateByRequest:
_data[month][0]["usage"]["total_images"] = num_transaction_invoice + num_transaction_imei
for key in _data[month][0]["average_processing_time"].keys():
_data[month][0]["average_processing_time"][key] = _data[month][0]["average_processing_time"][key]()
for key in _data[month][0]["file_average_processing_time"].keys():
_data[month][0]["file_average_processing_time"][key] = _data[month][0]["file_average_processing_time"][key]()
for key in settings.FIELD:
_data[month][0]["average_accuracy_rate"][key] = _data[month][0]["average_accuracy_rate"][key]()
for accuracy_type in ["feedback_accuracy", "reviewed_accuracy"]:

View File

@ -502,8 +502,8 @@ def dump_excel_report(input: json):
'N': 'average_accuracy_rate.purchase_date',
'O': 'average_accuracy_rate.retailername',
'P': 'average_accuracy_rate.invoice_no',
'Q': 'average_processing_time.imei',
'R': 'average_processing_time.invoice',
'Q': 'file_average_processing_time.imei',
'R': 'file_average_processing_time.invoice',
'S': 'review_progress'
}
start_index = 5
@ -528,7 +528,7 @@ def dump_excel_report(input: json):
ws[key + str(start_index)].fill = fill_yellow
if 'average_accuracy_rate' in mapping[key] and type(value) in [int, float] and value < 98:
ws[key + str(start_index)].font = font_red
elif 'average_processing_time' in mapping[key] and type(value) in [int, float] and value > 2.0:
elif 'file_average_processing_time' in mapping[key] and type(value) in [int, float] and value > 2.0:
ws[key + str(start_index)].font = font_red
elif 'bad_percent' in mapping[key] and type(value) in [int, float] and value > 10:
ws[key + str(start_index)].font = font_red

View File

@ -0,0 +1,27 @@
import numpy as np
import time
from django.utils import timezone
def queuing_time(start_time, ai_profile_individual):
return ai_profile_individual["inference"][0] - start_time
def inference_cost(ai_profile_individual):
return ai_profile_individual["inference"][1][0] - ai_profile_individual["inference"][0]
def postprocessing_cost(ai_profile_individual):
return ai_profile_individual["postprocess"][1] - ai_profile_individual["postprocess"][0]
def cost_profile(start_time, ai_profile_individual):
result = {"queue": queuing_time(start_time, ai_profile_individual),
"inference": inference_cost(ai_profile_individual),
"postprocessing":postprocessing_cost(ai_profile_individual)}
return result
def backend_cost(created_at, ai_start_time):
"""
Args:
created_at: str : django timezone format: example: 2024-04-01 16:16:18.344 +0700
ai_start_time: float : timestamp
"""
received = created_at.timestamp()
return ai_start_time - received

View File

@ -329,8 +329,8 @@ const ReportOverViewTable: React.FC<ReportOverViewTableProps> = ({
purchaseDateAAR: item.average_accuracy_rate.purchase_date,
retailerNameAAR: item.average_accuracy_rate.retailername,
invoiceNumberAAR: item.average_accuracy_rate.invoice_no,
snImeiAPT: item.average_processing_time.imei,
invoiceAPT: item.average_processing_time.invoice,
snImeiAPT: item.file_average_processing_time.imei,
invoiceAPT: item.file_average_processing_time.invoice,
snImeiTC: item.usage.imei,
invoiceTC: item.usage.invoice,
reviewProgress: item.review_progress,