2024-01-31 03:00:18 +00:00
|
|
|
import re
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
|
|
import copy
|
2024-02-01 07:32:20 +00:00
|
|
|
from typing import Any
|
2024-01-31 03:00:18 +00:00
|
|
|
from .ocr_utils.ocr_metrics import eval_ocr_metric
|
|
|
|
from .ocr_utils.sbt_report import post_processing_str
|
2024-02-05 05:56:51 +00:00
|
|
|
import uuid
|
2024-01-31 03:00:18 +00:00
|
|
|
from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile, ReportFile
|
|
|
|
from ..celery_worker.client_connector import c_connector
|
2024-02-06 03:14:44 +00:00
|
|
|
from ..utils.file import dict2xlsx, save_workbook_file, save_report_to_S3
|
2024-02-01 07:32:20 +00:00
|
|
|
from django.db.models import Q
|
2024-02-05 05:56:51 +00:00
|
|
|
from django.utils import timezone
|
|
|
|
import redis
|
|
|
|
from fwd import settings
|
|
|
|
from ..models import SubscriptionRequest, Report, ReportFile
|
|
|
|
import json
|
2024-01-31 03:00:18 +00:00
|
|
|
|
|
|
|
BAD_THRESHOLD = 0.75
|
|
|
|
|
|
|
|
valid_keys = ["retailername", "sold_to_party", "purchase_date", "imei_number"]
|
|
|
|
|
2024-02-05 05:56:51 +00:00
|
|
|
class ReportAccumulateByRequest:
|
|
|
|
def __init__(self, sub):
|
2024-02-06 03:14:44 +00:00
|
|
|
# self.redis_client = redis.Redis(host=settings.REDIS_HOST, port=settings.REDIS_PORT, decode_responses=True)
|
2024-02-05 05:56:51 +00:00
|
|
|
self.sub = sub
|
|
|
|
self.current_time = None
|
|
|
|
self.data = {} # {"month": [total, {"day": day_data}]}
|
|
|
|
self.total_format = {
|
|
|
|
'subs': "+",
|
|
|
|
'extraction_date': "Subtotal ()",
|
|
|
|
'total_images': 0,
|
|
|
|
'images_quality': {
|
|
|
|
'successful': 0,
|
|
|
|
'successful_percent': 0,
|
|
|
|
'bad': 0,
|
|
|
|
'bad_percent': 0
|
|
|
|
},
|
|
|
|
'average_accuracy_rate': {
|
|
|
|
'imei': IterAvg(),
|
|
|
|
'purchase_date': IterAvg(),
|
|
|
|
'retailer_name': IterAvg(),
|
|
|
|
'sold_to_party': IterAvg()
|
|
|
|
},
|
|
|
|
'average_processing_time': {
|
|
|
|
'imei': IterAvg(),
|
|
|
|
'invoice': IterAvg()
|
|
|
|
},
|
|
|
|
'usage': {
|
|
|
|
'imei':0,
|
|
|
|
'invoice': 0,
|
|
|
|
'request': 0
|
|
|
|
},
|
|
|
|
'feedback_accuracy': {
|
|
|
|
'imei_number': IterAvg(),
|
|
|
|
'purchase_date': IterAvg(),
|
|
|
|
'retailername': IterAvg(),
|
|
|
|
'sold_to_party': IterAvg()
|
|
|
|
},
|
|
|
|
'reviewed_accuracy': {
|
|
|
|
'imei_number': IterAvg(),
|
|
|
|
'purchase_date': IterAvg(),
|
|
|
|
'retailername': IterAvg(),
|
|
|
|
'sold_to_party': IterAvg()
|
|
|
|
},
|
|
|
|
'num_request': 0
|
|
|
|
}
|
|
|
|
self.day_format = {
|
|
|
|
'subs': sub,
|
|
|
|
'extraction_date': "",
|
|
|
|
'num_imei': 0,
|
|
|
|
'num_invoice': 0,
|
|
|
|
'total_images': 0,
|
|
|
|
'images_quality': {
|
|
|
|
'successful': 0,
|
|
|
|
'successful_percent': 0,
|
|
|
|
'bad': 0,
|
|
|
|
'bad_percent': 0
|
|
|
|
},
|
|
|
|
'average_accuracy_rate': {
|
|
|
|
'imei': IterAvg(),
|
|
|
|
'purchase_date': IterAvg(),
|
|
|
|
'retailer_name': IterAvg(),
|
|
|
|
'sold_to_party': IterAvg()
|
|
|
|
},
|
|
|
|
'average_processing_time': {
|
|
|
|
'imei': IterAvg(),
|
|
|
|
'invoice': IterAvg()
|
|
|
|
},
|
|
|
|
'usage': {
|
|
|
|
'imei': 0,
|
|
|
|
'invoice': 0,
|
|
|
|
'request': 0
|
|
|
|
},
|
|
|
|
'feedback_accuracy': {
|
|
|
|
'imei_number': IterAvg(),
|
|
|
|
'purchase_date': IterAvg(),
|
|
|
|
'retailername': IterAvg(),
|
|
|
|
'sold_to_party': IterAvg()
|
|
|
|
},
|
|
|
|
'reviewed_accuracy': {
|
|
|
|
'imei_number': IterAvg(),
|
|
|
|
'purchase_date': IterAvg(),
|
|
|
|
'retailername': IterAvg(),
|
|
|
|
'sold_to_party': IterAvg()
|
|
|
|
},
|
|
|
|
"report_files": [],
|
|
|
|
'num_request': 0
|
|
|
|
},
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def update_total(total, report_file):
|
|
|
|
total["total_images"] += 1
|
|
|
|
total["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0
|
|
|
|
total["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0
|
2024-02-06 03:14:44 +00:00
|
|
|
# total["report_files"].append(report_file)
|
2024-02-05 05:56:51 +00:00
|
|
|
|
|
|
|
if sum([len(report_file.reviewed_accuracy[x]) for x in report_file.reviewed_accuracy.keys() if "_count" not in x]) > 0 :
|
|
|
|
total["average_accuracy_rate"]["imei"].add(report_file.reviewed_accuracy.get("imei_number", []))
|
|
|
|
total["average_accuracy_rate"]["purchase_date"].add(report_file.reviewed_accuracy.get("purchase_date", []))
|
|
|
|
total["average_accuracy_rate"]["retailer_name"].add(report_file.reviewed_accuracy.get("retailername", []))
|
|
|
|
total["average_accuracy_rate"]["sold_to_party"].add(report_file.reviewed_accuracy.get("sold_to_party", []))
|
|
|
|
elif sum([len(report_file.feedback_accuracy[x]) for x in report_file.feedback_accuracy.keys() if "_count" not in x]) > 0:
|
|
|
|
total["average_accuracy_rate"]["imei"].add(report_file.feedback_accuracy.get("imei_number", []))
|
|
|
|
total["average_accuracy_rate"]["purchase_date"].add(report_file.feedback_accuracy.get("purchase_date", []))
|
|
|
|
total["average_accuracy_rate"]["retailer_name"].add(report_file.feedback_accuracy.get("retailername", []))
|
|
|
|
total["average_accuracy_rate"]["sold_to_party"].add(report_file.feedback_accuracy.get("sold_to_party", []))
|
|
|
|
|
|
|
|
for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
|
|
|
|
total["feedback_accuracy"][key].add(report_file.feedback_accuracy.get(key, []))
|
|
|
|
for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
|
|
|
|
total["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, []))
|
|
|
|
|
|
|
|
if not total["average_processing_time"].get(report_file.doc_type, None):
|
|
|
|
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
|
|
|
|
total["average_processing_time"] = IterAvg()
|
|
|
|
total["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
|
|
|
|
|
|
|
|
total["usage"]["imei"] += 1 if report_file.doc_type == "imei" else 0
|
|
|
|
total["usage"]["invoice"] += 1 if report_file.doc_type == "invoice" else 0
|
|
|
|
|
|
|
|
return total
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def update_day(day_data, report_file):
|
|
|
|
day_data["total_images"] += 1
|
|
|
|
day_data["images_quality"]["successful"] += 1 if not report_file.is_bad_image else 0
|
|
|
|
day_data["images_quality"]["bad"] += 1 if report_file.is_bad_image else 0
|
|
|
|
day_data["num_imei"] += 1 if report_file.doc_type == "imei" else 0
|
|
|
|
day_data["num_invoice"] += 1 if report_file.doc_type == "invoice" else 0
|
2024-02-06 03:14:44 +00:00
|
|
|
day_data["report_files"].append(report_file)
|
2024-02-05 05:56:51 +00:00
|
|
|
|
|
|
|
if sum([len(report_file.reviewed_accuracy[x]) for x in report_file.reviewed_accuracy.keys() if "_count" not in x]) > 0 :
|
|
|
|
day_data["average_accuracy_rate"]["imei"].add(report_file.reviewed_accuracy.get("imei_number", 0))
|
|
|
|
day_data["average_accuracy_rate"]["purchase_date"].add(report_file.reviewed_accuracy.get("purchase_date", 0))
|
|
|
|
day_data["average_accuracy_rate"]["retailer_name"].add(report_file.reviewed_accuracy.get("retailername", 0))
|
|
|
|
day_data["average_accuracy_rate"]["sold_to_party"].add(report_file.reviewed_accuracy.get("sold_to_party", 0))
|
|
|
|
elif sum([len(report_file.feedback_accuracy[x]) for x in report_file.feedback_accuracy.keys() if "_count" not in x]) > 0:
|
|
|
|
day_data["average_accuracy_rate"]["imei"].add(report_file.feedback_accuracy.get("imei_number", 0))
|
|
|
|
day_data["average_accuracy_rate"]["purchase_date"].add(report_file.feedback_accuracy.get("purchase_date", 0))
|
|
|
|
day_data["average_accuracy_rate"]["retailer_name"].add(report_file.feedback_accuracy.get("retailername", 0))
|
|
|
|
day_data["average_accuracy_rate"]["sold_to_party"].add(report_file.feedback_accuracy.get("sold_to_party", 0))
|
|
|
|
|
|
|
|
for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
|
|
|
|
day_data["feedback_accuracy"][key].add(report_file.feedback_accuracy.get(key, 0))
|
|
|
|
for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
|
|
|
|
day_data["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, 0))
|
|
|
|
|
|
|
|
if not day_data["average_processing_time"].get(report_file.doc_type, None):
|
|
|
|
print(f"[WARM]: Weird doctype: {report_file.doc_type}")
|
|
|
|
day_data["average_processing_time"] = IterAvg()
|
|
|
|
day_data["average_processing_time"][report_file.doc_type].add_avg(report_file.time_cost, 1) if report_file.time_cost else 0
|
|
|
|
|
|
|
|
return day_data
|
|
|
|
|
|
|
|
def add(self, request, report_files):
|
|
|
|
this_month = request.created_at.strftime("%Y%m")
|
|
|
|
this_day = request.created_at.strftime("%Y%m%d")
|
|
|
|
if not self.data.get(this_month, None):
|
|
|
|
self.data[this_month] = [copy.deepcopy(self.total_format), {}]
|
2024-02-06 04:08:46 +00:00
|
|
|
self.data[this_month][0]["extraction_date"] = "Subtotal (" + request.created_at.strftime("%Y-%m") + ")"
|
2024-02-05 05:56:51 +00:00
|
|
|
if not self.data[this_month][1].get(this_day, None):
|
|
|
|
self.data[this_month][1][this_day] = copy.deepcopy(self.day_format)[0]
|
|
|
|
self.data[this_month][1][this_day]['extraction_date'] = request.created_at.strftime("%Y-%m-%d")
|
|
|
|
usage = self.count_transactions_within_day(this_day)
|
|
|
|
self.data[this_month][1][this_day]["usage"]["imei"] = usage.get("imei", 0)
|
|
|
|
self.data[this_month][1][this_day]["usage"]["invoice"] = usage.get("invoice", 0)
|
|
|
|
self.data[this_month][1][this_day]["usage"]["request"] = usage.get("request", 0)
|
|
|
|
|
|
|
|
self.data[this_month][1][this_day]['num_request'] += 1
|
|
|
|
self.data[this_month][0]['num_request'] += 1
|
|
|
|
for report_file in report_files:
|
|
|
|
self.data[this_month][0] = self.update_total(self.data[this_month][0], report_file) # Update the subtotal within the month
|
|
|
|
self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], report_file) # Update the subtotal of the day
|
|
|
|
|
|
|
|
def count_transactions_within_day(self, date_string):
|
|
|
|
# convert this day into timezone.datetime at UTC
|
|
|
|
start_date = datetime.strptime(date_string, "%Y%m%d")
|
2024-02-06 03:14:44 +00:00
|
|
|
start_date_with_timezone = timezone.make_aware(start_date)
|
|
|
|
end_date_with_timezone = start_date_with_timezone + timezone.timedelta(days=1)
|
|
|
|
return count_transactions(start_date_with_timezone, end_date_with_timezone, self.sub)
|
2024-02-05 05:56:51 +00:00
|
|
|
|
|
|
|
def save(self, root_report_id, is_daily_report=False, include_test=False):
|
|
|
|
report_data = self.get()
|
|
|
|
fine_data = []
|
|
|
|
save_data = {"file": {"overview": f"{root_report_id}/{root_report_id}.xlsx"},
|
|
|
|
"data": fine_data} # {"sub_report_id": "S3 location", "data": fine_data}
|
|
|
|
# extract data
|
2024-02-06 08:59:27 +00:00
|
|
|
month_keys = list(report_data.keys())
|
|
|
|
month_keys.sort(reverse=True)
|
|
|
|
for month in month_keys:
|
2024-02-05 05:56:51 +00:00
|
|
|
fine_data.append(report_data[month][0])
|
2024-02-06 04:08:46 +00:00
|
|
|
day_keys = list(report_data[month][1].keys())
|
|
|
|
day_keys.sort(reverse = True)
|
|
|
|
for day in day_keys:
|
2024-02-05 05:56:51 +00:00
|
|
|
fine_data.append(report_data[month][1][day])
|
|
|
|
# save daily reports
|
|
|
|
report_id = root_report_id + "_" + day
|
|
|
|
start_date = datetime.strptime(day, "%Y%m%d")
|
2024-02-06 03:14:44 +00:00
|
|
|
start_date_with_timezone = timezone.make_aware(start_date)
|
|
|
|
end_date_with_timezone = start_date_with_timezone + timezone.timedelta(days=1)
|
2024-02-05 05:56:51 +00:00
|
|
|
_average_OCR_time = {"invoice": self.data[month][1][day]["average_processing_time"]["invoice"](), "imei": self.data[month][1][day]["average_processing_time"]["imei"](),
|
|
|
|
"invoice_count": self.data[month][1][day]["average_processing_time"]["invoice"].count, "imei_count": self.data[month][1][day]["average_processing_time"]["imei"].count}
|
|
|
|
|
|
|
|
_average_OCR_time["avg"] = (_average_OCR_time["invoice"]*_average_OCR_time["invoice_count"] + _average_OCR_time["imei"]*_average_OCR_time["imei_count"])/(_average_OCR_time["imei_count"] + _average_OCR_time["invoice_count"]) if (_average_OCR_time["imei_count"] + _average_OCR_time["invoice_count"]) > 0 else None
|
|
|
|
acumulated_acc = {"feedback_accuracy": {},
|
|
|
|
"reviewed_accuracy": {}}
|
|
|
|
for acc_type in ["feedback_accuracy", "reviewed_accuracy"]:
|
|
|
|
avg_acc = IterAvg()
|
|
|
|
for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
|
|
|
|
acumulated_acc[acc_type][key] = self.data[month][1][day][acc_type][key]()
|
|
|
|
acumulated_acc[acc_type][key+"_count"] = self.data[month][1][day][acc_type][key].count
|
|
|
|
avg_acc.add_avg(acumulated_acc[acc_type][key], acumulated_acc[acc_type][key+"_count"])
|
|
|
|
acumulated_acc[acc_type]["avg"] = avg_acc()
|
|
|
|
acumulated_acc[acc_type]["avg_count"] = avg_acc.count
|
|
|
|
new_report: Report = Report(
|
|
|
|
report_id=report_id,
|
|
|
|
is_daily_report=is_daily_report,
|
|
|
|
subsidiary=self.sub.lower().replace(" ", ""),
|
|
|
|
include_test=include_test,
|
2024-02-06 03:14:44 +00:00
|
|
|
start_at=start_date_with_timezone,
|
|
|
|
end_at=end_date_with_timezone,
|
2024-02-05 05:56:51 +00:00
|
|
|
status="Ready",
|
|
|
|
number_request=report_data[month][1][day]["num_request"],
|
|
|
|
number_images=report_data[month][1][day]["total_images"],
|
|
|
|
number_imei=report_data[month][1][day]["num_imei"],
|
|
|
|
number_invoice=report_data[month][1][day]["num_invoice"],
|
|
|
|
number_bad_images=report_data[month][1][day]["images_quality"]["bad"],
|
|
|
|
average_OCR_time=_average_OCR_time,
|
|
|
|
number_imei_transaction=report_data[month][1][day]["usage"]["imei"],
|
|
|
|
number_invoice_transaction=report_data[month][1][day]["usage"]["invoice"],
|
|
|
|
feedback_accuracy=acumulated_acc["feedback_accuracy"],
|
|
|
|
reviewed_accuracy=acumulated_acc["reviewed_accuracy"],
|
|
|
|
)
|
|
|
|
new_report.save()
|
2024-02-06 03:14:44 +00:00
|
|
|
data = extract_report_detail_list(self.data[month][1][day]["report_files"], lower=True)
|
|
|
|
data_workbook = dict2xlsx(data, _type='report_detail')
|
|
|
|
local_workbook = save_workbook_file(report_id + ".xlsx", new_report, data_workbook)
|
|
|
|
s3_key=save_report_to_S3(report_id, local_workbook)
|
|
|
|
return fine_data, save_data
|
|
|
|
|
2024-02-05 05:56:51 +00:00
|
|
|
def get(self) -> Any:
|
|
|
|
# FIXME: This looks like a junk
|
|
|
|
_data = copy.deepcopy(self.data)
|
|
|
|
for month in _data.keys():
|
2024-02-06 03:14:44 +00:00
|
|
|
_data[month][0]["images_quality"]["successful_percent"] = _data[month][0]["images_quality"]["successful"]/_data[month][0]["total_images"] if _data[month][0]["total_images"] > 0 else 0
|
|
|
|
_data[month][0]["images_quality"]["bad_percent"] = _data[month][0]["images_quality"]["bad"]/_data[month][0]["total_images"] if _data[month][0]["total_images"] > 0 else 0
|
2024-02-05 05:56:51 +00:00
|
|
|
num_transaction_imei = 0
|
|
|
|
num_transaction_invoice = 0
|
|
|
|
for day in _data[month][1].keys():
|
|
|
|
num_transaction_imei += _data[month][1][day]["usage"].get("imei", 0)
|
|
|
|
num_transaction_invoice += _data[month][1][day]["usage"].get("invoice", 0)
|
|
|
|
_data[month][1][day]["average_accuracy_rate"]["imei"] = _data[month][1][day]["average_accuracy_rate"]["imei"]()
|
|
|
|
_data[month][1][day]["average_accuracy_rate"]["purchase_date"] = _data[month][1][day]["average_accuracy_rate"]["purchase_date"]()
|
|
|
|
_data[month][1][day]["average_accuracy_rate"]["retailer_name"] = _data[month][1][day]["average_accuracy_rate"]["retailer_name"]()
|
|
|
|
_data[month][1][day]["average_accuracy_rate"]["sold_to_party"] = _data[month][1][day]["average_accuracy_rate"]["sold_to_party"]()
|
|
|
|
_data[month][1][day]["average_processing_time"]["imei"] = _data[month][1][day]["average_processing_time"]["imei"]()
|
|
|
|
_data[month][1][day]["average_processing_time"]["invoice"] = _data[month][1][day]["average_processing_time"]["invoice"]()
|
|
|
|
|
|
|
|
_data[month][1][day]["feedback_accuracy"]["imei_number"] = _data[month][1][day]["feedback_accuracy"]["imei_number"]()
|
|
|
|
_data[month][1][day]["feedback_accuracy"]["purchase_date"] = _data[month][1][day]["feedback_accuracy"]["purchase_date"]()
|
|
|
|
_data[month][1][day]["feedback_accuracy"]["retailername"] = _data[month][1][day]["feedback_accuracy"]["retailername"]()
|
|
|
|
_data[month][1][day]["feedback_accuracy"]["sold_to_party"] = _data[month][1][day]["feedback_accuracy"]["sold_to_party"]()
|
|
|
|
_data[month][1][day]["reviewed_accuracy"]["imei_number"] = _data[month][1][day]["reviewed_accuracy"]["imei_number"]()
|
|
|
|
_data[month][1][day]["reviewed_accuracy"]["purchase_date"] = _data[month][1][day]["reviewed_accuracy"]["purchase_date"]()
|
|
|
|
_data[month][1][day]["reviewed_accuracy"]["retailername"] = _data[month][1][day]["reviewed_accuracy"]["retailername"]()
|
|
|
|
_data[month][1][day]["reviewed_accuracy"]["sold_to_party"] = _data[month][1][day]["reviewed_accuracy"]["sold_to_party"]()
|
2024-02-06 03:14:44 +00:00
|
|
|
_data[month][1][day].pop("report_files")
|
|
|
|
|
|
|
|
_data[month][1][day]["images_quality"]["successful_percent"] = _data[month][1][day]["images_quality"]["successful"]/_data[month][1][day]["total_images"] if _data[month][1][day]["total_images"] > 0 else 0
|
|
|
|
_data[month][1][day]["images_quality"]["bad_percent"] = _data[month][1][day]["images_quality"]["bad"]/_data[month][1][day]["total_images"] if _data[month][1][day]["total_images"] > 0 else 0
|
|
|
|
|
2024-02-05 05:56:51 +00:00
|
|
|
_data[month][0]["usage"]["imei"] = num_transaction_imei
|
|
|
|
_data[month][0]["usage"]["invoice"] = num_transaction_invoice
|
|
|
|
_data[month][0]["average_accuracy_rate"]["imei"] = _data[month][0]["average_accuracy_rate"]["imei"]()
|
|
|
|
_data[month][0]["average_accuracy_rate"]["purchase_date"] = _data[month][0]["average_accuracy_rate"]["purchase_date"]()
|
|
|
|
_data[month][0]["average_accuracy_rate"]["retailer_name"] = _data[month][0]["average_accuracy_rate"]["retailer_name"]()
|
|
|
|
_data[month][0]["average_accuracy_rate"]["sold_to_party"] = _data[month][0]["average_accuracy_rate"]["sold_to_party"]()
|
|
|
|
_data[month][0]["average_processing_time"]["imei"] = _data[month][0]["average_processing_time"]["imei"]()
|
|
|
|
_data[month][0]["average_processing_time"]["invoice"] = _data[month][0]["average_processing_time"]["invoice"]()
|
|
|
|
|
|
|
|
_data[month][0]["feedback_accuracy"]["imei_number"] = _data[month][0]["feedback_accuracy"]["imei_number"]()
|
|
|
|
_data[month][0]["feedback_accuracy"]["purchase_date"] = _data[month][0]["feedback_accuracy"]["purchase_date"]()
|
|
|
|
_data[month][0]["feedback_accuracy"]["retailername"] = _data[month][0]["feedback_accuracy"]["retailername"]()
|
|
|
|
_data[month][0]["feedback_accuracy"]["sold_to_party"] = _data[month][0]["feedback_accuracy"]["sold_to_party"]()
|
|
|
|
_data[month][0]["reviewed_accuracy"]["imei_number"] = _data[month][0]["reviewed_accuracy"]["imei_number"]()
|
|
|
|
_data[month][0]["reviewed_accuracy"]["purchase_date"] = _data[month][0]["reviewed_accuracy"]["purchase_date"]()
|
|
|
|
_data[month][0]["reviewed_accuracy"]["retailername"] = _data[month][0]["reviewed_accuracy"]["retailername"]()
|
|
|
|
_data[month][0]["reviewed_accuracy"]["sold_to_party"] = _data[month][0]["reviewed_accuracy"]["sold_to_party"]()
|
|
|
|
|
|
|
|
return _data
|
|
|
|
|
|
|
|
|
2024-01-31 03:00:18 +00:00
|
|
|
class MonthReportAccumulate:
|
|
|
|
def __init__(self):
|
|
|
|
self.month = None
|
|
|
|
self.total = {
|
|
|
|
'subs': "+",
|
|
|
|
'extraction_date': "Subtotal ()",
|
|
|
|
'total_images': 0,
|
|
|
|
'images_quality': {
|
|
|
|
'successful': 0,
|
|
|
|
'successful_percent': 0,
|
|
|
|
'bad': 0,
|
|
|
|
'bad_percent': 0
|
|
|
|
},
|
|
|
|
'average_accuracy_rate': {
|
|
|
|
'imei': IterAvg(),
|
|
|
|
'purchase_date': IterAvg(),
|
|
|
|
'retailer_name': IterAvg()
|
|
|
|
},
|
|
|
|
'average_processing_time': {
|
|
|
|
'imei': IterAvg(),
|
|
|
|
'invoice': IterAvg()
|
|
|
|
},
|
|
|
|
'usage': {
|
|
|
|
'imei':0,
|
|
|
|
'invoice': 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
self.data = []
|
|
|
|
self.data_format = {
|
2024-02-01 07:32:20 +00:00
|
|
|
'subs': "",
|
|
|
|
'extraction_date': "",
|
2024-01-31 03:00:18 +00:00
|
|
|
'num_imei': 0,
|
|
|
|
'num_invoice': 0,
|
|
|
|
'total_images': 0,
|
|
|
|
'images_quality': {
|
|
|
|
'successful': 0,
|
|
|
|
'successful_percent': 0,
|
|
|
|
'bad': 0,
|
|
|
|
'bad_percent': 0
|
|
|
|
},
|
|
|
|
'average_accuracy_rate': {
|
|
|
|
'imei': 0,
|
|
|
|
'purchase_date': 0,
|
|
|
|
'retailer_name': 0
|
|
|
|
},
|
|
|
|
'average_processing_time': {
|
|
|
|
'imei': 0,
|
|
|
|
'invoice': 0
|
|
|
|
},
|
|
|
|
'usage': {
|
|
|
|
'imei':0,
|
|
|
|
'invoice': 0
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
def accumulate(self, report):
|
|
|
|
self.total["total_images"] += report.number_images
|
|
|
|
self.total["images_quality"]["successful"] += report.number_images - report.number_bad_images
|
|
|
|
self.total["images_quality"]["bad"] += report.number_bad_images
|
|
|
|
|
2024-02-01 07:32:20 +00:00
|
|
|
|
2024-01-31 03:00:18 +00:00
|
|
|
if sum([report.reviewed_accuracy[x] for x in report.reviewed_accuracy.keys() if "_count" not in x]) > 0 :
|
|
|
|
self.total["average_accuracy_rate"]["imei"].add_avg(report.reviewed_accuracy.get("imei_number", 0), report.reviewed_accuracy.get("imei_number_count", 0))
|
|
|
|
self.total["average_accuracy_rate"]["purchase_date"].add_avg(report.reviewed_accuracy.get("purchase_date", 0), report.reviewed_accuracy.get("purchase_date_count", 0))
|
|
|
|
self.total["average_accuracy_rate"]["retailer_name"].add_avg(report.reviewed_accuracy.get("retailername", 0), report.reviewed_accuracy.get("retailername_count", 0))
|
|
|
|
elif sum([ report.feedback_accuracy[x] for x in report.feedback_accuracy.keys() if "_count" not in x]) > 0:
|
|
|
|
self.total["average_accuracy_rate"]["imei"].add_avg(report.feedback_accuracy.get("imei_number", 0), report.feedback_accuracy.get("imei_number_count", 0))
|
|
|
|
self.total["average_accuracy_rate"]["purchase_date"].add_avg(report.feedback_accuracy.get("purchase_date", 0), report.feedback_accuracy.get("purchase_date_count", 0))
|
|
|
|
self.total["average_accuracy_rate"]["retailer_name"].add_avg(report.feedback_accuracy.get("retailername", 0), report.feedback_accuracy.get("retailername_count", 0))
|
|
|
|
|
2024-02-01 07:32:20 +00:00
|
|
|
self.total["average_processing_time"]["imei"].add_avg(report.average_OCR_time.get("imei", 0), report.average_OCR_time.get("imei_count", 0)) if report.average_OCR_time else 0
|
|
|
|
self.total["average_processing_time"]["invoice"].add_avg(report.average_OCR_time.get("invoice", 0), report.average_OCR_time.get("invoice_count", 0)) if report.average_OCR_time else 0
|
2024-01-31 03:00:18 +00:00
|
|
|
self.total["usage"]["imei"] += report.number_imei_transaction
|
|
|
|
self.total["usage"]["invoice"] += report.number_invoice_transaction
|
|
|
|
|
|
|
|
def add(self, report):
|
2024-02-05 05:56:51 +00:00
|
|
|
report_month = report.start_at.month
|
2024-01-31 03:00:18 +00:00
|
|
|
|
|
|
|
if self.month is None:
|
|
|
|
self.month = report_month
|
|
|
|
self.total["extraction_date"] = f"Subtotal ({self.month})"
|
2024-02-01 07:32:20 +00:00
|
|
|
|
2024-01-31 03:00:18 +00:00
|
|
|
elif self.month != report_month:
|
|
|
|
self.total["images_quality"]["successful_percent"] += self.total["images_quality"]["successful"]/self.total["total_images"]
|
|
|
|
self.total["images_quality"]["bad_percent"] += self.total["images_quality"]["bad"]/self.total["total_images"]
|
|
|
|
return False # Reports from a different month, stop accumulating
|
|
|
|
# accumulate fields
|
|
|
|
new_data = copy.deepcopy(self.data_format)[0]
|
|
|
|
new_data["num_imei"] = report.number_imei
|
2024-02-01 07:32:20 +00:00
|
|
|
new_data["subs"] = report.subsidiary
|
2024-02-05 05:56:51 +00:00
|
|
|
new_data["extraction_date"] = report.start_at
|
2024-01-31 03:00:18 +00:00
|
|
|
new_data["num_invoice"] = report.number_invoice
|
|
|
|
new_data["total_images"] = report.number_images
|
|
|
|
new_data["images_quality"]["successful"] = report.number_images - report.number_bad_images
|
|
|
|
new_data["images_quality"]["bad"] = report.number_bad_images
|
2024-02-01 07:32:20 +00:00
|
|
|
|
|
|
|
report.reviewed_accuracy = {} if report.reviewed_accuracy is None else report.reviewed_accuracy
|
|
|
|
report.feedback_accuracy = {} if report.feedback_accuracy is None else report.feedback_accuracy
|
2024-01-31 03:00:18 +00:00
|
|
|
if sum([ report.reviewed_accuracy[x] for x in report.reviewed_accuracy.keys() if "_count" not in x]):
|
|
|
|
new_data["average_accuracy_rate"]["imei"] = report.reviewed_accuracy.get("imei_number", None)
|
|
|
|
new_data["average_accuracy_rate"]["purchase_date"] = report.reviewed_accuracy.get("purchase_date", None)
|
|
|
|
new_data["average_accuracy_rate"]["retailer_name"] = report.reviewed_accuracy.get("retailername", None)
|
|
|
|
elif sum([ report.feedback_accuracy[x] for x in report.feedback_accuracy.keys() if "_count" not in x]):
|
|
|
|
new_data["average_accuracy_rate"]["imei"] = report.feedback_accuracy.get("imei_number", None)
|
|
|
|
new_data["average_accuracy_rate"]["purchase_date"] = report.feedback_accuracy.get("purchase_date", None)
|
|
|
|
new_data["average_accuracy_rate"]["retailer_name"] = report.feedback_accuracy.get("retailername", None)
|
2024-02-01 07:32:20 +00:00
|
|
|
new_data["average_processing_time"]["imei"] = report.average_OCR_time.get("imei", 0) if report.average_OCR_time else 0
|
|
|
|
new_data["average_processing_time"]["invoice"] = report.average_OCR_time.get("invoice", 0) if report.average_OCR_time else 0
|
2024-01-31 03:00:18 +00:00
|
|
|
new_data["usage"]["imei"] = report.number_imei_transaction
|
|
|
|
new_data["usage"]["invoice"] = report.number_invoice_transaction
|
|
|
|
|
2024-02-01 07:32:20 +00:00
|
|
|
new_data["images_quality"]["successful_percent"] += new_data["images_quality"]["successful"]/new_data["total_images"] if new_data["total_images"] else 0
|
|
|
|
new_data["images_quality"]["bad_percent"] += new_data["images_quality"]["bad"]/new_data["total_images"] if new_data["total_images"] else 0
|
2024-01-31 03:00:18 +00:00
|
|
|
self.data.append(new_data)
|
|
|
|
self.accumulate(report)
|
|
|
|
return True
|
|
|
|
|
2024-02-02 11:29:10 +00:00
|
|
|
def clear(self):
|
|
|
|
self.month = None
|
|
|
|
self.total = {
|
|
|
|
'subs': "+",
|
|
|
|
'extraction_date': "Subtotal ()",
|
|
|
|
'total_images': 0,
|
|
|
|
'images_quality': {
|
|
|
|
'successful': 0,
|
|
|
|
'successful_percent': 0,
|
|
|
|
'bad': 0,
|
|
|
|
'bad_percent': 0
|
|
|
|
},
|
|
|
|
'average_accuracy_rate': {
|
|
|
|
'imei': IterAvg(),
|
|
|
|
'purchase_date': IterAvg(),
|
|
|
|
'retailer_name': IterAvg()
|
|
|
|
},
|
|
|
|
'average_processing_time': {
|
|
|
|
'imei': IterAvg(),
|
|
|
|
'invoice': IterAvg()
|
|
|
|
},
|
|
|
|
'usage': {
|
|
|
|
'imei':0,
|
|
|
|
'invoice': 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
self.data = []
|
|
|
|
|
2024-01-31 03:00:18 +00:00
|
|
|
def __call__(self):
|
|
|
|
total = copy.deepcopy(self.total)
|
2024-02-02 11:29:10 +00:00
|
|
|
total["images_quality"]["successful_percent"] = total["images_quality"]["successful"]/total["total_images"] if total["total_images"] else 0
|
|
|
|
total["images_quality"]["bad_percent"] = total["images_quality"]["bad"]/total["total_images"] if total["total_images"] else 0
|
2024-01-31 03:00:18 +00:00
|
|
|
total["average_accuracy_rate"]["imei"] = total["average_accuracy_rate"]["imei"]()
|
|
|
|
total["average_accuracy_rate"]["purchase_date"] = total["average_accuracy_rate"]["purchase_date"]()
|
|
|
|
total["average_accuracy_rate"]["retailer_name"] = total["average_accuracy_rate"]["retailer_name"]()
|
|
|
|
total["average_processing_time"]["imei"] = total["average_processing_time"]["imei"]()
|
|
|
|
total["average_processing_time"]["invoice"] = total["average_processing_time"]["invoice"]()
|
|
|
|
return self.month, self.data, total
|
|
|
|
|
|
|
|
class IterAvg:
|
|
|
|
def __init__(self, name="default"):
|
|
|
|
self.name = name
|
|
|
|
self.avg = 0
|
|
|
|
self.count = 0
|
|
|
|
|
|
|
|
def add(self, values):
|
|
|
|
"""
|
|
|
|
Args:
|
|
|
|
values (list[float]):
|
|
|
|
"""
|
|
|
|
values = [x for x in values if x is not None]
|
|
|
|
if len(values) == 0:
|
|
|
|
return
|
|
|
|
self.avg = (self.avg*self.count + sum(values))/(self.count+len(values))
|
|
|
|
self.count += len(values)
|
|
|
|
|
|
|
|
def add_avg(self, avg, count):
|
|
|
|
if avg is None or count is None or count == 0:
|
|
|
|
return
|
|
|
|
self.count += count
|
|
|
|
self.avg = (self.avg*(self.count-count) + avg*count)/(self.count)
|
|
|
|
|
|
|
|
def __call__(self):
|
|
|
|
return self.avg
|
|
|
|
|
2024-02-05 05:56:51 +00:00
|
|
|
def validate_feedback_file(feedback, predict):
|
|
|
|
if feedback:
|
|
|
|
imei_feedback = feedback.get("imei_number", [])
|
|
|
|
imei_feedback = [x for x in imei_feedback if x != ""]
|
|
|
|
num_imei_feedback = len(imei_feedback)
|
|
|
|
num_imei_predict = len(predict.get("imei_number", []))
|
|
|
|
if num_imei_feedback != num_imei_predict:
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
2024-02-01 07:32:20 +00:00
|
|
|
def first_of_list(the_list):
|
|
|
|
if not the_list:
|
|
|
|
return None
|
|
|
|
return the_list[0]
|
|
|
|
|
|
|
|
def extract_report_detail_list(report_detail_list, lower=False, in_percent=True):
|
|
|
|
data = []
|
|
|
|
for report_file in report_detail_list:
|
|
|
|
data.append({
|
|
|
|
"Request ID": report_file.correspond_request_id,
|
|
|
|
"Redemption Number": report_file.correspond_redemption_id,
|
|
|
|
"Image type": report_file.doc_type,
|
|
|
|
"IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])),
|
|
|
|
"IMEI_OCR retrieved": first_of_list(report_file.predict_result.get("imei_number", [None])),
|
|
|
|
"IMEI1 Accuracy": first_of_list(report_file.feedback_accuracy.get("imei_number", [None])),
|
|
|
|
"Invoice_Purchase Date_Consumer": report_file.feedback_result.get("purchase_date", None),
|
|
|
|
"Invoice_Purchase Date_OCR": report_file.predict_result.get("purchase_date", []),
|
|
|
|
"Invoice_Purchase Date Accuracy": first_of_list(report_file.feedback_accuracy.get("purchase_date", [None])),
|
|
|
|
"Invoice_Retailer_Consumer": report_file.feedback_result.get("retailername", None),
|
|
|
|
"Invoice_Retailer_OCR": report_file.predict_result.get("retailername", None),
|
|
|
|
"Invoice_Retailer Accuracy": first_of_list(report_file.feedback_accuracy.get("retailername", [None])),
|
|
|
|
"OCR Image Accuracy": report_file.acc,
|
|
|
|
"OCR Image Speed (seconds)": report_file.time_cost,
|
|
|
|
"Reviewed?": "No",
|
|
|
|
"Bad Image Reasons": report_file.bad_image_reason,
|
|
|
|
"Countermeasures": report_file.counter_measures,
|
|
|
|
"IMEI_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("imei_number", [None])),
|
|
|
|
"Purchase Date_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("purchase_date", [None])),
|
|
|
|
"Retailer_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("retailername", [None])),
|
|
|
|
})
|
|
|
|
if lower:
|
|
|
|
for i, dat in enumerate(data):
|
|
|
|
keys = list(dat.keys())
|
|
|
|
for old_key in keys:
|
|
|
|
data[i][old_key.lower().replace(" ", "_")] = data[i].pop(old_key)
|
|
|
|
if in_percent:
|
|
|
|
for i, dat in enumerate(data):
|
|
|
|
keys = [x for x in list(dat.keys()) if "accuracy" in x.lower()]
|
|
|
|
for key in keys:
|
|
|
|
if data[i][key]:
|
|
|
|
data[i][key] = data[i][key]*100
|
|
|
|
return data
|
|
|
|
|
2024-02-06 03:14:44 +00:00
|
|
|
def count_transactions(start_date, end_date, subsidiary="all"):
|
2024-02-01 07:32:20 +00:00
|
|
|
base_query = Q(created_at__range=(start_date, end_date))
|
|
|
|
base_query &= Q(is_test_request=False)
|
2024-02-06 03:14:44 +00:00
|
|
|
if subsidiary and subsidiary.lower().replace(" ", "")!="all":
|
|
|
|
base_query &= Q(redemption_id__startswith=subsidiary)
|
2024-02-01 07:32:20 +00:00
|
|
|
transaction_att = {}
|
|
|
|
|
|
|
|
print(f"[DEBUG]: atracting transactions attribute...")
|
|
|
|
total_transaction_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at')
|
|
|
|
for request in total_transaction_requests:
|
|
|
|
if not request.doc_type:
|
|
|
|
continue
|
|
|
|
doc_types = request.doc_type.split(",")
|
|
|
|
for doc_type in doc_types:
|
|
|
|
if transaction_att.get(doc_type, None) == None:
|
|
|
|
transaction_att[doc_type] = 1
|
|
|
|
else:
|
|
|
|
transaction_att[doc_type] += 1
|
2024-02-05 05:56:51 +00:00
|
|
|
if not transaction_att.get("request", None):
|
|
|
|
transaction_att["request"] = 1
|
|
|
|
else:
|
|
|
|
transaction_att["request"] += 1
|
2024-02-01 07:32:20 +00:00
|
|
|
return transaction_att
|
|
|
|
|
2024-01-31 03:00:18 +00:00
|
|
|
def convert_datetime_format(date_string: str, is_gt=False) -> str:
|
|
|
|
# pattern_date_string = "2023-02-28"
|
|
|
|
input_format = "%Y-%m-%d"
|
|
|
|
output_format = "%d/%m/%Y"
|
|
|
|
# Validate the input date string format
|
|
|
|
pattern = r"\d{4}-\d{2}-\d{2}"
|
|
|
|
if re.match(pattern, date_string):
|
|
|
|
# Convert the date string to a datetime object
|
|
|
|
date_object = datetime.strptime(date_string, input_format)
|
|
|
|
# Convert the datetime object to the desired output format
|
|
|
|
formatted_date = date_object.strftime(output_format)
|
|
|
|
return formatted_date
|
|
|
|
return date_string
|
|
|
|
|
|
|
|
def predict_result_to_ready(result):
|
|
|
|
dict_result = {"retailername": "",
|
|
|
|
"sold_to_party": "",
|
|
|
|
"purchase_date": [],
|
|
|
|
"imei_number": [],}
|
|
|
|
dict_result["retailername"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}])[0].get("value", None)
|
|
|
|
dict_result["sold_to_party"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}])[1].get("value", None)
|
|
|
|
dict_result["purchase_date"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}])[2].get("value", [])
|
|
|
|
dict_result["imei_number"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[3].get("value", [])
|
|
|
|
return dict_result
|
|
|
|
|
|
|
|
def align_fine_result(ready_predict, fine_result):
|
|
|
|
# print(f"[DEBUG]: fine_result: {fine_result}")
|
|
|
|
# print(f"[DEBUG]: ready_predict: {ready_predict}")
|
|
|
|
if fine_result:
|
|
|
|
if fine_result["purchase_date"] and len(ready_predict["purchase_date"]) == 0:
|
|
|
|
ready_predict["purchase_date"] = [None]
|
|
|
|
if fine_result["retailername"] and not ready_predict["retailername"]:
|
|
|
|
ready_predict["retailername"] = [None]
|
|
|
|
fine_result["purchase_date"] = [fine_result["purchase_date"] for _ in range(len(ready_predict["purchase_date"]))]
|
|
|
|
# else:
|
|
|
|
# fine_result = {}
|
|
|
|
# for key in ready_predict.keys():
|
|
|
|
# fine_result[key] = []
|
|
|
|
# fine_result["purchase_date"] = [None for _ in range(len(ready_predict["purchase_date"]))]
|
|
|
|
return ready_predict, fine_result
|
|
|
|
|
|
|
|
def update_temp_accuracy(accuracy, acc, keys):
|
|
|
|
for key in keys:
|
|
|
|
accuracy[key].add(acc[key])
|
|
|
|
return accuracy
|
|
|
|
def calculate_accuracy(key_name, inference, target):
|
|
|
|
"""_summary_
|
|
|
|
|
|
|
|
Args:
|
|
|
|
key_name (string): key to calculate accuracy on, ex: retailername
|
|
|
|
inference (dict): result from ocr, refined to align with the target down below
|
|
|
|
target (dict): result of type
|
|
|
|
"""
|
|
|
|
acc = []
|
|
|
|
data = []
|
|
|
|
|
|
|
|
if not target or not inference:
|
|
|
|
return acc, data
|
|
|
|
if not isinstance(inference[key_name], list):
|
|
|
|
if inference[key_name] is None:
|
|
|
|
inference[key_name] = []
|
|
|
|
else:
|
|
|
|
inference[key_name] = [inference[key_name]]
|
|
|
|
if not isinstance(target[key_name], list):
|
|
|
|
if target[key_name] is None:
|
|
|
|
target[key_name] = []
|
|
|
|
else:
|
|
|
|
target[key_name] = [target[key_name]]
|
|
|
|
for i, v in enumerate(inference[key_name]):
|
|
|
|
# TODO: target[key_name][i] is None, ""
|
|
|
|
x = post_processing_str(key_name, inference[key_name][i], is_gt=False)
|
|
|
|
y = post_processing_str(key_name, target[key_name][i], is_gt=True)
|
|
|
|
|
|
|
|
score = eval_ocr_metric(
|
|
|
|
[x],
|
|
|
|
[y],
|
|
|
|
metric=[
|
|
|
|
"one_minus_ned",
|
|
|
|
# "line_acc_ignore_case_symbol",
|
|
|
|
# "line_acc",
|
|
|
|
# "one_minus_ned_word",
|
|
|
|
])
|
|
|
|
acc.append(list(score.values())[0])
|
|
|
|
data.append([x, y])
|
|
|
|
return acc, data
|
|
|
|
|
|
|
|
def calculate_avg_accuracy(acc, type, keys=[]):
|
|
|
|
acc_list = []
|
|
|
|
# print(f"[DEBUG]: type: {type} - acc: {acc}")
|
|
|
|
for key in keys:
|
|
|
|
acc_list += acc.get(type, {}).get(key, [])
|
|
|
|
|
|
|
|
acc_list = [x for x in acc_list if x is not None]
|
|
|
|
return sum(acc_list)/len(acc_list) if len(acc_list) > 0 else None
|
|
|
|
|
|
|
|
|
|
|
|
def calculate_and_save_subcription_file(report, request):
|
|
|
|
request_att = {"acc": {"feedback": {"imei_number": [],
|
|
|
|
"purchase_date": [],
|
|
|
|
"retailername": [],
|
|
|
|
"sold_to_party": [],
|
|
|
|
},
|
|
|
|
"reviewed": {"imei_number": [],
|
|
|
|
"purchase_date": [],
|
|
|
|
"retailername": [],
|
|
|
|
"sold_to_party": [],
|
|
|
|
}},
|
|
|
|
"err": [],
|
|
|
|
"time_cost": {},
|
|
|
|
"total_images": 0,
|
|
|
|
"bad_images": 0}
|
|
|
|
images = SubscriptionRequestFile.objects.filter(request=request)
|
|
|
|
for image in images:
|
|
|
|
status, att = calculate_subcription_file(image)
|
|
|
|
if status != 200:
|
|
|
|
continue
|
|
|
|
image.feedback_accuracy = att["acc"]["feedback"]
|
|
|
|
image.reviewed_accuracy = att["acc"]["reviewed"]
|
|
|
|
image.is_bad_image_quality = att["is_bad_image"]
|
|
|
|
image.save()
|
|
|
|
new_report_file = ReportFile(report=report,
|
|
|
|
correspond_request_id=request.request_id,
|
|
|
|
correspond_redemption_id=request.redemption_id,
|
|
|
|
doc_type=image.doc_type,
|
|
|
|
predict_result=image.predict_result,
|
|
|
|
feedback_result=image.feedback_result,
|
|
|
|
reviewed_result=image.reviewed_result,
|
|
|
|
feedback_accuracy=att["acc"]["feedback"],
|
|
|
|
reviewed_accuracy=att["acc"]["reviewed"],
|
|
|
|
acc=att["avg_acc"],
|
|
|
|
time_cost=image.processing_time,
|
2024-02-05 05:56:51 +00:00
|
|
|
is_bad_image=att["is_bad_image"],
|
2024-01-31 03:00:18 +00:00
|
|
|
bad_image_reason=image.reason,
|
|
|
|
counter_measures=image.counter_measures,
|
|
|
|
error="|".join(att["err"])
|
|
|
|
)
|
|
|
|
new_report_file.save()
|
|
|
|
if request_att["time_cost"].get(image.doc_type, None):
|
|
|
|
request_att["time_cost"][image.doc_type].append(image.processing_time)
|
|
|
|
else:
|
|
|
|
request_att["time_cost"][image.doc_type] = [image.processing_time]
|
|
|
|
try:
|
|
|
|
request_att["acc"]["feedback"]["imei_number"] += att["acc"]["feedback"]["imei_number"]
|
|
|
|
request_att["acc"]["feedback"]["purchase_date"] += att["acc"]["feedback"]["purchase_date"]
|
|
|
|
request_att["acc"]["feedback"]["retailername"] += att["acc"]["feedback"]["retailername"]
|
|
|
|
request_att["acc"]["feedback"]["sold_to_party"] += att["acc"]["feedback"]["sold_to_party"]
|
|
|
|
|
|
|
|
request_att["acc"]["reviewed"]["imei_number"] += att["acc"]["reviewed"]["imei_number"]
|
|
|
|
request_att["acc"]["reviewed"]["purchase_date"] += att["acc"]["reviewed"]["purchase_date"]
|
|
|
|
request_att["acc"]["reviewed"]["retailername"] += att["acc"]["reviewed"]["retailername"]
|
|
|
|
request_att["acc"]["reviewed"]["sold_to_party"] += att["acc"]["reviewed"]["sold_to_party"]
|
|
|
|
|
|
|
|
request_att["bad_images"] += int(att["is_bad_image"])
|
|
|
|
request_att["total_images"] += 1
|
|
|
|
request_att["err"] += att["err"]
|
|
|
|
except Exception as e:
|
|
|
|
print(e)
|
|
|
|
continue
|
|
|
|
|
|
|
|
return request_att
|
2024-02-05 05:56:51 +00:00
|
|
|
|
|
|
|
def calculate_a_request(report, request):
|
|
|
|
request_att = {"acc": {"feedback": {"imei_number": [],
|
|
|
|
"purchase_date": [],
|
|
|
|
"retailername": [],
|
|
|
|
"sold_to_party": [],
|
|
|
|
},
|
|
|
|
"reviewed": {"imei_number": [],
|
|
|
|
"purchase_date": [],
|
|
|
|
"retailername": [],
|
|
|
|
"sold_to_party": [],
|
|
|
|
}},
|
|
|
|
"err": [],
|
|
|
|
"time_cost": {},
|
|
|
|
"total_images": 0,
|
|
|
|
"bad_images": 0}
|
|
|
|
images = SubscriptionRequestFile.objects.filter(request=request)
|
|
|
|
report_files = []
|
|
|
|
for image in images:
|
|
|
|
status, att = calculate_subcription_file(image)
|
|
|
|
if status != 200:
|
|
|
|
continue
|
|
|
|
image.feedback_accuracy = att["acc"]["feedback"]
|
|
|
|
image.reviewed_accuracy = att["acc"]["reviewed"]
|
|
|
|
image.is_bad_image_quality = att["is_bad_image"]
|
|
|
|
image.save()
|
|
|
|
new_report_file = ReportFile(report=report,
|
|
|
|
correspond_request_id=request.request_id,
|
|
|
|
correspond_redemption_id=request.redemption_id,
|
|
|
|
doc_type=image.doc_type,
|
|
|
|
predict_result=image.predict_result,
|
|
|
|
feedback_result=image.feedback_result,
|
|
|
|
reviewed_result=image.reviewed_result,
|
|
|
|
feedback_accuracy=att["acc"]["feedback"],
|
|
|
|
reviewed_accuracy=att["acc"]["reviewed"],
|
|
|
|
acc=att["avg_acc"],
|
|
|
|
is_bad_image=att["is_bad_image"],
|
|
|
|
time_cost=image.processing_time,
|
|
|
|
bad_image_reason=image.reason,
|
|
|
|
counter_measures=image.counter_measures,
|
|
|
|
error="|".join(att["err"])
|
|
|
|
)
|
|
|
|
report_files.append(new_report_file)
|
|
|
|
if request_att["time_cost"].get(image.doc_type, None):
|
|
|
|
request_att["time_cost"][image.doc_type].append(image.processing_time)
|
|
|
|
else:
|
|
|
|
request_att["time_cost"][image.doc_type] = [image.processing_time]
|
|
|
|
try:
|
|
|
|
request_att["acc"]["feedback"]["imei_number"] += att["acc"]["feedback"]["imei_number"]
|
|
|
|
request_att["acc"]["feedback"]["purchase_date"] += att["acc"]["feedback"]["purchase_date"]
|
|
|
|
request_att["acc"]["feedback"]["retailername"] += att["acc"]["feedback"]["retailername"]
|
|
|
|
request_att["acc"]["feedback"]["sold_to_party"] += att["acc"]["feedback"]["sold_to_party"]
|
|
|
|
|
|
|
|
request_att["acc"]["reviewed"]["imei_number"] += att["acc"]["reviewed"]["imei_number"]
|
|
|
|
request_att["acc"]["reviewed"]["purchase_date"] += att["acc"]["reviewed"]["purchase_date"]
|
|
|
|
request_att["acc"]["reviewed"]["retailername"] += att["acc"]["reviewed"]["retailername"]
|
|
|
|
request_att["acc"]["reviewed"]["sold_to_party"] += att["acc"]["reviewed"]["sold_to_party"]
|
|
|
|
|
|
|
|
request_att["bad_images"] += int(att["is_bad_image"])
|
|
|
|
request_att["total_images"] += 1
|
|
|
|
request_att["err"] += att["err"]
|
|
|
|
except Exception as e:
|
|
|
|
print(e)
|
|
|
|
continue
|
|
|
|
|
|
|
|
return request_att, report_files
|
2024-01-31 03:00:18 +00:00
|
|
|
|
|
|
|
|
|
|
|
def calculate_subcription_file(subcription_request_file):
|
|
|
|
att = {"acc": {"feedback": {},
|
|
|
|
"reviewed": {}},
|
|
|
|
"err": [],
|
|
|
|
"is_bad_image": False,
|
|
|
|
"avg_acc": None}
|
|
|
|
if not subcription_request_file.predict_result:
|
|
|
|
return 400, att
|
|
|
|
|
|
|
|
inference_result = copy.deepcopy(subcription_request_file.predict_result)
|
|
|
|
inference_result, feedback_result = align_fine_result(inference_result, copy.deepcopy(subcription_request_file.feedback_result))
|
|
|
|
inference_result, reviewed_result = align_fine_result(inference_result, copy.deepcopy(subcription_request_file.reviewed_result))
|
|
|
|
# print(f"[DEBUG]: predict_result: {subcription_request_file.predict_result}")
|
|
|
|
# print(f"[DEBUG]: inference_result: {inference_result}")
|
|
|
|
# print(f"[DEBUG]: feedback_result: {feedback_result}")
|
|
|
|
# print(f"[DEBUG]: reviewed_result: {reviewed_result}")
|
|
|
|
|
|
|
|
for key_name in valid_keys:
|
|
|
|
try:
|
|
|
|
att["acc"]["feedback"][key_name], _ = calculate_accuracy(key_name, inference_result, feedback_result)
|
|
|
|
att["acc"]["reviewed"][key_name], _ = calculate_accuracy(key_name, inference_result, reviewed_result)
|
|
|
|
except Exception as e:
|
|
|
|
att["err"].append(str(e))
|
|
|
|
# print(f"[DEBUG]: e: {e} -key_name: {key_name}")
|
|
|
|
avg_reviewed = calculate_avg_accuracy(att["acc"], "reviewed", ["retailername", "sold_to_party", "purchase_date", "imei_number"])
|
|
|
|
avg_feedback = calculate_avg_accuracy(att["acc"], "feedback", ["retailername", "sold_to_party", "purchase_date", "imei_number"])
|
|
|
|
if avg_feedback is not None or avg_reviewed is not None:
|
|
|
|
avg_acc = max([x for x in [avg_feedback, avg_reviewed] if x is not None])
|
|
|
|
if avg_acc < BAD_THRESHOLD:
|
|
|
|
att["is_bad_image"] = True
|
2024-02-01 08:32:40 +00:00
|
|
|
# exclude bad images
|
|
|
|
for key_name in valid_keys:
|
|
|
|
att["acc"]["feedback"][key_name] = []
|
|
|
|
att["acc"]["reviewed"][key_name] = []
|
|
|
|
att["avg_acc"] = None
|
|
|
|
else:
|
|
|
|
att["avg_acc"] = avg_acc
|
2024-01-31 03:00:18 +00:00
|
|
|
return 200, att
|
|
|
|
|
|
|
|
def calculate_attributions(request): # for one request, return in order
|
|
|
|
acc = {"feedback": {},
|
|
|
|
"reviewed": {}} # {"feedback": {"retailername": [0.1], "sold_to_party":[0.9], "purchase_date":[0.6], "imei_number":[0.8]},
|
|
|
|
# "reviewed": {"retailername": [0.1], "sold_to_party":[0.9], "purchase_date":[0.6], "imei_number":[0.8]}}
|
|
|
|
data = {"feedback": {},
|
|
|
|
"reviewed": {}} # {"feedback": {"retailername": [[ocr, feedback], ...], "sold_to_party":[[ocr, feedback], ...], "purchase_date":[[ocr, feedback], ...], "imei_number":[[ocr, feedback], ...]}}
|
|
|
|
# {"reviewed": {"retailername": [[ocr, reviewed], ...], "sold_to_party":[[ocr, reviewed], ...], "purchase_date":[[ocr, reviewed], ...], "imei_number":[[ocr, reviewed], ...]}}
|
|
|
|
time_cost = {} # {"imei": [0.1], "invoice": [0.1]}
|
|
|
|
image_quality_num = [0, 0] # [good, bad]
|
|
|
|
image_quality_num[0] = len(request.doc_type.split(","))
|
|
|
|
error = ""
|
|
|
|
|
|
|
|
inference_result = predict_result_to_ready(request.predict_result)
|
|
|
|
reviewed_result = align_fine_result(inference_result, request.reviewed_result)
|
|
|
|
feedback_result = align_fine_result(inference_result, request.feedback_result)
|
|
|
|
|
|
|
|
# accuracy calculation
|
|
|
|
for key_name in valid_keys:
|
|
|
|
if isinstance(inference_result[key_name], list):
|
|
|
|
if len(inference_result[key_name]) != len(reviewed_result.get(key_name, [])):
|
|
|
|
error = f"Request {request.request_id} failed with different {key_name} in predict and reviewed_result"
|
|
|
|
break
|
|
|
|
if len(inference_result[key_name]) != len(feedback_result.get(key_name, [])):
|
|
|
|
error = f"Request {request.request_id} failed with different {key_name} in predict and feedback_result"
|
|
|
|
break
|
|
|
|
# calculate accuracy for feedback result
|
|
|
|
acc["feedback"][key_name], data["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result)
|
|
|
|
acc["reviewed"][key_name], data["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result)
|
|
|
|
else:
|
|
|
|
inference_result[key_name] = [inference_result[key_name]]
|
|
|
|
feedback_result[key_name] = [feedback_result[key_name]]
|
|
|
|
reviewed_result[key_name] = [reviewed_result[key_name]]
|
|
|
|
|
|
|
|
acc["feedback"][key_name], data["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result)
|
|
|
|
acc["reviewed"][key_name], data["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result)
|
|
|
|
|
|
|
|
acc["feedback"]["purchase_date"] = [max(acc["feedback"]["purchase_date"])] if len(acc["feedback"]["purchase_date"]) > 0 else []
|
|
|
|
acc["reviewed"]["purchase_date"] = [max(acc["reviewed"]["purchase_date"])] if len(acc["reviewed"]["purchase_date"]) > 0 else []
|
|
|
|
# Count for bad and total images
|
|
|
|
avg_invoice_feedback = calculate_avg_accuracy(acc, "feedback", ["retailername", "sold_to_party", "purchase_date"])
|
|
|
|
avg_invoice_reviewed = calculate_avg_accuracy(acc, "reviewed", ["retailername", "sold_to_party", "purchase_date"])
|
|
|
|
if avg_invoice_feedback is not None or avg_invoice_reviewed is not None:
|
|
|
|
if max([x for x in [avg_invoice_feedback, avg_invoice_reviewed] if x is not None]) < BAD_THRESHOLD:
|
|
|
|
image_quality_num[1] += 1
|
|
|
|
for i, _ in enumerate(acc["feedback"]["imei_number"]):
|
|
|
|
if acc["feedback"]["imei_number"][i] is not None and acc["reviewed"]["imei_number"][i] is not None:
|
|
|
|
if max([x for x in [acc["feedback"]["imei_number"][i], acc["reviewed"]["imei_number"][i]] if x is not None]) < BAD_THRESHOLD:
|
|
|
|
image_quality_num[1] += 1
|
|
|
|
# time cost and quality calculation
|
|
|
|
# TODO: to be deprecated, doc_type would be in file level in the future
|
|
|
|
try:
|
|
|
|
for doc_type, doc_profile in request.ai_inference_profile.items():
|
|
|
|
doc_type = doc_type.split("_")[0]
|
|
|
|
inference_time = doc_profile["inference"][1][0] - doc_profile["inference"][0]
|
|
|
|
postprocess_time = doc_profile["postprocess"][1] - doc_profile["postprocess"][0]
|
|
|
|
time_cost[doc_type].append(inference_time + postprocess_time)
|
|
|
|
except Exception as e:
|
|
|
|
error = f"Request id {request.request_id} failed with error: {e}"
|
|
|
|
|
|
|
|
return acc, data, time_cost, image_quality_num, error
|
|
|
|
|
|
|
|
def shadow_report(report_id, query):
|
2024-02-05 05:56:51 +00:00
|
|
|
c_connector.make_a_report_2(
|
2024-01-31 03:00:18 +00:00
|
|
|
(report_id, query))
|