Update: issues on 23 Feb

This commit is contained in:
dx-tan 2024-02-23 03:58:10 +07:00
parent 51c67258e7
commit 3eb6ab61a9
7 changed files with 169 additions and 85 deletions

View File

@ -224,7 +224,7 @@ OVERVIEW_REFRESH_INTERVAL = 2
OVERVIEW_REPORT_ROOT = "overview" OVERVIEW_REPORT_ROOT = "overview"
OVERVIEW_REPORT_DURATION = ["30d", "7d"] OVERVIEW_REPORT_DURATION = ["30d", "7d"]
ACC_EXCLUDE_RESEASONS = ["Invalid Input", "Handwritten information", "handwritten"] ACC_EXCLUDE_RESEASONS = ["Invalid Input", "Handwritten information", "handwritten", "invalid_image", "missing_information", "too_blurry_text", "too_small_text"]
SUBS = { SUBS = {
"SEAU": "AU", "SEAU": "AU",
@ -233,7 +233,8 @@ SUBS = {
"SEPCO": "PH", "SEPCO": "PH",
"TSE": "TH", "TSE": "TH",
"SEIN": "ID", "SEIN": "ID",
"ALL": "all" "ALL": "all", # all_detail
"ALL_SUMARY": "ALL_SUMARY"
} }
CACHES = { CACHES = {
@ -241,5 +242,3 @@ CACHES = {
'BACKEND': 'django.core.cache.backends.dummy.DummyCache', 'BACKEND': 'django.core.cache.backends.dummy.DummyCache',
} }
} }

View File

@ -7,6 +7,7 @@ from django.utils import timezone
from django.db.models import Q from django.db.models import Q
import uuid import uuid
import os import os
import copy
import pytz import pytz
from fwd import settings from fwd import settings
from drf_spectacular.utils import extend_schema, OpenApiParameter, OpenApiTypes from drf_spectacular.utils import extend_schema, OpenApiParameter, OpenApiTypes
@ -15,11 +16,12 @@ import json
from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException
from ..models import SubscriptionRequest, Report, ReportFile, SubscriptionRequestFile from ..models import SubscriptionRequest, Report, ReportFile, SubscriptionRequestFile
from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg
from ..utils.file import download_from_S3, convert_date_string, build_media_url_v2, build_url from ..utils.file import download_from_S3, convert_date_string, build_media_url_v2, build_url, dict2xlsx, save_report_to_S3
from ..utils.redis import RedisUtils from ..utils.redis import RedisUtils
from ..utils.process import string_to_boolean from ..utils.process import string_to_boolean
from ..request.ReportCreationSerializer import ReportCreationSerializer from ..request.ReportCreationSerializer import ReportCreationSerializer
from ..utils.subsidiary import map_subsidiary_long_to_short, map_subsidiary_short_to_long from ..utils.subsidiary import map_subsidiary_long_to_short, map_subsidiary_short_to_long
from ..utils.report import aggregate_overview
redis_client = RedisUtils() redis_client = RedisUtils()
@ -454,8 +456,8 @@ class AccuracyViewSet(viewsets.ViewSet):
], ],
responses=None, tags=['Accuracy'] responses=None, tags=['Accuracy']
) )
@action(detail=False, url_path="overview", methods=["GET"]) @action(detail=False, url_path="overview_sumary", methods=["GET"])
def overview(self, request): def overview_sumary(self, request):
if request.method == 'GET': if request.method == 'GET':
_subsidiary = request.GET.get('subsidiary', "ALL") _subsidiary = request.GET.get('subsidiary', "ALL")
duration = request.GET.get('duration', "") duration = request.GET.get('duration', "")
@ -472,6 +474,88 @@ class AccuracyViewSet(viewsets.ViewSet):
return JsonResponse({'error': 'Invalid request method.'}, status=405) return JsonResponse({'error': 'Invalid request method.'}, status=405)
@extend_schema(
parameters=[
OpenApiParameter(
name='duration',
location=OpenApiParameter.QUERY,
description='one of [30d, 7d]',
type=OpenApiTypes.STR,
default='30d',
),
OpenApiParameter(
name='subsidiary',
location=OpenApiParameter.QUERY,
description='Subsidiary',
type=OpenApiTypes.STR,
)
],
responses=None, tags=['Accuracy']
)
@action(detail=False, url_path="overview", methods=["GET"])
def overview(self, request):
if request.method == 'GET':
_subsidiary = request.GET.get('subsidiary', "ALL")
duration = request.GET.get('duration', "")
subsidiary = map_subsidiary_long_to_short(_subsidiary)
if _subsidiary == "ALL":
# aggregate_overview from subsibdiaries
subsidiaries_to_include = list(settings.SUBS.values())
subsidiaries_to_include.remove("all")
subsidiary_overview_reports = []
for sub in subsidiaries_to_include:
key = f"{sub}_{duration}"
try:
this_overview = json.loads(redis_client.get_specific_cache(settings.OVERVIEW_REPORT_ROOT, key)).get("data", [])
if sub != "ALL_SUMARY":
this_overview = [d for d in this_overview if d.get("subs") != "+"]
else:
for item in this_overview:
if item.get("subs") == "+":
item["extraction_date"] = item["extraction_date"].replace("Subtotal ", "").replace("(", "").replace(")", "") + "-32"
subsidiary_overview_reports += this_overview
except Exception as e:
print(f"[WARM]: Unable to retrive data {key} from Redis, skipping...")
data = aggregate_overview(subsidiary_overview_reports)
for item in data:
if item.get("subs") == "+":
item["extraction_date"] = "Subtotal (" + item["extraction_date"].replace("-32", "") + ")"
# Do the saving process
report_fine_data = copy.deepcopy(data)
for i, dat in enumerate(report_fine_data):
keys = [x for x in list(dat.keys()) if "accuracy" in x.lower()]
keys_percent = "images_quality"
for x_key in report_fine_data[i][keys_percent].keys():
if "percent" not in x_key:
continue
report_fine_data[i][keys_percent][x_key] = report_fine_data[i][keys_percent][x_key]*100
for key in keys:
if report_fine_data[i][key]:
for x_key in report_fine_data[i][key].keys():
report_fine_data[i][key][x_key] = report_fine_data[i][key][x_key]*100
overview_filename = _subsidiary + "_" + duration + ".xlsx"
data_workbook = dict2xlsx(report_fine_data, _type='report')
folder_path = os.path.join(settings.MEDIA_ROOT, "report", settings.OVERVIEW_REPORT_ROOT)
os.makedirs(folder_path, exist_ok = True)
file_path = os.path.join(folder_path, overview_filename)
data_workbook.save(file_path)
s3_key=save_report_to_S3(None, file_path)
# redis_client.set_cache(settings.OVERVIEW_REPORT_ROOT, overview_filename.replace(".xlsx", ""), json.dumps(save_data))
else:
# Retrive data from Redis
key = f"{subsidiary}_{duration}"
data = json.loads(redis_client.get_specific_cache(settings.OVERVIEW_REPORT_ROOT, key)).get("data", [])
response = {
'overview_data': data,
}
return JsonResponse(response, status=200)
return JsonResponse({'error': 'Invalid request method.'}, status=405)
@extend_schema( @extend_schema(
parameters=[ parameters=[
OpenApiParameter( OpenApiParameter(

View File

@ -264,6 +264,7 @@ def upload_report_to_s3(local_file_path, s3_key, report_id, delay):
try: try:
time.sleep(delay) time.sleep(delay)
s3_client.upload_file(local_file_path, s3_key) s3_client.upload_file(local_file_path, s3_key)
if report_id:
report = Report.objects.filter(report_id=report_id)[0] report = Report.objects.filter(report_id=report_id)[0]
report.S3_uploaded = True report.S3_uploaded = True
report.S3_file_name = s3_key report.S3_file_name = s3_key

View File

@ -304,6 +304,8 @@ def make_a_report_2(report_id, query_set):
for x_key in report_fine_data[i][key].keys(): for x_key in report_fine_data[i][key].keys():
report_fine_data[i][key][x_key] = report_fine_data[i][key][x_key]*100 report_fine_data[i][key][x_key] = report_fine_data[i][key][x_key]*100
data_workbook = dict2xlsx(report_fine_data, _type='report') data_workbook = dict2xlsx(report_fine_data, _type='report')
if query_set["subsidiary"] == "ALL":
query_set["subsidiary"] = "ALL_SUMARY"
overview_filename = query_set["subsidiary"] + "_" + query_set["report_overview_duration"] + ".xlsx" overview_filename = query_set["subsidiary"] + "_" + query_set["report_overview_duration"] + ".xlsx"
local_workbook = save_workbook_file(overview_filename, report, data_workbook, settings.OVERVIEW_REPORT_ROOT) local_workbook = save_workbook_file(overview_filename, report, data_workbook, settings.OVERVIEW_REPORT_ROOT)
s3_key=save_report_to_S3(report.report_id, local_workbook) s3_key=save_report_to_S3(report.report_id, local_workbook)

View File

@ -14,22 +14,12 @@ class Command(BaseCommand):
# Add your command-line arguments here # Add your command-line arguments here
parser.add_argument('test', type=str, help='Value for the argument') parser.add_argument('test', type=str, help='Value for the argument')
def process_request(self, request, predict_result, user_feedback, reviewed_result): def process_request(self, request, predict_result, user_feedback, reviewed_result, reason):
if len(request.request_id.split(".")[0].split("_")) < 2: if len(request.request_id.split(".")[0].split("_")) < 2:
return return
request_feedback = copy.deepcopy(request.feedback_result)
request_review = copy.deepcopy(request.reviewed_result) request_review = copy.deepcopy(request.reviewed_result)
if not request_feedback:
request_feedback = {
"request_id": request.request_id,
"imei_number": [],
"retailername": "",
"purchase_date": "",
"sold_to_party": ""
}
if not request_review: if not request_review:
request_review = { request_review = {
"request_id": request.request_id, "request_id": request.request_id,
@ -53,74 +43,40 @@ class Command(BaseCommand):
is_match = True is_match = True
if field == 'imei_number': if field == 'imei_number':
if not reviewed_result in request_review["imei_number"]: if not (reviewed_result in request_review["imei_number"]):
request_review["imei_number"].append(reviewed_result) request_review["imei_number"].append(reviewed_result)
if not user_feedback in request_feedback["imei_number"]:
request_feedback["imei_number"].append(user_feedback)
else: else:
if not reviewed_result == request_review[field]: if not reviewed_result == request_review[field]:
request_review[field] = reviewed_result request_review[field] = reviewed_result
if not user_feedback == request_feedback[field]:
request_feedback[field] = user_feedback
_predict_result = copy.deepcopy(predict_result_to_ready(request.predict_result))
_feedback_result = copy.deepcopy(request.feedback_result)
_reviewed_result = copy.deepcopy(request.reviewed_result) _reviewed_result = copy.deepcopy(request.reviewed_result)
if not _feedback_result:
_feedback_result = {
"imei_number": [],
"retailername": "",
"purchase_date": "",
"sold_to_party": ""
}
if not _reviewed_result: if not _reviewed_result:
_reviewed_result = { _reviewed_result = {
"request_id": image.request_id,
"imei_number": [], "imei_number": [],
"retailername": "", "retailername": "",
"purchase_date": "", "purchase_date": "",
"sold_to_party": "" "sold_to_party": ""
} }
if image.doc_type == "invoice": if image.doc_type == "invoice" and field in ['retailername', 'purchase_date']:
_predict_result[field] = predict_result
_predict_result["imei_number"] = []
if _feedback_result:
_feedback_result[field] = user_feedback
_feedback_result["imei_number"] = []
else:
None
if _reviewed_result: if _reviewed_result:
_reviewed_result[field] = reviewed_result _reviewed_result[field] = reviewed_result
_reviewed_result["imei_number"] = [] _reviewed_result["imei_number"] = []
else: else:
None None
else: elif image.doc_type == "imei" and field == "imei_number":
_predict_result = {
"retailername": None,
"sold_to_party": None,
"purchase_date": [],
"imei_number": [predict_result]
}
_feedback_result = {
"retailername": None,
"sold_to_party": None,
"purchase_date": None,
"imei_number": [user_feedback]
} if _feedback_result else None
_reviewed_result = { _reviewed_result = {
"retailername": None, "retailername": None,
"sold_to_party": None, "sold_to_party": None,
"purchase_date": None, "purchase_date": None,
"imei_number": [reviewed_result] "imei_number": [reviewed_result]
} if _reviewed_result else None } if _reviewed_result else None
image.predict_result = _predict_result
image.feedback_result = _feedback_result
image.reviewed_result = _reviewed_result image.reviewed_result = _reviewed_result
image.reason = reason
image.save() image.save()
request.feedback_result = request_feedback
request.reviewed_result = request_review request.reviewed_result = request_review
request.feedback_result["request_id"] = request.request_id
request.reviewed_result["request_id"] = request.request_id request.reviewed_result["request_id"] = request.request_id
request.is_reviewed = True request.is_reviewed = True
request.save() request.save()
@ -144,7 +100,8 @@ class Command(BaseCommand):
if not request: if not request:
print("Not found ====>", row) print("Not found ====>", row)
else: else:
self.process_request(request, row[3], row[2], row[4]) # request, predict_result, user_feedback, reviewed_result
self.process_request(request, row[3], row[2], row[4], row[8])
index += 1 index += 1
self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!')) self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!'))

View File

@ -212,7 +212,14 @@ class ReportAccumulateByRequest:
self.data[this_month][1][this_day]['num_request'] += 1 self.data[this_month][1][this_day]['num_request'] += 1
self.data[this_month][0]['num_request'] += 1 self.data[this_month][0]['num_request'] += 1
for report_file in report_files: for report_file in report_files:
if report_file.is_bad_image or report_file.bad_image_reason in settings.ACC_EXCLUDE_RESEASONS:
report_file.acc = None
for t in report_file.feedback_accuracy.keys():
report_file.feedback_accuracy[t] = []
for t in report_file.reviewed_accuracy.keys():
report_file.reviewed_accuracy[t] = []
self.data[this_month][0] = self.update_total(self.data[this_month][0], report_file) # Update the subtotal within the month self.data[this_month][0] = self.update_total(self.data[this_month][0], report_file) # Update the subtotal within the month
self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], report_file) # Update the subtotal of the day self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], report_file) # Update the subtotal of the day
@ -770,11 +777,18 @@ def calculate_and_save_subcription_file(report, request):
return request_att return request_att
# def result_maximize_list_values(result, acc):
# for k in acc.keys():
# if isinstance(acc[k], list) and len(acc[k]) > 0:
def acc_maximize_list_values(acc): def acc_maximize_list_values(acc):
pos = {}
for k in acc.keys(): for k in acc.keys():
pos[k] = 0
if isinstance(acc[k], list) and len(acc[k]) > 0: if isinstance(acc[k], list) and len(acc[k]) > 0:
acc[k] = [max(acc[k])] acc[k] = [max(acc[k])]
return acc pos[k] = acc[k].index(acc[k][0])
return acc, pos
def calculate_a_request(report, request): def calculate_a_request(report, request):
request_att = {"acc": {"feedback": {"imei_number": [], request_att = {"acc": {"feedback": {"imei_number": [],
@ -793,7 +807,8 @@ def calculate_a_request(report, request):
"sold_to_party": [], "sold_to_party": [],
}}, }},
"err": [], "err": [],
"time_cost": {}, "time_cost": {"imei": [],
"invoice": []},
"total_images": 0, "total_images": 0,
"bad_images": 0, "bad_images": 0,
"bad_image_list": [], "bad_image_list": [],
@ -802,6 +817,13 @@ def calculate_a_request(report, request):
report_files = [] report_files = []
for image in images: for image in images:
status, att = calculate_subcription_file(image) status, att = calculate_subcription_file(image)
att["acc"]["feedback"], fb_max_indexes = acc_maximize_list_values(att["acc"]["feedback"])
att["acc"]["reviewed"], rv_max_indexes = acc_maximize_list_values(att["acc"]["reviewed"])
_att = copy.deepcopy(att)
if status != 200: if status != 200:
continue continue
image.feedback_accuracy = att["acc"]["feedback"] # dict {key: [values]} image.feedback_accuracy = att["acc"]["feedback"] # dict {key: [values]}
@ -818,6 +840,14 @@ def calculate_a_request(report, request):
_sub = map_subsidiary_short_to_long(request.redemption_id[:2]) _sub = map_subsidiary_short_to_long(request.redemption_id[:2])
else: else:
print(f"[WARM]: empty redemption_id, check request: {request.request_id}") print(f"[WARM]: empty redemption_id, check request: {request.request_id}")
# Little trick to replace purchase date to normalized
if len(att["normalized_data"]["feedback"].get("purchase_date", [])) > 0:
image.predict_result["purchase_date"] = [att["normalized_data"]["feedback"]["purchase_date"][i][0] for i in range(len(att["normalized_data"]["feedback"]["purchase_date"]))]
image.feedback_result["purchase_date"] = att["normalized_data"]["feedback"]["purchase_date"][fb_max_indexes["purchase_date"]][1]
if len(att["normalized_data"]["reviewed"].get("purchase_date", [])) > 0:
image.predict_result["purchase_date"] = [att["normalized_data"]["reviewed"]["purchase_date"][i][0] for i in range(len(att["normalized_data"]["reviewed"]["purchase_date"]))]
image.reviewed_result["purchase_date"] = att["normalized_data"]["reviewed"]["purchase_date"][rv_max_indexes["purchase_date"]][1]
new_report_file = ReportFile(report=report, new_report_file = ReportFile(report=report,
subsidiary=_sub, subsidiary=_sub,
correspond_request_id=request.request_id, correspond_request_id=request.request_id,
@ -826,8 +856,8 @@ def calculate_a_request(report, request):
predict_result=image.predict_result, predict_result=image.predict_result,
feedback_result=image.feedback_result, feedback_result=image.feedback_result,
reviewed_result=image.reviewed_result, reviewed_result=image.reviewed_result,
feedback_accuracy=acc_maximize_list_values(att["acc"]["feedback"]), feedback_accuracy=att["acc"]["feedback"],
reviewed_accuracy=acc_maximize_list_values(att["acc"]["reviewed"]), reviewed_accuracy=att["acc"]["reviewed"],
acc=att["avg_acc"], acc=att["avg_acc"],
is_bad_image=att["is_bad_image"], is_bad_image=att["is_bad_image"],
is_reviewed="Yes" if request.is_reviewed else "No", is_reviewed="Yes" if request.is_reviewed else "No",
@ -837,16 +867,18 @@ def calculate_a_request(report, request):
error="|".join(att["err"]) error="|".join(att["err"])
) )
report_files.append(new_report_file) report_files.append(new_report_file)
_att = copy.deepcopy(att)
if att["is_bad_image"] or image.reason in settings.ACC_EXCLUDE_RESEASONS: if att["is_bad_image"] or image.reason in settings.ACC_EXCLUDE_RESEASONS:
if att["is_bad_image"]:
request_att["bad_image_list"].append(image.file_name) request_att["bad_image_list"].append(image.file_name)
# if image.reason in settings.ACC_EXCLUDE_RESEASONS:
# print(f"[DEBUG]: {image.reason}")
# TODO: Exclude bad image accuracy from average accuracy # TODO: Exclude bad image accuracy from average accuracy
_att["avg_acc"] = None _att["avg_acc"] = None
for t in ["feedback", "reviewed"]: for t in _att["acc"].keys():
for k in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: for k in _att["acc"][t].keys():
_att["acc"][t][k] = [] _att["acc"][t][k] = []
else:
if request_att["time_cost"].get(image.doc_type, None): if request_att["time_cost"].get(image.doc_type, None):
request_att["time_cost"][image.doc_type].append(image.processing_time) request_att["time_cost"][image.doc_type].append(image.processing_time)
else: else:
@ -863,11 +895,12 @@ def calculate_a_request(report, request):
request_att["acc"]["reviewed"]["retailername"] += _att["acc"]["reviewed"]["retailername"] request_att["acc"]["reviewed"]["retailername"] += _att["acc"]["reviewed"]["retailername"]
request_att["acc"]["reviewed"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] request_att["acc"]["reviewed"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"]
request_att["acc"]["acumulated"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] if att["acc"]["reviewed"]["imei_number"] else att["acc"]["feedback"]["imei_number"] request_att["acc"]["acumulated"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] if _att["acc"]["reviewed"]["imei_number"] else _att["acc"]["feedback"]["imei_number"]
request_att["acc"]["acumulated"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] if att["acc"]["reviewed"]["purchase_date"] else att["acc"]["feedback"]["purchase_date"] request_att["acc"]["acumulated"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] if _att["acc"]["reviewed"]["purchase_date"] else _att["acc"]["feedback"]["purchase_date"]
request_att["acc"]["acumulated"]["retailername"] += _att["acc"]["reviewed"]["retailername"] if att["acc"]["reviewed"]["retailername"] else att["acc"]["feedback"]["retailername"] request_att["acc"]["acumulated"]["retailername"] += _att["acc"]["reviewed"]["retailername"] if _att["acc"]["reviewed"]["retailername"] else _att["acc"]["feedback"]["retailername"]
request_att["acc"]["acumulated"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] if att["acc"]["reviewed"]["sold_to_party"] else att["acc"]["feedback"]["sold_to_party"] request_att["acc"]["acumulated"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] if _att["acc"]["reviewed"]["sold_to_party"] else _att["acc"]["feedback"]["sold_to_party"]
if image.reason not in settings.ACC_EXCLUDE_RESEASONS:
request_att["bad_images"] += int(_att["is_bad_image"]) request_att["bad_images"] += int(_att["is_bad_image"])
request_att["total_images"] += 1 request_att["total_images"] += 1
request_att["err"] += _att["err"] request_att["err"] += _att["err"]
@ -880,6 +913,8 @@ def calculate_a_request(report, request):
def calculate_subcription_file(subcription_request_file): def calculate_subcription_file(subcription_request_file):
att = {"acc": {"feedback": {}, att = {"acc": {"feedback": {},
"reviewed": {}}, "reviewed": {}},
"normalized_data": {"feedback": {},
"reviewed": {}},
"err": [], "err": [],
"is_bad_image": False, "is_bad_image": False,
"avg_acc": None} "avg_acc": None}
@ -896,8 +931,8 @@ def calculate_subcription_file(subcription_request_file):
for key_name in valid_keys: for key_name in valid_keys:
try: try:
att["acc"]["feedback"][key_name], _ = calculate_accuracy(key_name, inference_result, feedback_result) att["acc"]["feedback"][key_name], att["normalized_data"]["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result)
att["acc"]["reviewed"][key_name], _ = calculate_accuracy(key_name, inference_result, reviewed_result) att["acc"]["reviewed"][key_name], att["normalized_data"]["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result)
except Exception as e: except Exception as e:
att["err"].append(str(e)) att["err"].append(str(e))
# print(f"[DEBUG]: predict_result: {subcription_request_file.predict_result}") # print(f"[DEBUG]: predict_result: {subcription_request_file.predict_result}")

View File

@ -0,0 +1,6 @@
CAT_VALUES = {
"ALL": "ZZZZZZZZ",
}
def aggregate_overview(overview_list):
overview_list = sorted(overview_list, key=lambda x: x["extraction_date"] + CAT_VALUES.get(x["subs"], x["subs"]), reverse=True)
return overview_list