Update: issues on 23 Feb

This commit is contained in:
dx-tan 2024-02-23 03:58:10 +07:00
parent 51c67258e7
commit 3eb6ab61a9
7 changed files with 169 additions and 85 deletions

View File

@ -224,7 +224,7 @@ OVERVIEW_REFRESH_INTERVAL = 2
OVERVIEW_REPORT_ROOT = "overview"
OVERVIEW_REPORT_DURATION = ["30d", "7d"]
ACC_EXCLUDE_RESEASONS = ["Invalid Input", "Handwritten information", "handwritten"]
ACC_EXCLUDE_RESEASONS = ["Invalid Input", "Handwritten information", "handwritten", "invalid_image", "missing_information", "too_blurry_text", "too_small_text"]
SUBS = {
"SEAU": "AU",
@ -233,7 +233,8 @@ SUBS = {
"SEPCO": "PH",
"TSE": "TH",
"SEIN": "ID",
"ALL": "all"
"ALL": "all", # all_detail
"ALL_SUMARY": "ALL_SUMARY"
}
CACHES = {
@ -241,5 +242,3 @@ CACHES = {
'BACKEND': 'django.core.cache.backends.dummy.DummyCache',
}
}

View File

@ -7,6 +7,7 @@ from django.utils import timezone
from django.db.models import Q
import uuid
import os
import copy
import pytz
from fwd import settings
from drf_spectacular.utils import extend_schema, OpenApiParameter, OpenApiTypes
@ -15,11 +16,12 @@ import json
from ..exception.exceptions import InvalidException, RequiredFieldException, NotFoundException
from ..models import SubscriptionRequest, Report, ReportFile, SubscriptionRequestFile
from ..utils.accuracy import shadow_report, MonthReportAccumulate, first_of_list, extract_report_detail_list, IterAvg
from ..utils.file import download_from_S3, convert_date_string, build_media_url_v2, build_url
from ..utils.file import download_from_S3, convert_date_string, build_media_url_v2, build_url, dict2xlsx, save_report_to_S3
from ..utils.redis import RedisUtils
from ..utils.process import string_to_boolean
from ..request.ReportCreationSerializer import ReportCreationSerializer
from ..utils.subsidiary import map_subsidiary_long_to_short, map_subsidiary_short_to_long
from ..utils.report import aggregate_overview
redis_client = RedisUtils()
@ -454,8 +456,8 @@ class AccuracyViewSet(viewsets.ViewSet):
],
responses=None, tags=['Accuracy']
)
@action(detail=False, url_path="overview", methods=["GET"])
def overview(self, request):
@action(detail=False, url_path="overview_sumary", methods=["GET"])
def overview_sumary(self, request):
if request.method == 'GET':
_subsidiary = request.GET.get('subsidiary', "ALL")
duration = request.GET.get('duration', "")
@ -472,6 +474,88 @@ class AccuracyViewSet(viewsets.ViewSet):
return JsonResponse({'error': 'Invalid request method.'}, status=405)
@extend_schema(
parameters=[
OpenApiParameter(
name='duration',
location=OpenApiParameter.QUERY,
description='one of [30d, 7d]',
type=OpenApiTypes.STR,
default='30d',
),
OpenApiParameter(
name='subsidiary',
location=OpenApiParameter.QUERY,
description='Subsidiary',
type=OpenApiTypes.STR,
)
],
responses=None, tags=['Accuracy']
)
@action(detail=False, url_path="overview", methods=["GET"])
def overview(self, request):
if request.method == 'GET':
_subsidiary = request.GET.get('subsidiary', "ALL")
duration = request.GET.get('duration', "")
subsidiary = map_subsidiary_long_to_short(_subsidiary)
if _subsidiary == "ALL":
# aggregate_overview from subsibdiaries
subsidiaries_to_include = list(settings.SUBS.values())
subsidiaries_to_include.remove("all")
subsidiary_overview_reports = []
for sub in subsidiaries_to_include:
key = f"{sub}_{duration}"
try:
this_overview = json.loads(redis_client.get_specific_cache(settings.OVERVIEW_REPORT_ROOT, key)).get("data", [])
if sub != "ALL_SUMARY":
this_overview = [d for d in this_overview if d.get("subs") != "+"]
else:
for item in this_overview:
if item.get("subs") == "+":
item["extraction_date"] = item["extraction_date"].replace("Subtotal ", "").replace("(", "").replace(")", "") + "-32"
subsidiary_overview_reports += this_overview
except Exception as e:
print(f"[WARM]: Unable to retrive data {key} from Redis, skipping...")
data = aggregate_overview(subsidiary_overview_reports)
for item in data:
if item.get("subs") == "+":
item["extraction_date"] = "Subtotal (" + item["extraction_date"].replace("-32", "") + ")"
# Do the saving process
report_fine_data = copy.deepcopy(data)
for i, dat in enumerate(report_fine_data):
keys = [x for x in list(dat.keys()) if "accuracy" in x.lower()]
keys_percent = "images_quality"
for x_key in report_fine_data[i][keys_percent].keys():
if "percent" not in x_key:
continue
report_fine_data[i][keys_percent][x_key] = report_fine_data[i][keys_percent][x_key]*100
for key in keys:
if report_fine_data[i][key]:
for x_key in report_fine_data[i][key].keys():
report_fine_data[i][key][x_key] = report_fine_data[i][key][x_key]*100
overview_filename = _subsidiary + "_" + duration + ".xlsx"
data_workbook = dict2xlsx(report_fine_data, _type='report')
folder_path = os.path.join(settings.MEDIA_ROOT, "report", settings.OVERVIEW_REPORT_ROOT)
os.makedirs(folder_path, exist_ok = True)
file_path = os.path.join(folder_path, overview_filename)
data_workbook.save(file_path)
s3_key=save_report_to_S3(None, file_path)
# redis_client.set_cache(settings.OVERVIEW_REPORT_ROOT, overview_filename.replace(".xlsx", ""), json.dumps(save_data))
else:
# Retrive data from Redis
key = f"{subsidiary}_{duration}"
data = json.loads(redis_client.get_specific_cache(settings.OVERVIEW_REPORT_ROOT, key)).get("data", [])
response = {
'overview_data': data,
}
return JsonResponse(response, status=200)
return JsonResponse({'error': 'Invalid request method.'}, status=405)
@extend_schema(
parameters=[
OpenApiParameter(

View File

@ -264,6 +264,7 @@ def upload_report_to_s3(local_file_path, s3_key, report_id, delay):
try:
time.sleep(delay)
s3_client.upload_file(local_file_path, s3_key)
if report_id:
report = Report.objects.filter(report_id=report_id)[0]
report.S3_uploaded = True
report.S3_file_name = s3_key

View File

@ -304,6 +304,8 @@ def make_a_report_2(report_id, query_set):
for x_key in report_fine_data[i][key].keys():
report_fine_data[i][key][x_key] = report_fine_data[i][key][x_key]*100
data_workbook = dict2xlsx(report_fine_data, _type='report')
if query_set["subsidiary"] == "ALL":
query_set["subsidiary"] = "ALL_SUMARY"
overview_filename = query_set["subsidiary"] + "_" + query_set["report_overview_duration"] + ".xlsx"
local_workbook = save_workbook_file(overview_filename, report, data_workbook, settings.OVERVIEW_REPORT_ROOT)
s3_key=save_report_to_S3(report.report_id, local_workbook)

View File

@ -14,22 +14,12 @@ class Command(BaseCommand):
# Add your command-line arguments here
parser.add_argument('test', type=str, help='Value for the argument')
def process_request(self, request, predict_result, user_feedback, reviewed_result):
def process_request(self, request, predict_result, user_feedback, reviewed_result, reason):
if len(request.request_id.split(".")[0].split("_")) < 2:
return
request_feedback = copy.deepcopy(request.feedback_result)
request_review = copy.deepcopy(request.reviewed_result)
if not request_feedback:
request_feedback = {
"request_id": request.request_id,
"imei_number": [],
"retailername": "",
"purchase_date": "",
"sold_to_party": ""
}
if not request_review:
request_review = {
"request_id": request.request_id,
@ -53,74 +43,40 @@ class Command(BaseCommand):
is_match = True
if field == 'imei_number':
if not reviewed_result in request_review["imei_number"]:
if not (reviewed_result in request_review["imei_number"]):
request_review["imei_number"].append(reviewed_result)
if not user_feedback in request_feedback["imei_number"]:
request_feedback["imei_number"].append(user_feedback)
else:
if not reviewed_result == request_review[field]:
request_review[field] = reviewed_result
if not user_feedback == request_feedback[field]:
request_feedback[field] = user_feedback
_predict_result = copy.deepcopy(predict_result_to_ready(request.predict_result))
_feedback_result = copy.deepcopy(request.feedback_result)
_reviewed_result = copy.deepcopy(request.reviewed_result)
if not _feedback_result:
_feedback_result = {
"imei_number": [],
"retailername": "",
"purchase_date": "",
"sold_to_party": ""
}
if not _reviewed_result:
_reviewed_result = {
"request_id": image.request_id,
"imei_number": [],
"retailername": "",
"purchase_date": "",
"sold_to_party": ""
}
if image.doc_type == "invoice":
_predict_result[field] = predict_result
_predict_result["imei_number"] = []
if _feedback_result:
_feedback_result[field] = user_feedback
_feedback_result["imei_number"] = []
else:
None
if image.doc_type == "invoice" and field in ['retailername', 'purchase_date']:
if _reviewed_result:
_reviewed_result[field] = reviewed_result
_reviewed_result["imei_number"] = []
else:
None
else:
_predict_result = {
"retailername": None,
"sold_to_party": None,
"purchase_date": [],
"imei_number": [predict_result]
}
_feedback_result = {
"retailername": None,
"sold_to_party": None,
"purchase_date": None,
"imei_number": [user_feedback]
} if _feedback_result else None
elif image.doc_type == "imei" and field == "imei_number":
_reviewed_result = {
"retailername": None,
"sold_to_party": None,
"purchase_date": None,
"imei_number": [reviewed_result]
} if _reviewed_result else None
image.predict_result = _predict_result
image.feedback_result = _feedback_result
image.reviewed_result = _reviewed_result
image.reason = reason
image.save()
request.feedback_result = request_feedback
request.reviewed_result = request_review
request.feedback_result["request_id"] = request.request_id
request.reviewed_result["request_id"] = request.request_id
request.is_reviewed = True
request.save()
@ -144,7 +100,8 @@ class Command(BaseCommand):
if not request:
print("Not found ====>", row)
else:
self.process_request(request, row[3], row[2], row[4])
# request, predict_result, user_feedback, reviewed_result
self.process_request(request, row[3], row[2], row[4], row[8])
index += 1
self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!'))

View File

@ -212,7 +212,14 @@ class ReportAccumulateByRequest:
self.data[this_month][1][this_day]['num_request'] += 1
self.data[this_month][0]['num_request'] += 1
for report_file in report_files:
if report_file.is_bad_image or report_file.bad_image_reason in settings.ACC_EXCLUDE_RESEASONS:
report_file.acc = None
for t in report_file.feedback_accuracy.keys():
report_file.feedback_accuracy[t] = []
for t in report_file.reviewed_accuracy.keys():
report_file.reviewed_accuracy[t] = []
self.data[this_month][0] = self.update_total(self.data[this_month][0], report_file) # Update the subtotal within the month
self.data[this_month][1][this_day] = self.update_day(self.data[this_month][1][this_day], report_file) # Update the subtotal of the day
@ -770,11 +777,18 @@ def calculate_and_save_subcription_file(report, request):
return request_att
# def result_maximize_list_values(result, acc):
# for k in acc.keys():
# if isinstance(acc[k], list) and len(acc[k]) > 0:
def acc_maximize_list_values(acc):
pos = {}
for k in acc.keys():
pos[k] = 0
if isinstance(acc[k], list) and len(acc[k]) > 0:
acc[k] = [max(acc[k])]
return acc
pos[k] = acc[k].index(acc[k][0])
return acc, pos
def calculate_a_request(report, request):
request_att = {"acc": {"feedback": {"imei_number": [],
@ -793,7 +807,8 @@ def calculate_a_request(report, request):
"sold_to_party": [],
}},
"err": [],
"time_cost": {},
"time_cost": {"imei": [],
"invoice": []},
"total_images": 0,
"bad_images": 0,
"bad_image_list": [],
@ -802,6 +817,13 @@ def calculate_a_request(report, request):
report_files = []
for image in images:
status, att = calculate_subcription_file(image)
att["acc"]["feedback"], fb_max_indexes = acc_maximize_list_values(att["acc"]["feedback"])
att["acc"]["reviewed"], rv_max_indexes = acc_maximize_list_values(att["acc"]["reviewed"])
_att = copy.deepcopy(att)
if status != 200:
continue
image.feedback_accuracy = att["acc"]["feedback"] # dict {key: [values]}
@ -818,6 +840,14 @@ def calculate_a_request(report, request):
_sub = map_subsidiary_short_to_long(request.redemption_id[:2])
else:
print(f"[WARM]: empty redemption_id, check request: {request.request_id}")
# Little trick to replace purchase date to normalized
if len(att["normalized_data"]["feedback"].get("purchase_date", [])) > 0:
image.predict_result["purchase_date"] = [att["normalized_data"]["feedback"]["purchase_date"][i][0] for i in range(len(att["normalized_data"]["feedback"]["purchase_date"]))]
image.feedback_result["purchase_date"] = att["normalized_data"]["feedback"]["purchase_date"][fb_max_indexes["purchase_date"]][1]
if len(att["normalized_data"]["reviewed"].get("purchase_date", [])) > 0:
image.predict_result["purchase_date"] = [att["normalized_data"]["reviewed"]["purchase_date"][i][0] for i in range(len(att["normalized_data"]["reviewed"]["purchase_date"]))]
image.reviewed_result["purchase_date"] = att["normalized_data"]["reviewed"]["purchase_date"][rv_max_indexes["purchase_date"]][1]
new_report_file = ReportFile(report=report,
subsidiary=_sub,
correspond_request_id=request.request_id,
@ -826,8 +856,8 @@ def calculate_a_request(report, request):
predict_result=image.predict_result,
feedback_result=image.feedback_result,
reviewed_result=image.reviewed_result,
feedback_accuracy=acc_maximize_list_values(att["acc"]["feedback"]),
reviewed_accuracy=acc_maximize_list_values(att["acc"]["reviewed"]),
feedback_accuracy=att["acc"]["feedback"],
reviewed_accuracy=att["acc"]["reviewed"],
acc=att["avg_acc"],
is_bad_image=att["is_bad_image"],
is_reviewed="Yes" if request.is_reviewed else "No",
@ -837,16 +867,18 @@ def calculate_a_request(report, request):
error="|".join(att["err"])
)
report_files.append(new_report_file)
_att = copy.deepcopy(att)
if att["is_bad_image"] or image.reason in settings.ACC_EXCLUDE_RESEASONS:
if att["is_bad_image"]:
request_att["bad_image_list"].append(image.file_name)
# if image.reason in settings.ACC_EXCLUDE_RESEASONS:
# print(f"[DEBUG]: {image.reason}")
# TODO: Exclude bad image accuracy from average accuracy
_att["avg_acc"] = None
for t in ["feedback", "reviewed"]:
for k in ["imei_number", "purchase_date", "retailername", "sold_to_party"]:
for t in _att["acc"].keys():
for k in _att["acc"][t].keys():
_att["acc"][t][k] = []
else:
if request_att["time_cost"].get(image.doc_type, None):
request_att["time_cost"][image.doc_type].append(image.processing_time)
else:
@ -863,11 +895,12 @@ def calculate_a_request(report, request):
request_att["acc"]["reviewed"]["retailername"] += _att["acc"]["reviewed"]["retailername"]
request_att["acc"]["reviewed"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"]
request_att["acc"]["acumulated"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] if att["acc"]["reviewed"]["imei_number"] else att["acc"]["feedback"]["imei_number"]
request_att["acc"]["acumulated"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] if att["acc"]["reviewed"]["purchase_date"] else att["acc"]["feedback"]["purchase_date"]
request_att["acc"]["acumulated"]["retailername"] += _att["acc"]["reviewed"]["retailername"] if att["acc"]["reviewed"]["retailername"] else att["acc"]["feedback"]["retailername"]
request_att["acc"]["acumulated"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] if att["acc"]["reviewed"]["sold_to_party"] else att["acc"]["feedback"]["sold_to_party"]
request_att["acc"]["acumulated"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] if _att["acc"]["reviewed"]["imei_number"] else _att["acc"]["feedback"]["imei_number"]
request_att["acc"]["acumulated"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] if _att["acc"]["reviewed"]["purchase_date"] else _att["acc"]["feedback"]["purchase_date"]
request_att["acc"]["acumulated"]["retailername"] += _att["acc"]["reviewed"]["retailername"] if _att["acc"]["reviewed"]["retailername"] else _att["acc"]["feedback"]["retailername"]
request_att["acc"]["acumulated"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] if _att["acc"]["reviewed"]["sold_to_party"] else _att["acc"]["feedback"]["sold_to_party"]
if image.reason not in settings.ACC_EXCLUDE_RESEASONS:
request_att["bad_images"] += int(_att["is_bad_image"])
request_att["total_images"] += 1
request_att["err"] += _att["err"]
@ -880,6 +913,8 @@ def calculate_a_request(report, request):
def calculate_subcription_file(subcription_request_file):
att = {"acc": {"feedback": {},
"reviewed": {}},
"normalized_data": {"feedback": {},
"reviewed": {}},
"err": [],
"is_bad_image": False,
"avg_acc": None}
@ -896,8 +931,8 @@ def calculate_subcription_file(subcription_request_file):
for key_name in valid_keys:
try:
att["acc"]["feedback"][key_name], _ = calculate_accuracy(key_name, inference_result, feedback_result)
att["acc"]["reviewed"][key_name], _ = calculate_accuracy(key_name, inference_result, reviewed_result)
att["acc"]["feedback"][key_name], att["normalized_data"]["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result)
att["acc"]["reviewed"][key_name], att["normalized_data"]["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result)
except Exception as e:
att["err"].append(str(e))
# print(f"[DEBUG]: predict_result: {subcription_request_file.predict_result}")

View File

@ -0,0 +1,6 @@
CAT_VALUES = {
"ALL": "ZZZZZZZZ",
}
def aggregate_overview(overview_list):
overview_list = sorted(overview_list, key=lambda x: x["extraction_date"] + CAT_VALUES.get(x["subs"], x["subs"]), reverse=True)
return overview_list