Update: Accuracy fomula

This commit is contained in:
TannedCung 2024-06-20 15:02:10 +07:00
parent ef1523860b
commit 74431c3bc9
4 changed files with 87 additions and 15 deletions

View File

@ -238,6 +238,13 @@ SUBS = {
"SEAO": "seao"
}
FIELDS_BY_SUB = {
"SG": {"imei": ["imei_number"],
"invoice": ["retailername", "purchase_date"]},
"default": {"imei": ["imei_number"],
"invoice": ["retailername", "invoice_no", "purchase_date"]},
}
BAD_THRESHOLD = 0.75
NEED_REVIEW = 1.0

View File

@ -126,11 +126,13 @@ def create_accuracy_report(report_id, **kwargs):
"invoice_no": mean_list(request_att["acc"]["reviewed"].get("invoice_no", [None]))}
rq_accuracy = []
for rpf in _report_files:
if sum(len(value_list) for value_list in rpf.reviewed_accuracy.values()):
rq_accuracy += list(chain(*rpf.reviewed_accuracy.values()))
elif sum(len(value_list) for value_list in rpf.feedback_accuracy.values()):
rq_accuracy += list(chain(*rpf.feedback_accuracy.values()))
for i, _att in enumerate(_atts):
if _report_files[i].bad_image_reason in settings.ACC_EXCLUDE_RESEASONS:
continue
if sum(len(value_list) for value_list in _att["acc"]["reviewed"].values()):
rq_accuracy += list(chain(*_att["acc"]["reviewed"].values()))
elif sum(len(value_list) for value_list in _att["acc"]["feedback"].values()):
rq_accuracy += list(chain(*_att["acc"]["feedback"].values()))
request.is_required = False
if len(rq_accuracy) > 0:

View File

@ -0,0 +1,65 @@
from django.core.management.base import BaseCommand
from tqdm import tqdm
from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest
from fwd_api.exception.exceptions import InvalidException
from fwd_api.utils.accuracy import predict_result_to_ready
import traceback
import copy
from django.utils import timezone
KEY = "imei_number"
VALUE = "None"
EXPECTED_VALUE = []
class Command(BaseCommand):
help = 'Refactor database for image level'
def add_arguments(self, parser):
# Add your command-line arguments here
parser.add_argument('start', type=str, help='start date, sample: 2023-01-02T00:00:00+0700')
parser.add_argument('end', type=str, help='end date, sample: 2023-01-03T00:00:00+0700')
def process_request(self, request, result):
if len(request.request_id.split(".")[0].split("_")) < 2:
return
images = SubscriptionRequestFile.objects.filter(request=request)
if not request.predict_result:
# self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}"))
return
if request.predict_result.get("status", 200) != 200:
# self.stdout.write(self.style.WARNING(f"Not a sucess request {request.request_id}"))
return
if isinstance(request.redemption_id, str) and request.subsidiary is not None:
try:
request.subsidiary = request.redemption_id[:2]
request.save()
result['total'] += 1
result['subs'].add(request.redemption_id[:2])
except Exception as e:
print(e)
result["failed"] += 1
def handle(self, *args, **options):
start = options['start']
end = options['end']
result = {'total':0,
'failed':0,
'subs': set()}
if start or end:
try:
start_date = timezone.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z') # We care only about day precision only
end_date = timezone.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z')
except Exception as e:
print(f"[INFO]: start: {start}")
print(f"[INFO]: end: {end}")
raise InvalidException(excArgs="Date format")
subcription_iter = SubscriptionRequest.objects.filter(created_at__range=(start_date, end_date))
else:
subcription_iter = SubscriptionRequest.objects.all()
# file = open("modified.txt", "w")
for request in tqdm(subcription_iter.iterator()):
self.process_request(request, result)
# file.close()
self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully! total/failed: {}/{} - subs: {}'.format(result['total'], result['failed'], result['subs'])))

View File

@ -19,8 +19,6 @@ import json
from typing import Union, List, Dict
VALID_KEYS = ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"]
KEYS_BY_FILE_TYPE = {"imei": ["imei_number"],
"invoice": ["retailername", "invoice_no", "purchase_date"]}
OPTIONAL_KEYS = ['invoice_no']
class ReportAccumulateByRequest:
@ -776,7 +774,7 @@ def calculate_accuracy(key_name: str, inference: Dict[str, Union[str, List]], ta
_inference = inference[key_name]
_target = target[key_name]
_will_acc_be_ignored = _acc_will_be_ignored(key_name, _target)
# _will_acc_be_ignored = _acc_will_be_ignored(key_name, _target)
_inference, _target = _accuracy_calculate_formatter(_inference, _target)
for i, v in enumerate(_inference):
@ -793,7 +791,6 @@ def calculate_accuracy(key_name: str, inference: Dict[str, Union[str, List]], ta
# "line_acc",
# "one_minus_ned_word",
])
if not _will_acc_be_ignored:
acc.append(list(score.values())[0])
data.append([x, y])
return acc, data
@ -908,12 +905,11 @@ def calculate_a_request(report, request):
images = SubscriptionRequestFile.objects.filter(request=request, file_category=FileCategory.Origin.value)
report_files = []
for image in images:
status, att = calculate_subcription_file(image)
atts.append(att)
status, att = calculate_subcription_file(image, request.subsidiary)
att["acc"]["feedback"], fb_max_indexes = acc_maximize_list_values(att["acc"]["feedback"])
att["acc"]["reviewed"], rv_max_indexes = acc_maximize_list_values(att["acc"]["reviewed"])
_att = copy.deepcopy(att)
_att = copy.deepcopy(att) # deep copy right here to advoid removing acc for bad images in the next steps
fb_avg_acc = avg_dict(att["acc"]["feedback"])
rv_avg_acc = avg_dict(att["acc"]["reviewed"])
@ -924,6 +920,8 @@ def calculate_a_request(report, request):
continue
if status != 200:
continue
atts.append(att)
image.feedback_accuracy = att["acc"]["feedback"] # dict {key: [values]}
image.is_bad_image_quality = att["is_bad_image"] # is_bad_image=avg_acc<threshold (avg_acc=feedback_acc)
@ -1033,7 +1031,7 @@ def calculate_a_request(report, request):
return request_att, report_files, atts
def calculate_subcription_file(subcription_request_file):
def calculate_subcription_file(subcription_request_file, subsidiary):
att = {"acc": {"feedback": {},
"reviewed": {}},
"normalized_data": {"feedback": {},
@ -1050,7 +1048,7 @@ def calculate_subcription_file(subcription_request_file):
feedback_result = copy.deepcopy(subcription_request_file.feedback_result)
reviewed_result = copy.deepcopy(subcription_request_file.reviewed_result)
accuracy_keys_for_this_image = KEYS_BY_FILE_TYPE.get(subcription_request_file.doc_type, [])
accuracy_keys_for_this_image = settings.FIELDS_BY_SUB.get(subsidiary, settings.FIELDS_BY_SUB["default"]).get(subcription_request_file.doc_type, [])
for key_name in VALID_KEYS:
att["acc"]["feedback"][key_name] = []