commit
ff404e6598
@ -238,6 +238,13 @@ SUBS = {
|
|||||||
"SEAO": "seao"
|
"SEAO": "seao"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FIELDS_BY_SUB = {
|
||||||
|
"SG": {"imei": ["imei_number"],
|
||||||
|
"invoice": ["retailername", "purchase_date"]},
|
||||||
|
"default": {"imei": ["imei_number"],
|
||||||
|
"invoice": ["retailername", "invoice_no", "purchase_date"]},
|
||||||
|
}
|
||||||
|
|
||||||
BAD_THRESHOLD = 0.75
|
BAD_THRESHOLD = 0.75
|
||||||
NEED_REVIEW = 1.0
|
NEED_REVIEW = 1.0
|
||||||
|
|
||||||
|
@ -126,11 +126,13 @@ def create_accuracy_report(report_id, **kwargs):
|
|||||||
"invoice_no": mean_list(request_att["acc"]["reviewed"].get("invoice_no", [None]))}
|
"invoice_no": mean_list(request_att["acc"]["reviewed"].get("invoice_no", [None]))}
|
||||||
rq_accuracy = []
|
rq_accuracy = []
|
||||||
|
|
||||||
for rpf in _report_files:
|
for i, _att in enumerate(_atts):
|
||||||
if sum(len(value_list) for value_list in rpf.reviewed_accuracy.values()):
|
if _report_files[i].bad_image_reason in settings.ACC_EXCLUDE_RESEASONS:
|
||||||
rq_accuracy += list(chain(*rpf.reviewed_accuracy.values()))
|
continue
|
||||||
elif sum(len(value_list) for value_list in rpf.feedback_accuracy.values()):
|
if sum(len(value_list) for value_list in _att["acc"]["reviewed"].values()):
|
||||||
rq_accuracy += list(chain(*rpf.feedback_accuracy.values()))
|
rq_accuracy += list(chain(*_att["acc"]["reviewed"].values()))
|
||||||
|
elif sum(len(value_list) for value_list in _att["acc"]["feedback"].values()):
|
||||||
|
rq_accuracy += list(chain(*_att["acc"]["feedback"].values()))
|
||||||
|
|
||||||
request.is_required = False
|
request.is_required = False
|
||||||
if len(rq_accuracy) > 0:
|
if len(rq_accuracy) > 0:
|
||||||
|
@ -0,0 +1,174 @@
|
|||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
from tqdm import tqdm
|
||||||
|
from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest
|
||||||
|
from fwd_api.exception.exceptions import InvalidException
|
||||||
|
from fwd_api.utils.s3 import MinioS3Client
|
||||||
|
|
||||||
|
import copy
|
||||||
|
import os
|
||||||
|
import glob
|
||||||
|
import traceback
|
||||||
|
import copy
|
||||||
|
import json
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
|
IMAGE_DIRS = ["/external_data/SGGE", "/external_data/zipsGwp1", "/external_data/zipsGwp2", "/external_data/zipsGwp3", "/external_data/zipsGwp4", "/external_data/zipsEvoucher"]
|
||||||
|
# IMAGE_DIRS = ["/external_data/SGGE"]
|
||||||
|
image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.gif']
|
||||||
|
pdf_extensions = ['*.pdf']
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = 'Refactor database for image level'
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
# Add your command-line arguments here
|
||||||
|
parser.add_argument('start', type=str, help='start date, sample: 2023-01-02T00:00:00+0700')
|
||||||
|
parser.add_argument('end', type=str, help='end date, sample: 2023-01-03T00:00:00+0700')
|
||||||
|
|
||||||
|
def _prepare_data(self, redemtion_dirs):
|
||||||
|
prepared_data = {} # {"redemption_id": {"image_paths": []}, "pages": 1}
|
||||||
|
for redemtion_dir in redemtion_dirs:
|
||||||
|
redemptions = os.listdir(redemtion_dir)
|
||||||
|
for redemption in redemptions:
|
||||||
|
files_in_dir = []
|
||||||
|
for ext in image_extensions + pdf_extensions:
|
||||||
|
files_in_dir.extend(glob.glob(os.path.join(redemtion_dir, redemption, ext)))
|
||||||
|
redemption = redemption.replace("Data", "")
|
||||||
|
if prepared_data.get(redemption, None):
|
||||||
|
prepared_data[redemption]["image_paths"] += files_in_dir
|
||||||
|
prepared_data[redemption]["pages"] += len(files_in_dir)
|
||||||
|
else:
|
||||||
|
prepared_data[redemption] = {"image_paths": files_in_dir, "pages": len(files_in_dir)}
|
||||||
|
|
||||||
|
return prepared_data
|
||||||
|
|
||||||
|
def _add_error(self, result, error, redemption_id):
|
||||||
|
if not result.get("Error", None):
|
||||||
|
result["Error"] = {}
|
||||||
|
if result["Error"].get(error, None):
|
||||||
|
result["Error"][error].add(redemption_id)
|
||||||
|
else:
|
||||||
|
result["Error"][error] = set([redemption_id])
|
||||||
|
|
||||||
|
def _add_info(self, result, info, redemption_id):
|
||||||
|
if not result.get("Info", None):
|
||||||
|
result["Info"] = {}
|
||||||
|
if result["Info"].get(info, None):
|
||||||
|
result["Info"][info].add(redemption_id)
|
||||||
|
else:
|
||||||
|
result["Info"][info] = set([redemption_id])
|
||||||
|
|
||||||
|
def _add_warning(self, result, warn, redemption_id):
|
||||||
|
if not result.get("Warning", None):
|
||||||
|
result["Warning"] = {}
|
||||||
|
if result["Warning"].get(warn, None):
|
||||||
|
result["Warning"][warn].add(redemption_id)
|
||||||
|
else:
|
||||||
|
result["Warning"][warn] = set([redemption_id])
|
||||||
|
|
||||||
|
def _try_find_doc_type(self, file_paths):
|
||||||
|
doc_types = {"invoice": [],
|
||||||
|
"imei": [],
|
||||||
|
"undefined": []}
|
||||||
|
for file_path in file_paths:
|
||||||
|
if "invoice" in os.path.basename(file_path):
|
||||||
|
doc_types["invoice"].append(file_path)
|
||||||
|
elif "imei" in os.path.basename(file_path):
|
||||||
|
doc_types["imei"].append(file_path)
|
||||||
|
else:
|
||||||
|
doc_types["undefined"].append(file_path)
|
||||||
|
return doc_types
|
||||||
|
|
||||||
|
def process_request(self, request, data, result, s3_client):
|
||||||
|
if not request.predict_result:
|
||||||
|
# self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}"))
|
||||||
|
return
|
||||||
|
if request.predict_result.get("status", 200) != 200:
|
||||||
|
# self.stdout.write(self.style.WARNING(f"Not a sucess request {request.request_id}"))
|
||||||
|
return
|
||||||
|
# Find to coresponding redemption_ID
|
||||||
|
self._add_info(result, "[OCR]: redemptions", request.redemption_id)
|
||||||
|
if request.redemption_id not in list(data.keys()):
|
||||||
|
self._add_error(result, "[OCR]: Not found redemption_ID", request.redemption_id)
|
||||||
|
return
|
||||||
|
if request.pages != data[request.redemption_id]["pages"]:
|
||||||
|
self._add_error(result, "[SBT]: Mismatch files number in a request", request.redemption_id)
|
||||||
|
return
|
||||||
|
|
||||||
|
file_paths_by_doc_type = self._try_find_doc_type(data[request.redemption_id]["image_paths"])
|
||||||
|
if len(file_paths_by_doc_type["undefined"]) > 0:
|
||||||
|
self._add_warning(result, "[SBT]: Undefined doc type", request.redemption_id)
|
||||||
|
|
||||||
|
if len(request.request_id.split(".")[0].split("_")) < 2:
|
||||||
|
return
|
||||||
|
images = SubscriptionRequestFile.objects.filter(request=request, file_category="Origin")
|
||||||
|
|
||||||
|
for i, image in enumerate(images):
|
||||||
|
if image.doc_type not in ["imei", "invoice"]:
|
||||||
|
self._add_error(result, "[OCR]: Weird doc type", request.redemption_id)
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
if len(file_paths_by_doc_type[image.doc_type]) > 0:
|
||||||
|
local_file_path = file_paths_by_doc_type[image.doc_type].pop(0)
|
||||||
|
else:
|
||||||
|
local_file_path = file_paths_by_doc_type["undefined"].pop(0)
|
||||||
|
predir = "sbt_invoice"
|
||||||
|
s3_key = os.path.join(predir, request.request_id, image.file_name)
|
||||||
|
# s3_client.upload_file(local_file_path, s3_key)
|
||||||
|
result['total'] += 1
|
||||||
|
self._add_info(result, "[OCR]: Success", request.redemption_id)
|
||||||
|
except IndexError as e:
|
||||||
|
self._add_error(result, "[OCR]: Mismatch doc type", request.redemption_id)
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
self.stdout.write(self.style.ERROR(f"Request: {request.request_id} failed with {e}"))
|
||||||
|
print(traceback.format_exc())
|
||||||
|
result['failed'] += 1
|
||||||
|
self._add_info(result, "[OCR]: Failed", request.redemption_id)
|
||||||
|
continue
|
||||||
|
data.pop(request.redemption_id, None)
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
start = options['start']
|
||||||
|
end = options['end']
|
||||||
|
result = {'total':0,
|
||||||
|
'failed':0}
|
||||||
|
# TODO: redemption ID is not null on filter
|
||||||
|
if start or end:
|
||||||
|
try:
|
||||||
|
start_date = timezone.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z') # We care only about day precision only
|
||||||
|
end_date = timezone.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z')
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[INFO]: start: {start}")
|
||||||
|
print(f"[INFO]: end: {end}")
|
||||||
|
raise InvalidException(excArgs="Date format")
|
||||||
|
subcription_iter = SubscriptionRequest.objects.filter(created_at__range=(start_date, end_date), redemption_id__isnull=False)
|
||||||
|
else:
|
||||||
|
subcription_iter = SubscriptionRequest.objects.filter(redemption_id__isnull=False)
|
||||||
|
print(f"[INFO]: Preparing data for filling up...")
|
||||||
|
prepared_data = self._prepare_data(IMAGE_DIRS)
|
||||||
|
print(f"[INFO]: Prepared data, total: {len(list(prepared_data.keys()))}")
|
||||||
|
prepared_data_copy = copy.deepcopy(prepared_data)
|
||||||
|
s3_client = MinioS3Client(
|
||||||
|
# endpoint='http://107.120.133.27:9884',
|
||||||
|
access_key='AKIA3AFPFVWZHTZHB6FW',
|
||||||
|
secret_key='qYmEkfnO8ltQ7n9GfnF8+HRcfOsbXhx0YSNOLxdW',
|
||||||
|
bucket_name='ocr-sds'
|
||||||
|
)
|
||||||
|
# file = open("modified.txt", "w")
|
||||||
|
for request in tqdm(subcription_iter.iterator()):
|
||||||
|
self.process_request(request, prepared_data_copy, result, s3_client)
|
||||||
|
# file.close()
|
||||||
|
self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully! total/failed: {}/{}'.format(result['total'], result['failed'])))
|
||||||
|
# print(f"[INFO]: result: {result}")
|
||||||
|
for err in result.get("Error", []):
|
||||||
|
print("[INFO]: Error: {}: {}".format(err, len(result["Error"][err])))
|
||||||
|
result["Error"][err] = list(result["Error"][err])
|
||||||
|
for info in result.get("Info", []):
|
||||||
|
print("[INFO]: Info: {}: {}".format(info, len(result["Info"][info])))
|
||||||
|
result["Info"][info] = list(result["Info"][info])
|
||||||
|
for warn in result.get("Warning", []):
|
||||||
|
print("[INFO]: Warning: {}: {}".format(warn, len(result["Warning"][warn])))
|
||||||
|
result["Warning"][warn] = list(result["Warning"][warn])
|
||||||
|
with open("result.json", "w") as outfile:
|
||||||
|
json.dump(result, outfile)
|
@ -0,0 +1,65 @@
|
|||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
from tqdm import tqdm
|
||||||
|
from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest
|
||||||
|
from fwd_api.exception.exceptions import InvalidException
|
||||||
|
from fwd_api.utils.accuracy import predict_result_to_ready
|
||||||
|
import traceback
|
||||||
|
import copy
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
|
KEY = "imei_number"
|
||||||
|
VALUE = "None"
|
||||||
|
EXPECTED_VALUE = []
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = 'Refactor database for image level'
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
# Add your command-line arguments here
|
||||||
|
parser.add_argument('start', type=str, help='start date, sample: 2023-01-02T00:00:00+0700')
|
||||||
|
parser.add_argument('end', type=str, help='end date, sample: 2023-01-03T00:00:00+0700')
|
||||||
|
|
||||||
|
def process_request(self, request, result):
|
||||||
|
if len(request.request_id.split(".")[0].split("_")) < 2:
|
||||||
|
return
|
||||||
|
images = SubscriptionRequestFile.objects.filter(request=request)
|
||||||
|
if not request.predict_result:
|
||||||
|
# self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}"))
|
||||||
|
return
|
||||||
|
if request.predict_result.get("status", 200) != 200:
|
||||||
|
# self.stdout.write(self.style.WARNING(f"Not a sucess request {request.request_id}"))
|
||||||
|
return
|
||||||
|
|
||||||
|
if isinstance(request.redemption_id, str) and request.subsidiary is not None:
|
||||||
|
try:
|
||||||
|
request.subsidiary = request.redemption_id[:2]
|
||||||
|
request.save()
|
||||||
|
result['total'] += 1
|
||||||
|
result['subs'].add(request.redemption_id[:2])
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
result["failed"] += 1
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
start = options['start']
|
||||||
|
end = options['end']
|
||||||
|
result = {'total':0,
|
||||||
|
'failed':0,
|
||||||
|
'subs': set()}
|
||||||
|
if start or end:
|
||||||
|
try:
|
||||||
|
start_date = timezone.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z') # We care only about day precision only
|
||||||
|
end_date = timezone.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z')
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[INFO]: start: {start}")
|
||||||
|
print(f"[INFO]: end: {end}")
|
||||||
|
raise InvalidException(excArgs="Date format")
|
||||||
|
subcription_iter = SubscriptionRequest.objects.filter(created_at__range=(start_date, end_date))
|
||||||
|
else:
|
||||||
|
subcription_iter = SubscriptionRequest.objects.all()
|
||||||
|
|
||||||
|
# file = open("modified.txt", "w")
|
||||||
|
for request in tqdm(subcription_iter.iterator()):
|
||||||
|
self.process_request(request, result)
|
||||||
|
# file.close()
|
||||||
|
self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully! total/failed: {}/{} - subs: {}'.format(result['total'], result['failed'], result['subs'])))
|
@ -18,8 +18,8 @@ from ..models import SubscriptionRequest, Report, ReportFile
|
|||||||
import json
|
import json
|
||||||
from typing import Union, List, Dict
|
from typing import Union, List, Dict
|
||||||
|
|
||||||
valid_keys = ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"]
|
VALID_KEYS = ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"]
|
||||||
optional_keys = ['invoice_no']
|
OPTIONAL_KEYS = ['invoice_no']
|
||||||
|
|
||||||
class ReportAccumulateByRequest:
|
class ReportAccumulateByRequest:
|
||||||
def __init__(self, sub):
|
def __init__(self, sub):
|
||||||
@ -123,6 +123,7 @@ class ReportAccumulateByRequest:
|
|||||||
"review_progress": []
|
"review_progress": []
|
||||||
},
|
},
|
||||||
self.report = copy.deepcopy(self.month_format)
|
self.report = copy.deepcopy(self.month_format)
|
||||||
|
self.report["average_accuracy_rate"]["avg"] = IterAvg()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def update_total(total, report_file):
|
def update_total(total, report_file):
|
||||||
@ -142,8 +143,10 @@ class ReportAccumulateByRequest:
|
|||||||
for key in settings.FIELD:
|
for key in settings.FIELD:
|
||||||
if sum([len(report_file.reviewed_accuracy[x]) for x in report_file.reviewed_accuracy.keys() if "_count" not in x]) > 0 :
|
if sum([len(report_file.reviewed_accuracy[x]) for x in report_file.reviewed_accuracy.keys() if "_count" not in x]) > 0 :
|
||||||
total["average_accuracy_rate"][key].add(report_file.reviewed_accuracy.get(key, []))
|
total["average_accuracy_rate"][key].add(report_file.reviewed_accuracy.get(key, []))
|
||||||
|
total["average_accuracy_rate"]['avg'].add(report_file.reviewed_accuracy.get(key, []))
|
||||||
elif sum([len(report_file.feedback_accuracy[x]) for x in report_file.feedback_accuracy.keys() if "_count" not in x]) > 0:
|
elif sum([len(report_file.feedback_accuracy[x]) for x in report_file.feedback_accuracy.keys() if "_count" not in x]) > 0:
|
||||||
total["average_accuracy_rate"][key].add(report_file.feedback_accuracy.get(key, []))
|
total["average_accuracy_rate"][key].add(report_file.feedback_accuracy.get(key, []))
|
||||||
|
total["average_accuracy_rate"]['avg'].add(report_file.feedback_accuracy.get(key, []))
|
||||||
total["feedback_accuracy"][key].add(report_file.feedback_accuracy.get(key, []))
|
total["feedback_accuracy"][key].add(report_file.feedback_accuracy.get(key, []))
|
||||||
total["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, []))
|
total["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, []))
|
||||||
|
|
||||||
@ -346,17 +349,17 @@ class ReportAccumulateByRequest:
|
|||||||
for key in _report["average_processing_time"].keys():
|
for key in _report["average_processing_time"].keys():
|
||||||
_report["average_processing_time"][key] = _report["average_processing_time"][key]()
|
_report["average_processing_time"][key] = _report["average_processing_time"][key]()
|
||||||
|
|
||||||
avg_acc = 0
|
# avg_acc = 0
|
||||||
count_acc = 0
|
# count_acc = 0
|
||||||
for key in settings.FIELD:
|
for key in settings.FIELD:
|
||||||
_report["average_accuracy_rate"][key] = _report["average_accuracy_rate"][key]()
|
_report["average_accuracy_rate"][key] = _report["average_accuracy_rate"][key]()
|
||||||
for accuracy_type in ["feedback_accuracy", "reviewed_accuracy"]:
|
for accuracy_type in ["feedback_accuracy", "reviewed_accuracy"]:
|
||||||
if (_report[accuracy_type][key].count + count_acc) > 0:
|
# if (_report[accuracy_type][key].count + count_acc) > 0:
|
||||||
avg_acc = (avg_acc*count_acc + _report[accuracy_type][key].avg*_report[accuracy_type][key].count) / (_report[accuracy_type][key].count + count_acc)
|
# avg_acc = (avg_acc*count_acc + _report[accuracy_type][key].avg*_report[accuracy_type][key].count) / (_report[accuracy_type][key].count + count_acc)
|
||||||
count_acc += _report[accuracy_type][key].count
|
# count_acc += _report[accuracy_type][key].count
|
||||||
|
|
||||||
_report[accuracy_type][key] = _report[accuracy_type][key]()
|
_report[accuracy_type][key] = _report[accuracy_type][key]()
|
||||||
_report["average_accuracy_rate"]["avg"] = avg_acc
|
_report["average_accuracy_rate"]["avg"] = _report["average_accuracy_rate"]["avg"]()
|
||||||
|
|
||||||
_report["review_progress"] = _report["review_progress"].count(1)/(_report["review_progress"].count(0)+ _report["review_progress"].count(1)) if (_report["review_progress"].count(0)+ _report["review_progress"].count(1)) >0 else 0
|
_report["review_progress"] = _report["review_progress"].count(1)/(_report["review_progress"].count(0)+ _report["review_progress"].count(1)) if (_report["review_progress"].count(0)+ _report["review_progress"].count(1)) >0 else 0
|
||||||
_report["images_quality"]["successful_percent"] = _report["images_quality"]["successful"]/_report["total_images"] if _report["total_images"] > 0 else 0
|
_report["images_quality"]["successful_percent"] = _report["images_quality"]["successful"]/_report["total_images"] if _report["total_images"] > 0 else 0
|
||||||
@ -734,9 +737,11 @@ def _accuracy_calculate_formatter(inference, target):
|
|||||||
Make both list inference and target to be the same length.
|
Make both list inference and target to be the same length.
|
||||||
"""
|
"""
|
||||||
if not isinstance(inference, list):
|
if not isinstance(inference, list):
|
||||||
inference = [] if inference is None else [inference]
|
# inference = [] if inference is None else [inference]
|
||||||
|
inference = [inference]
|
||||||
if not isinstance(target, list):
|
if not isinstance(target, list):
|
||||||
target = [] if target is None else [target]
|
# target = [] if target is None else [target]
|
||||||
|
target = [target]
|
||||||
|
|
||||||
length = max(len(target), len(inference))
|
length = max(len(target), len(inference))
|
||||||
target = target + (length - len(target))*[target[0]] if len(target) > 0 else target + (length - len(target))*[None]
|
target = target + (length - len(target))*[target[0]] if len(target) > 0 else target + (length - len(target))*[None]
|
||||||
@ -745,7 +750,7 @@ def _accuracy_calculate_formatter(inference, target):
|
|||||||
return inference, target
|
return inference, target
|
||||||
|
|
||||||
def _acc_will_be_ignored(key_name, _target):
|
def _acc_will_be_ignored(key_name, _target):
|
||||||
is_optional_key = key_name in optional_keys
|
is_optional_key = key_name in OPTIONAL_KEYS
|
||||||
is_empty_target = _target in [[], None, '']
|
is_empty_target = _target in [[], None, '']
|
||||||
if is_optional_key and is_empty_target:
|
if is_optional_key and is_empty_target:
|
||||||
return True
|
return True
|
||||||
@ -769,7 +774,7 @@ def calculate_accuracy(key_name: str, inference: Dict[str, Union[str, List]], ta
|
|||||||
|
|
||||||
_inference = inference[key_name]
|
_inference = inference[key_name]
|
||||||
_target = target[key_name]
|
_target = target[key_name]
|
||||||
_will_acc_be_ignored = _acc_will_be_ignored(key_name, _target)
|
# _will_acc_be_ignored = _acc_will_be_ignored(key_name, _target)
|
||||||
_inference, _target = _accuracy_calculate_formatter(_inference, _target)
|
_inference, _target = _accuracy_calculate_formatter(_inference, _target)
|
||||||
|
|
||||||
for i, v in enumerate(_inference):
|
for i, v in enumerate(_inference):
|
||||||
@ -786,7 +791,6 @@ def calculate_accuracy(key_name: str, inference: Dict[str, Union[str, List]], ta
|
|||||||
# "line_acc",
|
# "line_acc",
|
||||||
# "one_minus_ned_word",
|
# "one_minus_ned_word",
|
||||||
])
|
])
|
||||||
if not _will_acc_be_ignored:
|
|
||||||
acc.append(list(score.values())[0])
|
acc.append(list(score.values())[0])
|
||||||
data.append([x, y])
|
data.append([x, y])
|
||||||
return acc, data
|
return acc, data
|
||||||
@ -901,12 +905,11 @@ def calculate_a_request(report, request):
|
|||||||
images = SubscriptionRequestFile.objects.filter(request=request, file_category=FileCategory.Origin.value)
|
images = SubscriptionRequestFile.objects.filter(request=request, file_category=FileCategory.Origin.value)
|
||||||
report_files = []
|
report_files = []
|
||||||
for image in images:
|
for image in images:
|
||||||
status, att = calculate_subcription_file(image)
|
status, att = calculate_subcription_file(image, request.subsidiary)
|
||||||
atts.append(att)
|
|
||||||
att["acc"]["feedback"], fb_max_indexes = acc_maximize_list_values(att["acc"]["feedback"])
|
att["acc"]["feedback"], fb_max_indexes = acc_maximize_list_values(att["acc"]["feedback"])
|
||||||
att["acc"]["reviewed"], rv_max_indexes = acc_maximize_list_values(att["acc"]["reviewed"])
|
att["acc"]["reviewed"], rv_max_indexes = acc_maximize_list_values(att["acc"]["reviewed"])
|
||||||
|
|
||||||
_att = copy.deepcopy(att)
|
_att = copy.deepcopy(att) # deep copy right here to advoid removing acc for bad images in the next steps
|
||||||
|
|
||||||
fb_avg_acc = avg_dict(att["acc"]["feedback"])
|
fb_avg_acc = avg_dict(att["acc"]["feedback"])
|
||||||
rv_avg_acc = avg_dict(att["acc"]["reviewed"])
|
rv_avg_acc = avg_dict(att["acc"]["reviewed"])
|
||||||
@ -917,6 +920,8 @@ def calculate_a_request(report, request):
|
|||||||
continue
|
continue
|
||||||
if status != 200:
|
if status != 200:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
atts.append(att)
|
||||||
image.feedback_accuracy = att["acc"]["feedback"] # dict {key: [values]}
|
image.feedback_accuracy = att["acc"]["feedback"] # dict {key: [values]}
|
||||||
image.is_bad_image_quality = att["is_bad_image"] # is_bad_image=avg_acc<threshold (avg_acc=feedback_acc)
|
image.is_bad_image_quality = att["is_bad_image"] # is_bad_image=avg_acc<threshold (avg_acc=feedback_acc)
|
||||||
|
|
||||||
@ -1026,7 +1031,7 @@ def calculate_a_request(report, request):
|
|||||||
|
|
||||||
return request_att, report_files, atts
|
return request_att, report_files, atts
|
||||||
|
|
||||||
def calculate_subcription_file(subcription_request_file):
|
def calculate_subcription_file(subcription_request_file, subsidiary):
|
||||||
att = {"acc": {"feedback": {},
|
att = {"acc": {"feedback": {},
|
||||||
"reviewed": {}},
|
"reviewed": {}},
|
||||||
"normalized_data": {"feedback": {},
|
"normalized_data": {"feedback": {},
|
||||||
@ -1043,7 +1048,15 @@ def calculate_subcription_file(subcription_request_file):
|
|||||||
feedback_result = copy.deepcopy(subcription_request_file.feedback_result)
|
feedback_result = copy.deepcopy(subcription_request_file.feedback_result)
|
||||||
reviewed_result = copy.deepcopy(subcription_request_file.reviewed_result)
|
reviewed_result = copy.deepcopy(subcription_request_file.reviewed_result)
|
||||||
|
|
||||||
for key_name in valid_keys:
|
accuracy_keys_for_this_image = settings.FIELDS_BY_SUB.get(subsidiary, settings.FIELDS_BY_SUB["default"]).get(subcription_request_file.doc_type, [])
|
||||||
|
|
||||||
|
for key_name in VALID_KEYS:
|
||||||
|
att["acc"]["feedback"][key_name] = []
|
||||||
|
att["normalized_data"]["feedback"][key_name] = []
|
||||||
|
att["acc"]["reviewed"][key_name] = []
|
||||||
|
att["normalized_data"]["reviewed"][key_name] = []
|
||||||
|
|
||||||
|
for key_name in accuracy_keys_for_this_image:
|
||||||
try:
|
try:
|
||||||
att["acc"]["feedback"][key_name], att["normalized_data"]["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result, "feedback", sub=subcription_request_file.request.subsidiary)
|
att["acc"]["feedback"][key_name], att["normalized_data"]["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result, "feedback", sub=subcription_request_file.request.subsidiary)
|
||||||
att["acc"]["reviewed"][key_name], att["normalized_data"]["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result, "reviewed", sub=subcription_request_file.request.subsidiary)
|
att["acc"]["reviewed"][key_name], att["normalized_data"]["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result, "reviewed", sub=subcription_request_file.request.subsidiary)
|
||||||
@ -1052,8 +1065,8 @@ def calculate_subcription_file(subcription_request_file):
|
|||||||
subcription_request_file.feedback_accuracy = att["acc"]["feedback"]
|
subcription_request_file.feedback_accuracy = att["acc"]["feedback"]
|
||||||
subcription_request_file.reviewed_accuracy = att["acc"]["reviewed"]
|
subcription_request_file.reviewed_accuracy = att["acc"]["reviewed"]
|
||||||
|
|
||||||
avg_reviewed = calculate_avg_accuracy(att["acc"], "reviewed", valid_keys)
|
avg_reviewed = calculate_avg_accuracy(att["acc"], "reviewed", VALID_KEYS)
|
||||||
avg_feedback = calculate_avg_accuracy(att["acc"], "feedback", valid_keys)
|
avg_feedback = calculate_avg_accuracy(att["acc"], "feedback", VALID_KEYS)
|
||||||
|
|
||||||
if avg_feedback is not None or avg_reviewed is not None:
|
if avg_feedback is not None or avg_reviewed is not None:
|
||||||
avg_acc = 0
|
avg_acc = 0
|
||||||
|
Loading…
Reference in New Issue
Block a user