Merge pull request #150 from SDSRV-IDP/fix/zeros_progress
Fix/zeros progress
This commit is contained in:
commit
d0e5c9e5be
@ -14,8 +14,12 @@ from django.utils import timezone
|
|||||||
|
|
||||||
IMAGE_DIRS = ["/external_data/SGGE", "/external_data/zipsGwp1", "/external_data/zipsGwp2", "/external_data/zipsGwp3", "/external_data/zipsGwp4", "/external_data/zipsEvoucher"]
|
IMAGE_DIRS = ["/external_data/SGGE", "/external_data/zipsGwp1", "/external_data/zipsGwp2", "/external_data/zipsGwp3", "/external_data/zipsGwp4", "/external_data/zipsEvoucher"]
|
||||||
# IMAGE_DIRS = ["/external_data/SGGE"]
|
# IMAGE_DIRS = ["/external_data/SGGE"]
|
||||||
image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.gif']
|
image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.gif', '*.JPG', '*.JPEG', '*.PNG', '*.GÌ']
|
||||||
pdf_extensions = ['*.pdf']
|
pdf_extensions = ['*.pdf', '*.PDF']
|
||||||
|
|
||||||
|
IGNORE_MULTIPLE_IMAGE = True
|
||||||
|
BET_ON_FIRST_IMAGE = True # Try to upload the first image to monoimage-request
|
||||||
|
PROVIDED_MONO_REDEMPTION_MAKE_IT_INVOICE = ["SG"]
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
help = 'Refactor database for image level'
|
help = 'Refactor database for image level'
|
||||||
@ -30,10 +34,13 @@ class Command(BaseCommand):
|
|||||||
for redemtion_dir in redemtion_dirs:
|
for redemtion_dir in redemtion_dirs:
|
||||||
redemptions = os.listdir(redemtion_dir)
|
redemptions = os.listdir(redemtion_dir)
|
||||||
for redemption in redemptions:
|
for redemption in redemptions:
|
||||||
|
if "." + redemption.split(".")[-1] in image_extensions + pdf_extensions + [".csv", ".zip"]:
|
||||||
|
continue # ignore non-folder
|
||||||
files_in_dir = []
|
files_in_dir = []
|
||||||
for ext in image_extensions + pdf_extensions:
|
for ext in image_extensions + pdf_extensions:
|
||||||
files_in_dir.extend(glob.glob(os.path.join(redemtion_dir, redemption, ext)))
|
files_in_dir.extend(glob.glob(os.path.join(redemtion_dir, redemption, ext)))
|
||||||
redemption = redemption.replace("Data", "")
|
files_in_dir = sorted(files_in_dir)
|
||||||
|
redemption = redemption.replace("Data", "").replace("(pdf)", "")
|
||||||
if prepared_data.get(redemption, None):
|
if prepared_data.get(redemption, None):
|
||||||
prepared_data[redemption]["image_paths"] += files_in_dir
|
prepared_data[redemption]["image_paths"] += files_in_dir
|
||||||
prepared_data[redemption]["pages"] += len(files_in_dir)
|
prepared_data[redemption]["pages"] += len(files_in_dir)
|
||||||
@ -42,29 +49,23 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
return prepared_data
|
return prepared_data
|
||||||
|
|
||||||
def _add_error(self, result, error, redemption_id):
|
def _add_log(self, result, log, redemption_id, log_level):
|
||||||
if not result.get("Error", None):
|
if not result.get(log_level, None):
|
||||||
result["Error"] = {}
|
result[log_level] = {}
|
||||||
if result["Error"].get(error, None):
|
log = "[{}]".format(redemption_id[:2]) + log
|
||||||
result["Error"][error].add(redemption_id)
|
if result[log_level].get(log, None):
|
||||||
|
result[log_level][log].add(redemption_id)
|
||||||
else:
|
else:
|
||||||
result["Error"][error] = set([redemption_id])
|
result[log_level][log] = set([redemption_id])
|
||||||
|
|
||||||
|
def _add_error(self, result, error, redemption_id):
|
||||||
|
self._add_log(result, error, redemption_id, "Error")
|
||||||
|
|
||||||
def _add_info(self, result, info, redemption_id):
|
def _add_info(self, result, info, redemption_id):
|
||||||
if not result.get("Info", None):
|
self._add_log(result, info, redemption_id, "Info")
|
||||||
result["Info"] = {}
|
|
||||||
if result["Info"].get(info, None):
|
|
||||||
result["Info"][info].add(redemption_id)
|
|
||||||
else:
|
|
||||||
result["Info"][info] = set([redemption_id])
|
|
||||||
|
|
||||||
def _add_warning(self, result, warn, redemption_id):
|
def _add_warning(self, result, warn, redemption_id):
|
||||||
if not result.get("Warning", None):
|
self._add_log(result, warn, redemption_id, "Warning")
|
||||||
result["Warning"] = {}
|
|
||||||
if result["Warning"].get(warn, None):
|
|
||||||
result["Warning"][warn].add(redemption_id)
|
|
||||||
else:
|
|
||||||
result["Warning"][warn] = set([redemption_id])
|
|
||||||
|
|
||||||
def _try_find_doc_type(self, file_paths):
|
def _try_find_doc_type(self, file_paths):
|
||||||
doc_types = {"invoice": [],
|
doc_types = {"invoice": [],
|
||||||
@ -88,17 +89,59 @@ class Command(BaseCommand):
|
|||||||
return
|
return
|
||||||
# Find to coresponding redemption_ID
|
# Find to coresponding redemption_ID
|
||||||
self._add_info(result, "[OCR]: redemptions", request.redemption_id)
|
self._add_info(result, "[OCR]: redemptions", request.redemption_id)
|
||||||
|
self._add_info(result, "[OCR]: total {} images".format(request.pages), request.redemption_id)
|
||||||
if request.redemption_id not in list(data.keys()):
|
if request.redemption_id not in list(data.keys()):
|
||||||
self._add_error(result, "[OCR]: Not found redemption_ID", request.redemption_id)
|
self._add_error(result, "[OCR]: Not found redemption_ID", request.redemption_id)
|
||||||
return
|
return
|
||||||
|
|
||||||
if request.pages != data[request.redemption_id]["pages"]:
|
if request.pages != data[request.redemption_id]["pages"]:
|
||||||
self._add_error(result, "[SBT]: Mismatch files number in a request", request.redemption_id)
|
self._add_error(result, "[SBT]: Mismatch files number in a request", request.redemption_id)
|
||||||
|
if BET_ON_FIRST_IMAGE and request.pages == 1:
|
||||||
|
self._add_warning(result, "[SBT]: monoimage-request, bet on first one", request.redemption_id)
|
||||||
|
data[request.redemption_id]["image_paths"] = [data[request.redemption_id]["image_paths"][0]]
|
||||||
|
else:
|
||||||
return
|
return
|
||||||
|
|
||||||
file_paths_by_doc_type = self._try_find_doc_type(data[request.redemption_id]["image_paths"])
|
file_paths_by_doc_type = self._try_find_doc_type(data[request.redemption_id]["image_paths"])
|
||||||
|
if request.redemption_id in [
|
||||||
|
"SGE20240608115040-910",
|
||||||
|
"SGE20240607160017-644",
|
||||||
|
"SGE20240609095034-986",
|
||||||
|
"SGGE20240609145539-429",
|
||||||
|
"SGE20240607134340-431",
|
||||||
|
"SGE20240609073431-645",
|
||||||
|
"SGE20240608124611-070",
|
||||||
|
"SGE20240610120344-912",
|
||||||
|
"SGE20240610085917-775",
|
||||||
|
"SGGE20240609044518-869",
|
||||||
|
"SGE20240608093242-813",
|
||||||
|
"SGGE20240608175708-038",
|
||||||
|
"SGE20240607175952-926",
|
||||||
|
"SGE20240609060258-864",
|
||||||
|
"SGGE20240609144052-538",
|
||||||
|
"SGG20240607135057-187",
|
||||||
|
"SGE20240608133426-100",
|
||||||
|
"SGE20240607152408-300",
|
||||||
|
"SGG20240608162101-167",
|
||||||
|
"SGG20240608133730-021",
|
||||||
|
"SGE20240609103647-828"
|
||||||
|
]:
|
||||||
|
print("{} - {} - {}".format(request.redemption_id[:2] in PROVIDED_MONO_REDEMPTION_MAKE_IT_INVOICE, request.redemption_id[:2], data[request.redemption_id]["pages"]))
|
||||||
|
if request.redemption_id[:2] in PROVIDED_MONO_REDEMPTION_MAKE_IT_INVOICE and data[request.redemption_id]["pages"] == 1:
|
||||||
|
self._add_warning(result, "[SBT]: mono-redemption, make it invoice", request.redemption_id)
|
||||||
|
file_paths_by_doc_type["invoice"] = data[request.redemption_id]["image_paths"]
|
||||||
|
file_paths_by_doc_type["imei"] = []
|
||||||
|
file_paths_by_doc_type["undefined"] = []
|
||||||
|
|
||||||
if len(file_paths_by_doc_type["undefined"]) > 0:
|
if len(file_paths_by_doc_type["undefined"]) > 0:
|
||||||
self._add_warning(result, "[SBT]: Undefined doc type", request.redemption_id)
|
self._add_warning(result, "[SBT]: Undefined doc type", request.redemption_id)
|
||||||
|
|
||||||
|
|
||||||
|
if request.pages > 1 or data[request.redemption_id]["pages"] > 1:
|
||||||
|
self._add_error(result, "[SBT]: request with multiple images", request.redemption_id)
|
||||||
|
if IGNORE_MULTIPLE_IMAGE:
|
||||||
|
return
|
||||||
|
|
||||||
if len(request.request_id.split(".")[0].split("_")) < 2:
|
if len(request.request_id.split(".")[0].split("_")) < 2:
|
||||||
return
|
return
|
||||||
images = SubscriptionRequestFile.objects.filter(request=request, file_category="Origin")
|
images = SubscriptionRequestFile.objects.filter(request=request, file_category="Origin")
|
||||||
@ -147,12 +190,15 @@ class Command(BaseCommand):
|
|||||||
subcription_iter = SubscriptionRequest.objects.filter(redemption_id__isnull=False)
|
subcription_iter = SubscriptionRequest.objects.filter(redemption_id__isnull=False)
|
||||||
print(f"[INFO]: Preparing data for filling up...")
|
print(f"[INFO]: Preparing data for filling up...")
|
||||||
prepared_data = self._prepare_data(IMAGE_DIRS)
|
prepared_data = self._prepare_data(IMAGE_DIRS)
|
||||||
|
# Log out prepared infomation
|
||||||
|
for k,v in prepared_data.items():
|
||||||
|
self._add_info(result, "[Provided]: total {} images found".format(v["pages"]), k)
|
||||||
print(f"[INFO]: Prepared data, total: {len(list(prepared_data.keys()))}")
|
print(f"[INFO]: Prepared data, total: {len(list(prepared_data.keys()))}")
|
||||||
prepared_data_copy = copy.deepcopy(prepared_data)
|
prepared_data_copy = copy.deepcopy(prepared_data)
|
||||||
s3_client = MinioS3Client(
|
s3_client = MinioS3Client(
|
||||||
# endpoint='http://107.120.133.27:9884',
|
# endpoint='http://107.120.133.27:9884',
|
||||||
access_key='secret',
|
access_key='secret',
|
||||||
secret_key='secret+HRcfOsbXhx0YSNOLxdW',
|
secret_key='secret',
|
||||||
bucket_name='ocr-sds'
|
bucket_name='ocr-sds'
|
||||||
)
|
)
|
||||||
# file = open("modified.txt", "w")
|
# file = open("modified.txt", "w")
|
||||||
@ -164,11 +210,12 @@ class Command(BaseCommand):
|
|||||||
for err in result.get("Error", []):
|
for err in result.get("Error", []):
|
||||||
print("[INFO]: Error: {}: {}".format(err, len(result["Error"][err])))
|
print("[INFO]: Error: {}: {}".format(err, len(result["Error"][err])))
|
||||||
result["Error"][err] = list(result["Error"][err])
|
result["Error"][err] = list(result["Error"][err])
|
||||||
for info in result.get("Info", []):
|
|
||||||
print("[INFO]: Info: {}: {}".format(info, len(result["Info"][info])))
|
for log_level in ['Info', 'Error', 'Warning']:
|
||||||
result["Info"][info] = list(result["Info"][info])
|
errs = result.get(log_level, [])
|
||||||
for warn in result.get("Warning", []):
|
errs = sorted(errs)
|
||||||
print("[INFO]: Warning: {}: {}".format(warn, len(result["Warning"][warn])))
|
for err in errs:
|
||||||
result["Warning"][warn] = list(result["Warning"][warn])
|
print("[INFO]: {}: {}: {}".format(log_level, err, len(result[log_level][err])))
|
||||||
|
result[log_level][err] = list(result[log_level][err])
|
||||||
with open("result.json", "w") as outfile:
|
with open("result.json", "w") as outfile:
|
||||||
json.dump(result, outfile)
|
json.dump(result, outfile)
|
@ -408,7 +408,7 @@ class ReportAccumulateByRequest:
|
|||||||
_report[accuracy_type][key] = _report[accuracy_type][key]()
|
_report[accuracy_type][key] = _report[accuracy_type][key]()
|
||||||
_report["average_accuracy_rate"]["avg"] = _report["average_accuracy_rate"]["avg"]()
|
_report["average_accuracy_rate"]["avg"] = _report["average_accuracy_rate"]["avg"]()
|
||||||
|
|
||||||
_report["review_progress"] = _report["review_progress"].count(1)/(_report["review_progress"].count(0)+ _report["review_progress"].count(1)) if (_report["review_progress"].count(0)+ _report["review_progress"].count(1)) >0 else 0
|
_report["review_progress"] = _report["review_progress"].count(1)/(_report["review_progress"].count(0)+ _report["review_progress"].count(1)) if (_report["review_progress"].count(0)+ _report["review_progress"].count(1)) >0 else 1.0
|
||||||
_report["images_quality"]["successful_percent"] = _report["images_quality"]["successful"]/_report["total_images"] if _report["total_images"] > 0 else 0
|
_report["images_quality"]["successful_percent"] = _report["images_quality"]["successful"]/_report["total_images"] if _report["total_images"] > 0 else 0
|
||||||
_report["images_quality"]["bad_percent"] = _report["images_quality"]["bad"]/_report["total_images"] if _report["total_images"] > 0 else 0
|
_report["images_quality"]["bad_percent"] = _report["images_quality"]["bad"]/_report["total_images"] if _report["total_images"] > 0 else 0
|
||||||
# export data for dashboard
|
# export data for dashboard
|
||||||
|
Loading…
Reference in New Issue
Block a user