From b2ee0cad418f1145000bc7233dee6081131e7992 Mon Sep 17 00:00:00 2001 From: PhanThanhTrung Date: Tue, 12 Mar 2024 14:49:17 +0700 Subject: [PATCH 01/12] update process_img parameter --- cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py b/cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py index c03b041..022b51e 100755 --- a/cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py +++ b/cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py @@ -38,7 +38,7 @@ def sbt_predict(image_url, engine) -> None: os.makedirs(save_dir, exist_ok = True) tmp_image_path = os.path.join(save_dir, f"{uuid.uuid4()}.jpg") cv2.imwrite(tmp_image_path, img) - outputs = process_img(img_path=tmp_image_path, + outputs = process_img(img=tmp_image_path, save_dir=save_dir, engine=engine, export_all=False, # False @@ -71,7 +71,6 @@ def predict(page_numb, image_url): """ sbt_result = sbt_predict(image_url, engine=sbt_engine) - print(sbt_result) output_dict = { "document_type": "invoice", "document_class": " ", From df5f3b371b91cae7d2a303e05457218275035812 Mon Sep 17 00:00:00 2001 From: PhanThanhTrung Date: Tue, 12 Mar 2024 14:49:47 +0700 Subject: [PATCH 02/12] update dockerfile for building AI services --- cope2n-ai-fi/Dockerfile | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cope2n-ai-fi/Dockerfile b/cope2n-ai-fi/Dockerfile index 8968edf..ed5d957 100755 --- a/cope2n-ai-fi/Dockerfile +++ b/cope2n-ai-fi/Dockerfile @@ -24,10 +24,10 @@ RUN python -m pip install 'git+https://github.com/facebookresearch/detectron2.gi # Install SDSV packages COPY . /workspace/cope2n-ai-fi -RUN cd /workspace/cope2n-ai-fi/modules/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsv_dewarp && pip3 install -v -e . --no-cache-dir -RUN cd /workspace/cope2n-ai-fi/modules/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsvtd && pip3 install -v -e . --no-cache-dir -RUN cd /workspace/cope2n-ai-fi/modules/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsvtr && pip3 install -v -e . --no-cache-dir - +RUN cd /workspace/cope2n-ai-fi/modules/sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsv_dewarp && pip3 install -v -e . --no-cache-dir +RUN cd /workspace/cope2n-ai-fi/modules/sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsvtd && pip3 install -v -e . --no-cache-dir +RUN cd /workspace/cope2n-ai-fi/modules/sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsvtr && pip3 install -v -e . --no-cache-dir +RUN cd /workspace/cope2n-ai-fi/modules/sdsvkvu/sdsvkvu/externals/sdsvocr && pip3 install -v -e . --no-cache-dir # COPY ./modules/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsv_dewarp /tmp/sdsv_dewarp # RUN cd /tmp/sdsv_dewarp && pip install -v -e . --no-cache-dir @@ -64,4 +64,5 @@ WORKDIR /workspace ENV PYTHONPATH="." ENV TZ="Asia/Ho_Chi_Minh" -CMD [ "sh", "run.sh"] \ No newline at end of file +CMD [ "sh", "run.sh"] +# ENTRYPOINT [ "sleep", "infinity" ] \ No newline at end of file From 6b11f82342188e1a52c2ff7f02e2b64725ecce3d Mon Sep 17 00:00:00 2001 From: PhanThanhTrung Date: Tue, 12 Mar 2024 14:50:10 +0700 Subject: [PATCH 03/12] update version pdf2image to match with dependencies --- cope2n-ai-fi/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cope2n-ai-fi/requirements.txt b/cope2n-ai-fi/requirements.txt index faf39a9..fe251fb 100755 --- a/cope2n-ai-fi/requirements.txt +++ b/cope2n-ai-fi/requirements.txt @@ -9,6 +9,6 @@ pymupdf easydict imagesize==1.4.1 -pdf2image==1.16.3 +pdf2image==1.17.0 redis==5.0.1 celery==5.3.6 \ No newline at end of file From 5042a7081098679e015b68cc0acb65c53b0e1b2e Mon Sep 17 00:00:00 2001 From: PhanThanhTrung Date: Tue, 12 Mar 2024 16:45:50 +0700 Subject: [PATCH 04/12] process_sync update invoice_no --- cope2n-ai-fi/common/utils_kvu/split_docs.py | 3 ++- .../celery_worker/process_result_tasks.py | 21 ++++++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/cope2n-ai-fi/common/utils_kvu/split_docs.py b/cope2n-ai-fi/common/utils_kvu/split_docs.py index 52d1078..c20d10d 100755 --- a/cope2n-ai-fi/common/utils_kvu/split_docs.py +++ b/cope2n-ai-fi/common/utils_kvu/split_docs.py @@ -107,6 +107,7 @@ def merge_sbt_output(loutputs): merged_output = [] combined_output = {"retailername": None, "sold_to_party": None, + "invoice_no": None, "purchase_date": [], "imei_number": []} # place holder for the output for output in loutputs: @@ -123,7 +124,7 @@ def merge_sbt_output(loutputs): combined_output[field["label"]].append(field["value"]) if output['doc_type'] == "invoice": for field in fields: - if field["label"] in ["retailername", "sold_to_party", "purchase_date"] : + if field["label"] in ["retailername", "sold_to_party", "purchase_date", "invoice_no"] : if isinstance(combined_output[field["label"]], list): if field["value"] is not None: if isinstance(field["value"], list): diff --git a/cope2n-api/fwd_api/celery_worker/process_result_tasks.py b/cope2n-api/fwd_api/celery_worker/process_result_tasks.py index 728d8fb..da03539 100755 --- a/cope2n-api/fwd_api/celery_worker/process_result_tasks.py +++ b/cope2n-api/fwd_api/celery_worker/process_result_tasks.py @@ -29,8 +29,8 @@ def aggregate_result(results): des_result["content"]["total_pages"] = 0 des_result["content"]["ocr_num_pages"] = 0 des_result["content"]["document"][0]["end_page"] = 0 - des_result["content"]["document"][0]["content"][3]["value"] = [None for _ in range(doc_types.count("imei"))] - des_result["content"]["document"][0]["content"][2]["value"] = [] + des_result["content"]["document"][0]["content"][4]["value"] = [None for _ in range(doc_types.count("imei"))] + des_result["content"]["document"][0]["content"][3]["value"] = [] imei_count = 0 for doc_type, result in sorted_results: @@ -38,12 +38,14 @@ def aggregate_result(results): des_result["content"]["ocr_num_pages"] += 1 des_result["content"]["document"][0]["end_page"] += 1 if doc_type == "imei": - des_result["content"]["document"][0]["content"][3]["value"][imei_count] = result["content"]["document"][0]["content"][3]["value"][0] + if len(result["content"]["document"][0]["content"][4]["value"]): + des_result["content"]["document"][0]["content"][4]["value"][imei_count] = result["content"]["document"][0]["content"][4]["value"][0] imei_count += 1 elif doc_type == "invoice": des_result["content"]["document"][0]["content"][0]["value"] = result["content"]["document"][0]["content"][0]["value"] des_result["content"]["document"][0]["content"][1]["value"] = result["content"]["document"][0]["content"][1]["value"] - des_result["content"]["document"][0]["content"][2]["value"] += result["content"]["document"][0]["content"][2]["value"] + des_result["content"]["document"][0]["content"][2]["value"] = result["content"]["document"][0]["content"][2]["value"] + des_result["content"]["document"][0]["content"][3]["value"] += result["content"]["document"][0]["content"][3]["value"] elif doc_type == "all": des_result.update(result) else: @@ -151,7 +153,6 @@ def process_invoice_sbt_result(rq_id, result, metadata): index_in_request = metadata.pop("index_to_image_type", 0) result["metadata"] = metadata _update_subscription_rq_file(request_id=rq, index_in_request=index_in_request, doc_type=image_type, result=result) - status = result.get("status", 200) redis_client.set_cache(rq_id, page_index, result) done = rq.pages == redis_client.get_size(rq_id) @@ -194,9 +195,9 @@ def process_invoice_sbt_result(rq_id, result, metadata): def _update_subscription_rq_file(request_id, index_in_request, doc_type, result): image = SubscriptionRequestFile.objects.filter(request=request_id, index_in_request=index_in_request, doc_type=doc_type).first() - retailer_name = None sold_to_party = None + invoice_no = None purchase_date = [] imei_number = [] predicted_res = __get_actual_predict_result(result=result) @@ -208,19 +209,23 @@ def _update_subscription_rq_file(request_id, index_in_request, doc_type, result) sold_to_party = elem['value'] elif elem["label"] == "purchase_date": purchase_date = elem['value'] + elif elem["label"] == "invoice_no": + invoice_no = elem['value'] else: imei_number = elem['value'] if doc_type=='invoice': _predict_result = { "retailername": retailer_name, - "sold_to_party": sold_to_party, + "sold_to_party": sold_to_party, + "invoice_no": invoice_no, "purchase_date": purchase_date, "imei_number": [] } else: _predict_result = { "retailername": None, - "sold_to_party": None, + "sold_to_party": None, + "invoice_no": None, "purchase_date": [], "imei_number": imei_number } From cbc83ba8c8f32f6b99b72dfcc69387eba1f3546e Mon Sep 17 00:00:00 2001 From: PhanThanhTrung Date: Tue, 12 Mar 2024 17:02:36 +0700 Subject: [PATCH 05/12] add invoice_no to get_subscription_request --- cope2n-api/fwd_api/api/accuracy_view.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cope2n-api/fwd_api/api/accuracy_view.py b/cope2n-api/fwd_api/api/accuracy_view.py index fc60961..e7b2bff 100755 --- a/cope2n-api/fwd_api/api/accuracy_view.py +++ b/cope2n-api/fwd_api/api/accuracy_view.py @@ -669,6 +669,7 @@ class AccuracyViewSet(viewsets.ViewSet): "request_id": subscription_request.request_id, "retailername": None, "sold_to_party": None, + "invoice_no": None, "purchase_date": None, "imei_number": [] } From 5708fa575aad97f02e11ce41accb331b0162aeab Mon Sep 17 00:00:00 2001 From: PhanThanhTrung Date: Tue, 12 Mar 2024 17:07:31 +0700 Subject: [PATCH 06/12] update invoice_no to update review process --- cope2n-api/fwd_api/api/accuracy_view.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cope2n-api/fwd_api/api/accuracy_view.py b/cope2n-api/fwd_api/api/accuracy_view.py index e7b2bff..d6f19dc 100755 --- a/cope2n-api/fwd_api/api/accuracy_view.py +++ b/cope2n-api/fwd_api/api/accuracy_view.py @@ -771,7 +771,7 @@ class AccuracyViewSet(viewsets.ViewSet): raise InvalidException(excArgs=f'reviewed_result') reviewed_result = data["reviewed_result"] - for field in ['retailername', 'sold_to_party', 'purchase_date', 'imei_number']: + for field in ['retailername', 'sold_to_party', 'invoice_no', 'purchase_date', 'imei_number']: if not field in reviewed_result.keys(): raise RequiredFieldException(excArgs=f'reviewed_result.{field}') reviewed_result['request_id'] = request_id @@ -784,6 +784,7 @@ class AccuracyViewSet(viewsets.ViewSet): subscription_request_file.reviewed_result = { "retailername": None, "sold_to_party": None, + "invoice_no": None, "purchase_date": [], "imei_number": []} if len(reviewed_result["imei_number"]) - 1 >= subscription_request_file.index_in_request: From 7764f9726f52acd6bbc4a08478599f36a81a04cc Mon Sep 17 00:00:00 2001 From: PhanThanhTrung Date: Tue, 12 Mar 2024 17:10:07 +0700 Subject: [PATCH 07/12] update default value get_subscription_request --- cope2n-api/fwd_api/api/accuracy_view.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cope2n-api/fwd_api/api/accuracy_view.py b/cope2n-api/fwd_api/api/accuracy_view.py index d6f19dc..b8fa200 100755 --- a/cope2n-api/fwd_api/api/accuracy_view.py +++ b/cope2n-api/fwd_api/api/accuracy_view.py @@ -645,7 +645,7 @@ class AccuracyViewSet(viewsets.ViewSet): 'properties': { 'reviewed_result': { 'type': 'string', - 'default': '''{"request_id": "Sample request_id", "imei_number": ["sample_imei1", "sample_imei2"], "retailername": "Sample Retailer", "purchase_date": "01/01/1970", "sold_to_party": "Sample party"}''', + 'default': '''{"request_id": "Sample request_id", "imei_number": ["sample_imei1", "sample_imei2"], "retailername": "Sample Retailer", "purchase_date": "01/01/1970", "sold_to_party": "Sample party", "invoice_no": "Sample Invoice no"}''', }, }, }, From 19c182c54359086a87e341ceab581c830a24d930 Mon Sep 17 00:00:00 2001 From: PhanThanhTrung Date: Wed, 13 Mar 2024 13:17:47 +0700 Subject: [PATCH 08/12] update --- cope2n-api/fwd_api/api/accuracy_view.py | 2 +- cope2n-api/fwd_api/api/ctel_view.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/cope2n-api/fwd_api/api/accuracy_view.py b/cope2n-api/fwd_api/api/accuracy_view.py index b8fa200..787abae 100755 --- a/cope2n-api/fwd_api/api/accuracy_view.py +++ b/cope2n-api/fwd_api/api/accuracy_view.py @@ -408,7 +408,7 @@ class AccuracyViewSet(viewsets.ViewSet): data = [] for report in page: - acc_keys = ["purchase_date", "retailername", "imei_number", "avg"] + acc_keys = ["purchase_date", "retailername", "invoice_no", "imei_number", "avg"] acc = {} for key in acc_keys: fb = report.feedback_accuracy.get(key, 0) if report.feedback_accuracy else 0 diff --git a/cope2n-api/fwd_api/api/ctel_view.py b/cope2n-api/fwd_api/api/ctel_view.py index 91816fe..593613c 100755 --- a/cope2n-api/fwd_api/api/ctel_view.py +++ b/cope2n-api/fwd_api/api/ctel_view.py @@ -320,6 +320,9 @@ class CtelViewSet(viewsets.ViewSet): 'retailername': { 'type': 'string', }, + 'invoice_no': { + 'type': 'string', + }, 'sold_to_party': { 'type': 'string', }, @@ -336,7 +339,7 @@ class CtelViewSet(viewsets.ViewSet): } }, }, - 'required': ['request_id', 'retailername', 'sold_to_party', 'purchase_date', 'imei_number'] + 'required': ['request_id', 'retailername', 'invoice_no', 'sold_to_party', 'purchase_date', 'imei_number'] } }, responses=None, tags=['OCR']) @action(detail=False, url_path="images/feedback", methods=["POST"]) From 6eca045f0ebe1dc7a5a40cb9ceae07daf4afb1c6 Mon Sep 17 00:00:00 2001 From: PhanThanhTrung Date: Wed, 13 Mar 2024 13:18:04 +0700 Subject: [PATCH 09/12] remove make_a_report function --- .../celery_worker/process_report_tasks.py | 129 +----------------- 1 file changed, 1 insertion(+), 128 deletions(-) diff --git a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py index e450a25..c62559b 100755 --- a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py +++ b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py @@ -35,133 +35,6 @@ def mean_list(l): return 0 return sum(l)/len(l) -@app.task(name='make_a_report') -def make_a_report(report_id, query_set): - # TODO: to be deprecated - try: - start_date = timezone.datetime.strptime(query_set["start_date_str"], '%Y-%m-%dT%H:%M:%S%z') - end_date = timezone.datetime.strptime(query_set["end_date_str"], '%Y-%m-%dT%H:%M:%S%z') - base_query = Q(created_at__range=(start_date, end_date)) - if query_set["request_id"]: - base_query &= Q(request_id=query_set["request_id"]) - if query_set["redemption_id"]: - base_query &= Q(redemption_id=query_set["redemption_id"]) - base_query &= Q(is_test_request=False) - if isinstance(query_set["include_test"], str): - query_set["include_test"] = True if query_set["include_test"].lower() in ["true", "yes", "1"] else False - if query_set["include_test"]: - # base_query = ~base_query - base_query.children = base_query.children[:-1] - - elif isinstance(query_set["include_test"], bool): - if query_set["include_test"]: - base_query = ~base_query - if isinstance(query_set["subsidiary"], str): - if query_set["subsidiary"] and query_set["subsidiary"].lower().replace(" ", "")!="all": - base_query &= Q(redemption_id__startswith=query_set["subsidiary"]) - if isinstance(query_set["is_reviewed"], str): - if query_set["is_reviewed"] == "reviewed": - base_query &= Q(is_reviewed=True) - elif query_set["is_reviewed"] == "not reviewed": - base_query &= Q(is_reviewed=False) - # elif query_set["is_reviewed"] == "all": - # pass - - errors = [] - # Create a placeholder to fill - accuracy = {"feedback" :{"imei_number": IterAvg(), - "purchase_date": IterAvg(), - "retailername": IterAvg(), - "sold_to_party": IterAvg(),}, - "reviewed" :{"imei_number": IterAvg(), - "purchase_date": IterAvg(), - "retailername": IterAvg(), - "sold_to_party": IterAvg(),} - } # {"imei": {"acc": 0.1, count: 1}, ...} - time_cost = {"invoice": IterAvg(), - "imei": IterAvg()} - number_images = 0 - number_bad_images = 0 - # TODO: Multithreading - # Calculate accuracy, processing time, ....Then save. - subscription_requests = SubscriptionRequest.objects.filter(base_query).order_by('created_at') - report: Report = \ - Report.objects.filter(report_id=report_id).first() - # TODO: number of transaction by doc type - num_request = 0 - for request in subscription_requests: - if request.status != 200 or not (request.reviewed_result or request.feedback_result): - # Failed requests or lack of reviewed_result/feedback_result - continue - request_att = calculate_and_save_subcription_file(report, request) - - request.feedback_accuracy = {"imei_number" : mean_list(request_att["acc"]["feedback"].get("imei_number", [None])), - "purchase_date" : mean_list(request_att["acc"]["feedback"].get("purchase_date", [None])), - "retailername" : mean_list(request_att["acc"]["feedback"].get("retailername", [None])), - "sold_to_party" : mean_list(request_att["acc"]["feedback"].get("sold_to_party", [None]))} - request.reviewed_accuracy = {"imei_number" : mean_list(request_att["acc"]["reviewed"].get("imei_number", [None])), - "purchase_date" : mean_list(request_att["acc"]["reviewed"].get("purchase_date", [None])), - "retailername" : mean_list(request_att["acc"]["reviewed"].get("retailername", [None])), - "sold_to_party" : mean_list(request_att["acc"]["reviewed"].get("sold_to_party", [None]))} - request.save() - number_images += request_att["total_images"] - number_bad_images += request_att["bad_images"] - update_temp_accuracy(accuracy["feedback"], request_att["acc"]["feedback"], keys=["imei_number", "purchase_date", "retailername", "sold_to_party"]) - update_temp_accuracy(accuracy["reviewed"], request_att["acc"]["reviewed"], keys=["imei_number", "purchase_date", "retailername", "sold_to_party"]) - - time_cost["imei"].add(request_att["time_cost"].get("imei", [])) - time_cost["invoice"].add(request_att["time_cost"].get("invoice", [])) - - errors += request_att["err"] - num_request += 1 - transaction_att = count_transactions(start_date, end_date, report.subsidiary) - # Do saving process - report.number_request = num_request - report.number_images = number_images - report.number_imei = time_cost["imei"].count - report.number_invoice = time_cost["invoice"].count - report.number_bad_images = number_bad_images - # FIXME: refactor this data stream for endurability - report.average_OCR_time = {"invoice": time_cost["invoice"](), "imei": time_cost["imei"](), - "invoice_count": time_cost["invoice"].count, "imei_count": time_cost["imei"].count} - - report.average_OCR_time["avg"] = (report.average_OCR_time["invoice"]*report.average_OCR_time["invoice_count"] + report.average_OCR_time["imei"]*report.average_OCR_time["imei_count"])/(report.average_OCR_time["imei_count"] + report.average_OCR_time["invoice_count"]) if (report.average_OCR_time["imei_count"] + report.average_OCR_time["invoice_count"]) > 0 else None - - report.number_imei_transaction = transaction_att.get("imei", 0) - report.number_invoice_transaction = transaction_att.get("invoice", 0) - - acumulated_acc = {"feedback": {}, - "reviewed": {}} - for acc_type in ["feedback", "reviewed"]: - avg_acc = IterAvg() - for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: - acumulated_acc[acc_type][key] = accuracy[acc_type][key]() - acumulated_acc[acc_type][key+"_count"] = accuracy[acc_type][key].count - avg_acc.add_avg(acumulated_acc[acc_type][key], acumulated_acc[acc_type][key+"_count"]) - acumulated_acc[acc_type]["avg"] = avg_acc() - - report.feedback_accuracy = acumulated_acc["feedback"] - report.reviewed_accuracy = acumulated_acc["reviewed"] - - report.errors = "|".join(errors) - report.status = "Ready" - report.save() - # Saving a xlsx file - report_files = ReportFile.objects.filter(report=report) - data = extract_report_detail_list(report_files, lower=True) - data_workbook = dict2xlsx(data, _type='report_detail') - local_workbook = save_workbook_file(report.report_id + ".xlsx", report, data_workbook) - s3_key=save_report_to_S3(report.report_id, local_workbook) - - except IndexError as e: - print(e) - traceback.print_exc() - print("NotFound request by report id, %d", report_id) - except Exception as e: - print("[ERROR]: an error occured while processing report: ", report_id) - traceback.print_exc() - return 400 - @app.task(name='make_a_report_2') def make_a_report_2(report_id, query_set): report_type = query_set.pop("report_type", "accuracy") @@ -281,7 +154,7 @@ def create_accuracy_report(report_id, **kwargs): "acumulated": {}} for acc_type in ["feedback", "reviewed", "acumulated"]: avg_acc = IterAvg() - for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: + for key in ["imei_number", "purchase_date", "invoice_no", "retailername", "sold_to_party"]: acumulated_acc[acc_type][key] = accuracy[acc_type][key]() acumulated_acc[acc_type][key + "_count"] = accuracy[acc_type][key].count avg_acc.add_avg(acumulated_acc[acc_type][key], acumulated_acc[acc_type][key+"_count"]) From a8999bb9c86d609432596f5e7a0b7e3a5abd4cd7 Mon Sep 17 00:00:00 2001 From: PhanThanhTrung Date: Wed, 13 Mar 2024 13:18:26 +0700 Subject: [PATCH 10/12] update internal task + remove junk code --- cope2n-api/fwd_api/celery_worker/internal_task.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cope2n-api/fwd_api/celery_worker/internal_task.py b/cope2n-api/fwd_api/celery_worker/internal_task.py index f098d1a..ca69595 100755 --- a/cope2n-api/fwd_api/celery_worker/internal_task.py +++ b/cope2n-api/fwd_api/celery_worker/internal_task.py @@ -90,11 +90,13 @@ def process_csv_feedback(csv_file_path, feedback_id): imei2 = row.get('imeiNumber2') purchase_date = row.get('Purchase Date') retailer = row.get('retailer') + invoice_no = row.get('invoice_no') sold_to_party = row.get('Sold to party') server_time = float(row.get('timetakenmilli')) fb['request_id'] = request_id fb['retailername'] = retailer fb['sold_to_party'] = sold_to_party + fb["invoice_no"] = invoice_no fb['purchase_date'] = purchase_date fb['imei_number'] = [imei1, imei2] sub_rq.feedback_result = fb @@ -119,7 +121,6 @@ def process_csv_feedback(csv_file_path, feedback_id): continue _predict_result = copy.deepcopy(predict_result_to_ready(sub_rq.predict_result)) _feedback_result = copy.deepcopy(sub_rq.feedback_result) - # _reviewed_result = copy.deepcopy(sub_rq.reviewed_result) try: image.processing_time = time_cost.get(image.doc_type, [0 for _ in range(image.index_in_request)])[image.index_in_request] except Exception as e: @@ -132,9 +133,6 @@ def process_csv_feedback(csv_file_path, feedback_id): if _feedback_result: _feedback_result["imei_number"] = [] - # if _reviewed_result: - # _reviewed_result["imei_number"] = [] - else: try: _predict_result = {"retailername": None, "sold_to_party": None, "purchase_date": [], "imei_number": [_predict_result["imei_number"][image.index_in_request]]} From d35a5955af87a0bfb96bf2d5683fffd3e654b3e2 Mon Sep 17 00:00:00 2001 From: PhanThanhTrung Date: Wed, 13 Mar 2024 13:18:57 +0700 Subject: [PATCH 11/12] update accuracy fns for adding invoice_no --- cope2n-api/fwd_api/utils/accuracy.py | 220 ++++++++------------------- 1 file changed, 61 insertions(+), 159 deletions(-) diff --git a/cope2n-api/fwd_api/utils/accuracy.py b/cope2n-api/fwd_api/utils/accuracy.py index ee258c0..fa671ab 100755 --- a/cope2n-api/fwd_api/utils/accuracy.py +++ b/cope2n-api/fwd_api/utils/accuracy.py @@ -17,7 +17,7 @@ from fwd import settings from ..models import SubscriptionRequest, Report, ReportFile import json -valid_keys = ["retailername", "sold_to_party", "purchase_date", "imei_number"] +valid_keys = ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"] class ReportAccumulateByRequest: def __init__(self, sub): @@ -41,7 +41,8 @@ class ReportAccumulateByRequest: 'imei': IterAvg(), 'purchase_date': IterAvg(), 'retailer_name': IterAvg(), - 'sold_to_party': IterAvg() + 'sold_to_party': IterAvg(), + 'invoice_no': IterAvg() }, 'average_processing_time': { 'imei': IterAvg(), @@ -57,13 +58,15 @@ class ReportAccumulateByRequest: 'imei_number': IterAvg(), 'purchase_date': IterAvg(), 'retailername': IterAvg(), - 'sold_to_party': IterAvg() + 'sold_to_party': IterAvg(), + 'invoice_no': IterAvg() }, 'reviewed_accuracy': { 'imei_number': IterAvg(), 'purchase_date': IterAvg(), 'retailername': IterAvg(), - 'sold_to_party': IterAvg() + 'sold_to_party': IterAvg(), + 'invoice_no': IterAvg() }, 'num_request': 0, "review_progress": [] @@ -84,7 +87,8 @@ class ReportAccumulateByRequest: 'imei': IterAvg(), 'purchase_date': IterAvg(), 'retailer_name': IterAvg(), - 'sold_to_party': IterAvg() + 'sold_to_party': IterAvg(), + 'invoice_no': IterAvg() }, 'average_processing_time': { 'imei': IterAvg(), @@ -100,13 +104,15 @@ class ReportAccumulateByRequest: 'imei_number': IterAvg(), 'purchase_date': IterAvg(), 'retailername': IterAvg(), - 'sold_to_party': IterAvg() + 'sold_to_party': IterAvg(), + 'invoice_no': IterAvg() }, 'reviewed_accuracy': { 'imei_number': IterAvg(), 'purchase_date': IterAvg(), 'retailername': IterAvg(), - 'sold_to_party': IterAvg() + 'sold_to_party': IterAvg(), + 'invoice_no': IterAvg() }, "report_files": [], "num_request": 0, @@ -132,15 +138,17 @@ class ReportAccumulateByRequest: total["average_accuracy_rate"]["purchase_date"].add(report_file.reviewed_accuracy.get("purchase_date", [])) total["average_accuracy_rate"]["retailer_name"].add(report_file.reviewed_accuracy.get("retailername", [])) total["average_accuracy_rate"]["sold_to_party"].add(report_file.reviewed_accuracy.get("sold_to_party", [])) + total["average_accuracy_rate"]["invoice_no"].add(report_file.reviewed_accuracy.get("invoice_no", [])) elif sum([len(report_file.feedback_accuracy[x]) for x in report_file.feedback_accuracy.keys() if "_count" not in x]) > 0: total["average_accuracy_rate"]["imei"].add(report_file.feedback_accuracy.get("imei_number", [])) total["average_accuracy_rate"]["purchase_date"].add(report_file.feedback_accuracy.get("purchase_date", [])) total["average_accuracy_rate"]["retailer_name"].add(report_file.feedback_accuracy.get("retailername", [])) total["average_accuracy_rate"]["sold_to_party"].add(report_file.feedback_accuracy.get("sold_to_party", [])) + total["average_accuracy_rate"]["invoice_no"].add(report_file.feedback_accuracy.get("invoice_no", [])) - for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: + for key in ["imei_number", "purchase_date", "invoice_no", "retailername", "sold_to_party"]: total["feedback_accuracy"][key].add(report_file.feedback_accuracy.get(key, [])) - for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: + for key in ["imei_number", "purchase_date", "invoice_no", "retailername", "sold_to_party"]: total["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, [])) if not total["average_processing_time"].get(report_file.doc_type, None): @@ -179,15 +187,17 @@ class ReportAccumulateByRequest: day_data["average_accuracy_rate"]["purchase_date"].add(report_file.reviewed_accuracy.get("purchase_date", [])) day_data["average_accuracy_rate"]["retailer_name"].add(report_file.reviewed_accuracy.get("retailername", [])) day_data["average_accuracy_rate"]["sold_to_party"].add(report_file.reviewed_accuracy.get("sold_to_party", [])) + day_data["average_accuracy_rate"]["invoice_no"].add(report_file.reviewed_accuracy.get("invoice_no", [])) elif sum([len(report_file.feedback_accuracy[x]) for x in report_file.feedback_accuracy.keys() if "_count" not in x]) > 0: day_data["average_accuracy_rate"]["imei"].add(report_file.feedback_accuracy.get("imei_number", [])) day_data["average_accuracy_rate"]["purchase_date"].add(report_file.feedback_accuracy.get("purchase_date", [])) day_data["average_accuracy_rate"]["retailer_name"].add(report_file.feedback_accuracy.get("retailername", [])) day_data["average_accuracy_rate"]["sold_to_party"].add(report_file.feedback_accuracy.get("sold_to_party", [])) + day_data["average_accuracy_rate"]["invoice_no"].add(report_file.feedback_accuracy.get("invoice_no", [])) - for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: + for key in ["imei_number", "purchase_date", "invoice_no", "retailername", "sold_to_party"]: day_data["feedback_accuracy"][key].add(report_file.feedback_accuracy.get(key, [])) - for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: + for key in ["imei_number", "purchase_date", "invoice_no", "retailername", "sold_to_party"]: day_data["reviewed_accuracy"][key].add(report_file.reviewed_accuracy.get(key, [])) if not day_data["average_processing_time"].get(report_file.doc_type, None): @@ -264,7 +274,7 @@ class ReportAccumulateByRequest: "reviewed_accuracy": {}} for acc_type in ["feedback_accuracy", "reviewed_accuracy"]: avg_acc = IterAvg() - for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: + for key in ["imei_number", "purchase_date", "invoice_no", "retailername", "sold_to_party"]: acumulated_acc[acc_type][key] = self.data[month][1][day][acc_type][key]() acumulated_acc[acc_type][key+"_count"] = self.data[month][1][day][acc_type][key].count avg_acc.add_avg(acumulated_acc[acc_type][key], acumulated_acc[acc_type][key+"_count"]) @@ -312,6 +322,7 @@ class ReportAccumulateByRequest: _data[month][1][day]["average_accuracy_rate"]["purchase_date"] = _data[month][1][day]["average_accuracy_rate"]["purchase_date"]() _data[month][1][day]["average_accuracy_rate"]["retailer_name"] = _data[month][1][day]["average_accuracy_rate"]["retailer_name"]() _data[month][1][day]["average_accuracy_rate"]["sold_to_party"] = _data[month][1][day]["average_accuracy_rate"]["sold_to_party"]() + _data[month][1][day]["average_accuracy_rate"]["invoice_no"] = _data[month][1][day]["average_accuracy_rate"]["invoice_no"]() for key in _data[month][1][day]["average_processing_time"].keys(): _data[month][1][day]["average_processing_time"][key] = _data[month][1][day]["average_processing_time"][key]() @@ -319,10 +330,14 @@ class ReportAccumulateByRequest: _data[month][1][day]["feedback_accuracy"]["purchase_date"] = _data[month][1][day]["feedback_accuracy"]["purchase_date"]() _data[month][1][day]["feedback_accuracy"]["retailername"] = _data[month][1][day]["feedback_accuracy"]["retailername"]() _data[month][1][day]["feedback_accuracy"]["sold_to_party"] = _data[month][1][day]["feedback_accuracy"]["sold_to_party"]() + _data[month][1][day]["feedback_accuracy"]["invoice_no"] = _data[month][1][day]["feedback_accuracy"]["invoice_no"]() + _data[month][1][day]["reviewed_accuracy"]["imei_number"] = _data[month][1][day]["reviewed_accuracy"]["imei_number"]() _data[month][1][day]["reviewed_accuracy"]["purchase_date"] = _data[month][1][day]["reviewed_accuracy"]["purchase_date"]() _data[month][1][day]["reviewed_accuracy"]["retailername"] = _data[month][1][day]["reviewed_accuracy"]["retailername"]() _data[month][1][day]["reviewed_accuracy"]["sold_to_party"] = _data[month][1][day]["reviewed_accuracy"]["sold_to_party"]() + _data[month][1][day]["reviewed_accuracy"]["invoice_no"] = _data[month][1][day]["reviewed_accuracy"]["invoice_no"]() + _data[month][1][day]["review_progress"] = _data[month][1][day]["review_progress"].count(1)/(_data[month][1][day]["review_progress"].count(0)+ _data[month][1][day]["review_progress"].count(1)) if (_data[month][1][day]["review_progress"].count(0)+ _data[month][1][day]["review_progress"].count(1)) >0 else 0 _data[month][1][day].pop("report_files") @@ -336,6 +351,7 @@ class ReportAccumulateByRequest: _data[month][0]["average_accuracy_rate"]["purchase_date"] = _data[month][0]["average_accuracy_rate"]["purchase_date"]() _data[month][0]["average_accuracy_rate"]["retailer_name"] = _data[month][0]["average_accuracy_rate"]["retailer_name"]() _data[month][0]["average_accuracy_rate"]["sold_to_party"] = _data[month][0]["average_accuracy_rate"]["sold_to_party"]() + _data[month][0]["average_accuracy_rate"]["invoice_no"] = _data[month][0]["average_accuracy_rate"]["invoice_no"]() for key in _data[month][0]["average_processing_time"].keys(): _data[month][0]["average_processing_time"][key] = _data[month][0]["average_processing_time"][key]() @@ -343,10 +359,14 @@ class ReportAccumulateByRequest: _data[month][0]["feedback_accuracy"]["purchase_date"] = _data[month][0]["feedback_accuracy"]["purchase_date"]() _data[month][0]["feedback_accuracy"]["retailername"] = _data[month][0]["feedback_accuracy"]["retailername"]() _data[month][0]["feedback_accuracy"]["sold_to_party"] = _data[month][0]["feedback_accuracy"]["sold_to_party"]() + _data[month][0]["feedback_accuracy"]["invoice_no"] = _data[month][0]["feedback_accuracy"]["invoice_no"]() + _data[month][0]["reviewed_accuracy"]["imei_number"] = _data[month][0]["reviewed_accuracy"]["imei_number"]() _data[month][0]["reviewed_accuracy"]["purchase_date"] = _data[month][0]["reviewed_accuracy"]["purchase_date"]() _data[month][0]["reviewed_accuracy"]["retailername"] = _data[month][0]["reviewed_accuracy"]["retailername"]() _data[month][0]["reviewed_accuracy"]["sold_to_party"] = _data[month][0]["reviewed_accuracy"]["sold_to_party"]() + _data[month][0]["reviewed_accuracy"]["invoice_no"] = _data[month][0]["reviewed_accuracy"]["invoice_no"]() + _data[month][0]["review_progress"] = _data[month][0]["review_progress"].count(1)/(_data[month][0]["review_progress"].count(0)+ _data[month][0]["review_progress"].count(1)) if (_data[month][0]["review_progress"].count(0)+ _data[month][0]["review_progress"].count(1)) >0 else 0 return _data @@ -367,7 +387,8 @@ class MonthReportAccumulate: 'average_accuracy_rate': { 'imei': IterAvg(), 'purchase_date': IterAvg(), - 'retailer_name': IterAvg() + 'retailer_name': IterAvg(), + 'invoice_no': IterAvg() }, 'average_processing_time': { 'imei': IterAvg(), @@ -394,7 +415,8 @@ class MonthReportAccumulate: 'average_accuracy_rate': { 'imei': 0, 'purchase_date': 0, - 'retailer_name': 0 + 'retailer_name': 0, + 'invoice_no': 0 }, 'average_processing_time': { 'imei': 0, @@ -416,10 +438,12 @@ class MonthReportAccumulate: self.total["average_accuracy_rate"]["imei"].add_avg(report.reviewed_accuracy.get("imei_number", 0), report.reviewed_accuracy.get("imei_number_count", 0)) self.total["average_accuracy_rate"]["purchase_date"].add_avg(report.reviewed_accuracy.get("purchase_date", 0), report.reviewed_accuracy.get("purchase_date_count", 0)) self.total["average_accuracy_rate"]["retailer_name"].add_avg(report.reviewed_accuracy.get("retailername", 0), report.reviewed_accuracy.get("retailername_count", 0)) + self.total["average_accuracy_rate"]["invoice_no"].add_avg(report.reviewed_accuracy.get("invoice_no", 0), report.reviewed_accuracy.get("invoice_no_count", 0)) elif sum([ report.feedback_accuracy[x] for x in report.feedback_accuracy.keys() if "_count" not in x]) > 0: self.total["average_accuracy_rate"]["imei"].add_avg(report.feedback_accuracy.get("imei_number", 0), report.feedback_accuracy.get("imei_number_count", 0)) self.total["average_accuracy_rate"]["purchase_date"].add_avg(report.feedback_accuracy.get("purchase_date", 0), report.feedback_accuracy.get("purchase_date_count", 0)) self.total["average_accuracy_rate"]["retailer_name"].add_avg(report.feedback_accuracy.get("retailername", 0), report.feedback_accuracy.get("retailername_count", 0)) + self.total["average_accuracy_rate"]["invoice_no"].add_avg(report.feedback_accuracy.get("invoice_no", 0), report.feedback_accuracy.get("invoice_no_count", 0)) self.total["average_processing_time"]["imei"].add_avg(report.average_OCR_time.get("imei", 0), report.average_OCR_time.get("imei_count", 0)) if report.average_OCR_time else 0 self.total["average_processing_time"]["invoice"].add_avg(report.average_OCR_time.get("invoice", 0), report.average_OCR_time.get("invoice_count", 0)) if report.average_OCR_time else 0 @@ -453,10 +477,13 @@ class MonthReportAccumulate: new_data["average_accuracy_rate"]["imei"] = report.reviewed_accuracy.get("imei_number", None) new_data["average_accuracy_rate"]["purchase_date"] = report.reviewed_accuracy.get("purchase_date", None) new_data["average_accuracy_rate"]["retailer_name"] = report.reviewed_accuracy.get("retailername", None) + new_data["average_accuracy_rate"]["invoice_no"] = report.reviewed_accuracy.get("invoice_no", None) elif sum([ report.feedback_accuracy[x] for x in report.feedback_accuracy.keys() if "_count" not in x]): new_data["average_accuracy_rate"]["imei"] = report.feedback_accuracy.get("imei_number", None) new_data["average_accuracy_rate"]["purchase_date"] = report.feedback_accuracy.get("purchase_date", None) new_data["average_accuracy_rate"]["retailer_name"] = report.feedback_accuracy.get("retailername", None) + new_data["average_accuracy_rate"]["invoice_no"] = report.feedback_accuracy.get("invoice_no", None) + new_data["average_processing_time"]["imei"] = report.average_OCR_time.get("imei", 0) if report.average_OCR_time else 0 new_data["average_processing_time"]["invoice"] = report.average_OCR_time.get("invoice", 0) if report.average_OCR_time else 0 new_data["usage"]["imei"] = report.number_imei_transaction @@ -544,7 +571,7 @@ def validate_feedback_file(feedback, predict): if num_imei_feedback != num_imei_predict: return False return True - + def first_of_list(the_list): if not the_list: return None @@ -567,6 +594,9 @@ def extract_report_detail_list(report_detail_list, lower=False, in_percent=True) "Invoice_Retailer_Consumer": report_file.feedback_result.get("retailername", None), "Invoice_Retailer_OCR": report_file.predict_result.get("retailername", None), "Invoice_Retailer Accuracy": first_of_list(report_file.feedback_accuracy.get("retailername", [None])), + "Invoice_No_Consumer": report_file.feedback_result.get("invoice_no", None), + "Invoice_No_OCR": report_file.predict_result.get("invoice_no", None), + "Invoice_No Accuracy": first_of_list(report_file.feedback_accuracy.get("invoice_no", [None])), "OCR Image Accuracy": report_file.acc, "OCR Image Speed (seconds)": report_file.time_cost, "Is Reviewed": report_file.is_reviewed, @@ -575,6 +605,7 @@ def extract_report_detail_list(report_detail_list, lower=False, in_percent=True) "IMEI_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("imei_number", [None])), "Purchase Date_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("purchase_date", [None])), "Retailer_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("retailername", [None])), + "Invoice_No_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("invoice_no", [None])) }) if lower: for i, dat in enumerate(data): @@ -630,14 +661,16 @@ def convert_datetime_format(date_string: str, is_gt=False) -> str: def predict_result_to_ready(result): dict_result = {"retailername": "", "sold_to_party": "", + "invoice_no": "", "purchase_date": [], "imei_number": [],} if not result: return dict_result dict_result["retailername"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}])[0].get("value", None) dict_result["sold_to_party"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}])[1].get("value", None) - dict_result["purchase_date"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}])[2].get("value", []) - dict_result["imei_number"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[3].get("value", []) + dict_result["invoice_no"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}])[2].get("value", None) + dict_result["purchase_date"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}])[3].get("value", []) + dict_result["imei_number"] = result.get("content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[4].get("value", []) return dict_result def align_fine_result(ready_predict, fine_result): @@ -648,8 +681,8 @@ def align_fine_result(ready_predict, fine_result): ready_predict["purchase_date"] = [None] if fine_result["retailername"] and not ready_predict["retailername"]: ready_predict["retailername"] = [None] - # if ready_predict["retailername"] and not fine_result["retailername"]: - # fine_result["retailername"] = [None] + if ready_predict["invoice_no"] and not fine_result["invoice_no"]: + fine_result["invoice_no"] = [None] fine_result["purchase_date"] = [fine_result["purchase_date"] for _ in range(len(ready_predict["purchase_date"]))] # fine_result["retailername"] = None if len(ready_predict["purchase_date"]))] # else: @@ -724,76 +757,6 @@ def calculate_avg_accuracy(acc, type, keys=[]): return sum(acc_list)/len(acc_list) if len(acc_list) > 0 else None -# Deprecated -def calculate_and_save_subcription_file(report, request): - request_att = {"acc": {"feedback": {"imei_number": [], - "purchase_date": [], - "retailername": [], - "sold_to_party": [], - }, - "reviewed": {"imei_number": [], - "purchase_date": [], - "retailername": [], - "sold_to_party": [], - }}, - "err": [], - "time_cost": {}, - "total_images": 0, - "bad_images": 0} - images = SubscriptionRequestFile.objects.filter(request=request) - for image in images: - status, att = calculate_subcription_file(image) - if status != 200: - continue - image.feedback_accuracy = att["acc"]["feedback"] - image.reviewed_accuracy = att["acc"]["reviewed"] - image.is_bad_image_quality = att["is_bad_image"] - image.save() - new_report_file = ReportFile(report=report, - correspond_request_id=request.request_id, - correspond_redemption_id=request.redemption_id, - doc_type=image.doc_type, - predict_result=image.predict_result, - feedback_result=image.feedback_result, - reviewed_result=image.reviewed_result, - feedback_accuracy=att["acc"]["feedback"], - reviewed_accuracy=att["acc"]["reviewed"], - acc=att["avg_acc"], - time_cost=image.processing_time, - is_bad_image=att["is_bad_image"], - bad_image_reason=image.reason, - counter_measures=image.counter_measures, - error="|".join(att["err"]) - ) - new_report_file.save() - if request_att["time_cost"].get(image.doc_type, None): - request_att["time_cost"][image.doc_type].append(image.processing_time) - else: - request_att["time_cost"][image.doc_type] = [image.processing_time] - try: - request_att["acc"]["feedback"]["imei_number"] += att["acc"]["feedback"]["imei_number"] - request_att["acc"]["feedback"]["purchase_date"] += att["acc"]["feedback"]["purchase_date"] - request_att["acc"]["feedback"]["retailername"] += att["acc"]["feedback"]["retailername"] - request_att["acc"]["feedback"]["sold_to_party"] += att["acc"]["feedback"]["sold_to_party"] - - request_att["acc"]["reviewed"]["imei_number"] += att["acc"]["reviewed"]["imei_number"] - request_att["acc"]["reviewed"]["purchase_date"] += att["acc"]["reviewed"]["purchase_date"] - request_att["acc"]["reviewed"]["retailername"] += att["acc"]["reviewed"]["retailername"] - request_att["acc"]["reviewed"]["sold_to_party"] += att["acc"]["reviewed"]["sold_to_party"] - - request_att["bad_images"] += int(att["is_bad_image"]) - request_att["total_images"] += 1 - request_att["err"] += att["err"] - except Exception as e: - print(e) - continue - - return request_att - -# def result_maximize_list_values(result, acc): -# for k in acc.keys(): -# if isinstance(acc[k], list) and len(acc[k]) > 0: - def acc_maximize_list_values(acc): pos = {} for k in acc.keys(): @@ -852,16 +815,19 @@ def calculate_a_request(report, request): "purchase_date": [], "retailername": [], "sold_to_party": [], + "invoice_no": [], }, "reviewed": {"imei_number": [], "purchase_date": [], "retailername": [], "sold_to_party": [], + "invoice_no": [], }, "acumulated":{"imei_number": [], "purchase_date": [], "retailername": [], "sold_to_party": [], + "invoice_no": [], }}, "err": [], "time_cost": {"imei": [], @@ -950,16 +916,19 @@ def calculate_a_request(report, request): request_att["acc"]["feedback"]["purchase_date"] += _att["acc"]["feedback"]["purchase_date"] request_att["acc"]["feedback"]["retailername"] += _att["acc"]["feedback"]["retailername"] request_att["acc"]["feedback"]["sold_to_party"] += _att["acc"]["feedback"]["sold_to_party"] + request_att["acc"]["feedback"]["invoice_no"] += _att["acc"]["feedback"]["invoice_no"] request_att["acc"]["reviewed"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] request_att["acc"]["reviewed"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] request_att["acc"]["reviewed"]["retailername"] += _att["acc"]["reviewed"]["retailername"] request_att["acc"]["reviewed"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] + request_att["acc"]["reviewed"]["invoice_no"] += _att["acc"]["reviewed"]["invoice_no"] request_att["acc"]["acumulated"]["imei_number"] += _att["acc"]["reviewed"]["imei_number"] if _att["acc"]["reviewed"]["imei_number"] else _att["acc"]["feedback"]["imei_number"] request_att["acc"]["acumulated"]["purchase_date"] += _att["acc"]["reviewed"]["purchase_date"] if _att["acc"]["reviewed"]["purchase_date"] else _att["acc"]["feedback"]["purchase_date"] request_att["acc"]["acumulated"]["retailername"] += _att["acc"]["reviewed"]["retailername"] if _att["acc"]["reviewed"]["retailername"] else _att["acc"]["feedback"]["retailername"] request_att["acc"]["acumulated"]["sold_to_party"] += _att["acc"]["reviewed"]["sold_to_party"] if _att["acc"]["reviewed"]["sold_to_party"] else _att["acc"]["feedback"]["sold_to_party"] + request_att["acc"]["acumulated"]["invoice_no"] += _att["acc"]["reviewed"]["invoice_no"] if _att["acc"]["reviewed"]["invoice_no"] else _att["acc"]["feedback"]["invoice_no"] if image.reason not in settings.ACC_EXCLUDE_RESEASONS: request_att["bad_images"] += int(_att["is_bad_image"]) @@ -987,10 +956,6 @@ def calculate_subcription_file(subcription_request_file): inference_result = copy.deepcopy(subcription_request_file.predict_result) inference_result, feedback_result = align_fine_result(inference_result, copy.deepcopy(subcription_request_file.feedback_result)) inference_result, reviewed_result = align_fine_result(inference_result, copy.deepcopy(subcription_request_file.reviewed_result)) - # print(f"[DEBUG]: predict_result: {subcription_request_file.predict_result}") - # print(f"[DEBUG]: inference_result: {inference_result}") - # print(f"[DEBUG]: feedback_result: {feedback_result}") - # print(f"[DEBUG]: reviewed_result: {reviewed_result}") for key_name in valid_keys: try: @@ -1002,8 +967,8 @@ def calculate_subcription_file(subcription_request_file): # print(f"[DEBUG]: e: {e} -key_name: {key_name}") subcription_request_file.feedback_accuracy = att["acc"]["feedback"] subcription_request_file.reviewed_accuracy = att["acc"]["reviewed"] - avg_reviewed = calculate_avg_accuracy(att["acc"], "reviewed", ["retailername", "sold_to_party", "purchase_date", "imei_number"]) - avg_feedback = calculate_avg_accuracy(att["acc"], "feedback", ["retailername", "sold_to_party", "purchase_date", "imei_number"]) + avg_reviewed = calculate_avg_accuracy(att["acc"], "reviewed", ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"]) + avg_feedback = calculate_avg_accuracy(att["acc"], "feedback", ["retailername", "sold_to_party", "invoice_no", "purchase_date", "imei_number"]) if avg_feedback is not None or avg_reviewed is not None: avg_acc = 0 if avg_feedback is not None: @@ -1019,68 +984,6 @@ def calculate_subcription_file(subcription_request_file): att["is_bad_image"] = True return 200, att -def calculate_attributions(request): # for one request, return in order - # Deprecated - acc = {"feedback": {}, - "reviewed": {}} # {"feedback": {"retailername": [0.1], "sold_to_party":[0.9], "purchase_date":[0.6], "imei_number":[0.8]}, - # "reviewed": {"retailername": [0.1], "sold_to_party":[0.9], "purchase_date":[0.6], "imei_number":[0.8]}} - data = {"feedback": {}, - "reviewed": {}} # {"feedback": {"retailername": [[ocr, feedback], ...], "sold_to_party":[[ocr, feedback], ...], "purchase_date":[[ocr, feedback], ...], "imei_number":[[ocr, feedback], ...]}} - # {"reviewed": {"retailername": [[ocr, reviewed], ...], "sold_to_party":[[ocr, reviewed], ...], "purchase_date":[[ocr, reviewed], ...], "imei_number":[[ocr, reviewed], ...]}} - time_cost = {} # {"imei": [0.1], "invoice": [0.1]} - image_quality_num = [0, 0] # [good, bad] - image_quality_num[0] = len(request.doc_type.split(",")) - error = "" - - inference_result = predict_result_to_ready(request.predict_result) - reviewed_result = align_fine_result(inference_result, request.reviewed_result) - feedback_result = align_fine_result(inference_result, request.feedback_result) - - # accuracy calculation - for key_name in valid_keys: - if isinstance(inference_result[key_name], list): - if len(inference_result[key_name]) != len(reviewed_result.get(key_name, [])): - error = f"Request {request.request_id} failed with different {key_name} in predict and reviewed_result" - break - if len(inference_result[key_name]) != len(feedback_result.get(key_name, [])): - error = f"Request {request.request_id} failed with different {key_name} in predict and feedback_result" - break - # calculate accuracy for feedback result - acc["feedback"][key_name], data["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result) - acc["reviewed"][key_name], data["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result) - else: - inference_result[key_name] = [inference_result[key_name]] - feedback_result[key_name] = [feedback_result[key_name]] - reviewed_result[key_name] = [reviewed_result[key_name]] - - acc["feedback"][key_name], data["feedback"][key_name] = calculate_accuracy(key_name, inference_result, feedback_result) - acc["reviewed"][key_name], data["reviewed"][key_name] = calculate_accuracy(key_name, inference_result, reviewed_result) - - acc["feedback"]["purchase_date"] = [max(acc["feedback"]["purchase_date"])] if len(acc["feedback"]["purchase_date"]) > 0 else [] - acc["reviewed"]["purchase_date"] = [max(acc["reviewed"]["purchase_date"])] if len(acc["reviewed"]["purchase_date"]) > 0 else [] - # Count for bad and total images - avg_invoice_feedback = calculate_avg_accuracy(acc, "feedback", ["retailername", "sold_to_party", "purchase_date"]) - avg_invoice_reviewed = calculate_avg_accuracy(acc, "reviewed", ["retailername", "sold_to_party", "purchase_date"]) - if avg_invoice_feedback is not None or avg_invoice_reviewed is not None: - if max([x for x in [avg_invoice_feedback, avg_invoice_reviewed] if x is not None]) < settings.BAD_THRESHOLD: - image_quality_num[1] += 1 - for i, _ in enumerate(acc["feedback"]["imei_number"]): - if acc["feedback"]["imei_number"][i] is not None and acc["reviewed"]["imei_number"][i] is not None: - if max([x for x in [acc["feedback"]["imei_number"][i], acc["reviewed"]["imei_number"][i]] if x is not None]) < settings.BAD_THRESHOLD: - image_quality_num[1] += 1 - # time cost and quality calculation - # TODO: to be deprecated, doc_type would be in file level in the future - try: - for doc_type, doc_profile in request.ai_inference_profile.items(): - doc_type = doc_type.split("_")[0] - inference_time = doc_profile["inference"][1][0] - doc_profile["inference"][0] - postprocess_time = doc_profile["postprocess"][1] - doc_profile["postprocess"][0] - time_cost[doc_type].append(inference_time + postprocess_time) - except Exception as e: - error = f"Request id {request.request_id} failed with error: {e}" - - return acc, data, time_cost, image_quality_num, error - def mean_list(l): l = [x for x in l if x is not None] if len(l) == 0: @@ -1088,5 +991,4 @@ def mean_list(l): return sum(l)/len(l) def shadow_report(report_id, query): - c_connector.make_a_report_2( - (report_id, query)) + c_connector.make_a_report_2((report_id, query)) From b278350cf27114c7b68d4aa794872cf470c24e6b Mon Sep 17 00:00:00 2001 From: PhanThanhTrung Date: Wed, 13 Mar 2024 14:20:27 +0700 Subject: [PATCH 12/12] done adding invoice_no --- .../fwd_api/celery_worker/internal_task.py | 5 ++-- .../celery_worker/process_report_tasks.py | 14 ++++++----- cope2n-api/fwd_api/utils/accuracy.py | 25 ++++++------------- 3 files changed, 18 insertions(+), 26 deletions(-) diff --git a/cope2n-api/fwd_api/celery_worker/internal_task.py b/cope2n-api/fwd_api/celery_worker/internal_task.py index ca69595..ac13d84 100755 --- a/cope2n-api/fwd_api/celery_worker/internal_task.py +++ b/cope2n-api/fwd_api/celery_worker/internal_task.py @@ -135,9 +135,8 @@ def process_csv_feedback(csv_file_path, feedback_id): else: try: - _predict_result = {"retailername": None, "sold_to_party": None, "purchase_date": [], "imei_number": [_predict_result["imei_number"][image.index_in_request]]} - _feedback_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_feedback_result["imei_number"][image.index_in_request]]} if _feedback_result else None - # _reviewed_result = {"retailername": None, "sold_to_party": None, "purchase_date": None, "imei_number": [_reviewed_result["imei_number"][image.index_in_request]]} if _reviewed_result else None + _predict_result = {"retailername": None, "sold_to_party": None, "invoice_no": None, "purchase_date": [], "imei_number": [_predict_result["imei_number"][image.index_in_request]]} + _feedback_result = {"retailername": None, "sold_to_party": None, "invoice_no": None, "purchase_date": None, "imei_number": [_feedback_result["imei_number"][image.index_in_request]]} if _feedback_result else None except Exception as e: print (f"[ERROR]: {request_id} - {e}") image.predict_result = _predict_result diff --git a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py index 097e9fd..84f0dec 100755 --- a/cope2n-api/fwd_api/celery_worker/process_report_tasks.py +++ b/cope2n-api/fwd_api/celery_worker/process_report_tasks.py @@ -112,18 +112,20 @@ def create_accuracy_report(report_id, **kwargs): request.feedback_accuracy = {"imei_number": mean_list(request_att["acc"]["feedback"].get("imei_number", [None])), "purchase_date": mean_list(request_att["acc"]["feedback"].get("purchase_date", [None])), "retailername": mean_list(request_att["acc"]["feedback"].get("retailername", [None])), - "sold_to_party": mean_list(request_att["acc"]["feedback"].get("sold_to_party", [None]))} + "sold_to_party": mean_list(request_att["acc"]["feedback"].get("sold_to_party", [None])), + "invoice_no": mean_list(request_att["acc"]["feedback"].get("invoice_no", [None]))} request.reviewed_accuracy = {"imei_number": mean_list(request_att["acc"]["reviewed"].get("imei_number", [None])), "purchase_date": mean_list(request_att["acc"]["reviewed"].get("purchase_date", [None])), "retailername": mean_list(request_att["acc"]["reviewed"].get("retailername", [None])), - "sold_to_party": mean_list(request_att["acc"]["reviewed"].get("sold_to_party", [None]))} + "sold_to_party": mean_list(request_att["acc"]["reviewed"].get("sold_to_party", [None])), + "invoice_no": mean_list(request_att["acc"]["reviewed"].get("invoice_no", [None]))} request.save() number_images += request_att["total_images"] number_bad_images += request_att["bad_images"] bad_image_list += request_att["bad_image_list"] - update_temp_accuracy(accuracy["feedback"], request_att["acc"]["feedback"], keys=["imei_number", "purchase_date", "retailername", "sold_to_party"]) - update_temp_accuracy(accuracy["reviewed"], request_att["acc"]["reviewed"], keys=["imei_number", "purchase_date", "retailername", "sold_to_party"]) - update_temp_accuracy(accuracy["acumulated"], request_att["acc"]["acumulated"], keys=["imei_number", "purchase_date", "retailername", "sold_to_party"]) + update_temp_accuracy(accuracy["feedback"], request_att["acc"]["feedback"], keys=["imei_number", "purchase_date", "invoice_no", "retailername", "sold_to_party"]) + update_temp_accuracy(accuracy["reviewed"], request_att["acc"]["reviewed"], keys=["imei_number", "purchase_date", "invoice_no", "retailername", "sold_to_party"]) + update_temp_accuracy(accuracy["acumulated"], request_att["acc"]["acumulated"], keys=["imei_number", "purchase_date", "invoice_no", "retailername", "sold_to_party"]) time_cost["imei"].add(request_att["time_cost"].get("imei", [])) time_cost["invoice"].add(request_att["time_cost"].get("invoice", [])) @@ -233,7 +235,7 @@ def create_billing_report(report_id, **kwargs): "reviewed": {}, "acumulated": {}} for acc_type in ["feedback", "reviewed", "acumulated"]: - for key in ["imei_number", "purchase_date", "retailername", "sold_to_party"]: + for key in ["imei_number", "purchase_date", "invoice_no", "retailername", "sold_to_party"]: acumulated_acc[acc_type][key] = None acumulated_acc[acc_type][key + "_count"] = None acumulated_acc[acc_type]["avg"] = None diff --git a/cope2n-api/fwd_api/utils/accuracy.py b/cope2n-api/fwd_api/utils/accuracy.py index bd32c70..6ae00d2 100755 --- a/cope2n-api/fwd_api/utils/accuracy.py +++ b/cope2n-api/fwd_api/utils/accuracy.py @@ -540,23 +540,20 @@ def extract_report_detail_list(report_detail_list, lower=False, in_percent=True) "Image type": report_file.doc_type, "IMEI_user submitted": first_of_list(report_file.feedback_result.get("imei_number", [None])), "IMEI_OCR retrieved": first_of_list(report_file.predict_result.get("imei_number", [None])), - "IMEI Revised": None, + "IMEI Revised": first_of_list(report_file.reviewed_result.get("imei_number", [None])), "IMEI1 Accuracy": first_of_list(report_file.feedback_accuracy.get("imei_number", [None])), - "Invoice_Number_User": None, - "Invoice_Number_OCR": None, - "Invoice_Number Revised": None, - "Invoice_Number_Accuracy": None, + "Invoice_Number_User": report_file.feedback_result.get("invoice_no", None), + "Invoice_Number_OCR": report_file.predict_result.get("invoice_no", None), + "Invoice_Number Revised": report_file.reviewed_result.get("invoice_no", None), + "Invoice_Number_Accuracy": first_of_list(report_file.feedback_accuracy.get("invoice_no", [None])), "Invoice_Purchase Date_Consumer": report_file.feedback_result.get("purchase_date", None), "Invoice_Purchase Date_OCR": report_file.predict_result.get("purchase_date", []), - "Invoice_Purchase Date Revised": None, + "Invoice_Purchase Date Revised": report_file.reviewed_result.get("purchase_date", None), "Invoice_Purchase Date Accuracy": first_of_list(report_file.feedback_accuracy.get("purchase_date", [None])), "Invoice_Retailer_Consumer": report_file.feedback_result.get("retailername", None), "Invoice_Retailer_OCR": report_file.predict_result.get("retailername", None), - "Invoice_Purchase Date Revised": None, + "Invoice_Retailer Revised": report_file.reviewed_result.get("retailername", None), "Invoice_Retailer Accuracy": first_of_list(report_file.feedback_accuracy.get("retailername", [None])), - "Invoice_No_Consumer": report_file.feedback_result.get("invoice_no", None), - "Invoice_No_OCR": report_file.predict_result.get("invoice_no", None), - "Invoice_No Accuracy": first_of_list(report_file.feedback_accuracy.get("invoice_no", [None])), "OCR Image Accuracy": report_file.acc, "OCR Image Speed (seconds)": report_file.time_cost, "Is Reviewed": report_file.is_reviewed, @@ -565,7 +562,7 @@ def extract_report_detail_list(report_detail_list, lower=False, in_percent=True) "IMEI_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("imei_number", [None])), "Purchase Date_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("purchase_date", [None])), "Retailer_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("retailername", [None])), - "Invoice_No_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("invoice_no", [None])) + "Invoice_Number_Revised Accuracy": first_of_list(report_file.reviewed_accuracy.get("invoice_no", [None])) }) if lower: for i, dat in enumerate(data): @@ -644,12 +641,6 @@ def align_fine_result(ready_predict, fine_result): if ready_predict["invoice_no"] and not fine_result["invoice_no"]: fine_result["invoice_no"] = [None] fine_result["purchase_date"] = [fine_result["purchase_date"] for _ in range(len(ready_predict["purchase_date"]))] - # fine_result["retailername"] = None if len(ready_predict["purchase_date"]))] - # else: - # fine_result = {} - # for key in ready_predict.keys(): - # fine_result[key] = [] - # fine_result["purchase_date"] = [None for _ in range(len(ready_predict["purchase_date"]))] return ready_predict, fine_result def update_temp_accuracy(accuracy, acc, keys):