From 056052f1a26488818fe809299d7db46b2d6a2825 Mon Sep 17 00:00:00 2001 From: PhanThanhTrung Date: Mon, 11 Mar 2024 15:48:18 +0700 Subject: [PATCH 1/3] update --- .../celery_worker/process_result_tasks.py | 48 +++++++++++++++++-- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/cope2n-api/fwd_api/celery_worker/process_result_tasks.py b/cope2n-api/fwd_api/celery_worker/process_result_tasks.py index 9277961..7cb8e72 100755 --- a/cope2n-api/fwd_api/celery_worker/process_result_tasks.py +++ b/cope2n-api/fwd_api/celery_worker/process_result_tasks.py @@ -8,7 +8,7 @@ from copy import deepcopy from fwd_api.celery_worker.worker import app from fwd_api.models import SubscriptionRequest from fwd_api.exception.exceptions import InvalidException -from fwd_api.models import SubscriptionRequest +from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile from fwd_api.constant.common import ProcessType from fwd_api.utils.redis import RedisUtils from fwd_api.utils import process as ProcessUtil @@ -147,7 +147,6 @@ def process_invoice_sbt_result(rq_id, result, metadata): rq_id = rq_id.split("_sub_")[0] rq: SubscriptionRequest = SubscriptionRequest.objects.filter(request_id=rq_id).first() - result["metadata"] = metadata # status = to_status(result) status = result.get("status", 200) @@ -171,23 +170,64 @@ def process_invoice_sbt_result(rq_id, result, metadata): rq.preprocessing_time = result.get("metadata", {}).get("preprocessing_time", 0) # advancing the last result rq.ai_inference_time = time.time() - rq.ai_inference_start_time rq.save() - else: rq.status = 404 # stop waiting rq.predict_result = result rq.save() - + _update_subscription_rq_file(request_id=rq_id) update_user(rq) except IndexError as e: print(e) print("NotFound request by requestId, %d", rq_id) rq.ai_inference_time = 0 rq.save() + _update_subscription_rq_file(request_id=rq_id) except Exception as e: print(e) print("Fail Invoice %d", rq_id) traceback.print_exc() rq.ai_inference_time = 0 rq.save() + _update_subscription_rq_file(request_id=rq_id) return "FailInvoice" + +def _update_subscription_rq_file(request_id): + sub_rqs = SubscriptionRequest.objects.filter(request_id=request_id).first() + result = sub_rqs.predict_result + if result is None: + return + + files = SubscriptionRequestFile.objects.filter(request=sub_rqs) + for image in files: + retailer_name = None + sold_to_party = None + purchase_date = [] + imei_number = [] + predicted_res = __get_actual_predict_result(result=result) + if len(predicted_res)!=0: + for elem in predicted_res: + if elem["label"] == "retailername": + retailer_name = elem['value'] + elif elem["label"] == "sold_to_party": + sold_to_party = elem['value'] + elif elem["label"] == "purchase_date": + purchase_date=elem['value'] + else: + imei_number=elem['value'] + + _predict_result = { + "retailername": retailer_name, + "sold_to_party": sold_to_party, + "purchase_date": purchase_date, + "imei_number": imei_number + } + image.predict_result = _predict_result + image.save() + +def __get_actual_predict_result(result: dict): + predicted_res = result.get('content', {}).get('document', []) + if len(predicted_res)==0: + return [] + predicted_res = predicted_res[0].get('content', []) + return predicted_res \ No newline at end of file From b8e37068f60f40b22a2fabb01c5864aaab569212 Mon Sep 17 00:00:00 2001 From: PhanThanhTrung Date: Mon, 11 Mar 2024 16:35:27 +0700 Subject: [PATCH 2/3] update index_in_request with doc_type --- .../fwd_api/celery_worker/internal_task.py | 5 +- .../celery_worker/process_result_tasks.py | 63 +++++++++---------- 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/cope2n-api/fwd_api/celery_worker/internal_task.py b/cope2n-api/fwd_api/celery_worker/internal_task.py index 36523f5..f098d1a 100755 --- a/cope2n-api/fwd_api/celery_worker/internal_task.py +++ b/cope2n-api/fwd_api/celery_worker/internal_task.py @@ -193,11 +193,13 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files): raise FileContentInvalidException for j in range(len(_b_urls)): _b_urls[j]["doc_type"] = file["file_type"] - _b_urls[j]["page_number"] = idx + _b_urls[j]["page_number"] = idx + _b_urls[j]["index_to_image_type"] = file["index_in_request"] return idx, _b_urls[0] elif extension in image_extensions: this_url = ProcessUtil.process_image_local_file(file["file_name"], file["file_path"], new_request, user, file["file_type"], file["index_in_request"])[0] this_url["page_number"] = idx + this_url["index_to_image_type"] = file["index_in_request"] if file["file_type"]: this_url["doc_type"] = file["file_type"] return idx, this_url @@ -222,6 +224,7 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files): file_meta["ai_inference_profile"] = {} file_meta["index_in_request"] = i file_meta["preprocessing_time"] = preprocessing_time + file_meta["index_to_image_type"] = b_url["index_to_image_type"] to_queue.append((fractorized_request_id, sub_id, [b_url], user_id, p_type, file_meta)) # Send to next queue diff --git a/cope2n-api/fwd_api/celery_worker/process_result_tasks.py b/cope2n-api/fwd_api/celery_worker/process_result_tasks.py index 7cb8e72..28bd789 100755 --- a/cope2n-api/fwd_api/celery_worker/process_result_tasks.py +++ b/cope2n-api/fwd_api/celery_worker/process_result_tasks.py @@ -147,8 +147,11 @@ def process_invoice_sbt_result(rq_id, result, metadata): rq_id = rq_id.split("_sub_")[0] rq: SubscriptionRequest = SubscriptionRequest.objects.filter(request_id=rq_id).first() + image_type = metadata["doc_type"] + index_in_request = metadata.pop("index_to_image_type", 0) result["metadata"] = metadata - # status = to_status(result) + _update_subscription_rq_file(request_id=rq, index_in_request=index_in_request, doc_type=image_type, result=result) + status = result.get("status", 200) redis_client.set_cache(rq_id, page_index, result) done = rq.pages == redis_client.get_size(rq_id) @@ -174,56 +177,48 @@ def process_invoice_sbt_result(rq_id, result, metadata): rq.status = 404 # stop waiting rq.predict_result = result rq.save() - _update_subscription_rq_file(request_id=rq_id) update_user(rq) except IndexError as e: print(e) print("NotFound request by requestId, %d", rq_id) rq.ai_inference_time = 0 rq.save() - _update_subscription_rq_file(request_id=rq_id) except Exception as e: print(e) print("Fail Invoice %d", rq_id) traceback.print_exc() rq.ai_inference_time = 0 rq.save() - _update_subscription_rq_file(request_id=rq_id) return "FailInvoice" -def _update_subscription_rq_file(request_id): - sub_rqs = SubscriptionRequest.objects.filter(request_id=request_id).first() - result = sub_rqs.predict_result - if result is None: - return +def _update_subscription_rq_file(request_id, index_in_request, doc_type, result): + image = SubscriptionRequestFile.objects.filter(request=request_id, index_in_request=index_in_request, doc_type=doc_type).first() - files = SubscriptionRequestFile.objects.filter(request=sub_rqs) - for image in files: - retailer_name = None - sold_to_party = None - purchase_date = [] - imei_number = [] - predicted_res = __get_actual_predict_result(result=result) - if len(predicted_res)!=0: - for elem in predicted_res: - if elem["label"] == "retailername": - retailer_name = elem['value'] - elif elem["label"] == "sold_to_party": - sold_to_party = elem['value'] - elif elem["label"] == "purchase_date": - purchase_date=elem['value'] - else: - imei_number=elem['value'] + retailer_name = None + sold_to_party = None + purchase_date = [] + imei_number = [] + predicted_res = __get_actual_predict_result(result=result) + if len(predicted_res)!=0: + for elem in predicted_res: + if elem["label"] == "retailername": + retailer_name = elem['value'] + elif elem["label"] == "sold_to_party": + sold_to_party = elem['value'] + elif elem["label"] == "purchase_date": + purchase_date = elem['value'] + else: + imei_number = elem['value'] - _predict_result = { - "retailername": retailer_name, - "sold_to_party": sold_to_party, - "purchase_date": purchase_date, - "imei_number": imei_number - } - image.predict_result = _predict_result - image.save() + _predict_result = { + "retailername": retailer_name, + "sold_to_party": sold_to_party, + "purchase_date": purchase_date, + "imei_number": imei_number + } + image.predict_result = _predict_result + image.save() def __get_actual_predict_result(result: dict): predicted_res = result.get('content', {}).get('document', []) From c3d177dc0ca3e32a3aa8c178a1ecba2289fe6a20 Mon Sep 17 00:00:00 2001 From: PhanThanhTrung Date: Mon, 11 Mar 2024 16:42:51 +0700 Subject: [PATCH 3/3] force predicted values to None or empty base on doc type --- .../celery_worker/process_result_tasks.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/cope2n-api/fwd_api/celery_worker/process_result_tasks.py b/cope2n-api/fwd_api/celery_worker/process_result_tasks.py index 28bd789..728d8fb 100755 --- a/cope2n-api/fwd_api/celery_worker/process_result_tasks.py +++ b/cope2n-api/fwd_api/celery_worker/process_result_tasks.py @@ -210,13 +210,20 @@ def _update_subscription_rq_file(request_id, index_in_request, doc_type, result) purchase_date = elem['value'] else: imei_number = elem['value'] - - _predict_result = { - "retailername": retailer_name, - "sold_to_party": sold_to_party, - "purchase_date": purchase_date, - "imei_number": imei_number - } + if doc_type=='invoice': + _predict_result = { + "retailername": retailer_name, + "sold_to_party": sold_to_party, + "purchase_date": purchase_date, + "imei_number": [] + } + else: + _predict_result = { + "retailername": None, + "sold_to_party": None, + "purchase_date": [], + "imei_number": imei_number + } image.predict_result = _predict_result image.save()