From 074dd2047debc25c92fa000d9da4535615ad5496 Mon Sep 17 00:00:00 2001 From: dx-tan Date: Tue, 26 Dec 2023 18:44:03 +0700 Subject: [PATCH 1/3] Fix: false aggregate results --- cope2n-api/fwd/settings.py | 3 ++- .../celery_worker/process_result_tasks.py | 18 ++++++++++++++++-- .../fwd_api/models/SubscriptionRequest.py | 1 - cope2n-api/fwd_api/utils/file.py | 2 +- cope2n-api/fwd_api/utils/redis.py | 2 ++ cope2n-api/locale/vi_VN/LC_MESSAGES/django.mo | Bin 4094 -> 4244 bytes cope2n-api/locale/vi_VN/LC_MESSAGES/django.po | 6 ++++++ cope2n-fe/nginx.conf | 2 +- 8 files changed, 28 insertions(+), 6 deletions(-) diff --git a/cope2n-api/fwd/settings.py b/cope2n-api/fwd/settings.py index 514ff37..4b6a13f 100755 --- a/cope2n-api/fwd/settings.py +++ b/cope2n-api/fwd/settings.py @@ -111,6 +111,7 @@ DATABASES = { 'PASSWORD': env.str("DB_PASSWORD", None), 'HOST': env.str("DB_HOST", None), 'PORT': env.str("DB_PORT", None), + 'CONN_MAX_AGE': None, } } @@ -207,7 +208,7 @@ BROKER_URL = env.str("BROKER_URL", default="amqp://test:test@107.120.70.226:5672 CELERY_TASK_TRACK_STARTED = True CELERY_TASK_TIME_LIMIT = 30 * 60 -MAX_UPLOAD_SIZE_OF_A_FILE = 100 * 1024 * 1024 # 100 MB +MAX_UPLOAD_SIZE_OF_A_FILE = 5 * 1024 * 1024 # 5 MB MAX_UPLOAD_FILE_SIZE_OF_A_REQUEST = 100 * 1024 * 1024 # 100 MB MAX_UPLOAD_FILES_IN_A_REQUEST = 5 MAX_PIXEL_IN_A_FILE = 5000 diff --git a/cope2n-api/fwd_api/celery_worker/process_result_tasks.py b/cope2n-api/fwd_api/celery_worker/process_result_tasks.py index 4121dc3..5fe7466 100755 --- a/cope2n-api/fwd_api/celery_worker/process_result_tasks.py +++ b/cope2n-api/fwd_api/celery_worker/process_result_tasks.py @@ -1,6 +1,7 @@ import traceback import time import uuid +import logging from copy import deepcopy @@ -146,6 +147,21 @@ def process_invoice_sbt_result(rq_id, result): rq_id = rq_id.split("_sub_")[0] rq: SubscriptionRequest = \ SubscriptionRequest.objects.filter(request_id=rq_id, process_type=ProcessType.SBT_INVOICE.value)[0] + for i in range(10): + if rq.ai_inference_start_time == 0: + logging.warn(f"ai_inference_start_time = 0, looks like database is lagging, attemp {i} in 0.2 second ...") + rq.refresh_from_db() + time.sleep(0.2) + if i == 9: # return an error + logging.warn("Unable to retrieve rq, exiting") + rq.status = 404 # stop waiting + rq.predict_result = result + rq.save() + update_user(rq) + return "FailInvoice" + else: + break + # status = to_status(result) status = result.get("status", 200) redis_client.set_cache(rq_id, page_index, result) @@ -156,13 +172,11 @@ def process_invoice_sbt_result(rq_id, result): results = redis_client.get_all_cache(rq_id) rq.predict_result = aggregate_result(results, rq.doc_type) # print(f"[DEBUG]: rq.predict_result: {rq.predict_result}") - redis_client.remove_cache(rq_id) rq.save() else: rq.status = 404 # stop waiting rq.predict_result = result - redis_client.remove_cache(rq_id) rq.save() rq.ai_inference_time = time.time() - rq.ai_inference_start_time diff --git a/cope2n-api/fwd_api/models/SubscriptionRequest.py b/cope2n-api/fwd_api/models/SubscriptionRequest.py index 780df18..0cbda44 100755 --- a/cope2n-api/fwd_api/models/SubscriptionRequest.py +++ b/cope2n-api/fwd_api/models/SubscriptionRequest.py @@ -18,7 +18,6 @@ class SubscriptionRequest(models.Model): subscription = models.ForeignKey(Subscription, on_delete=models.CASCADE) created_at = models.DateTimeField(default=timezone.now, db_index=True) updated_at = models.DateTimeField(auto_now=True) - S3_uploaded = models.BooleanField(default=False) is_test_request = models.BooleanField(default=False) S3_uploaded = models.BooleanField(default=False) diff --git a/cope2n-api/fwd_api/utils/file.py b/cope2n-api/fwd_api/utils/file.py index 4a94b39..f3af27e 100644 --- a/cope2n-api/fwd_api/utils/file.py +++ b/cope2n-api/fwd_api/utils/file.py @@ -31,7 +31,7 @@ def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUES raise InvalidException(excArgs="files") extension = f.name.split(".")[-1].lower() in allowed_file_extensions if not extension or "." not in f.name: - raise FileFormatInvalidException(excArgs=allowed_file_extensions) + raise FileFormatInvalidException(excArgs=list(allowed_file_extensions)) if f.size > settings.MAX_UPLOAD_SIZE_OF_A_FILE: raise LimitReachedException(excArgs=('A file', str(settings.MAX_UPLOAD_SIZE_OF_A_FILE / 1024 / 1024), 'MB')) total_file_size += f.size diff --git a/cope2n-api/fwd_api/utils/redis.py b/cope2n-api/fwd_api/utils/redis.py index da4c7ec..7a9b54e 100644 --- a/cope2n-api/fwd_api/utils/redis.py +++ b/cope2n-api/fwd_api/utils/redis.py @@ -1,5 +1,6 @@ import redis import json +from datetime import datetime, timedelta from django.conf import settings @@ -14,6 +15,7 @@ class RedisUtils: image_index: int """ self.redis_client.hset(request_id, image_index, json.dumps(data)) + self.redis_client.expire(request_id, 3600) def get_all_cache(self, request_id): resutlt = {} diff --git a/cope2n-api/locale/vi_VN/LC_MESSAGES/django.mo b/cope2n-api/locale/vi_VN/LC_MESSAGES/django.mo index 46499c0a63c864ad4d763617a051f95c97b2b292..71bafc73dea62caf96254432277b2e0aa36a6a86 100755 GIT binary patch delta 1505 zcmZA1OGs2v9LMp$kMTKbsbyy7b+p%H_R`AKBop{G6 zb;K$nH{R?x`V+ZO&iTxC<9STRAxyzB^y5p+#0k$yTta;o({SE1G07~QdKRjG0WLL* zSdg3LG}K@!HhG>xhk6(@@H%QugUHA3anXPey!vCzq5c##-Wy~B`-s^%hkD+}TWA4k zn8N&)&&^gE0+@i!s5dxQHv0-mKsIegJhQqF(zj=23rwD{%sqsTtH+ z_=#-4W%Cg4WcjH1N;1g*L2k-vppW&U9_+(ico+HDJ1()qggw;fQ2oN3B(+~gWu)J$ z52Mb+D^vzPpeCHcEd1=X&qui7W4}E!`Bp0~L`_hEN^Jw`EVxMatPk}@{iuF}$T_qj z&oNXcUZ7s!J!)%ba0C9pQjC;xR5d_7R$vRV^|qPVLa69hu~_sLT9DpWMQ^{B(7qNE z+D;9;od^;;2^DQ$5y2U?9mFDtquQ%7~wb1p1erDPp6{S}h zR4G{?w#T|vMpSf`_+PVHRsJgL^!sVh^FT)(|S%s&b-| z(D_l(za>euO@vbYe<@pR*xTCUj@J4b;vBal6XK(Z*UNw3BFI+#sZtMOM(t zD59{HFwu5s6;T8gRI4Z?g49Jpl#8P8@A{v^obx&N-19zX?yc5;+3-_c{I(Ge5H}Iy zG zG^@l$w;fw}&S4E6#WtF~<1zgN$bU z3mY)WGSzVwRo;UqFu=7qjjHz$HBb>%?=7ywdDOsvP&@D!HE;z9Ls%O1z5y#4-gTb)FQVGL#Af`6n$T}AFK4?$HAs=k zl$o{Rah?Neso@P&gBg-0oOPe0I{b>=7-u$`P>w_s97G+(5NfOQsCtvwj&n#;dtb%= zEAxed&A5oVgegYX%6d`n`%rfv@bY2Q&Wxb$&RNvfPN4446;!(^JdAgcpM54#yCv+$ zSPlChbxfie592tVLREa@<=;>{u5vEsA> Date: Tue, 26 Dec 2023 18:44:25 +0700 Subject: [PATCH 2/3] Remove: key --- deploy_images.py | 10 ++++++---- deploy_images.sh | 24 ++++++++++++------------ 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/deploy_images.py b/deploy_images.py index 2de1774..242016e 100644 --- a/deploy_images.py +++ b/deploy_images.py @@ -5,11 +5,13 @@ import string import os import boto3 from datetime import datetime +from dotenv import load_dotenv +load_dotenv(".env_prod") BASH_FILE = './deploy_images.sh' S3_ENDPOINT = "" -S3_ACCESS_KEY = "secret" -S3_SECRET_KEY = "secret" +S3_ACCESS_KEY = os.getenv('S3_ACCESS_KEY') +S3_SECRET_KEY = os.getenv('S3_SECRET_KEY') S3_BUCKET = "ocr-deployment-config" class MinioS3Client: @@ -77,8 +79,8 @@ def deploy(): # Define the variable tag = str(random_hash()[:8]) now = datetime.now() - # tag = tag + "_" + str(now.strftime("%d%m%y%H%M%S")) - tag = "4cae5134_261223123256" + tag = tag + "_" + str(now.strftime("%d%m%y%H%M%S")) + # tag = "4cae5134_261223123256" print(tag) # Execute the Bash script with the variable as a command-line argument diff --git a/deploy_images.sh b/deploy_images.sh index b0b8195..07a1c02 100755 --- a/deploy_images.sh +++ b/deploy_images.sh @@ -5,20 +5,20 @@ tag=$1 echo "[INFO] Tag received from Python: $tag" -# echo "[INFO] Pushing AI image with tag: $tag..." -# docker compose -f docker-compose-dev.yml build cope2n-fi-sbt -# docker tag sidp/cope2n-ai-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-ai-fi-sbt:${tag} -# docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-ai-fi-sbt:${tag} +echo "[INFO] Pushing AI image with tag: $tag..." +docker compose -f docker-compose-dev.yml build cope2n-fi-sbt +docker tag sidp/cope2n-ai-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-ai-fi-sbt:${tag} +docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-ai-fi-sbt:${tag} -# echo "[INFO] Pushing BE image with tag: $tag..." -# docker compose -f docker-compose-dev.yml build be-ctel-sbt -# docker tag sidp/cope2n-be-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-be-fi-sbt:${tag} -# docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-be-fi-sbt:${tag} +echo "[INFO] Pushing BE image with tag: $tag..." +docker compose -f docker-compose-dev.yml build be-ctel-sbt +docker tag sidp/cope2n-be-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-be-fi-sbt:${tag} +docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-be-fi-sbt:${tag} -# echo "[INFO] Pushing FE image with tag: $tag..." -# docker compose -f docker-compose-dev.yml build fe-sbt -# docker tag sidp/cope2n-fe-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-fe-fi-sbt:${tag} -# docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-fe-fi-sbt:${tag} +echo "[INFO] Pushing FE image with tag: $tag..." +docker compose -f docker-compose-dev.yml build fe-sbt +docker tag sidp/cope2n-fe-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-fe-fi-sbt:${tag} +docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-fe-fi-sbt:${tag} cp ./docker-compose-prod.yml ./docker-compose_${tag}.yml sed -i "s/{{tag}}/$tag/g" ./docker-compose_${tag}.yml From a131ed7580a4e6188a310fd4e4ebb4e72f571da2 Mon Sep 17 00:00:00 2001 From: dx-tan Date: Tue, 26 Dec 2023 21:05:35 +0700 Subject: [PATCH 3/3] Workaround to fix missing db fields --- cope2n-api/fwd_api/api/ctel_view.py | 17 ++++++++-------- .../fwd_api/celery_worker/internal_task.py | 20 +++++++++++++++++++ .../celery_worker/process_result_tasks.py | 3 +-- cope2n-api/fwd_api/utils/redis.py | 2 +- 4 files changed, 31 insertions(+), 11 deletions(-) diff --git a/cope2n-api/fwd_api/api/ctel_view.py b/cope2n-api/fwd_api/api/ctel_view.py index e2664e0..c10af80 100755 --- a/cope2n-api/fwd_api/api/ctel_view.py +++ b/cope2n-api/fwd_api/api/ctel_view.py @@ -214,14 +214,6 @@ class CtelViewSet(viewsets.ViewSet): "imei": imei_file_objs, "invoice": invoice_file_objs } - total_page = len(files.keys()) - p_type = validated_data['type'] - new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page, - pages_left=total_page, - process_type=p_type, status=1, request_id=rq_id, - provider_code=provider_code, - subscription=sub) - new_request.save() count = 0 doc_files_with_type = [] @@ -236,6 +228,15 @@ class CtelViewSet(viewsets.ViewSet): )) count += 1 + total_page = len(doc_files_with_type) + p_type = validated_data['type'] + new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page, + pages_left=total_page, + process_type=p_type, status=1, request_id=rq_id, + provider_code=provider_code, + subscription=sub) + new_request.save() + # Run file processing in a pool of 2 threads. TODO: Convert to Celery worker when possible compact_files = [None] * len(doc_files_with_type) pool = ThreadPool(processes=2) diff --git a/cope2n-api/fwd_api/celery_worker/internal_task.py b/cope2n-api/fwd_api/celery_worker/internal_task.py index d1376d5..eeefc5c 100755 --- a/cope2n-api/fwd_api/celery_worker/internal_task.py +++ b/cope2n-api/fwd_api/celery_worker/internal_task.py @@ -13,6 +13,8 @@ from fwd_api.models import SubscriptionRequestFile from ..utils import file as FileUtils from ..utils import process as ProcessUtil from ..utils import s3 as S3Util +from fwd_api.constant.common import ProcessType + from celery.utils.log import get_task_logger from fwd import settings @@ -113,6 +115,24 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files): new_request.ai_inference_start_time = time.time() new_request.save() + trials = 0 + while True: + rq: SubscriptionRequest = \ + SubscriptionRequest.objects.filter(request_id=rq_id).first() + if rq.ai_inference_start_time != 0: + break + time.sleep(0.1) + trials += 1 + if trials > 5: + rq.preprocessing_time = time.time() - start_time + rq.doc_type = doc_type_string + rq.ai_inference_start_time = time.time() + rq.save() + if trials > 10: + rq.status = 404 + rq.save() + return + # Send to next queue for sub_rq_id, sub_id, urls, user_id, p_type in to_queue: ProcessUtil.send_to_queue2(sub_rq_id, sub_id, urls, user_id, p_type) diff --git a/cope2n-api/fwd_api/celery_worker/process_result_tasks.py b/cope2n-api/fwd_api/celery_worker/process_result_tasks.py index 5fe7466..b86d2f5 100755 --- a/cope2n-api/fwd_api/celery_worker/process_result_tasks.py +++ b/cope2n-api/fwd_api/celery_worker/process_result_tasks.py @@ -145,8 +145,7 @@ def process_invoice_sbt_result(rq_id, result): try: page_index = int(rq_id.split("_sub_")[1]) rq_id = rq_id.split("_sub_")[0] - rq: SubscriptionRequest = \ - SubscriptionRequest.objects.filter(request_id=rq_id, process_type=ProcessType.SBT_INVOICE.value)[0] + rq: SubscriptionRequest = SubscriptionRequest.objects.filter(request_id=rq_id).first() for i in range(10): if rq.ai_inference_start_time == 0: logging.warn(f"ai_inference_start_time = 0, looks like database is lagging, attemp {i} in 0.2 second ...") diff --git a/cope2n-api/fwd_api/utils/redis.py b/cope2n-api/fwd_api/utils/redis.py index 7a9b54e..ff65035 100644 --- a/cope2n-api/fwd_api/utils/redis.py +++ b/cope2n-api/fwd_api/utils/redis.py @@ -24,7 +24,7 @@ class RedisUtils: return resutlt def get_size(self, request_id): - return self.redis_client.hlen(request_id) + return self.redis_client.hlen(request_id) def remove_cache(self, request_id): self.redis_client.delete(request_id)