Merge pull request #21 from dx-tan/fix/localfiles_resistant

Fix/localfiles resistant
This commit is contained in:
Đỗ Xuân Tân 2023-12-26 21:08:05 +07:00 committed by GitHub Enterprise
commit df48297c99
12 changed files with 77 additions and 33 deletions

View File

@ -111,6 +111,7 @@ DATABASES = {
'PASSWORD': env.str("DB_PASSWORD", None), 'PASSWORD': env.str("DB_PASSWORD", None),
'HOST': env.str("DB_HOST", None), 'HOST': env.str("DB_HOST", None),
'PORT': env.str("DB_PORT", None), 'PORT': env.str("DB_PORT", None),
'CONN_MAX_AGE': None,
} }
} }
@ -207,7 +208,7 @@ BROKER_URL = env.str("BROKER_URL", default="amqp://test:test@107.120.70.226:5672
CELERY_TASK_TRACK_STARTED = True CELERY_TASK_TRACK_STARTED = True
CELERY_TASK_TIME_LIMIT = 30 * 60 CELERY_TASK_TIME_LIMIT = 30 * 60
MAX_UPLOAD_SIZE_OF_A_FILE = 100 * 1024 * 1024 # 100 MB MAX_UPLOAD_SIZE_OF_A_FILE = 5 * 1024 * 1024 # 5 MB
MAX_UPLOAD_FILE_SIZE_OF_A_REQUEST = 100 * 1024 * 1024 # 100 MB MAX_UPLOAD_FILE_SIZE_OF_A_REQUEST = 100 * 1024 * 1024 # 100 MB
MAX_UPLOAD_FILES_IN_A_REQUEST = 5 MAX_UPLOAD_FILES_IN_A_REQUEST = 5
MAX_PIXEL_IN_A_FILE = 5000 MAX_PIXEL_IN_A_FILE = 5000

View File

@ -214,14 +214,6 @@ class CtelViewSet(viewsets.ViewSet):
"imei": imei_file_objs, "imei": imei_file_objs,
"invoice": invoice_file_objs "invoice": invoice_file_objs
} }
total_page = len(files.keys())
p_type = validated_data['type']
new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page,
pages_left=total_page,
process_type=p_type, status=1, request_id=rq_id,
provider_code=provider_code,
subscription=sub)
new_request.save()
count = 0 count = 0
doc_files_with_type = [] doc_files_with_type = []
@ -236,6 +228,15 @@ class CtelViewSet(viewsets.ViewSet):
)) ))
count += 1 count += 1
total_page = len(doc_files_with_type)
p_type = validated_data['type']
new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page,
pages_left=total_page,
process_type=p_type, status=1, request_id=rq_id,
provider_code=provider_code,
subscription=sub)
new_request.save()
# Run file processing in a pool of 2 threads. TODO: Convert to Celery worker when possible # Run file processing in a pool of 2 threads. TODO: Convert to Celery worker when possible
compact_files = [None] * len(doc_files_with_type) compact_files = [None] * len(doc_files_with_type)
pool = ThreadPool(processes=2) pool = ThreadPool(processes=2)

View File

@ -13,6 +13,8 @@ from fwd_api.models import SubscriptionRequestFile
from ..utils import file as FileUtils from ..utils import file as FileUtils
from ..utils import process as ProcessUtil from ..utils import process as ProcessUtil
from ..utils import s3 as S3Util from ..utils import s3 as S3Util
from fwd_api.constant.common import ProcessType
from celery.utils.log import get_task_logger from celery.utils.log import get_task_logger
from fwd import settings from fwd import settings
@ -113,6 +115,24 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files):
new_request.ai_inference_start_time = time.time() new_request.ai_inference_start_time = time.time()
new_request.save() new_request.save()
trials = 0
while True:
rq: SubscriptionRequest = \
SubscriptionRequest.objects.filter(request_id=rq_id).first()
if rq.ai_inference_start_time != 0:
break
time.sleep(0.1)
trials += 1
if trials > 5:
rq.preprocessing_time = time.time() - start_time
rq.doc_type = doc_type_string
rq.ai_inference_start_time = time.time()
rq.save()
if trials > 10:
rq.status = 404
rq.save()
return
# Send to next queue # Send to next queue
for sub_rq_id, sub_id, urls, user_id, p_type in to_queue: for sub_rq_id, sub_id, urls, user_id, p_type in to_queue:
ProcessUtil.send_to_queue2(sub_rq_id, sub_id, urls, user_id, p_type) ProcessUtil.send_to_queue2(sub_rq_id, sub_id, urls, user_id, p_type)

View File

@ -1,6 +1,7 @@
import traceback import traceback
import time import time
import uuid import uuid
import logging
from copy import deepcopy from copy import deepcopy
@ -144,8 +145,22 @@ def process_invoice_sbt_result(rq_id, result):
try: try:
page_index = int(rq_id.split("_sub_")[1]) page_index = int(rq_id.split("_sub_")[1])
rq_id = rq_id.split("_sub_")[0] rq_id = rq_id.split("_sub_")[0]
rq: SubscriptionRequest = \ rq: SubscriptionRequest = SubscriptionRequest.objects.filter(request_id=rq_id).first()
SubscriptionRequest.objects.filter(request_id=rq_id, process_type=ProcessType.SBT_INVOICE.value)[0] for i in range(10):
if rq.ai_inference_start_time == 0:
logging.warn(f"ai_inference_start_time = 0, looks like database is lagging, attemp {i} in 0.2 second ...")
rq.refresh_from_db()
time.sleep(0.2)
if i == 9: # return an error
logging.warn("Unable to retrieve rq, exiting")
rq.status = 404 # stop waiting
rq.predict_result = result
rq.save()
update_user(rq)
return "FailInvoice"
else:
break
# status = to_status(result) # status = to_status(result)
status = result.get("status", 200) status = result.get("status", 200)
redis_client.set_cache(rq_id, page_index, result) redis_client.set_cache(rq_id, page_index, result)
@ -156,13 +171,11 @@ def process_invoice_sbt_result(rq_id, result):
results = redis_client.get_all_cache(rq_id) results = redis_client.get_all_cache(rq_id)
rq.predict_result = aggregate_result(results, rq.doc_type) rq.predict_result = aggregate_result(results, rq.doc_type)
# print(f"[DEBUG]: rq.predict_result: {rq.predict_result}") # print(f"[DEBUG]: rq.predict_result: {rq.predict_result}")
redis_client.remove_cache(rq_id)
rq.save() rq.save()
else: else:
rq.status = 404 # stop waiting rq.status = 404 # stop waiting
rq.predict_result = result rq.predict_result = result
redis_client.remove_cache(rq_id)
rq.save() rq.save()
rq.ai_inference_time = time.time() - rq.ai_inference_start_time rq.ai_inference_time = time.time() - rq.ai_inference_start_time

View File

@ -18,7 +18,6 @@ class SubscriptionRequest(models.Model):
subscription = models.ForeignKey(Subscription, on_delete=models.CASCADE) subscription = models.ForeignKey(Subscription, on_delete=models.CASCADE)
created_at = models.DateTimeField(default=timezone.now, db_index=True) created_at = models.DateTimeField(default=timezone.now, db_index=True)
updated_at = models.DateTimeField(auto_now=True) updated_at = models.DateTimeField(auto_now=True)
S3_uploaded = models.BooleanField(default=False)
is_test_request = models.BooleanField(default=False) is_test_request = models.BooleanField(default=False)
S3_uploaded = models.BooleanField(default=False) S3_uploaded = models.BooleanField(default=False)

View File

@ -31,7 +31,7 @@ def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUES
raise InvalidException(excArgs="files") raise InvalidException(excArgs="files")
extension = f.name.split(".")[-1].lower() in allowed_file_extensions extension = f.name.split(".")[-1].lower() in allowed_file_extensions
if not extension or "." not in f.name: if not extension or "." not in f.name:
raise FileFormatInvalidException(excArgs=allowed_file_extensions) raise FileFormatInvalidException(excArgs=list(allowed_file_extensions))
if f.size > settings.MAX_UPLOAD_SIZE_OF_A_FILE: if f.size > settings.MAX_UPLOAD_SIZE_OF_A_FILE:
raise LimitReachedException(excArgs=('A file', str(settings.MAX_UPLOAD_SIZE_OF_A_FILE / 1024 / 1024), 'MB')) raise LimitReachedException(excArgs=('A file', str(settings.MAX_UPLOAD_SIZE_OF_A_FILE / 1024 / 1024), 'MB'))
total_file_size += f.size total_file_size += f.size

View File

@ -1,5 +1,6 @@
import redis import redis
import json import json
from datetime import datetime, timedelta
from django.conf import settings from django.conf import settings
@ -14,6 +15,7 @@ class RedisUtils:
image_index: int image_index: int
""" """
self.redis_client.hset(request_id, image_index, json.dumps(data)) self.redis_client.hset(request_id, image_index, json.dumps(data))
self.redis_client.expire(request_id, 3600)
def get_all_cache(self, request_id): def get_all_cache(self, request_id):
resutlt = {} resutlt = {}
@ -22,7 +24,7 @@ class RedisUtils:
return resutlt return resutlt
def get_size(self, request_id): def get_size(self, request_id):
return self.redis_client.hlen(request_id) return self.redis_client.hlen(request_id)
def remove_cache(self, request_id): def remove_cache(self, request_id):
self.redis_client.delete(request_id) self.redis_client.delete(request_id)

View File

@ -94,6 +94,12 @@ msgstr "Số lần yêu cầu"
msgid "Number of template" msgid "Number of template"
msgstr "Số mẫu tài liệu" msgstr "Số mẫu tài liệu"
msgid "Number of imei_file"
msgstr "Số lượng file IMEI"
msgid "Number of invoice_file"
msgstr "Số lượng file Invoice"
msgid "times" msgid "times"
msgstr "lượt" msgstr "lượt"

View File

@ -2,7 +2,7 @@ server {
# listen {{port}}; # listen {{port}};
# listen [::]:{{port}}; # listen [::]:{{port}};
server_name localhost; server_name localhost;
client_max_body_size 10M; client_max_body_size 100M;
#access_log /var/log/nginx/host.access.log main; #access_log /var/log/nginx/host.access.log main;

View File

@ -5,11 +5,13 @@ import string
import os import os
import boto3 import boto3
from datetime import datetime from datetime import datetime
from dotenv import load_dotenv
load_dotenv(".env_prod")
BASH_FILE = './deploy_images.sh' BASH_FILE = './deploy_images.sh'
S3_ENDPOINT = "" S3_ENDPOINT = ""
S3_ACCESS_KEY = "secret" S3_ACCESS_KEY = os.getenv('S3_ACCESS_KEY')
S3_SECRET_KEY = "secret" S3_SECRET_KEY = os.getenv('S3_SECRET_KEY')
S3_BUCKET = "ocr-deployment-config" S3_BUCKET = "ocr-deployment-config"
class MinioS3Client: class MinioS3Client:
@ -77,8 +79,8 @@ def deploy():
# Define the variable # Define the variable
tag = str(random_hash()[:8]) tag = str(random_hash()[:8])
now = datetime.now() now = datetime.now()
# tag = tag + "_" + str(now.strftime("%d%m%y%H%M%S")) tag = tag + "_" + str(now.strftime("%d%m%y%H%M%S"))
tag = "4cae5134_261223123256" # tag = "4cae5134_261223123256"
print(tag) print(tag)
# Execute the Bash script with the variable as a command-line argument # Execute the Bash script with the variable as a command-line argument

View File

@ -5,20 +5,20 @@ tag=$1
echo "[INFO] Tag received from Python: $tag" echo "[INFO] Tag received from Python: $tag"
# echo "[INFO] Pushing AI image with tag: $tag..." echo "[INFO] Pushing AI image with tag: $tag..."
# docker compose -f docker-compose-dev.yml build cope2n-fi-sbt docker compose -f docker-compose-dev.yml build cope2n-fi-sbt
# docker tag sidp/cope2n-ai-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-ai-fi-sbt:${tag} docker tag sidp/cope2n-ai-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-ai-fi-sbt:${tag}
# docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-ai-fi-sbt:${tag} docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-ai-fi-sbt:${tag}
# echo "[INFO] Pushing BE image with tag: $tag..." echo "[INFO] Pushing BE image with tag: $tag..."
# docker compose -f docker-compose-dev.yml build be-ctel-sbt docker compose -f docker-compose-dev.yml build be-ctel-sbt
# docker tag sidp/cope2n-be-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-be-fi-sbt:${tag} docker tag sidp/cope2n-be-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-be-fi-sbt:${tag}
# docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-be-fi-sbt:${tag} docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-be-fi-sbt:${tag}
# echo "[INFO] Pushing FE image with tag: $tag..." echo "[INFO] Pushing FE image with tag: $tag..."
# docker compose -f docker-compose-dev.yml build fe-sbt docker compose -f docker-compose-dev.yml build fe-sbt
# docker tag sidp/cope2n-fe-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-fe-fi-sbt:${tag} docker tag sidp/cope2n-fe-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-fe-fi-sbt:${tag}
# docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-fe-fi-sbt:${tag} docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-fe-fi-sbt:${tag}
cp ./docker-compose-prod.yml ./docker-compose_${tag}.yml cp ./docker-compose-prod.yml ./docker-compose_${tag}.yml
sed -i "s/{{tag}}/$tag/g" ./docker-compose_${tag}.yml sed -i "s/{{tag}}/$tag/g" ./docker-compose_${tag}.yml