Merge pull request #21 from dx-tan/fix/localfiles_resistant

Fix/localfiles resistant
This commit is contained in:
Đỗ Xuân Tân 2023-12-26 21:08:05 +07:00 committed by GitHub Enterprise
commit df48297c99
12 changed files with 77 additions and 33 deletions

View File

@ -111,6 +111,7 @@ DATABASES = {
'PASSWORD': env.str("DB_PASSWORD", None),
'HOST': env.str("DB_HOST", None),
'PORT': env.str("DB_PORT", None),
'CONN_MAX_AGE': None,
}
}
@ -207,7 +208,7 @@ BROKER_URL = env.str("BROKER_URL", default="amqp://test:test@107.120.70.226:5672
CELERY_TASK_TRACK_STARTED = True
CELERY_TASK_TIME_LIMIT = 30 * 60
MAX_UPLOAD_SIZE_OF_A_FILE = 100 * 1024 * 1024 # 100 MB
MAX_UPLOAD_SIZE_OF_A_FILE = 5 * 1024 * 1024 # 5 MB
MAX_UPLOAD_FILE_SIZE_OF_A_REQUEST = 100 * 1024 * 1024 # 100 MB
MAX_UPLOAD_FILES_IN_A_REQUEST = 5
MAX_PIXEL_IN_A_FILE = 5000

View File

@ -214,14 +214,6 @@ class CtelViewSet(viewsets.ViewSet):
"imei": imei_file_objs,
"invoice": invoice_file_objs
}
total_page = len(files.keys())
p_type = validated_data['type']
new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page,
pages_left=total_page,
process_type=p_type, status=1, request_id=rq_id,
provider_code=provider_code,
subscription=sub)
new_request.save()
count = 0
doc_files_with_type = []
@ -236,6 +228,15 @@ class CtelViewSet(viewsets.ViewSet):
))
count += 1
total_page = len(doc_files_with_type)
p_type = validated_data['type']
new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page,
pages_left=total_page,
process_type=p_type, status=1, request_id=rq_id,
provider_code=provider_code,
subscription=sub)
new_request.save()
# Run file processing in a pool of 2 threads. TODO: Convert to Celery worker when possible
compact_files = [None] * len(doc_files_with_type)
pool = ThreadPool(processes=2)

View File

@ -13,6 +13,8 @@ from fwd_api.models import SubscriptionRequestFile
from ..utils import file as FileUtils
from ..utils import process as ProcessUtil
from ..utils import s3 as S3Util
from fwd_api.constant.common import ProcessType
from celery.utils.log import get_task_logger
from fwd import settings
@ -113,6 +115,24 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files):
new_request.ai_inference_start_time = time.time()
new_request.save()
trials = 0
while True:
rq: SubscriptionRequest = \
SubscriptionRequest.objects.filter(request_id=rq_id).first()
if rq.ai_inference_start_time != 0:
break
time.sleep(0.1)
trials += 1
if trials > 5:
rq.preprocessing_time = time.time() - start_time
rq.doc_type = doc_type_string
rq.ai_inference_start_time = time.time()
rq.save()
if trials > 10:
rq.status = 404
rq.save()
return
# Send to next queue
for sub_rq_id, sub_id, urls, user_id, p_type in to_queue:
ProcessUtil.send_to_queue2(sub_rq_id, sub_id, urls, user_id, p_type)

View File

@ -1,6 +1,7 @@
import traceback
import time
import uuid
import logging
from copy import deepcopy
@ -144,8 +145,22 @@ def process_invoice_sbt_result(rq_id, result):
try:
page_index = int(rq_id.split("_sub_")[1])
rq_id = rq_id.split("_sub_")[0]
rq: SubscriptionRequest = \
SubscriptionRequest.objects.filter(request_id=rq_id, process_type=ProcessType.SBT_INVOICE.value)[0]
rq: SubscriptionRequest = SubscriptionRequest.objects.filter(request_id=rq_id).first()
for i in range(10):
if rq.ai_inference_start_time == 0:
logging.warn(f"ai_inference_start_time = 0, looks like database is lagging, attemp {i} in 0.2 second ...")
rq.refresh_from_db()
time.sleep(0.2)
if i == 9: # return an error
logging.warn("Unable to retrieve rq, exiting")
rq.status = 404 # stop waiting
rq.predict_result = result
rq.save()
update_user(rq)
return "FailInvoice"
else:
break
# status = to_status(result)
status = result.get("status", 200)
redis_client.set_cache(rq_id, page_index, result)
@ -156,13 +171,11 @@ def process_invoice_sbt_result(rq_id, result):
results = redis_client.get_all_cache(rq_id)
rq.predict_result = aggregate_result(results, rq.doc_type)
# print(f"[DEBUG]: rq.predict_result: {rq.predict_result}")
redis_client.remove_cache(rq_id)
rq.save()
else:
rq.status = 404 # stop waiting
rq.predict_result = result
redis_client.remove_cache(rq_id)
rq.save()
rq.ai_inference_time = time.time() - rq.ai_inference_start_time

View File

@ -18,7 +18,6 @@ class SubscriptionRequest(models.Model):
subscription = models.ForeignKey(Subscription, on_delete=models.CASCADE)
created_at = models.DateTimeField(default=timezone.now, db_index=True)
updated_at = models.DateTimeField(auto_now=True)
S3_uploaded = models.BooleanField(default=False)
is_test_request = models.BooleanField(default=False)
S3_uploaded = models.BooleanField(default=False)

View File

@ -31,7 +31,7 @@ def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUES
raise InvalidException(excArgs="files")
extension = f.name.split(".")[-1].lower() in allowed_file_extensions
if not extension or "." not in f.name:
raise FileFormatInvalidException(excArgs=allowed_file_extensions)
raise FileFormatInvalidException(excArgs=list(allowed_file_extensions))
if f.size > settings.MAX_UPLOAD_SIZE_OF_A_FILE:
raise LimitReachedException(excArgs=('A file', str(settings.MAX_UPLOAD_SIZE_OF_A_FILE / 1024 / 1024), 'MB'))
total_file_size += f.size

View File

@ -1,5 +1,6 @@
import redis
import json
from datetime import datetime, timedelta
from django.conf import settings
@ -14,6 +15,7 @@ class RedisUtils:
image_index: int
"""
self.redis_client.hset(request_id, image_index, json.dumps(data))
self.redis_client.expire(request_id, 3600)
def get_all_cache(self, request_id):
resutlt = {}
@ -22,7 +24,7 @@ class RedisUtils:
return resutlt
def get_size(self, request_id):
return self.redis_client.hlen(request_id)
return self.redis_client.hlen(request_id)
def remove_cache(self, request_id):
self.redis_client.delete(request_id)

View File

@ -94,6 +94,12 @@ msgstr "Số lần yêu cầu"
msgid "Number of template"
msgstr "Số mẫu tài liệu"
msgid "Number of imei_file"
msgstr "Số lượng file IMEI"
msgid "Number of invoice_file"
msgstr "Số lượng file Invoice"
msgid "times"
msgstr "lượt"

View File

@ -2,7 +2,7 @@ server {
# listen {{port}};
# listen [::]:{{port}};
server_name localhost;
client_max_body_size 10M;
client_max_body_size 100M;
#access_log /var/log/nginx/host.access.log main;

View File

@ -5,11 +5,13 @@ import string
import os
import boto3
from datetime import datetime
from dotenv import load_dotenv
load_dotenv(".env_prod")
BASH_FILE = './deploy_images.sh'
S3_ENDPOINT = ""
S3_ACCESS_KEY = "secret"
S3_SECRET_KEY = "secret"
S3_ACCESS_KEY = os.getenv('S3_ACCESS_KEY')
S3_SECRET_KEY = os.getenv('S3_SECRET_KEY')
S3_BUCKET = "ocr-deployment-config"
class MinioS3Client:
@ -77,8 +79,8 @@ def deploy():
# Define the variable
tag = str(random_hash()[:8])
now = datetime.now()
# tag = tag + "_" + str(now.strftime("%d%m%y%H%M%S"))
tag = "4cae5134_261223123256"
tag = tag + "_" + str(now.strftime("%d%m%y%H%M%S"))
# tag = "4cae5134_261223123256"
print(tag)
# Execute the Bash script with the variable as a command-line argument

View File

@ -5,20 +5,20 @@ tag=$1
echo "[INFO] Tag received from Python: $tag"
# echo "[INFO] Pushing AI image with tag: $tag..."
# docker compose -f docker-compose-dev.yml build cope2n-fi-sbt
# docker tag sidp/cope2n-ai-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-ai-fi-sbt:${tag}
# docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-ai-fi-sbt:${tag}
echo "[INFO] Pushing AI image with tag: $tag..."
docker compose -f docker-compose-dev.yml build cope2n-fi-sbt
docker tag sidp/cope2n-ai-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-ai-fi-sbt:${tag}
docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-ai-fi-sbt:${tag}
# echo "[INFO] Pushing BE image with tag: $tag..."
# docker compose -f docker-compose-dev.yml build be-ctel-sbt
# docker tag sidp/cope2n-be-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-be-fi-sbt:${tag}
# docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-be-fi-sbt:${tag}
echo "[INFO] Pushing BE image with tag: $tag..."
docker compose -f docker-compose-dev.yml build be-ctel-sbt
docker tag sidp/cope2n-be-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-be-fi-sbt:${tag}
docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-be-fi-sbt:${tag}
# echo "[INFO] Pushing FE image with tag: $tag..."
# docker compose -f docker-compose-dev.yml build fe-sbt
# docker tag sidp/cope2n-fe-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-fe-fi-sbt:${tag}
# docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-fe-fi-sbt:${tag}
echo "[INFO] Pushing FE image with tag: $tag..."
docker compose -f docker-compose-dev.yml build fe-sbt
docker tag sidp/cope2n-fe-fi-sbt:latest public.ecr.aws/v4n9y6r8/sidp/cope2n-fe-fi-sbt:${tag}
docker push public.ecr.aws/v4n9y6r8/sidp/cope2n-fe-fi-sbt:${tag}
cp ./docker-compose-prod.yml ./docker-compose_${tag}.yml
sed -i "s/{{tag}}/$tag/g" ./docker-compose_${tag}.yml