Clean up code

This commit is contained in:
Viet Anh Nguyen 2023-12-12 12:54:34 +07:00
parent 0665d707a5
commit d2ea7f2e66
19 changed files with 283 additions and 481 deletions

View File

@ -0,0 +1,2 @@
# SBT Project

View File

@ -203,6 +203,7 @@ MAX_UPLOAD_SIZE_OF_A_FILE = 100 * 1024 * 1024 # 100 MB
MAX_UPLOAD_FILE_SIZE_OF_A_REQUEST = 100 * 1024 * 1024 # 100 MB
MAX_UPLOAD_FILES_IN_A_REQUEST = 5
MAX_PIXEL_IN_A_FILE = 5000
TARGET_MAX_IMAGE_SIZE = (2048, 2048)
SIZE_TO_COMPRESS = 2 * 1024 * 1024
MAX_NUMBER_OF_TEMPLATE = 3
MAX_PAGES_OF_PDF_FILE = 50
@ -212,3 +213,5 @@ CACHES = {
'BACKEND': 'django.core.cache.backends.dummy.DummyCache',
}
}

View File

@ -29,19 +29,6 @@ from fwd import settings
class CtelUserViewSet(viewsets.ViewSet):
lookup_field = "username"
# @extend_schema(request=LoginRequest, responses=None, tags=['users'], examples=[
# OpenApiExample(
# 'ex1',
# summary='Sample Login',
# description='Sample Login',
# value={
# 'username': 'admin',
# 'password': 'admin'
# }
# ),
# ])
@extend_schema(request={
'multipart/form-data': {
'type': 'object',

View File

@ -1,10 +1,8 @@
import time
import uuid
from wsgiref.util import FileWrapper
import base64
from django.core.files.uploadedfile import TemporaryUploadedFile
from django.db import transaction
from django.http import HttpResponse, JsonResponse
from django.utils.crypto import get_random_string
from drf_spectacular.utils import extend_schema
@ -12,12 +10,14 @@ from rest_framework import status, viewsets
from rest_framework.decorators import action
from rest_framework.response import Response
from typing import List
from rest_framework.renderers import JSONRenderer
from rest_framework_xml.renderers import XMLRenderer
from fwd import settings
from ..celery_worker.client_connector import c_connector
from ..annotation.api import throw_on_failure
from ..constant.common import allowed_p_type, ProcessType, REQUEST_ID, FOLDER_TYPE, \
FolderFileType, TEMPLATE_ID, EntityStatus, standard_ocr_list, pdf_extensions, image_extensions, allowed_file_extensions
FolderFileType, TEMPLATE_ID, EntityStatus, pdf_extensions, allowed_file_extensions
from ..exception.exceptions import RequiredFieldException, InvalidException, NotFoundException, \
PermissionDeniedException, LimitReachedException, LockedEntityException, FileContentInvalidException, ServiceTimeoutException
from ..models import SubscriptionRequest, UserProfile, SubscriptionRequestFile, OcrTemplate, Subscription
@ -28,79 +28,6 @@ class CtelViewSet(viewsets.ViewSet):
lookup_field = "username"
size_to_compress = settings.SIZE_TO_COMPRESS
@extend_schema(request={
'multipart/form-data': {
'type': 'object',
'properties': {
'file': {
'type': 'string',
'format': 'binary'
},
'processType': {
'type': 'string'
},
},
'required': {'file', 'processType'}
}
}, responses=None, tags=['ocr'])
@action(detail=False, url_path="image/process", methods=["POST"])
# @transaction.atomic
def process(self, request):
s_time = time.time()
# print(30*"=")
# print(f"[DEBUG]: request: {request}")
# print(30*"=")
user_info = ProcessUtil.get_user(request)
user = user_info.user
sub = user_info.current_sub
validated_data = ProcessUtil.validate_ocr_request_and_get(request, sub)
provider_code = 'SAP'
rq_id = provider_code + uuid.uuid4().hex
file_obj: TemporaryUploadedFile = validated_data['file']
file_extension = file_obj.name.split(".")[-1].lower()
p_type = validated_data['type']
file_name = f"temp_{rq_id}.{file_extension}"
total_page = 1
new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page,
pages_left=total_page,
doc_type="all",
process_type=p_type, status=1, request_id=rq_id,
provider_code=provider_code,
subscription=sub)
new_request.save()
from ..celery_worker.client_connector import c_connector
file_obj.seek(0)
file_path = FileUtils.resize_and_save_file(file_name, new_request, file_obj, 100)
S3_path = FileUtils.save_to_S3(file_name, new_request, file_path)
files: [{
"file_name": file_name,
"file_path": file_path, # local path to file
"file_type": ""
},]
if file_extension in pdf_extensions:
c_connector.do_pdf((rq_id, sub.id, p_type, user.id, files))
# b_url = ProcessUtil.process_pdf_file(file_name, file_obj, new_request, user)
elif file_extension in image_extensions:
b_url = ProcessUtil.process_image_file(file_name, file_obj, new_request, user)
j_time = time.time()
print(f"[INFO]: Duration of Pre-processing: {j_time - s_time}s")
print(f"[INFO]: b_url: {b_url}")
if p_type in standard_ocr_list:
ProcessUtil.send_to_queue2(rq_id + "_sub_0", sub.id, b_url, user.id, p_type)
if p_type == ProcessType.TEMPLATE_MATCHING.value:
ProcessUtil.send_template_queue(rq_id, b_url, validated_data['template'], user.id)
else:
return JsonResponse(status=status.HTTP_406_NOT_ACCEPTABLE, data={"request_id": rq_id, "message": f"File {file_extension} is now allowed"})
return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id})
@extend_schema(request={
'multipart/form-data': {
'type': 'object',
@ -124,12 +51,7 @@ class CtelViewSet(viewsets.ViewSet):
}
}, responses=None, tags=['ocr'])
@action(detail=False, url_path="images/process", methods=["POST"])
# @transaction.atomic
def processes(self, request):
s_time = time.time()
# print(30*"=")
# print(f"[DEBUG]: request: {request}")
# print(30*"=")
user_info = ProcessUtil.get_user(request)
user = user_info.user
sub = user_info.current_sub
@ -147,8 +69,7 @@ class CtelViewSet(viewsets.ViewSet):
"invoice": invoice_file_objs
}
total_page = len(files.keys())
# file_paths = []
list_urls = []
p_type = validated_data['type']
new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page,
pages_left=total_page,
@ -176,7 +97,6 @@ class CtelViewSet(viewsets.ViewSet):
compact_files.append(this_file)
c_connector.do_pdf((rq_id, sub.id, p_type, user.id, compact_files))
j_time = time.time()
return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id})
@extend_schema(request={
@ -202,12 +122,7 @@ class CtelViewSet(viewsets.ViewSet):
}
}, responses=None, tags=['ocr'])
@action(detail=False, url_path="images/process_sync", methods=["POST"])
# @transaction.atomic
def processes_sync(self, request):
s_time = time.time()
# print(30*"=")
# print(f"[DEBUG]: request: {request}")
# print(30*"=")
user_info = ProcessUtil.get_user(request)
user = user_info.user
sub = user_info.current_sub
@ -225,8 +140,6 @@ class CtelViewSet(viewsets.ViewSet):
"invoice": invoice_file_objs
}
total_page = len(files.keys())
# file_paths = []
list_urls = []
p_type = validated_data['type']
new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page,
pages_left=total_page,
@ -244,7 +157,7 @@ class CtelViewSet(viewsets.ViewSet):
_name = f"temp_{doc_type}_{rq_id}_{i}.{_ext}"
doc_file.seek(0)
file_path = FileUtils.resize_and_save_file(_name, new_request, doc_file, 100)
S3_path = FileUtils.save_to_S3(_name, new_request, file_path)
_ = FileUtils.save_to_S3(_name, new_request, file_path)
count += 1
this_file = {
"file_name": _name,
@ -254,8 +167,6 @@ class CtelViewSet(viewsets.ViewSet):
compact_files.append(this_file)
c_connector.do_pdf((rq_id, sub.id, p_type, user.id, compact_files))
j_time = time.time()
time_out = 120
start = time.time()
while time.time() - start < time_out:
@ -322,13 +233,7 @@ class CtelViewSet(viewsets.ViewSet):
}
}, responses=None, tags=['ocr'])
@action(detail=False, url_path="images/feedback", methods=["POST"])
# @transaction.atomic
def feedback(self, request):
# s_time = time.time()
# user_info = ProcessUtil.get_user(request)
# user = user_info.user
# sub = user_info.current_sub
validated_data = ProcessUtil.sbt_validate_feedback(request)
rq_id = validated_data['request_id']
@ -426,9 +331,6 @@ class CtelViewSet(viewsets.ViewSet):
headers={'Content-Disposition': 'filename={fn}'.format(fn=file_name)},
content_type=content_type)
from rest_framework.renderers import JSONRenderer
from rest_framework_xml.renderers import XMLRenderer
@extend_schema(request=None, responses=None, tags=['data'])
@throw_on_failure(InvalidException(excArgs='data'))
@action(detail=False, url_path=r"result/(?P<request_id>\w+)", methods=["GET"], renderer_classes=[JSONRenderer, XMLRenderer])
@ -499,76 +401,3 @@ class CtelViewSet(viewsets.ViewSet):
serializer.is_valid()
return Response(status=status.HTTP_200_OK, data=serializer.data[0])
@action(detail=False, url_path="image/process/app", methods=["POST"])
# @transaction.atomic
def process_app(self, request):
app_id = "THIS_IS_OUR_APP_TEST_ACCOUNT_9123"
users = UserProfile.objects.filter(sync_id=app_id)
if len(users) > 1:
raise InvalidException(excArgs='user')
if len(users) == 0:
user = UserProfile(sync_id=app_id, limit_total_pages=1000, status=EntityStatus.ACTIVE.value)
user.save()
else:
user = users[0]
subs = Subscription.objects.filter(user=user)
if len(subs) > 1:
raise InvalidException(excArgs='sub')
if len(subs) == 0:
sub = Subscription(user=user, limit_token=10000, current_token=0, status=EntityStatus.ACTIVE.value)
sub.save()
else:
sub = subs[0]
cur = sub.current_token
lim = sub.limit_token
list_file = request.data.getlist('file')
s_time = time.time()
if "processType" not in request.data or int(request.data['processType']) not in allowed_p_type:
raise InvalidException(excArgs='processType')
p_type: int = int(request.data['processType'])
if cur + ProcessUtil.token_value(p_type) >= lim:
raise LimitReachedException(excArgs=('Number of request', str(sub.limit_token), 'times'))
FileUtils.validate_list_file(list_file)
if ("templateId" not in request.data) and p_type == ProcessType.TEMPLATE_MATCHING.value:
raise InvalidException(excArgs=TEMPLATE_ID)
provider_code = 'Ctel'
rq_id = provider_code + str(p_type) + get_random_string(5) + str(round(time.time() * 1000))
file_obj: TemporaryUploadedFile = list_file[0]
file_name = "temp_file_" + rq_id + get_random_string(2) + ".jpg"
total_page = 1
new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page,
process_type=p_type, status=1, request_id=rq_id,
provider_code=provider_code, subscription=sub)
new_request.save()
if p_type == ProcessType.ID_CARD.value or p_type == ProcessType.INVOICE.value or p_type == ProcessType.OCR_WITH_BOX.value or p_type == ProcessType.DRIVER_LICENSE.value:
if file_obj.size > self.size_to_compress:
quality = 90
else:
quality = 100
file_path = FileUtils.resize_and_save_file(file_name, new_request, file_obj, quality)
new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path,
request=new_request,
file_name=file_name)
new_request_file.save()
b_url = FileUtils.build_url(FolderFileType.REQUESTS.value, new_request.request_id, user.id, file_name)
j_time = time.time()
print("Json {}".format(j_time - s_time))
ProcessUtil.send_to_queue2(rq_id, sub.id, b_url, user.id, p_type)
return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id})
return JsonResponse(status=status.HTTP_502_BAD_GATEWAY, data={"message": "unknown_error"})

View File

@ -1,14 +1,12 @@
from celery import shared_task
import time
import fitz
import uuid
import os
import base64
from fwd_api.models import SubscriptionRequest, UserProfile
from fwd_api.celery_worker.worker import app
from ..constant.common import ProcessType, \
FolderFileType, standard_ocr_list, image_extensions
from django.core.files.uploadedfile import TemporaryUploadedFile
from ..constant.common import FolderFileType, image_extensions
from ..exception.exceptions import FileContentInvalidException
from ..utils import FileUtils, ProcessUtil, S3_process
from celery.utils.log import get_task_logger
@ -64,7 +62,7 @@ def process_pdf_byte(file_name: str, file_path: str, request, user, file_obj) ->
def process_image_file(file_name: str, file_path, request, user) -> list:
from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile
from fwd_api.models import SubscriptionRequestFile
new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path,
request=request,
@ -87,9 +85,7 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files):
"file_type": ""
},]
"""
from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile,UserProfile
start = time.time()
from django.conf import settings
new_request = SubscriptionRequest.objects.filter(request_id=rq_id)[0]
user = UserProfile.objects.filter(id=user_id).first()
b_urls = []
@ -128,10 +124,6 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files):
new_request.doc_type = doc_type_string
new_request.save()
# if p_type in standard_ocr_list:
# ProcessUtil.send_to_queue2(rq_id, sub_id, b_urls, user_id, p_type)
# if p_type == ProcessType.TEMPLATE_MATCHING.value:
# ProcessUtil.send_template_queue(rq_id, b_urls, '', user_id)
@app.task(name='upload_file_to_s3')
def upload_file_to_s3(local_file_path, s3_key):

View File

@ -1,9 +1,5 @@
import traceback
import time
import uuid
from fwd_api.celery_worker.worker import app
from fwd_api.models import SubscriptionRequest
from django.utils.crypto import get_random_string
from fwd_api.exception.exceptions import InvalidException

View File

@ -1,7 +1,7 @@
import io
import os
import traceback
import base64
import pathlib
import json
from PIL import Image, ExifTags
@ -14,6 +14,7 @@ from fwd_api.exception.exceptions import GeneralException, RequiredFieldExceptio
from fwd_api.models import SubscriptionRequest, OcrTemplate
from fwd_api.utils import ProcessUtil
from fwd_api.utils.CryptoUtils import image_authenticator
from fwd_api.utils.image import resize
from ..celery_worker.client_connector import c_connector
import imagesize
@ -126,15 +127,26 @@ def save_template_file(file_name: str, rq: OcrTemplate, file: TemporaryUploadedF
print(e)
raise ServiceUnavailableException()
def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path):
try:
file_path = os.path.join(folder_path, file_name)
extension = file_name.split(".")[-1]
if extension.lower() == "pdf":
save_pdf(file_path, file)
else:
save_img(file_path, file, quality)
except InvalidDecompressedSizeException as e:
raise e
except Exception as e:
print(e)
raise ServiceUnavailableException()
return file_path
def resize_and_save_file(file_name: str, rq: SubscriptionRequest, file: TemporaryUploadedFile, quality):
try:
folder_path = get_folder_path(rq)
# print(f"[DEBUG]: folder_path: {folder_path}")
is_exist = os.path.exists(folder_path)
if not is_exist:
# Create a new directory because it does not exist
os.makedirs(folder_path)
pathlib.Path(folder_path).mkdir(exist_ok=True, parents=True)
return save_file_with_path(file_name, file, quality, folder_path)
except InvalidDecompressedSizeException as e:
raise e
@ -155,22 +167,6 @@ def save_to_S3(file_name, rq, local_file_path):
print(f"[ERROR]: {e}")
raise ServiceUnavailableException()
def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path):
try:
file_path = os.path.join(folder_path, file_name)
extension = file_name.split(".")[-1]
if extension in ['pdf', 'PDF']:
save_pdf(file_path, file)
else:
save_img(file_path, file, quality)
except InvalidDecompressedSizeException as e:
raise e
except Exception as e:
print(e)
raise ServiceUnavailableException()
return file_path
def save_pdf(file_path: str, file: TemporaryUploadedFile):
f = open(file_path, 'wb+')
for chunk in file.chunks():
@ -209,7 +205,10 @@ def save_img(file_path: str, file: TemporaryUploadedFile, quality):
print(ex)
print("Rotation Error")
traceback.print_exc()
image.convert('RGB').save(file_path, optimize=True, quality=quality)
image = resize(image, max_w=settings.TARGET_MAX_IMAGE_SIZE[0], max_h=settings.TARGET_MAX_IMAGE_SIZE[1])
image = image.convert('RGB')
image.save(file_path, optimize=True, quality=quality)
def build_media_url(folder: str, uid: str, file_name: str = None) -> str:
token = image_authenticator.generate_img_token()

View File

@ -320,13 +320,6 @@ def send_to_queue2(rq_id, sub_id, file_url, user_id, typez):
c_connector.process_invoice_manulife((rq_id, file_url))
elif typez == ProcessType.SBT_INVOICE.value:
c_connector.process_invoice_sbt((rq_id, file_url))
# elif typez == ProcessType.DRIVER_LICENSE.value:
# c_connector.process_driver_license(
# (rq_id, sub_id, map_process_type_to_folder_name(typez), file_url, user_id))
# elif typez == ProcessType.OCR_WITH_BOX.value:
# c_connector.process_ocr_with_box((rq_id, file_url))
# elif typez == ProcessType.TEMPLATE_MATCHING.value:
# c_connector.process_template_matching((rq_id, file_url))
except Exception as e:
print(e)
raise BadGatewayException()
@ -418,36 +411,16 @@ def process_image_local_file(file_name: str, file_path: str, request: Subscripti
}]
def pdf_to_images_urls(doc: fitz.Document, request: SubscriptionRequest, user, dpi: int = 300) -> list:
def resize(image, max_w=2048, max_h=2048):
logger.info(f"[DEBUG]: image.size: {image.size}, type(image): {type(image)}")
cur_w, cur_h = image.width, image.height
image_bytes = image.samples
image = Image.frombytes("RGB", [cur_w, cur_h], image_bytes)
if cur_h > max_w or cur_h > max_h:
ratio_w = max_w/cur_w
ratio_h = max_h/cur_h
ratio = min([ratio_h, ratio_w])
new_w = int(ratio*cur_w)
new_h = int(ratio*cur_h)
image = image.resize((new_w, new_h))
return image
zoom = dpi // 72
magnify = fitz.Matrix(zoom, zoom)
pdf_extracted = []
for idx, page in enumerate(doc):
saving_path = FileUtils.get_folder_path(request)
# saving_path = r'C:\Users\mrdra\PycharmProjects\Ctel\test_data'
break_file_name = f'break_{idx}.jpg'
saving_path = os.path.join(saving_path, break_file_name)
page = doc.load_page(idx)
pix = page.get_pixmap(dpi=250) # render page to an image
# pix = resize(pix)
# print(f"[DEBUG]: pix.size: {pix.size}")
if pix.size > 8*3*settings.MAX_PIXEL_IN_A_FILE*settings.MAX_PIXEL_IN_A_FILE:
raise InvalidDecompressedSizeException(excArgs=(str(width), str(height), str(settings.MAX_PIXEL_IN_A_FILE)))
raise InvalidDecompressedSizeException(excArgs=(str(pix.width), str(pix.height), str(settings.MAX_PIXEL_IN_A_FILE)))
pix.save(saving_path)
print(f"Saving {saving_path}")
new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=saving_path,

View File

@ -0,0 +1,15 @@
from PIL import Image
def resize(image, max_w=2048, max_h=2048):
cur_w, cur_h = image.width, image.height
image_bytes = image.samples
image = Image.frombytes("RGB", [cur_w, cur_h], image_bytes)
if cur_h > max_w or cur_h > max_h:
ratio_w = max_w/cur_w
ratio_h = max_h/cur_h
ratio = min([ratio_h, ratio_w])
new_w = int(ratio*cur_w)
new_h = int(ratio*cur_h)
image = image.resize((new_w, new_h))
return image

View File

@ -1,195 +0,0 @@
version: '3.0'
# TODO: use docker-compose extend: for compact purpose
networks:
ctel:
driver: bridge
services:
cope2n-fi-manulife-sbt:
build:
context: cope2n-ai-fi
shm_size: 10gb
dockerfile: Dockerfile
shm_size: 10gb
image: tannedcung/cope2n-ai-fi-sbt
container_name: "TannedCung-cope2n-ai-fi-manulife-sbt-dev"
networks:
- ctel
privileged: true
environment:
- CELERY_BROKER=amqp://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@rabbitmq-manulife-sbt:5672
- CUDA_VISIBLE_DEVICES=1
volumes:
- ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only
- ./cope2n-api:/workspace/cope2n-api
- ./cope2n-fe:/workspace/cope2n-fe
- ./cope2n-ai-fi/models:/models
working_dir: /workspace/cope2n-ai-fi
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: 1
# capabilities: [gpu]
# command: bash -c "tail -f > /dev/null"
command: bash run.sh
# Back-end services
be-ctel-manulife-sbt:
build:
context: cope2n-api
dockerfile: Dockerfile-dev
# ports:
# - 9800:9000
image: tannedcung/cope2n-be
container_name: "TannedCung-cope2n-be-ctel-manulife-sbt-dev"
environment:
- MEDIA_ROOT=${MEDIA_ROOT}
- DB_ENGINE=${DB_ENGINE}
- DB_SCHEMA=${DB_SCHEMA}
- DB_USER=${DB_USER}
- DB_PASSWORD=${DB_PASSWORD}
- DB_HOST=${DB_HOST}
- DB_PORT=${DB_PUBLIC_PORT}
- DEBUG=${DEBUG}
- CORS_ALLOWED_ORIGINS=${CORS_ALLOWED_ORIGINS}
- BASE_PORT=${BASE_PORT}
- CTEL_KEY=${CTEL_KEY}
- SECRET_KEY=${SECRET_KEY}
- ALLOWED_HOSTS=${ALLOWED_HOSTS}
- BROKER_URL=amqp://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@rabbitmq-manulife-sbt:5672
- BASE_URL=http://be-ctel-manulife-sbt:${BASE_PORT}
- BASE_UI_URL=http://fe:${VITE_PORT}
- AUTH_TOKEN_LIFE_TIME=${AUTH_TOKEN_LIFE_TIME}
- IMAGE_TOKEN_LIFE_TIME=${IMAGE_TOKEN_LIFE_TIME}
- INTERNAL_SDS_KEY=${INTERNAL_SDS_KEY}
- FI_USER_NAME=${FI_USER_NAME}
- FI_PASSWORD=${FI_PASSWORD}
# restart: always
networks:
- ctel
volumes:
- ${HOST_MEDIA_FOLDER}:${MEDIA_ROOT}
- ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only
- ./cope2n-api:/workspace/cope2n-api
- ./cope2n-fe:/workspace/cope2n-fe
working_dir: /workspace/cope2n-api
# depends_on:
# db:
# condition: service_started
# rabbitmq:
# condition: service_started
# command: sh -c "python manage.py collectstatic --no-input &&
# python manage.py makemigrations &&
# python manage.py compilemessages &&
command: "gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker -b 0.0.0.0:9000" # pre-makemigrations on prod
# command: sh -c "tail -f > /dev/null"
be-celery-manulife-sbt:
# build:
# context: cope2n-api
# dockerfile: Dockerfile-dev
# args:
# - "UID=${UID:-1000}"
# - "GID=${GID:-1000}"
image: tannedcung/cope2n-be
container_name: "TannedCung-cope2n-be-celery-manulife-sbt-dev"
environment:
- MEDIA_ROOT=${MEDIA_ROOT}
- PYTHONPATH=${PYTHONPATH}:/app # For import module
- PYTHONUNBUFFERED=1 # For show print log
- DB_SCHEMA=${DB_SCHEMA}
- DB_USER=${DB_USER}
- DB_PASSWORD=${DB_PASSWORD}
- DB_HOST=${DB_HOST}
- DB_PORT=${DB_INTERNAL_PORT}
- BROKER_URL=${BROKER_URL}
- DB_ENGINE=${DB_ENGINE}
- DEBUG=${DEBUG}
networks:
- ctel
# restart: always
depends_on:
db-manulife-sbt:
condition: service_started
rabbitmq-manulife-sbt:
condition: service_started
volumes:
- ${HOST_MEDIA_FOLDER}:${MEDIA_ROOT}
- ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only
- ./cope2n-api:/workspace/cope2n-api
- ./cope2n-fe:/workspace/cope2n-fe
working_dir: /workspace/cope2n-api
command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO"
# Back-end persistent
db-manulife-sbt:
mem_reservation: 500m
mem_limit: 1g
container_name: TannedCung-cope2n-be-manulife-sbt-db
image: postgres:14.7-alpine
volumes:
- db_data:/var/lib/postgresql/data
- ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only
- ./cope2n-api:/workspace/cope2n-api
- ./cope2n-fe:/workspace/cope2n-fe
working_dir: /workspace/cope2n-api
networks:
- ctel
environment:
- POSTGRES_USER=${DB_USER}
- POSTGRES_PASSWORD=${DB_PASSWORD}
- POSTGRES_DB=${DB_SCHEMA}
rabbitmq-manulife-sbt:
mem_reservation: 600m
mem_limit: 4g
container_name: TannedCung-cope2n-be-rabbitmq-manulife-sbt
restart: always
image: rabbitmq:3.10-alpine
ports:
- 5672:5672
volumes:
- rabbitmq_data:/var/lib/rabbitmq
- ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only
- ./cope2n-api:/workspace/cope2n-api
- ./cope2n-fe:/workspace/cope2n-fe
working_dir: /workspace/cope2n-api
networks:
- ctel
environment:
- RABBITMQ_DEFAULT_USER=${RABBITMQ_DEFAULT_USER}
- RABBITMQ_DEFAULT_PASS=${RABBITMQ_DEFAULT_PASS}
# Front-end services
fe:
# build:
# args:
# - PORT=${PORT}
# context: cope2n-fe
# shm_size: 10gb
# dockerfile: Dockerfile-dev
shm_size: 10gb
image: tannedcung/cope2n-be
container_name: "TannedCung-cope2n-fe-ctel-manulife-sbt-dev"
privileged: true
ports:
- 9801:9001
networks:
- ctel
volumes:
- ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only
- ./cope2n-api:/workspace/cope2n-api
- ./cope2n-fe:/workspace/cope2n-fe
working_dir: /workspace/cope2n-fe
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: 1
# capabilities: [gpu]
command: bash -c "source /root/.bashrc && ldconfig && npm start"
# command: sh -c "tail -f > /dev/null"
volumes:
db_data:
rabbitmq_data:

View File

@ -41,8 +41,8 @@ services:
build:
context: cope2n-api
dockerfile: Dockerfile
ports:
- 9880:9000
# ports:
# - 9880:9000
image: sidp/cope2n-be-fi-sbt
# container_name: "sidp-cope2n-be-ctel-sbt"
environment:
@ -67,7 +67,7 @@ services:
- INTERNAL_SDS_KEY=${INTERNAL_SDS_KEY}
- FI_USER_NAME=${FI_USER_NAME}
- FI_PASSWORD=${FI_PASSWORD}
# - S3_ENDPOINT=http://minio:9884
- S3_ENDPOINT=http://minio:9884
- S3_ACCESS_KEY=${S3_ACCESS_KEY}
- S3_SECRET_KEY=${S3_SECRET_KEY}
- S3_BUCKET_NAME=${S3_BUCKET_NAME}
@ -79,9 +79,9 @@ services:
- BE_static:/app/static
# - ./cope2n-api:/app
working_dir: /app
# depends_on:
# db:
# condition: service_started
depends_on:
db-sbt:
condition: service_started
# rabbitmq:
# condition: service_started
command: sh -c "python manage.py collectstatic --no-input &&
@ -90,19 +90,19 @@ services:
gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker -b 0.0.0.0:9000" # pre-makemigrations on prod
# command: sh -c "tail -f > /dev/null"
# minio:
# image: minio/minio
minio:
image: minio/minio
# ports:
# - 9884:9884
# - 9885:9885
#environment:
# - MINIO_ACCESS_KEY=${S3_ACCESS_KEY}
# - MINIO_SECRET_KEY=${S3_SECRET_KEY}
#volumes:
# - ./minio_data:/data
#networks:
# - ctel-sbt
#command: server --address :9884 --console-address :9885 /data
environment:
- MINIO_ACCESS_KEY=${S3_ACCESS_KEY}
- MINIO_SECRET_KEY=${S3_SECRET_KEY}
volumes:
- ./data/minio_data:/data
networks:
- ctel-sbt
command: server --address :9884 --console-address :9885 /data
be-celery-sbt:
# build:
@ -130,7 +130,7 @@ services:
- CTEL_KEY=${CTEL_KEY}
- SECRET_KEY=${SECRET_KEY}
- ALLOWED_HOSTS=${ALLOWED_HOSTS}
# - S3_ENDPOINT=http://minio:9884
- S3_ENDPOINT=http://minio:9884
- S3_ACCESS_KEY=${S3_ACCESS_KEY}
- S3_SECRET_KEY=${S3_SECRET_KEY}
- S3_BUCKET_NAME=${S3_BUCKET_NAME}
@ -146,11 +146,10 @@ services:
condition: service_started
volumes:
- ${HOST_MEDIA_FOLDER}:${MEDIA_ROOT}
# - sqlite_data:/app/
# - ./cope2n-api:/app
working_dir: /app
command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO"
# Back-end persistent
db-sbt:
mem_reservation: 500m
@ -158,7 +157,7 @@ services:
# container_name: sidp-cope2n-be-sbt-db
image: postgres:14.7-alpine
volumes:
- ./postgres_data:/var/lib/postgresql/data
- ./data/postgres_data:/var/lib/postgresql/data
working_dir: /workspace/cope2n-api
networks:
- ctel-sbt
@ -191,10 +190,10 @@ services:
dockerfile: Dockerfile
shm_size: 10gb
image: sidp/cope2n-fe-fi-sbt
container_name: "sidp-cope2n-fe-ctel-sbt"
# container_name: "sidp-cope2n-fe-ctel-sbt"
privileged: true
ports:
- 9881:80
- 9889:80
depends_on:
be-ctel-sbt:
condition: service_started
@ -202,10 +201,9 @@ services:
condition: service_started
environment:
- VITE_PROXY=http://be-ctel-sbt:${BASE_PORT}
# - VITE_PROXY=http://42.96.42.13:9880
- VITE_API_BASE_URL=http://fe-sbt:80
volumes:
- BE_static:/backend-static l-sbt
- BE_static:/backend-static
networks:
- ctel-sbt

203
speedtest.py Normal file
View File

@ -0,0 +1,203 @@
import requests
import time
import argparse
import multiprocessing
import tqdm
import random
import traceback
parser = argparse.ArgumentParser()
parser.add_argument("--host", dest="host", default="https://sbt.idp.sdsrv.ai", required=False)
parser.add_argument("-u", "--username", help="Username to connect to server", required=True)
parser.add_argument("-p", "--password", help="Password to connect to server", required=True)
parser.add_argument("--num_requests", type=int, help="Number of requests", required=False, default=100)
parser.add_argument("--num_workers", type=int, help="Number of workers", required=False, default=3)
parser.add_argument("--checking_interval", type=float, help="Interval result checking time", required=False, default=0.5)
args = parser.parse_args()
PROCESSING_TIMEOUT = 60
# =================================================================
# GET THE TOKEN
response = requests.post(f'{args.host}/api/ctel/login/', json={
'username': args.username,
'password': args.password
})
try:
token = response.json()['token']
except:
print("Failed to login")
print(response.content)
# After the login, store the token in the memory (RAM) or DB
# Re-login to issue a new token after 6 days.
# =================================================================
def process_file(data):
files, token = data
num_files = len(files)
files.append(
('processType', (None, 12)),
)
# =================================================================
# UPLOAD THE FILE
start_time = time.time()
try:
response = requests.post(f'{args.host}/api/ctel/images/process/', headers={
'Authorization': token,
}, files=files, timeout=100)
except requests.exceptions.Timeout:
print("Timeout occurred while uploading")
return {
"success": False,
"status": "timeout",
"upload_time": 0,
"process_time": 0,
"num_files": 0,
}
except Exception as e:
print(e)
traceback.print_exc()
print("Unknown exception occurred while uploading")
return {
"success": False,
"status": "unknown error",
"upload_time": 0,
"process_time": 0,
"num_files": 0,
}
data = response.json()
if "request_id" not in data:
print("Missing request_id")
print(data)
return {
"success": False,
"status": "unknown error",
"upload_time": 0,
"process_time": 0,
"num_files": 0,
}
request_id = response.json()['request_id']
end_time = time.time()
upload_time = end_time - start_time
# =================================================================
# =================================================================
# CHECK THE RESULT
start_time = time.time()
while True:
try:
response = requests.get(f'{args.host}/api/ctel/result/{request_id}/', headers={
'Authorization': token,
}, timeout=100)
except requests.exceptions.Timeout:
print("Timeout occurred while requerying result")
return {
"success": False,
"status": "timeout",
"upload_time": 0,
"process_time": 0,
"num_files": 0,
}
except Exception as e:
print(e)
traceback.print_exc()
print("Unknown exception occurred while uploading")
return {
"success": False,
"status": "unknown error",
"upload_time": 0,
"process_time": 0,
"num_files": 0,
}
data = response.json()
if data.get("data", None):
print(data.get("data")) # Got the response
if data.get("data", None).get("status", 200) != 200:
return {
"success": False,
"status": data.get("status", -1),
"upload_time": 0,
"process_time": 0,
"num_files": 0,
}
break
else:
if time.time() - start_time > PROCESSING_TIMEOUT:
print("Timeout!")
return {
"success": False,
"status": "timeout",
"upload_time": 0,
"process_time": 0,
"num_files": 0,
}
time.sleep(args.checking_interval)
end_time = time.time()
process_time = end_time - start_time
# =================================================================
return {
"success": True,
"status": 200,
"upload_time": upload_time,
"process_time": process_time,
"num_files": num_files,
}
invoice_files = [
('invoice_file', ('invoice.jpg', open("test_samples/sbt/big_image.jpg", "rb").read())),
]
imei_files = [
('imei_files', ("test_samples/sbt/imei1.jpg", open("test_samples/sbt/big_image.jpg", "rb").read())),
('imei_files', ("test_samples/sbt/imei2.jpg", open("test_samples/sbt/imei2.jpg", "rb").read())),
('imei_files', ("test_samples/sbt/imei3.jpg", open("test_samples/sbt/imei3.jpg", "rb").read())),
('imei_files', ("test_samples/sbt/imei4.jpeg", open("test_samples/sbt/imei4.jpeg", "rb").read())),
('imei_files', ("test_samples/sbt/imei5.jpg", open("test_samples/sbt/imei5.jpg", "rb").read())),
]
def get_imei_files():
num_files = random.randint(1, len(imei_files) + 1)
files = imei_files[:num_files]
# print("Num of imei files:", len(files))
return files
def get_files():
return invoice_files + get_imei_files()
def gen_input(num_input):
for _ in range(num_input):
yield (get_files(), token)
pool = multiprocessing.Pool(processes=args.num_workers)
results = []
for result in tqdm.tqdm(pool.imap_unordered(process_file, gen_input(num_input=args.num_requests)), total=args.num_requests):
results.append(result)
print("## TEST REPORT #################################")
print("Number of requests: {}".format(args.num_requests))
print("Number of concurrent requests: {}".format(args.num_workers))
print("Number of files: 1 invoice, 1-5 imei files (random)")
print("Query time interval for result: {:.3f}s ".format(args.checking_interval))
print("--------------------------------------")
print("SUCCESS RATE")
counter = {}
for result in results:
counter[result["status"]] = counter.get(result["status"], 0) + 1
total_requests = sum(counter.values())
print("Success rate: {}".format(counter.get(200, 0) / total_requests if total_requests > 0 else -1))
print("Statuses:", counter)
print("--------------------------------------")
print("TIME BY REQUEST")
uploading_time = [x["upload_time"] for x in results if x["success"]]
if len(uploading_time) == 0:
print("No valid uploading time")
print("Check the results!")
processing_time = [x["process_time"] for x in results if x["success"]]
print("Uploading time (Avg / Min / Max): {:.3f}s {:.3f}s {:.3f}s".format(sum(uploading_time) / len(uploading_time), min(uploading_time), max(uploading_time)))
print("Processing time (Avg / Min / Max): {:.3f}s {:.3f}s {:.3f}s".format(sum(processing_time) / len(processing_time), min(processing_time), max(processing_time)))
print("--------------------------------------")
print("TIME BY IMAGE")
uploading_time = [x["upload_time"] for x in results if x["success"]]
processing_time = [x["process_time"] for x in results if x["success"]]
num_images = sum(x["num_files"] for x in results if x["success"])
print("Total images:", num_images)
print("Uploading time: {:.3f}s".format(sum(uploading_time) / num_images))
print("Processing time: {:.3f}s".format(sum(processing_time) / num_images))
print("--------------------------------------")

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 MiB

BIN
test_samples/sbt/imei1.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 MiB

BIN
test_samples/sbt/imei2.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 253 KiB

BIN
test_samples/sbt/imei3.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 MiB

BIN
test_samples/sbt/imei4.jpeg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

BIN
test_samples/sbt/imei5.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 153 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 MiB