Clean up code

This commit is contained in:
Viet Anh Nguyen 2023-12-12 12:54:34 +07:00
parent 0665d707a5
commit d2ea7f2e66
19 changed files with 283 additions and 481 deletions

View File

@ -0,0 +1,2 @@
# SBT Project

View File

@ -203,6 +203,7 @@ MAX_UPLOAD_SIZE_OF_A_FILE = 100 * 1024 * 1024 # 100 MB
MAX_UPLOAD_FILE_SIZE_OF_A_REQUEST = 100 * 1024 * 1024 # 100 MB MAX_UPLOAD_FILE_SIZE_OF_A_REQUEST = 100 * 1024 * 1024 # 100 MB
MAX_UPLOAD_FILES_IN_A_REQUEST = 5 MAX_UPLOAD_FILES_IN_A_REQUEST = 5
MAX_PIXEL_IN_A_FILE = 5000 MAX_PIXEL_IN_A_FILE = 5000
TARGET_MAX_IMAGE_SIZE = (2048, 2048)
SIZE_TO_COMPRESS = 2 * 1024 * 1024 SIZE_TO_COMPRESS = 2 * 1024 * 1024
MAX_NUMBER_OF_TEMPLATE = 3 MAX_NUMBER_OF_TEMPLATE = 3
MAX_PAGES_OF_PDF_FILE = 50 MAX_PAGES_OF_PDF_FILE = 50
@ -211,4 +212,6 @@ CACHES = {
'default': { 'default': {
'BACKEND': 'django.core.cache.backends.dummy.DummyCache', 'BACKEND': 'django.core.cache.backends.dummy.DummyCache',
} }
} }

View File

@ -29,19 +29,6 @@ from fwd import settings
class CtelUserViewSet(viewsets.ViewSet): class CtelUserViewSet(viewsets.ViewSet):
lookup_field = "username" lookup_field = "username"
# @extend_schema(request=LoginRequest, responses=None, tags=['users'], examples=[
# OpenApiExample(
# 'ex1',
# summary='Sample Login',
# description='Sample Login',
# value={
# 'username': 'admin',
# 'password': 'admin'
# }
# ),
# ])
@extend_schema(request={ @extend_schema(request={
'multipart/form-data': { 'multipart/form-data': {
'type': 'object', 'type': 'object',

View File

@ -1,10 +1,8 @@
import time import time
import uuid import uuid
from wsgiref.util import FileWrapper from wsgiref.util import FileWrapper
import base64
from django.core.files.uploadedfile import TemporaryUploadedFile from django.core.files.uploadedfile import TemporaryUploadedFile
from django.db import transaction
from django.http import HttpResponse, JsonResponse from django.http import HttpResponse, JsonResponse
from django.utils.crypto import get_random_string from django.utils.crypto import get_random_string
from drf_spectacular.utils import extend_schema from drf_spectacular.utils import extend_schema
@ -12,12 +10,14 @@ from rest_framework import status, viewsets
from rest_framework.decorators import action from rest_framework.decorators import action
from rest_framework.response import Response from rest_framework.response import Response
from typing import List from typing import List
from rest_framework.renderers import JSONRenderer
from rest_framework_xml.renderers import XMLRenderer
from fwd import settings from fwd import settings
from ..celery_worker.client_connector import c_connector from ..celery_worker.client_connector import c_connector
from ..annotation.api import throw_on_failure from ..annotation.api import throw_on_failure
from ..constant.common import allowed_p_type, ProcessType, REQUEST_ID, FOLDER_TYPE, \ from ..constant.common import allowed_p_type, ProcessType, REQUEST_ID, FOLDER_TYPE, \
FolderFileType, TEMPLATE_ID, EntityStatus, standard_ocr_list, pdf_extensions, image_extensions, allowed_file_extensions FolderFileType, TEMPLATE_ID, EntityStatus, pdf_extensions, allowed_file_extensions
from ..exception.exceptions import RequiredFieldException, InvalidException, NotFoundException, \ from ..exception.exceptions import RequiredFieldException, InvalidException, NotFoundException, \
PermissionDeniedException, LimitReachedException, LockedEntityException, FileContentInvalidException, ServiceTimeoutException PermissionDeniedException, LimitReachedException, LockedEntityException, FileContentInvalidException, ServiceTimeoutException
from ..models import SubscriptionRequest, UserProfile, SubscriptionRequestFile, OcrTemplate, Subscription from ..models import SubscriptionRequest, UserProfile, SubscriptionRequestFile, OcrTemplate, Subscription
@ -27,79 +27,6 @@ from ..utils import FileUtils, ProcessUtil
class CtelViewSet(viewsets.ViewSet): class CtelViewSet(viewsets.ViewSet):
lookup_field = "username" lookup_field = "username"
size_to_compress = settings.SIZE_TO_COMPRESS size_to_compress = settings.SIZE_TO_COMPRESS
@extend_schema(request={
'multipart/form-data': {
'type': 'object',
'properties': {
'file': {
'type': 'string',
'format': 'binary'
},
'processType': {
'type': 'string'
},
},
'required': {'file', 'processType'}
}
}, responses=None, tags=['ocr'])
@action(detail=False, url_path="image/process", methods=["POST"])
# @transaction.atomic
def process(self, request):
s_time = time.time()
# print(30*"=")
# print(f"[DEBUG]: request: {request}")
# print(30*"=")
user_info = ProcessUtil.get_user(request)
user = user_info.user
sub = user_info.current_sub
validated_data = ProcessUtil.validate_ocr_request_and_get(request, sub)
provider_code = 'SAP'
rq_id = provider_code + uuid.uuid4().hex
file_obj: TemporaryUploadedFile = validated_data['file']
file_extension = file_obj.name.split(".")[-1].lower()
p_type = validated_data['type']
file_name = f"temp_{rq_id}.{file_extension}"
total_page = 1
new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page,
pages_left=total_page,
doc_type="all",
process_type=p_type, status=1, request_id=rq_id,
provider_code=provider_code,
subscription=sub)
new_request.save()
from ..celery_worker.client_connector import c_connector
file_obj.seek(0)
file_path = FileUtils.resize_and_save_file(file_name, new_request, file_obj, 100)
S3_path = FileUtils.save_to_S3(file_name, new_request, file_path)
files: [{
"file_name": file_name,
"file_path": file_path, # local path to file
"file_type": ""
},]
if file_extension in pdf_extensions:
c_connector.do_pdf((rq_id, sub.id, p_type, user.id, files))
# b_url = ProcessUtil.process_pdf_file(file_name, file_obj, new_request, user)
elif file_extension in image_extensions:
b_url = ProcessUtil.process_image_file(file_name, file_obj, new_request, user)
j_time = time.time()
print(f"[INFO]: Duration of Pre-processing: {j_time - s_time}s")
print(f"[INFO]: b_url: {b_url}")
if p_type in standard_ocr_list:
ProcessUtil.send_to_queue2(rq_id + "_sub_0", sub.id, b_url, user.id, p_type)
if p_type == ProcessType.TEMPLATE_MATCHING.value:
ProcessUtil.send_template_queue(rq_id, b_url, validated_data['template'], user.id)
else:
return JsonResponse(status=status.HTTP_406_NOT_ACCEPTABLE, data={"request_id": rq_id, "message": f"File {file_extension} is now allowed"})
return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id})
@extend_schema(request={ @extend_schema(request={
'multipart/form-data': { 'multipart/form-data': {
@ -124,12 +51,7 @@ class CtelViewSet(viewsets.ViewSet):
} }
}, responses=None, tags=['ocr']) }, responses=None, tags=['ocr'])
@action(detail=False, url_path="images/process", methods=["POST"]) @action(detail=False, url_path="images/process", methods=["POST"])
# @transaction.atomic
def processes(self, request): def processes(self, request):
s_time = time.time()
# print(30*"=")
# print(f"[DEBUG]: request: {request}")
# print(30*"=")
user_info = ProcessUtil.get_user(request) user_info = ProcessUtil.get_user(request)
user = user_info.user user = user_info.user
sub = user_info.current_sub sub = user_info.current_sub
@ -147,8 +69,7 @@ class CtelViewSet(viewsets.ViewSet):
"invoice": invoice_file_objs "invoice": invoice_file_objs
} }
total_page = len(files.keys()) total_page = len(files.keys())
# file_paths = []
list_urls = []
p_type = validated_data['type'] p_type = validated_data['type']
new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page, new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page,
pages_left=total_page, pages_left=total_page,
@ -176,7 +97,6 @@ class CtelViewSet(viewsets.ViewSet):
compact_files.append(this_file) compact_files.append(this_file)
c_connector.do_pdf((rq_id, sub.id, p_type, user.id, compact_files)) c_connector.do_pdf((rq_id, sub.id, p_type, user.id, compact_files))
j_time = time.time()
return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id}) return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id})
@extend_schema(request={ @extend_schema(request={
@ -202,12 +122,7 @@ class CtelViewSet(viewsets.ViewSet):
} }
}, responses=None, tags=['ocr']) }, responses=None, tags=['ocr'])
@action(detail=False, url_path="images/process_sync", methods=["POST"]) @action(detail=False, url_path="images/process_sync", methods=["POST"])
# @transaction.atomic
def processes_sync(self, request): def processes_sync(self, request):
s_time = time.time()
# print(30*"=")
# print(f"[DEBUG]: request: {request}")
# print(30*"=")
user_info = ProcessUtil.get_user(request) user_info = ProcessUtil.get_user(request)
user = user_info.user user = user_info.user
sub = user_info.current_sub sub = user_info.current_sub
@ -225,8 +140,6 @@ class CtelViewSet(viewsets.ViewSet):
"invoice": invoice_file_objs "invoice": invoice_file_objs
} }
total_page = len(files.keys()) total_page = len(files.keys())
# file_paths = []
list_urls = []
p_type = validated_data['type'] p_type = validated_data['type']
new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page, new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page,
pages_left=total_page, pages_left=total_page,
@ -244,18 +157,16 @@ class CtelViewSet(viewsets.ViewSet):
_name = f"temp_{doc_type}_{rq_id}_{i}.{_ext}" _name = f"temp_{doc_type}_{rq_id}_{i}.{_ext}"
doc_file.seek(0) doc_file.seek(0)
file_path = FileUtils.resize_and_save_file(_name, new_request, doc_file, 100) file_path = FileUtils.resize_and_save_file(_name, new_request, doc_file, 100)
S3_path = FileUtils.save_to_S3(_name, new_request, file_path) _ = FileUtils.save_to_S3(_name, new_request, file_path)
count += 1 count += 1
this_file = { this_file = {
"file_name": _name, "file_name": _name,
"file_path": file_path, "file_path": file_path,
"file_type": doc_type "file_type": doc_type
} }
compact_files.append(this_file) compact_files.append(this_file)
c_connector.do_pdf((rq_id, sub.id, p_type, user.id, compact_files)) c_connector.do_pdf((rq_id, sub.id, p_type, user.id, compact_files))
j_time = time.time()
time_out = 120 time_out = 120
start = time.time() start = time.time()
while time.time() - start < time_out: while time.time() - start < time_out:
@ -322,13 +233,7 @@ class CtelViewSet(viewsets.ViewSet):
} }
}, responses=None, tags=['ocr']) }, responses=None, tags=['ocr'])
@action(detail=False, url_path="images/feedback", methods=["POST"]) @action(detail=False, url_path="images/feedback", methods=["POST"])
# @transaction.atomic
def feedback(self, request): def feedback(self, request):
# s_time = time.time()
# user_info = ProcessUtil.get_user(request)
# user = user_info.user
# sub = user_info.current_sub
validated_data = ProcessUtil.sbt_validate_feedback(request) validated_data = ProcessUtil.sbt_validate_feedback(request)
rq_id = validated_data['request_id'] rq_id = validated_data['request_id']
@ -425,9 +330,6 @@ class CtelViewSet(viewsets.ViewSet):
return HttpResponse(FileWrapper(FileUtils.get_file(media_data.file_path)), status=status.HTTP_200_OK, return HttpResponse(FileWrapper(FileUtils.get_file(media_data.file_path)), status=status.HTTP_200_OK,
headers={'Content-Disposition': 'filename={fn}'.format(fn=file_name)}, headers={'Content-Disposition': 'filename={fn}'.format(fn=file_name)},
content_type=content_type) content_type=content_type)
from rest_framework.renderers import JSONRenderer
from rest_framework_xml.renderers import XMLRenderer
@extend_schema(request=None, responses=None, tags=['data']) @extend_schema(request=None, responses=None, tags=['data'])
@throw_on_failure(InvalidException(excArgs='data')) @throw_on_failure(InvalidException(excArgs='data'))
@ -499,76 +401,3 @@ class CtelViewSet(viewsets.ViewSet):
serializer.is_valid() serializer.is_valid()
return Response(status=status.HTTP_200_OK, data=serializer.data[0]) return Response(status=status.HTTP_200_OK, data=serializer.data[0])
@action(detail=False, url_path="image/process/app", methods=["POST"])
# @transaction.atomic
def process_app(self, request):
app_id = "THIS_IS_OUR_APP_TEST_ACCOUNT_9123"
users = UserProfile.objects.filter(sync_id=app_id)
if len(users) > 1:
raise InvalidException(excArgs='user')
if len(users) == 0:
user = UserProfile(sync_id=app_id, limit_total_pages=1000, status=EntityStatus.ACTIVE.value)
user.save()
else:
user = users[0]
subs = Subscription.objects.filter(user=user)
if len(subs) > 1:
raise InvalidException(excArgs='sub')
if len(subs) == 0:
sub = Subscription(user=user, limit_token=10000, current_token=0, status=EntityStatus.ACTIVE.value)
sub.save()
else:
sub = subs[0]
cur = sub.current_token
lim = sub.limit_token
list_file = request.data.getlist('file')
s_time = time.time()
if "processType" not in request.data or int(request.data['processType']) not in allowed_p_type:
raise InvalidException(excArgs='processType')
p_type: int = int(request.data['processType'])
if cur + ProcessUtil.token_value(p_type) >= lim:
raise LimitReachedException(excArgs=('Number of request', str(sub.limit_token), 'times'))
FileUtils.validate_list_file(list_file)
if ("templateId" not in request.data) and p_type == ProcessType.TEMPLATE_MATCHING.value:
raise InvalidException(excArgs=TEMPLATE_ID)
provider_code = 'Ctel'
rq_id = provider_code + str(p_type) + get_random_string(5) + str(round(time.time() * 1000))
file_obj: TemporaryUploadedFile = list_file[0]
file_name = "temp_file_" + rq_id + get_random_string(2) + ".jpg"
total_page = 1
new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page,
process_type=p_type, status=1, request_id=rq_id,
provider_code=provider_code, subscription=sub)
new_request.save()
if p_type == ProcessType.ID_CARD.value or p_type == ProcessType.INVOICE.value or p_type == ProcessType.OCR_WITH_BOX.value or p_type == ProcessType.DRIVER_LICENSE.value:
if file_obj.size > self.size_to_compress:
quality = 90
else:
quality = 100
file_path = FileUtils.resize_and_save_file(file_name, new_request, file_obj, quality)
new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path,
request=new_request,
file_name=file_name)
new_request_file.save()
b_url = FileUtils.build_url(FolderFileType.REQUESTS.value, new_request.request_id, user.id, file_name)
j_time = time.time()
print("Json {}".format(j_time - s_time))
ProcessUtil.send_to_queue2(rq_id, sub.id, b_url, user.id, p_type)
return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id})
return JsonResponse(status=status.HTTP_502_BAD_GATEWAY, data={"message": "unknown_error"})

View File

@ -1,14 +1,12 @@
from celery import shared_task
import time import time
import fitz import fitz
import uuid import uuid
import os import os
import base64 import base64
from fwd_api.models import SubscriptionRequest, UserProfile
from fwd_api.celery_worker.worker import app from fwd_api.celery_worker.worker import app
from ..constant.common import ProcessType, \ from ..constant.common import FolderFileType, image_extensions
FolderFileType, standard_ocr_list, image_extensions
from django.core.files.uploadedfile import TemporaryUploadedFile
from ..exception.exceptions import FileContentInvalidException from ..exception.exceptions import FileContentInvalidException
from ..utils import FileUtils, ProcessUtil, S3_process from ..utils import FileUtils, ProcessUtil, S3_process
from celery.utils.log import get_task_logger from celery.utils.log import get_task_logger
@ -64,7 +62,7 @@ def process_pdf_byte(file_name: str, file_path: str, request, user, file_obj) ->
def process_image_file(file_name: str, file_path, request, user) -> list: def process_image_file(file_name: str, file_path, request, user) -> list:
from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile from fwd_api.models import SubscriptionRequestFile
new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path, new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path,
request=request, request=request,
@ -87,9 +85,7 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files):
"file_type": "" "file_type": ""
},] },]
""" """
from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile,UserProfile
start = time.time() start = time.time()
from django.conf import settings
new_request = SubscriptionRequest.objects.filter(request_id=rq_id)[0] new_request = SubscriptionRequest.objects.filter(request_id=rq_id)[0]
user = UserProfile.objects.filter(id=user_id).first() user = UserProfile.objects.filter(id=user_id).first()
b_urls = [] b_urls = []
@ -128,10 +124,6 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files):
new_request.doc_type = doc_type_string new_request.doc_type = doc_type_string
new_request.save() new_request.save()
# if p_type in standard_ocr_list:
# ProcessUtil.send_to_queue2(rq_id, sub_id, b_urls, user_id, p_type)
# if p_type == ProcessType.TEMPLATE_MATCHING.value:
# ProcessUtil.send_template_queue(rq_id, b_urls, '', user_id)
@app.task(name='upload_file_to_s3') @app.task(name='upload_file_to_s3')
def upload_file_to_s3(local_file_path, s3_key): def upload_file_to_s3(local_file_path, s3_key):

View File

@ -1,9 +1,5 @@
import traceback
import time
import uuid
from fwd_api.celery_worker.worker import app from fwd_api.celery_worker.worker import app
from fwd_api.models import SubscriptionRequest from fwd_api.models import SubscriptionRequest
from django.utils.crypto import get_random_string
from fwd_api.exception.exceptions import InvalidException from fwd_api.exception.exceptions import InvalidException

View File

@ -1,7 +1,7 @@
import io import io
import os import os
import traceback import traceback
import base64 import pathlib
import json import json
from PIL import Image, ExifTags from PIL import Image, ExifTags
@ -14,6 +14,7 @@ from fwd_api.exception.exceptions import GeneralException, RequiredFieldExceptio
from fwd_api.models import SubscriptionRequest, OcrTemplate from fwd_api.models import SubscriptionRequest, OcrTemplate
from fwd_api.utils import ProcessUtil from fwd_api.utils import ProcessUtil
from fwd_api.utils.CryptoUtils import image_authenticator from fwd_api.utils.CryptoUtils import image_authenticator
from fwd_api.utils.image import resize
from ..celery_worker.client_connector import c_connector from ..celery_worker.client_connector import c_connector
import imagesize import imagesize
@ -126,15 +127,26 @@ def save_template_file(file_name: str, rq: OcrTemplate, file: TemporaryUploadedF
print(e) print(e)
raise ServiceUnavailableException() raise ServiceUnavailableException()
def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path):
try:
file_path = os.path.join(folder_path, file_name)
extension = file_name.split(".")[-1]
if extension.lower() == "pdf":
save_pdf(file_path, file)
else:
save_img(file_path, file, quality)
except InvalidDecompressedSizeException as e:
raise e
except Exception as e:
print(e)
raise ServiceUnavailableException()
return file_path
def resize_and_save_file(file_name: str, rq: SubscriptionRequest, file: TemporaryUploadedFile, quality): def resize_and_save_file(file_name: str, rq: SubscriptionRequest, file: TemporaryUploadedFile, quality):
try: try:
folder_path = get_folder_path(rq) folder_path = get_folder_path(rq)
# print(f"[DEBUG]: folder_path: {folder_path}") pathlib.Path(folder_path).mkdir(exist_ok=True, parents=True)
is_exist = os.path.exists(folder_path)
if not is_exist:
# Create a new directory because it does not exist
os.makedirs(folder_path)
return save_file_with_path(file_name, file, quality, folder_path) return save_file_with_path(file_name, file, quality, folder_path)
except InvalidDecompressedSizeException as e: except InvalidDecompressedSizeException as e:
raise e raise e
@ -155,22 +167,6 @@ def save_to_S3(file_name, rq, local_file_path):
print(f"[ERROR]: {e}") print(f"[ERROR]: {e}")
raise ServiceUnavailableException() raise ServiceUnavailableException()
def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path):
try:
file_path = os.path.join(folder_path, file_name)
extension = file_name.split(".")[-1]
if extension in ['pdf', 'PDF']:
save_pdf(file_path, file)
else:
save_img(file_path, file, quality)
except InvalidDecompressedSizeException as e:
raise e
except Exception as e:
print(e)
raise ServiceUnavailableException()
return file_path
def save_pdf(file_path: str, file: TemporaryUploadedFile): def save_pdf(file_path: str, file: TemporaryUploadedFile):
f = open(file_path, 'wb+') f = open(file_path, 'wb+')
for chunk in file.chunks(): for chunk in file.chunks():
@ -209,7 +205,10 @@ def save_img(file_path: str, file: TemporaryUploadedFile, quality):
print(ex) print(ex)
print("Rotation Error") print("Rotation Error")
traceback.print_exc() traceback.print_exc()
image.convert('RGB').save(file_path, optimize=True, quality=quality)
image = resize(image, max_w=settings.TARGET_MAX_IMAGE_SIZE[0], max_h=settings.TARGET_MAX_IMAGE_SIZE[1])
image = image.convert('RGB')
image.save(file_path, optimize=True, quality=quality)
def build_media_url(folder: str, uid: str, file_name: str = None) -> str: def build_media_url(folder: str, uid: str, file_name: str = None) -> str:
token = image_authenticator.generate_img_token() token = image_authenticator.generate_img_token()

View File

@ -320,13 +320,6 @@ def send_to_queue2(rq_id, sub_id, file_url, user_id, typez):
c_connector.process_invoice_manulife((rq_id, file_url)) c_connector.process_invoice_manulife((rq_id, file_url))
elif typez == ProcessType.SBT_INVOICE.value: elif typez == ProcessType.SBT_INVOICE.value:
c_connector.process_invoice_sbt((rq_id, file_url)) c_connector.process_invoice_sbt((rq_id, file_url))
# elif typez == ProcessType.DRIVER_LICENSE.value:
# c_connector.process_driver_license(
# (rq_id, sub_id, map_process_type_to_folder_name(typez), file_url, user_id))
# elif typez == ProcessType.OCR_WITH_BOX.value:
# c_connector.process_ocr_with_box((rq_id, file_url))
# elif typez == ProcessType.TEMPLATE_MATCHING.value:
# c_connector.process_template_matching((rq_id, file_url))
except Exception as e: except Exception as e:
print(e) print(e)
raise BadGatewayException() raise BadGatewayException()
@ -418,36 +411,16 @@ def process_image_local_file(file_name: str, file_path: str, request: Subscripti
}] }]
def pdf_to_images_urls(doc: fitz.Document, request: SubscriptionRequest, user, dpi: int = 300) -> list: def pdf_to_images_urls(doc: fitz.Document, request: SubscriptionRequest, user, dpi: int = 300) -> list:
def resize(image, max_w=2048, max_h=2048):
logger.info(f"[DEBUG]: image.size: {image.size}, type(image): {type(image)}")
cur_w, cur_h = image.width, image.height
image_bytes = image.samples
image = Image.frombytes("RGB", [cur_w, cur_h], image_bytes)
if cur_h > max_w or cur_h > max_h:
ratio_w = max_w/cur_w
ratio_h = max_h/cur_h
ratio = min([ratio_h, ratio_w])
new_w = int(ratio*cur_w)
new_h = int(ratio*cur_h)
image = image.resize((new_w, new_h))
return image
zoom = dpi // 72
magnify = fitz.Matrix(zoom, zoom)
pdf_extracted = [] pdf_extracted = []
for idx, page in enumerate(doc): for idx, page in enumerate(doc):
saving_path = FileUtils.get_folder_path(request) saving_path = FileUtils.get_folder_path(request)
# saving_path = r'C:\Users\mrdra\PycharmProjects\Ctel\test_data'
break_file_name = f'break_{idx}.jpg' break_file_name = f'break_{idx}.jpg'
saving_path = os.path.join(saving_path, break_file_name) saving_path = os.path.join(saving_path, break_file_name)
page = doc.load_page(idx) page = doc.load_page(idx)
pix = page.get_pixmap(dpi=250) # render page to an image pix = page.get_pixmap(dpi=250) # render page to an image
# pix = resize(pix)
# print(f"[DEBUG]: pix.size: {pix.size}")
if pix.size > 8*3*settings.MAX_PIXEL_IN_A_FILE*settings.MAX_PIXEL_IN_A_FILE: if pix.size > 8*3*settings.MAX_PIXEL_IN_A_FILE*settings.MAX_PIXEL_IN_A_FILE:
raise InvalidDecompressedSizeException(excArgs=(str(width), str(height), str(settings.MAX_PIXEL_IN_A_FILE))) raise InvalidDecompressedSizeException(excArgs=(str(pix.width), str(pix.height), str(settings.MAX_PIXEL_IN_A_FILE)))
pix.save(saving_path) pix.save(saving_path)
print(f"Saving {saving_path}") print(f"Saving {saving_path}")
new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=saving_path, new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=saving_path,

View File

@ -0,0 +1,15 @@
from PIL import Image
def resize(image, max_w=2048, max_h=2048):
cur_w, cur_h = image.width, image.height
image_bytes = image.samples
image = Image.frombytes("RGB", [cur_w, cur_h], image_bytes)
if cur_h > max_w or cur_h > max_h:
ratio_w = max_w/cur_w
ratio_h = max_h/cur_h
ratio = min([ratio_h, ratio_w])
new_w = int(ratio*cur_w)
new_h = int(ratio*cur_h)
image = image.resize((new_w, new_h))
return image

View File

@ -1,195 +0,0 @@
version: '3.0'
# TODO: use docker-compose extend: for compact purpose
networks:
ctel:
driver: bridge
services:
cope2n-fi-manulife-sbt:
build:
context: cope2n-ai-fi
shm_size: 10gb
dockerfile: Dockerfile
shm_size: 10gb
image: tannedcung/cope2n-ai-fi-sbt
container_name: "TannedCung-cope2n-ai-fi-manulife-sbt-dev"
networks:
- ctel
privileged: true
environment:
- CELERY_BROKER=amqp://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@rabbitmq-manulife-sbt:5672
- CUDA_VISIBLE_DEVICES=1
volumes:
- ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only
- ./cope2n-api:/workspace/cope2n-api
- ./cope2n-fe:/workspace/cope2n-fe
- ./cope2n-ai-fi/models:/models
working_dir: /workspace/cope2n-ai-fi
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: 1
# capabilities: [gpu]
# command: bash -c "tail -f > /dev/null"
command: bash run.sh
# Back-end services
be-ctel-manulife-sbt:
build:
context: cope2n-api
dockerfile: Dockerfile-dev
# ports:
# - 9800:9000
image: tannedcung/cope2n-be
container_name: "TannedCung-cope2n-be-ctel-manulife-sbt-dev"
environment:
- MEDIA_ROOT=${MEDIA_ROOT}
- DB_ENGINE=${DB_ENGINE}
- DB_SCHEMA=${DB_SCHEMA}
- DB_USER=${DB_USER}
- DB_PASSWORD=${DB_PASSWORD}
- DB_HOST=${DB_HOST}
- DB_PORT=${DB_PUBLIC_PORT}
- DEBUG=${DEBUG}
- CORS_ALLOWED_ORIGINS=${CORS_ALLOWED_ORIGINS}
- BASE_PORT=${BASE_PORT}
- CTEL_KEY=${CTEL_KEY}
- SECRET_KEY=${SECRET_KEY}
- ALLOWED_HOSTS=${ALLOWED_HOSTS}
- BROKER_URL=amqp://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@rabbitmq-manulife-sbt:5672
- BASE_URL=http://be-ctel-manulife-sbt:${BASE_PORT}
- BASE_UI_URL=http://fe:${VITE_PORT}
- AUTH_TOKEN_LIFE_TIME=${AUTH_TOKEN_LIFE_TIME}
- IMAGE_TOKEN_LIFE_TIME=${IMAGE_TOKEN_LIFE_TIME}
- INTERNAL_SDS_KEY=${INTERNAL_SDS_KEY}
- FI_USER_NAME=${FI_USER_NAME}
- FI_PASSWORD=${FI_PASSWORD}
# restart: always
networks:
- ctel
volumes:
- ${HOST_MEDIA_FOLDER}:${MEDIA_ROOT}
- ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only
- ./cope2n-api:/workspace/cope2n-api
- ./cope2n-fe:/workspace/cope2n-fe
working_dir: /workspace/cope2n-api
# depends_on:
# db:
# condition: service_started
# rabbitmq:
# condition: service_started
# command: sh -c "python manage.py collectstatic --no-input &&
# python manage.py makemigrations &&
# python manage.py compilemessages &&
command: "gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker -b 0.0.0.0:9000" # pre-makemigrations on prod
# command: sh -c "tail -f > /dev/null"
be-celery-manulife-sbt:
# build:
# context: cope2n-api
# dockerfile: Dockerfile-dev
# args:
# - "UID=${UID:-1000}"
# - "GID=${GID:-1000}"
image: tannedcung/cope2n-be
container_name: "TannedCung-cope2n-be-celery-manulife-sbt-dev"
environment:
- MEDIA_ROOT=${MEDIA_ROOT}
- PYTHONPATH=${PYTHONPATH}:/app # For import module
- PYTHONUNBUFFERED=1 # For show print log
- DB_SCHEMA=${DB_SCHEMA}
- DB_USER=${DB_USER}
- DB_PASSWORD=${DB_PASSWORD}
- DB_HOST=${DB_HOST}
- DB_PORT=${DB_INTERNAL_PORT}
- BROKER_URL=${BROKER_URL}
- DB_ENGINE=${DB_ENGINE}
- DEBUG=${DEBUG}
networks:
- ctel
# restart: always
depends_on:
db-manulife-sbt:
condition: service_started
rabbitmq-manulife-sbt:
condition: service_started
volumes:
- ${HOST_MEDIA_FOLDER}:${MEDIA_ROOT}
- ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only
- ./cope2n-api:/workspace/cope2n-api
- ./cope2n-fe:/workspace/cope2n-fe
working_dir: /workspace/cope2n-api
command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO"
# Back-end persistent
db-manulife-sbt:
mem_reservation: 500m
mem_limit: 1g
container_name: TannedCung-cope2n-be-manulife-sbt-db
image: postgres:14.7-alpine
volumes:
- db_data:/var/lib/postgresql/data
- ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only
- ./cope2n-api:/workspace/cope2n-api
- ./cope2n-fe:/workspace/cope2n-fe
working_dir: /workspace/cope2n-api
networks:
- ctel
environment:
- POSTGRES_USER=${DB_USER}
- POSTGRES_PASSWORD=${DB_PASSWORD}
- POSTGRES_DB=${DB_SCHEMA}
rabbitmq-manulife-sbt:
mem_reservation: 600m
mem_limit: 4g
container_name: TannedCung-cope2n-be-rabbitmq-manulife-sbt
restart: always
image: rabbitmq:3.10-alpine
ports:
- 5672:5672
volumes:
- rabbitmq_data:/var/lib/rabbitmq
- ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only
- ./cope2n-api:/workspace/cope2n-api
- ./cope2n-fe:/workspace/cope2n-fe
working_dir: /workspace/cope2n-api
networks:
- ctel
environment:
- RABBITMQ_DEFAULT_USER=${RABBITMQ_DEFAULT_USER}
- RABBITMQ_DEFAULT_PASS=${RABBITMQ_DEFAULT_PASS}
# Front-end services
fe:
# build:
# args:
# - PORT=${PORT}
# context: cope2n-fe
# shm_size: 10gb
# dockerfile: Dockerfile-dev
shm_size: 10gb
image: tannedcung/cope2n-be
container_name: "TannedCung-cope2n-fe-ctel-manulife-sbt-dev"
privileged: true
ports:
- 9801:9001
networks:
- ctel
volumes:
- ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only
- ./cope2n-api:/workspace/cope2n-api
- ./cope2n-fe:/workspace/cope2n-fe
working_dir: /workspace/cope2n-fe
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: 1
# capabilities: [gpu]
command: bash -c "source /root/.bashrc && ldconfig && npm start"
# command: sh -c "tail -f > /dev/null"
volumes:
db_data:
rabbitmq_data:

View File

@ -41,8 +41,8 @@ services:
build: build:
context: cope2n-api context: cope2n-api
dockerfile: Dockerfile dockerfile: Dockerfile
ports: # ports:
- 9880:9000 # - 9880:9000
image: sidp/cope2n-be-fi-sbt image: sidp/cope2n-be-fi-sbt
# container_name: "sidp-cope2n-be-ctel-sbt" # container_name: "sidp-cope2n-be-ctel-sbt"
environment: environment:
@ -67,7 +67,7 @@ services:
- INTERNAL_SDS_KEY=${INTERNAL_SDS_KEY} - INTERNAL_SDS_KEY=${INTERNAL_SDS_KEY}
- FI_USER_NAME=${FI_USER_NAME} - FI_USER_NAME=${FI_USER_NAME}
- FI_PASSWORD=${FI_PASSWORD} - FI_PASSWORD=${FI_PASSWORD}
# - S3_ENDPOINT=http://minio:9884 - S3_ENDPOINT=http://minio:9884
- S3_ACCESS_KEY=${S3_ACCESS_KEY} - S3_ACCESS_KEY=${S3_ACCESS_KEY}
- S3_SECRET_KEY=${S3_SECRET_KEY} - S3_SECRET_KEY=${S3_SECRET_KEY}
- S3_BUCKET_NAME=${S3_BUCKET_NAME} - S3_BUCKET_NAME=${S3_BUCKET_NAME}
@ -79,30 +79,30 @@ services:
- BE_static:/app/static - BE_static:/app/static
# - ./cope2n-api:/app # - ./cope2n-api:/app
working_dir: /app working_dir: /app
# depends_on: depends_on:
# db: db-sbt:
# condition: service_started condition: service_started
# rabbitmq: # rabbitmq:
# condition: service_started # condition: service_started
command: sh -c "python manage.py collectstatic --no-input && command: sh -c "python manage.py collectstatic --no-input &&
python manage.py migrate && python manage.py migrate &&
python manage.py compilemessages && python manage.py compilemessages &&
gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker -b 0.0.0.0:9000" # pre-makemigrations on prod gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker -b 0.0.0.0:9000" # pre-makemigrations on prod
# command: sh -c "tail -f > /dev/null" # command: sh -c "tail -f > /dev/null"
# minio: minio:
# image: minio/minio image: minio/minio
#ports: # ports:
# - 9884:9884 # - 9884:9884
# - 9885:9885 # - 9885:9885
#environment: environment:
# - MINIO_ACCESS_KEY=${S3_ACCESS_KEY} - MINIO_ACCESS_KEY=${S3_ACCESS_KEY}
# - MINIO_SECRET_KEY=${S3_SECRET_KEY} - MINIO_SECRET_KEY=${S3_SECRET_KEY}
#volumes: volumes:
# - ./minio_data:/data - ./data/minio_data:/data
#networks: networks:
# - ctel-sbt - ctel-sbt
#command: server --address :9884 --console-address :9885 /data command: server --address :9884 --console-address :9885 /data
be-celery-sbt: be-celery-sbt:
# build: # build:
@ -130,7 +130,7 @@ services:
- CTEL_KEY=${CTEL_KEY} - CTEL_KEY=${CTEL_KEY}
- SECRET_KEY=${SECRET_KEY} - SECRET_KEY=${SECRET_KEY}
- ALLOWED_HOSTS=${ALLOWED_HOSTS} - ALLOWED_HOSTS=${ALLOWED_HOSTS}
# - S3_ENDPOINT=http://minio:9884 - S3_ENDPOINT=http://minio:9884
- S3_ACCESS_KEY=${S3_ACCESS_KEY} - S3_ACCESS_KEY=${S3_ACCESS_KEY}
- S3_SECRET_KEY=${S3_SECRET_KEY} - S3_SECRET_KEY=${S3_SECRET_KEY}
- S3_BUCKET_NAME=${S3_BUCKET_NAME} - S3_BUCKET_NAME=${S3_BUCKET_NAME}
@ -146,11 +146,10 @@ services:
condition: service_started condition: service_started
volumes: volumes:
- ${HOST_MEDIA_FOLDER}:${MEDIA_ROOT} - ${HOST_MEDIA_FOLDER}:${MEDIA_ROOT}
# - sqlite_data:/app/
# - ./cope2n-api:/app
working_dir: /app working_dir: /app
command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO" command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO"
# Back-end persistent # Back-end persistent
db-sbt: db-sbt:
mem_reservation: 500m mem_reservation: 500m
@ -158,7 +157,7 @@ services:
# container_name: sidp-cope2n-be-sbt-db # container_name: sidp-cope2n-be-sbt-db
image: postgres:14.7-alpine image: postgres:14.7-alpine
volumes: volumes:
- ./postgres_data:/var/lib/postgresql/data - ./data/postgres_data:/var/lib/postgresql/data
working_dir: /workspace/cope2n-api working_dir: /workspace/cope2n-api
networks: networks:
- ctel-sbt - ctel-sbt
@ -191,10 +190,10 @@ services:
dockerfile: Dockerfile dockerfile: Dockerfile
shm_size: 10gb shm_size: 10gb
image: sidp/cope2n-fe-fi-sbt image: sidp/cope2n-fe-fi-sbt
container_name: "sidp-cope2n-fe-ctel-sbt" # container_name: "sidp-cope2n-fe-ctel-sbt"
privileged: true privileged: true
ports: ports:
- 9881:80 - 9889:80
depends_on: depends_on:
be-ctel-sbt: be-ctel-sbt:
condition: service_started condition: service_started
@ -202,10 +201,9 @@ services:
condition: service_started condition: service_started
environment: environment:
- VITE_PROXY=http://be-ctel-sbt:${BASE_PORT} - VITE_PROXY=http://be-ctel-sbt:${BASE_PORT}
# - VITE_PROXY=http://42.96.42.13:9880
- VITE_API_BASE_URL=http://fe-sbt:80 - VITE_API_BASE_URL=http://fe-sbt:80
volumes: volumes:
- BE_static:/backend-static l-sbt - BE_static:/backend-static
networks: networks:
- ctel-sbt - ctel-sbt

203
speedtest.py Normal file
View File

@ -0,0 +1,203 @@
import requests
import time
import argparse
import multiprocessing
import tqdm
import random
import traceback
parser = argparse.ArgumentParser()
parser.add_argument("--host", dest="host", default="https://sbt.idp.sdsrv.ai", required=False)
parser.add_argument("-u", "--username", help="Username to connect to server", required=True)
parser.add_argument("-p", "--password", help="Password to connect to server", required=True)
parser.add_argument("--num_requests", type=int, help="Number of requests", required=False, default=100)
parser.add_argument("--num_workers", type=int, help="Number of workers", required=False, default=3)
parser.add_argument("--checking_interval", type=float, help="Interval result checking time", required=False, default=0.5)
args = parser.parse_args()
PROCESSING_TIMEOUT = 60
# =================================================================
# GET THE TOKEN
response = requests.post(f'{args.host}/api/ctel/login/', json={
'username': args.username,
'password': args.password
})
try:
token = response.json()['token']
except:
print("Failed to login")
print(response.content)
# After the login, store the token in the memory (RAM) or DB
# Re-login to issue a new token after 6 days.
# =================================================================
def process_file(data):
files, token = data
num_files = len(files)
files.append(
('processType', (None, 12)),
)
# =================================================================
# UPLOAD THE FILE
start_time = time.time()
try:
response = requests.post(f'{args.host}/api/ctel/images/process/', headers={
'Authorization': token,
}, files=files, timeout=100)
except requests.exceptions.Timeout:
print("Timeout occurred while uploading")
return {
"success": False,
"status": "timeout",
"upload_time": 0,
"process_time": 0,
"num_files": 0,
}
except Exception as e:
print(e)
traceback.print_exc()
print("Unknown exception occurred while uploading")
return {
"success": False,
"status": "unknown error",
"upload_time": 0,
"process_time": 0,
"num_files": 0,
}
data = response.json()
if "request_id" not in data:
print("Missing request_id")
print(data)
return {
"success": False,
"status": "unknown error",
"upload_time": 0,
"process_time": 0,
"num_files": 0,
}
request_id = response.json()['request_id']
end_time = time.time()
upload_time = end_time - start_time
# =================================================================
# =================================================================
# CHECK THE RESULT
start_time = time.time()
while True:
try:
response = requests.get(f'{args.host}/api/ctel/result/{request_id}/', headers={
'Authorization': token,
}, timeout=100)
except requests.exceptions.Timeout:
print("Timeout occurred while requerying result")
return {
"success": False,
"status": "timeout",
"upload_time": 0,
"process_time": 0,
"num_files": 0,
}
except Exception as e:
print(e)
traceback.print_exc()
print("Unknown exception occurred while uploading")
return {
"success": False,
"status": "unknown error",
"upload_time": 0,
"process_time": 0,
"num_files": 0,
}
data = response.json()
if data.get("data", None):
print(data.get("data")) # Got the response
if data.get("data", None).get("status", 200) != 200:
return {
"success": False,
"status": data.get("status", -1),
"upload_time": 0,
"process_time": 0,
"num_files": 0,
}
break
else:
if time.time() - start_time > PROCESSING_TIMEOUT:
print("Timeout!")
return {
"success": False,
"status": "timeout",
"upload_time": 0,
"process_time": 0,
"num_files": 0,
}
time.sleep(args.checking_interval)
end_time = time.time()
process_time = end_time - start_time
# =================================================================
return {
"success": True,
"status": 200,
"upload_time": upload_time,
"process_time": process_time,
"num_files": num_files,
}
invoice_files = [
('invoice_file', ('invoice.jpg', open("test_samples/sbt/big_image.jpg", "rb").read())),
]
imei_files = [
('imei_files', ("test_samples/sbt/imei1.jpg", open("test_samples/sbt/big_image.jpg", "rb").read())),
('imei_files', ("test_samples/sbt/imei2.jpg", open("test_samples/sbt/imei2.jpg", "rb").read())),
('imei_files', ("test_samples/sbt/imei3.jpg", open("test_samples/sbt/imei3.jpg", "rb").read())),
('imei_files', ("test_samples/sbt/imei4.jpeg", open("test_samples/sbt/imei4.jpeg", "rb").read())),
('imei_files', ("test_samples/sbt/imei5.jpg", open("test_samples/sbt/imei5.jpg", "rb").read())),
]
def get_imei_files():
num_files = random.randint(1, len(imei_files) + 1)
files = imei_files[:num_files]
# print("Num of imei files:", len(files))
return files
def get_files():
return invoice_files + get_imei_files()
def gen_input(num_input):
for _ in range(num_input):
yield (get_files(), token)
pool = multiprocessing.Pool(processes=args.num_workers)
results = []
for result in tqdm.tqdm(pool.imap_unordered(process_file, gen_input(num_input=args.num_requests)), total=args.num_requests):
results.append(result)
print("## TEST REPORT #################################")
print("Number of requests: {}".format(args.num_requests))
print("Number of concurrent requests: {}".format(args.num_workers))
print("Number of files: 1 invoice, 1-5 imei files (random)")
print("Query time interval for result: {:.3f}s ".format(args.checking_interval))
print("--------------------------------------")
print("SUCCESS RATE")
counter = {}
for result in results:
counter[result["status"]] = counter.get(result["status"], 0) + 1
total_requests = sum(counter.values())
print("Success rate: {}".format(counter.get(200, 0) / total_requests if total_requests > 0 else -1))
print("Statuses:", counter)
print("--------------------------------------")
print("TIME BY REQUEST")
uploading_time = [x["upload_time"] for x in results if x["success"]]
if len(uploading_time) == 0:
print("No valid uploading time")
print("Check the results!")
processing_time = [x["process_time"] for x in results if x["success"]]
print("Uploading time (Avg / Min / Max): {:.3f}s {:.3f}s {:.3f}s".format(sum(uploading_time) / len(uploading_time), min(uploading_time), max(uploading_time)))
print("Processing time (Avg / Min / Max): {:.3f}s {:.3f}s {:.3f}s".format(sum(processing_time) / len(processing_time), min(processing_time), max(processing_time)))
print("--------------------------------------")
print("TIME BY IMAGE")
uploading_time = [x["upload_time"] for x in results if x["success"]]
processing_time = [x["process_time"] for x in results if x["success"]]
num_images = sum(x["num_files"] for x in results if x["success"])
print("Total images:", num_images)
print("Uploading time: {:.3f}s".format(sum(uploading_time) / num_images))
print("Processing time: {:.3f}s".format(sum(processing_time) / num_images))
print("--------------------------------------")

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 MiB

BIN
test_samples/sbt/imei1.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 MiB

BIN
test_samples/sbt/imei2.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 253 KiB

BIN
test_samples/sbt/imei3.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 MiB

BIN
test_samples/sbt/imei4.jpeg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

BIN
test_samples/sbt/imei5.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 153 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 MiB