diff --git a/README.md b/README.md index e69de29..5cf8954 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,2 @@ +# SBT Project + diff --git a/cope2n-api/fwd/settings.py b/cope2n-api/fwd/settings.py index b3eab12..ba84598 100755 --- a/cope2n-api/fwd/settings.py +++ b/cope2n-api/fwd/settings.py @@ -203,6 +203,7 @@ MAX_UPLOAD_SIZE_OF_A_FILE = 100 * 1024 * 1024 # 100 MB MAX_UPLOAD_FILE_SIZE_OF_A_REQUEST = 100 * 1024 * 1024 # 100 MB MAX_UPLOAD_FILES_IN_A_REQUEST = 5 MAX_PIXEL_IN_A_FILE = 5000 +TARGET_MAX_IMAGE_SIZE = (2048, 2048) SIZE_TO_COMPRESS = 2 * 1024 * 1024 MAX_NUMBER_OF_TEMPLATE = 3 MAX_PAGES_OF_PDF_FILE = 50 @@ -211,4 +212,6 @@ CACHES = { 'default': { 'BACKEND': 'django.core.cache.backends.dummy.DummyCache', } -} \ No newline at end of file +} + + diff --git a/cope2n-api/fwd_api/api/ctel_user_view.py b/cope2n-api/fwd_api/api/ctel_user_view.py index 4b2c8c4..033368e 100755 --- a/cope2n-api/fwd_api/api/ctel_user_view.py +++ b/cope2n-api/fwd_api/api/ctel_user_view.py @@ -29,19 +29,6 @@ from fwd import settings class CtelUserViewSet(viewsets.ViewSet): lookup_field = "username" - - # @extend_schema(request=LoginRequest, responses=None, tags=['users'], examples=[ - # OpenApiExample( - # 'ex1', - # summary='Sample Login', - # description='Sample Login', - # value={ - # 'username': 'admin', - # 'password': 'admin' - # } - # ), - # ]) - @extend_schema(request={ 'multipart/form-data': { 'type': 'object', diff --git a/cope2n-api/fwd_api/api/ctel_view.py b/cope2n-api/fwd_api/api/ctel_view.py index bd454d9..d531889 100755 --- a/cope2n-api/fwd_api/api/ctel_view.py +++ b/cope2n-api/fwd_api/api/ctel_view.py @@ -1,10 +1,8 @@ import time import uuid from wsgiref.util import FileWrapper -import base64 from django.core.files.uploadedfile import TemporaryUploadedFile -from django.db import transaction from django.http import HttpResponse, JsonResponse from django.utils.crypto import get_random_string from drf_spectacular.utils import extend_schema @@ -12,12 +10,14 @@ from rest_framework import status, viewsets from rest_framework.decorators import action from rest_framework.response import Response from typing import List +from rest_framework.renderers import JSONRenderer + from rest_framework_xml.renderers import XMLRenderer from fwd import settings from ..celery_worker.client_connector import c_connector from ..annotation.api import throw_on_failure from ..constant.common import allowed_p_type, ProcessType, REQUEST_ID, FOLDER_TYPE, \ - FolderFileType, TEMPLATE_ID, EntityStatus, standard_ocr_list, pdf_extensions, image_extensions, allowed_file_extensions + FolderFileType, TEMPLATE_ID, EntityStatus, pdf_extensions, allowed_file_extensions from ..exception.exceptions import RequiredFieldException, InvalidException, NotFoundException, \ PermissionDeniedException, LimitReachedException, LockedEntityException, FileContentInvalidException, ServiceTimeoutException from ..models import SubscriptionRequest, UserProfile, SubscriptionRequestFile, OcrTemplate, Subscription @@ -27,79 +27,6 @@ from ..utils import FileUtils, ProcessUtil class CtelViewSet(viewsets.ViewSet): lookup_field = "username" size_to_compress = settings.SIZE_TO_COMPRESS - - @extend_schema(request={ - 'multipart/form-data': { - 'type': 'object', - 'properties': { - 'file': { - 'type': 'string', - 'format': 'binary' - }, - 'processType': { - 'type': 'string' - }, - }, - 'required': {'file', 'processType'} - } - }, responses=None, tags=['ocr']) - @action(detail=False, url_path="image/process", methods=["POST"]) - # @transaction.atomic - def process(self, request): - s_time = time.time() - # print(30*"=") - # print(f"[DEBUG]: request: {request}") - # print(30*"=") - user_info = ProcessUtil.get_user(request) - user = user_info.user - sub = user_info.current_sub - - validated_data = ProcessUtil.validate_ocr_request_and_get(request, sub) - - provider_code = 'SAP' - rq_id = provider_code + uuid.uuid4().hex - - file_obj: TemporaryUploadedFile = validated_data['file'] - file_extension = file_obj.name.split(".")[-1].lower() - p_type = validated_data['type'] - file_name = f"temp_{rq_id}.{file_extension}" - - total_page = 1 - - new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page, - pages_left=total_page, - doc_type="all", - process_type=p_type, status=1, request_id=rq_id, - provider_code=provider_code, - subscription=sub) - new_request.save() - from ..celery_worker.client_connector import c_connector - file_obj.seek(0) - file_path = FileUtils.resize_and_save_file(file_name, new_request, file_obj, 100) - S3_path = FileUtils.save_to_S3(file_name, new_request, file_path) - - files: [{ - "file_name": file_name, - "file_path": file_path, # local path to file - "file_type": "" - },] - - if file_extension in pdf_extensions: - c_connector.do_pdf((rq_id, sub.id, p_type, user.id, files)) - # b_url = ProcessUtil.process_pdf_file(file_name, file_obj, new_request, user) - elif file_extension in image_extensions: - b_url = ProcessUtil.process_image_file(file_name, file_obj, new_request, user) - j_time = time.time() - print(f"[INFO]: Duration of Pre-processing: {j_time - s_time}s") - print(f"[INFO]: b_url: {b_url}") - if p_type in standard_ocr_list: - ProcessUtil.send_to_queue2(rq_id + "_sub_0", sub.id, b_url, user.id, p_type) - if p_type == ProcessType.TEMPLATE_MATCHING.value: - ProcessUtil.send_template_queue(rq_id, b_url, validated_data['template'], user.id) - else: - return JsonResponse(status=status.HTTP_406_NOT_ACCEPTABLE, data={"request_id": rq_id, "message": f"File {file_extension} is now allowed"}) - - return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id}) @extend_schema(request={ 'multipart/form-data': { @@ -124,12 +51,7 @@ class CtelViewSet(viewsets.ViewSet): } }, responses=None, tags=['ocr']) @action(detail=False, url_path="images/process", methods=["POST"]) - # @transaction.atomic def processes(self, request): - s_time = time.time() - # print(30*"=") - # print(f"[DEBUG]: request: {request}") - # print(30*"=") user_info = ProcessUtil.get_user(request) user = user_info.user sub = user_info.current_sub @@ -147,8 +69,7 @@ class CtelViewSet(viewsets.ViewSet): "invoice": invoice_file_objs } total_page = len(files.keys()) - # file_paths = [] - list_urls = [] + p_type = validated_data['type'] new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page, pages_left=total_page, @@ -176,7 +97,6 @@ class CtelViewSet(viewsets.ViewSet): compact_files.append(this_file) c_connector.do_pdf((rq_id, sub.id, p_type, user.id, compact_files)) - j_time = time.time() return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id}) @extend_schema(request={ @@ -202,12 +122,7 @@ class CtelViewSet(viewsets.ViewSet): } }, responses=None, tags=['ocr']) @action(detail=False, url_path="images/process_sync", methods=["POST"]) - # @transaction.atomic def processes_sync(self, request): - s_time = time.time() - # print(30*"=") - # print(f"[DEBUG]: request: {request}") - # print(30*"=") user_info = ProcessUtil.get_user(request) user = user_info.user sub = user_info.current_sub @@ -225,8 +140,6 @@ class CtelViewSet(viewsets.ViewSet): "invoice": invoice_file_objs } total_page = len(files.keys()) - # file_paths = [] - list_urls = [] p_type = validated_data['type'] new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page, pages_left=total_page, @@ -244,18 +157,16 @@ class CtelViewSet(viewsets.ViewSet): _name = f"temp_{doc_type}_{rq_id}_{i}.{_ext}" doc_file.seek(0) file_path = FileUtils.resize_and_save_file(_name, new_request, doc_file, 100) - S3_path = FileUtils.save_to_S3(_name, new_request, file_path) + _ = FileUtils.save_to_S3(_name, new_request, file_path) count += 1 this_file = { - "file_name": _name, - "file_path": file_path, - "file_type": doc_type + "file_name": _name, + "file_path": file_path, + "file_type": doc_type } compact_files.append(this_file) c_connector.do_pdf((rq_id, sub.id, p_type, user.id, compact_files)) - j_time = time.time() - time_out = 120 start = time.time() while time.time() - start < time_out: @@ -322,13 +233,7 @@ class CtelViewSet(viewsets.ViewSet): } }, responses=None, tags=['ocr']) @action(detail=False, url_path="images/feedback", methods=["POST"]) - # @transaction.atomic def feedback(self, request): - # s_time = time.time() - # user_info = ProcessUtil.get_user(request) - # user = user_info.user - # sub = user_info.current_sub - validated_data = ProcessUtil.sbt_validate_feedback(request) rq_id = validated_data['request_id'] @@ -425,9 +330,6 @@ class CtelViewSet(viewsets.ViewSet): return HttpResponse(FileWrapper(FileUtils.get_file(media_data.file_path)), status=status.HTTP_200_OK, headers={'Content-Disposition': 'filename={fn}'.format(fn=file_name)}, content_type=content_type) - - from rest_framework.renderers import JSONRenderer - from rest_framework_xml.renderers import XMLRenderer @extend_schema(request=None, responses=None, tags=['data']) @throw_on_failure(InvalidException(excArgs='data')) @@ -499,76 +401,3 @@ class CtelViewSet(viewsets.ViewSet): serializer.is_valid() return Response(status=status.HTTP_200_OK, data=serializer.data[0]) - - @action(detail=False, url_path="image/process/app", methods=["POST"]) - # @transaction.atomic - def process_app(self, request): - app_id = "THIS_IS_OUR_APP_TEST_ACCOUNT_9123" - users = UserProfile.objects.filter(sync_id=app_id) - if len(users) > 1: - raise InvalidException(excArgs='user') - if len(users) == 0: - user = UserProfile(sync_id=app_id, limit_total_pages=1000, status=EntityStatus.ACTIVE.value) - user.save() - else: - user = users[0] - - subs = Subscription.objects.filter(user=user) - if len(subs) > 1: - raise InvalidException(excArgs='sub') - if len(subs) == 0: - sub = Subscription(user=user, limit_token=10000, current_token=0, status=EntityStatus.ACTIVE.value) - sub.save() - else: - sub = subs[0] - cur = sub.current_token - lim = sub.limit_token - - list_file = request.data.getlist('file') - s_time = time.time() - - if "processType" not in request.data or int(request.data['processType']) not in allowed_p_type: - raise InvalidException(excArgs='processType') - p_type: int = int(request.data['processType']) - - if cur + ProcessUtil.token_value(p_type) >= lim: - raise LimitReachedException(excArgs=('Number of request', str(sub.limit_token), 'times')) - - FileUtils.validate_list_file(list_file) - - if ("templateId" not in request.data) and p_type == ProcessType.TEMPLATE_MATCHING.value: - raise InvalidException(excArgs=TEMPLATE_ID) - - provider_code = 'Ctel' - - rq_id = provider_code + str(p_type) + get_random_string(5) + str(round(time.time() * 1000)) - - file_obj: TemporaryUploadedFile = list_file[0] - - file_name = "temp_file_" + rq_id + get_random_string(2) + ".jpg" - - total_page = 1 - - new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page, - process_type=p_type, status=1, request_id=rq_id, - provider_code=provider_code, subscription=sub) - new_request.save() - - if p_type == ProcessType.ID_CARD.value or p_type == ProcessType.INVOICE.value or p_type == ProcessType.OCR_WITH_BOX.value or p_type == ProcessType.DRIVER_LICENSE.value: - if file_obj.size > self.size_to_compress: - quality = 90 - else: - quality = 100 - file_path = FileUtils.resize_and_save_file(file_name, new_request, file_obj, quality) - new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path, - request=new_request, - file_name=file_name) - new_request_file.save() - - b_url = FileUtils.build_url(FolderFileType.REQUESTS.value, new_request.request_id, user.id, file_name) - j_time = time.time() - print("Json {}".format(j_time - s_time)) - ProcessUtil.send_to_queue2(rq_id, sub.id, b_url, user.id, p_type) - return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id}) - - return JsonResponse(status=status.HTTP_502_BAD_GATEWAY, data={"message": "unknown_error"}) diff --git a/cope2n-api/fwd_api/celery_worker/internal_task.py b/cope2n-api/fwd_api/celery_worker/internal_task.py index 9f311c3..701f691 100755 --- a/cope2n-api/fwd_api/celery_worker/internal_task.py +++ b/cope2n-api/fwd_api/celery_worker/internal_task.py @@ -1,14 +1,12 @@ -from celery import shared_task import time import fitz import uuid import os import base64 +from fwd_api.models import SubscriptionRequest, UserProfile from fwd_api.celery_worker.worker import app -from ..constant.common import ProcessType, \ - FolderFileType, standard_ocr_list, image_extensions -from django.core.files.uploadedfile import TemporaryUploadedFile +from ..constant.common import FolderFileType, image_extensions from ..exception.exceptions import FileContentInvalidException from ..utils import FileUtils, ProcessUtil, S3_process from celery.utils.log import get_task_logger @@ -64,7 +62,7 @@ def process_pdf_byte(file_name: str, file_path: str, request, user, file_obj) -> def process_image_file(file_name: str, file_path, request, user) -> list: - from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile + from fwd_api.models import SubscriptionRequestFile new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path, request=request, @@ -87,9 +85,7 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files): "file_type": "" },] """ - from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile,UserProfile start = time.time() - from django.conf import settings new_request = SubscriptionRequest.objects.filter(request_id=rq_id)[0] user = UserProfile.objects.filter(id=user_id).first() b_urls = [] @@ -128,10 +124,6 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files): new_request.doc_type = doc_type_string new_request.save() - # if p_type in standard_ocr_list: - # ProcessUtil.send_to_queue2(rq_id, sub_id, b_urls, user_id, p_type) - # if p_type == ProcessType.TEMPLATE_MATCHING.value: - # ProcessUtil.send_template_queue(rq_id, b_urls, '', user_id) @app.task(name='upload_file_to_s3') def upload_file_to_s3(local_file_path, s3_key): diff --git a/cope2n-api/fwd_api/celery_worker/process_result_tasks.py b/cope2n-api/fwd_api/celery_worker/process_result_tasks.py index 6e8c417..6e23964 100755 --- a/cope2n-api/fwd_api/celery_worker/process_result_tasks.py +++ b/cope2n-api/fwd_api/celery_worker/process_result_tasks.py @@ -1,9 +1,5 @@ -import traceback -import time -import uuid from fwd_api.celery_worker.worker import app from fwd_api.models import SubscriptionRequest -from django.utils.crypto import get_random_string from fwd_api.exception.exceptions import InvalidException diff --git a/cope2n-api/fwd_api/utils/FileUtils.py b/cope2n-api/fwd_api/utils/FileUtils.py index 6e3b2ba..eb1cbaf 100755 --- a/cope2n-api/fwd_api/utils/FileUtils.py +++ b/cope2n-api/fwd_api/utils/FileUtils.py @@ -1,7 +1,7 @@ import io import os import traceback -import base64 +import pathlib import json from PIL import Image, ExifTags @@ -14,6 +14,7 @@ from fwd_api.exception.exceptions import GeneralException, RequiredFieldExceptio from fwd_api.models import SubscriptionRequest, OcrTemplate from fwd_api.utils import ProcessUtil from fwd_api.utils.CryptoUtils import image_authenticator +from fwd_api.utils.image import resize from ..celery_worker.client_connector import c_connector import imagesize @@ -126,15 +127,26 @@ def save_template_file(file_name: str, rq: OcrTemplate, file: TemporaryUploadedF print(e) raise ServiceUnavailableException() +def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path): + try: + file_path = os.path.join(folder_path, file_name) + extension = file_name.split(".")[-1] + if extension.lower() == "pdf": + save_pdf(file_path, file) + else: + save_img(file_path, file, quality) + except InvalidDecompressedSizeException as e: + raise e + except Exception as e: + print(e) + raise ServiceUnavailableException() + return file_path + def resize_and_save_file(file_name: str, rq: SubscriptionRequest, file: TemporaryUploadedFile, quality): try: folder_path = get_folder_path(rq) - # print(f"[DEBUG]: folder_path: {folder_path}") - is_exist = os.path.exists(folder_path) - if not is_exist: - # Create a new directory because it does not exist - os.makedirs(folder_path) + pathlib.Path(folder_path).mkdir(exist_ok=True, parents=True) return save_file_with_path(file_name, file, quality, folder_path) except InvalidDecompressedSizeException as e: raise e @@ -155,22 +167,6 @@ def save_to_S3(file_name, rq, local_file_path): print(f"[ERROR]: {e}") raise ServiceUnavailableException() -def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path): - try: - file_path = os.path.join(folder_path, file_name) - extension = file_name.split(".")[-1] - - if extension in ['pdf', 'PDF']: - save_pdf(file_path, file) - else: - save_img(file_path, file, quality) - except InvalidDecompressedSizeException as e: - raise e - except Exception as e: - print(e) - raise ServiceUnavailableException() - return file_path - def save_pdf(file_path: str, file: TemporaryUploadedFile): f = open(file_path, 'wb+') for chunk in file.chunks(): @@ -209,7 +205,10 @@ def save_img(file_path: str, file: TemporaryUploadedFile, quality): print(ex) print("Rotation Error") traceback.print_exc() - image.convert('RGB').save(file_path, optimize=True, quality=quality) + + image = resize(image, max_w=settings.TARGET_MAX_IMAGE_SIZE[0], max_h=settings.TARGET_MAX_IMAGE_SIZE[1]) + image = image.convert('RGB') + image.save(file_path, optimize=True, quality=quality) def build_media_url(folder: str, uid: str, file_name: str = None) -> str: token = image_authenticator.generate_img_token() diff --git a/cope2n-api/fwd_api/utils/ProcessUtil.py b/cope2n-api/fwd_api/utils/ProcessUtil.py index 4c292e8..131ee07 100755 --- a/cope2n-api/fwd_api/utils/ProcessUtil.py +++ b/cope2n-api/fwd_api/utils/ProcessUtil.py @@ -320,13 +320,6 @@ def send_to_queue2(rq_id, sub_id, file_url, user_id, typez): c_connector.process_invoice_manulife((rq_id, file_url)) elif typez == ProcessType.SBT_INVOICE.value: c_connector.process_invoice_sbt((rq_id, file_url)) - # elif typez == ProcessType.DRIVER_LICENSE.value: - # c_connector.process_driver_license( - # (rq_id, sub_id, map_process_type_to_folder_name(typez), file_url, user_id)) - # elif typez == ProcessType.OCR_WITH_BOX.value: - # c_connector.process_ocr_with_box((rq_id, file_url)) - # elif typez == ProcessType.TEMPLATE_MATCHING.value: - # c_connector.process_template_matching((rq_id, file_url)) except Exception as e: print(e) raise BadGatewayException() @@ -418,36 +411,16 @@ def process_image_local_file(file_name: str, file_path: str, request: Subscripti }] def pdf_to_images_urls(doc: fitz.Document, request: SubscriptionRequest, user, dpi: int = 300) -> list: - def resize(image, max_w=2048, max_h=2048): - logger.info(f"[DEBUG]: image.size: {image.size}, type(image): {type(image)}") - cur_w, cur_h = image.width, image.height - image_bytes = image.samples - image = Image.frombytes("RGB", [cur_w, cur_h], image_bytes) - if cur_h > max_w or cur_h > max_h: - ratio_w = max_w/cur_w - ratio_h = max_h/cur_h - ratio = min([ratio_h, ratio_w]) - new_w = int(ratio*cur_w) - new_h = int(ratio*cur_h) - image = image.resize((new_w, new_h)) - - return image - - zoom = dpi // 72 - magnify = fitz.Matrix(zoom, zoom) pdf_extracted = [] for idx, page in enumerate(doc): saving_path = FileUtils.get_folder_path(request) - # saving_path = r'C:\Users\mrdra\PycharmProjects\Ctel\test_data' break_file_name = f'break_{idx}.jpg' saving_path = os.path.join(saving_path, break_file_name) page = doc.load_page(idx) pix = page.get_pixmap(dpi=250) # render page to an image - # pix = resize(pix) - # print(f"[DEBUG]: pix.size: {pix.size}") if pix.size > 8*3*settings.MAX_PIXEL_IN_A_FILE*settings.MAX_PIXEL_IN_A_FILE: - raise InvalidDecompressedSizeException(excArgs=(str(width), str(height), str(settings.MAX_PIXEL_IN_A_FILE))) + raise InvalidDecompressedSizeException(excArgs=(str(pix.width), str(pix.height), str(settings.MAX_PIXEL_IN_A_FILE))) pix.save(saving_path) print(f"Saving {saving_path}") new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=saving_path, diff --git a/cope2n-api/fwd_api/utils/image.py b/cope2n-api/fwd_api/utils/image.py new file mode 100644 index 0000000..0964adb --- /dev/null +++ b/cope2n-api/fwd_api/utils/image.py @@ -0,0 +1,15 @@ +from PIL import Image + +def resize(image, max_w=2048, max_h=2048): + cur_w, cur_h = image.width, image.height + image_bytes = image.samples + image = Image.frombytes("RGB", [cur_w, cur_h], image_bytes) + if cur_h > max_w or cur_h > max_h: + ratio_w = max_w/cur_w + ratio_h = max_h/cur_h + ratio = min([ratio_h, ratio_w]) + new_w = int(ratio*cur_w) + new_h = int(ratio*cur_h) + image = image.resize((new_w, new_h)) + + return image \ No newline at end of file diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml deleted file mode 100755 index 5e09ccf..0000000 --- a/docker-compose-dev.yml +++ /dev/null @@ -1,195 +0,0 @@ -version: '3.0' -# TODO: use docker-compose extend: for compact purpose -networks: - ctel: - driver: bridge - -services: - cope2n-fi-manulife-sbt: - build: - context: cope2n-ai-fi - shm_size: 10gb - dockerfile: Dockerfile - shm_size: 10gb - image: tannedcung/cope2n-ai-fi-sbt - container_name: "TannedCung-cope2n-ai-fi-manulife-sbt-dev" - networks: - - ctel - privileged: true - environment: - - CELERY_BROKER=amqp://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@rabbitmq-manulife-sbt:5672 - - CUDA_VISIBLE_DEVICES=1 - volumes: - - ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only - - ./cope2n-api:/workspace/cope2n-api - - ./cope2n-fe:/workspace/cope2n-fe - - ./cope2n-ai-fi/models:/models - working_dir: /workspace/cope2n-ai-fi - # deploy: - # resources: - # reservations: - # devices: - # - driver: nvidia - # count: 1 - # capabilities: [gpu] - # command: bash -c "tail -f > /dev/null" - command: bash run.sh - # Back-end services - be-ctel-manulife-sbt: - build: - context: cope2n-api - dockerfile: Dockerfile-dev - # ports: - # - 9800:9000 - image: tannedcung/cope2n-be - container_name: "TannedCung-cope2n-be-ctel-manulife-sbt-dev" - environment: - - MEDIA_ROOT=${MEDIA_ROOT} - - DB_ENGINE=${DB_ENGINE} - - DB_SCHEMA=${DB_SCHEMA} - - DB_USER=${DB_USER} - - DB_PASSWORD=${DB_PASSWORD} - - DB_HOST=${DB_HOST} - - DB_PORT=${DB_PUBLIC_PORT} - - DEBUG=${DEBUG} - - CORS_ALLOWED_ORIGINS=${CORS_ALLOWED_ORIGINS} - - BASE_PORT=${BASE_PORT} - - CTEL_KEY=${CTEL_KEY} - - SECRET_KEY=${SECRET_KEY} - - ALLOWED_HOSTS=${ALLOWED_HOSTS} - - BROKER_URL=amqp://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@rabbitmq-manulife-sbt:5672 - - BASE_URL=http://be-ctel-manulife-sbt:${BASE_PORT} - - BASE_UI_URL=http://fe:${VITE_PORT} - - AUTH_TOKEN_LIFE_TIME=${AUTH_TOKEN_LIFE_TIME} - - IMAGE_TOKEN_LIFE_TIME=${IMAGE_TOKEN_LIFE_TIME} - - INTERNAL_SDS_KEY=${INTERNAL_SDS_KEY} - - FI_USER_NAME=${FI_USER_NAME} - - FI_PASSWORD=${FI_PASSWORD} - # restart: always - networks: - - ctel - volumes: - - ${HOST_MEDIA_FOLDER}:${MEDIA_ROOT} - - ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only - - ./cope2n-api:/workspace/cope2n-api - - ./cope2n-fe:/workspace/cope2n-fe - working_dir: /workspace/cope2n-api - # depends_on: - # db: - # condition: service_started - # rabbitmq: - # condition: service_started - # command: sh -c "python manage.py collectstatic --no-input && - # python manage.py makemigrations && - # python manage.py compilemessages && - command: "gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker -b 0.0.0.0:9000" # pre-makemigrations on prod - # command: sh -c "tail -f > /dev/null" - - be-celery-manulife-sbt: - # build: - # context: cope2n-api - # dockerfile: Dockerfile-dev - # args: - # - "UID=${UID:-1000}" - # - "GID=${GID:-1000}" - image: tannedcung/cope2n-be - container_name: "TannedCung-cope2n-be-celery-manulife-sbt-dev" - environment: - - MEDIA_ROOT=${MEDIA_ROOT} - - PYTHONPATH=${PYTHONPATH}:/app # For import module - - PYTHONUNBUFFERED=1 # For show print log - - DB_SCHEMA=${DB_SCHEMA} - - DB_USER=${DB_USER} - - DB_PASSWORD=${DB_PASSWORD} - - DB_HOST=${DB_HOST} - - DB_PORT=${DB_INTERNAL_PORT} - - BROKER_URL=${BROKER_URL} - - DB_ENGINE=${DB_ENGINE} - - DEBUG=${DEBUG} - networks: - - ctel - # restart: always - depends_on: - db-manulife-sbt: - condition: service_started - rabbitmq-manulife-sbt: - condition: service_started - volumes: - - ${HOST_MEDIA_FOLDER}:${MEDIA_ROOT} - - ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only - - ./cope2n-api:/workspace/cope2n-api - - ./cope2n-fe:/workspace/cope2n-fe - working_dir: /workspace/cope2n-api - command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO" - # Back-end persistent - db-manulife-sbt: - mem_reservation: 500m - mem_limit: 1g - container_name: TannedCung-cope2n-be-manulife-sbt-db - image: postgres:14.7-alpine - volumes: - - db_data:/var/lib/postgresql/data - - ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only - - ./cope2n-api:/workspace/cope2n-api - - ./cope2n-fe:/workspace/cope2n-fe - working_dir: /workspace/cope2n-api - networks: - - ctel - environment: - - POSTGRES_USER=${DB_USER} - - POSTGRES_PASSWORD=${DB_PASSWORD} - - POSTGRES_DB=${DB_SCHEMA} - - rabbitmq-manulife-sbt: - mem_reservation: 600m - mem_limit: 4g - container_name: TannedCung-cope2n-be-rabbitmq-manulife-sbt - restart: always - image: rabbitmq:3.10-alpine - ports: - - 5672:5672 - volumes: - - rabbitmq_data:/var/lib/rabbitmq - - ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only - - ./cope2n-api:/workspace/cope2n-api - - ./cope2n-fe:/workspace/cope2n-fe - working_dir: /workspace/cope2n-api - networks: - - ctel - environment: - - RABBITMQ_DEFAULT_USER=${RABBITMQ_DEFAULT_USER} - - RABBITMQ_DEFAULT_PASS=${RABBITMQ_DEFAULT_PASS} - # Front-end services - fe: - # build: - # args: - # - PORT=${PORT} - # context: cope2n-fe - # shm_size: 10gb - # dockerfile: Dockerfile-dev - shm_size: 10gb - image: tannedcung/cope2n-be - container_name: "TannedCung-cope2n-fe-ctel-manulife-sbt-dev" - privileged: true - ports: - - 9801:9001 - networks: - - ctel - volumes: - - ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only - - ./cope2n-api:/workspace/cope2n-api - - ./cope2n-fe:/workspace/cope2n-fe - working_dir: /workspace/cope2n-fe - # deploy: - # resources: - # reservations: - # devices: - # - driver: nvidia - # count: 1 - # capabilities: [gpu] - command: bash -c "source /root/.bashrc && ldconfig && npm start" - # command: sh -c "tail -f > /dev/null" - -volumes: - db_data: - rabbitmq_data: \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index a1c6b1f..fef9b6f 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -41,8 +41,8 @@ services: build: context: cope2n-api dockerfile: Dockerfile - ports: - - 9880:9000 + # ports: + # - 9880:9000 image: sidp/cope2n-be-fi-sbt # container_name: "sidp-cope2n-be-ctel-sbt" environment: @@ -67,7 +67,7 @@ services: - INTERNAL_SDS_KEY=${INTERNAL_SDS_KEY} - FI_USER_NAME=${FI_USER_NAME} - FI_PASSWORD=${FI_PASSWORD} - # - S3_ENDPOINT=http://minio:9884 + - S3_ENDPOINT=http://minio:9884 - S3_ACCESS_KEY=${S3_ACCESS_KEY} - S3_SECRET_KEY=${S3_SECRET_KEY} - S3_BUCKET_NAME=${S3_BUCKET_NAME} @@ -79,30 +79,30 @@ services: - BE_static:/app/static # - ./cope2n-api:/app working_dir: /app - # depends_on: - # db: - # condition: service_started - # rabbitmq: - # condition: service_started + depends_on: + db-sbt: + condition: service_started + # rabbitmq: + # condition: service_started command: sh -c "python manage.py collectstatic --no-input && python manage.py migrate && python manage.py compilemessages && gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker -b 0.0.0.0:9000" # pre-makemigrations on prod # command: sh -c "tail -f > /dev/null" - # minio: - # image: minio/minio - #ports: + minio: + image: minio/minio + # ports: # - 9884:9884 # - 9885:9885 - #environment: - # - MINIO_ACCESS_KEY=${S3_ACCESS_KEY} - # - MINIO_SECRET_KEY=${S3_SECRET_KEY} - #volumes: - # - ./minio_data:/data - #networks: - # - ctel-sbt - #command: server --address :9884 --console-address :9885 /data + environment: + - MINIO_ACCESS_KEY=${S3_ACCESS_KEY} + - MINIO_SECRET_KEY=${S3_SECRET_KEY} + volumes: + - ./data/minio_data:/data + networks: + - ctel-sbt + command: server --address :9884 --console-address :9885 /data be-celery-sbt: # build: @@ -130,7 +130,7 @@ services: - CTEL_KEY=${CTEL_KEY} - SECRET_KEY=${SECRET_KEY} - ALLOWED_HOSTS=${ALLOWED_HOSTS} - # - S3_ENDPOINT=http://minio:9884 + - S3_ENDPOINT=http://minio:9884 - S3_ACCESS_KEY=${S3_ACCESS_KEY} - S3_SECRET_KEY=${S3_SECRET_KEY} - S3_BUCKET_NAME=${S3_BUCKET_NAME} @@ -146,11 +146,10 @@ services: condition: service_started volumes: - ${HOST_MEDIA_FOLDER}:${MEDIA_ROOT} - # - sqlite_data:/app/ - # - ./cope2n-api:/app working_dir: /app command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO" + # Back-end persistent db-sbt: mem_reservation: 500m @@ -158,7 +157,7 @@ services: # container_name: sidp-cope2n-be-sbt-db image: postgres:14.7-alpine volumes: - - ./postgres_data:/var/lib/postgresql/data + - ./data/postgres_data:/var/lib/postgresql/data working_dir: /workspace/cope2n-api networks: - ctel-sbt @@ -191,10 +190,10 @@ services: dockerfile: Dockerfile shm_size: 10gb image: sidp/cope2n-fe-fi-sbt - container_name: "sidp-cope2n-fe-ctel-sbt" + # container_name: "sidp-cope2n-fe-ctel-sbt" privileged: true ports: - - 9881:80 + - 9889:80 depends_on: be-ctel-sbt: condition: service_started @@ -202,10 +201,9 @@ services: condition: service_started environment: - VITE_PROXY=http://be-ctel-sbt:${BASE_PORT} - # - VITE_PROXY=http://42.96.42.13:9880 - VITE_API_BASE_URL=http://fe-sbt:80 volumes: - - BE_static:/backend-static l-sbt + - BE_static:/backend-static networks: - ctel-sbt diff --git a/speedtest.py b/speedtest.py new file mode 100644 index 0000000..8029038 --- /dev/null +++ b/speedtest.py @@ -0,0 +1,203 @@ +import requests +import time +import argparse +import multiprocessing +import tqdm +import random +import traceback + + +parser = argparse.ArgumentParser() +parser.add_argument("--host", dest="host", default="https://sbt.idp.sdsrv.ai", required=False) +parser.add_argument("-u", "--username", help="Username to connect to server", required=True) +parser.add_argument("-p", "--password", help="Password to connect to server", required=True) +parser.add_argument("--num_requests", type=int, help="Number of requests", required=False, default=100) +parser.add_argument("--num_workers", type=int, help="Number of workers", required=False, default=3) +parser.add_argument("--checking_interval", type=float, help="Interval result checking time", required=False, default=0.5) +args = parser.parse_args() + +PROCESSING_TIMEOUT = 60 + + +# ================================================================= +# GET THE TOKEN +response = requests.post(f'{args.host}/api/ctel/login/', json={ + 'username': args.username, + 'password': args.password +}) +try: + token = response.json()['token'] +except: + print("Failed to login") + print(response.content) +# After the login, store the token in the memory (RAM) or DB +# Re-login to issue a new token after 6 days. +# ================================================================= + +def process_file(data): + files, token = data + num_files = len(files) + files.append( + ('processType', (None, 12)), + ) + # ================================================================= + # UPLOAD THE FILE + start_time = time.time() + try: + response = requests.post(f'{args.host}/api/ctel/images/process/', headers={ + 'Authorization': token, + }, files=files, timeout=100) + except requests.exceptions.Timeout: + print("Timeout occurred while uploading") + return { + "success": False, + "status": "timeout", + "upload_time": 0, + "process_time": 0, + "num_files": 0, + } + except Exception as e: + print(e) + traceback.print_exc() + print("Unknown exception occurred while uploading") + return { + "success": False, + "status": "unknown error", + "upload_time": 0, + "process_time": 0, + "num_files": 0, + } + data = response.json() + if "request_id" not in data: + print("Missing request_id") + print(data) + return { + "success": False, + "status": "unknown error", + "upload_time": 0, + "process_time": 0, + "num_files": 0, + } + request_id = response.json()['request_id'] + end_time = time.time() + upload_time = end_time - start_time + # ================================================================= + + # ================================================================= + # CHECK THE RESULT + start_time = time.time() + while True: + try: + response = requests.get(f'{args.host}/api/ctel/result/{request_id}/', headers={ + 'Authorization': token, + }, timeout=100) + except requests.exceptions.Timeout: + print("Timeout occurred while requerying result") + return { + "success": False, + "status": "timeout", + "upload_time": 0, + "process_time": 0, + "num_files": 0, + } + except Exception as e: + print(e) + traceback.print_exc() + print("Unknown exception occurred while uploading") + return { + "success": False, + "status": "unknown error", + "upload_time": 0, + "process_time": 0, + "num_files": 0, + } + data = response.json() + if data.get("data", None): + print(data.get("data")) # Got the response + if data.get("data", None).get("status", 200) != 200: + return { + "success": False, + "status": data.get("status", -1), + "upload_time": 0, + "process_time": 0, + "num_files": 0, + } + break + else: + if time.time() - start_time > PROCESSING_TIMEOUT: + print("Timeout!") + return { + "success": False, + "status": "timeout", + "upload_time": 0, + "process_time": 0, + "num_files": 0, + } + time.sleep(args.checking_interval) + end_time = time.time() + process_time = end_time - start_time + # ================================================================= + return { + "success": True, + "status": 200, + "upload_time": upload_time, + "process_time": process_time, + "num_files": num_files, + } + +invoice_files = [ + ('invoice_file', ('invoice.jpg', open("test_samples/sbt/big_image.jpg", "rb").read())), +] +imei_files = [ + ('imei_files', ("test_samples/sbt/imei1.jpg", open("test_samples/sbt/big_image.jpg", "rb").read())), + ('imei_files', ("test_samples/sbt/imei2.jpg", open("test_samples/sbt/imei2.jpg", "rb").read())), + ('imei_files', ("test_samples/sbt/imei3.jpg", open("test_samples/sbt/imei3.jpg", "rb").read())), + ('imei_files', ("test_samples/sbt/imei4.jpeg", open("test_samples/sbt/imei4.jpeg", "rb").read())), + ('imei_files', ("test_samples/sbt/imei5.jpg", open("test_samples/sbt/imei5.jpg", "rb").read())), +] +def get_imei_files(): + num_files = random.randint(1, len(imei_files) + 1) + files = imei_files[:num_files] + # print("Num of imei files:", len(files)) + return files +def get_files(): + return invoice_files + get_imei_files() +def gen_input(num_input): + for _ in range(num_input): + yield (get_files(), token) +pool = multiprocessing.Pool(processes=args.num_workers) +results = [] +for result in tqdm.tqdm(pool.imap_unordered(process_file, gen_input(num_input=args.num_requests)), total=args.num_requests): + results.append(result) + +print("## TEST REPORT #################################") +print("Number of requests: {}".format(args.num_requests)) +print("Number of concurrent requests: {}".format(args.num_workers)) +print("Number of files: 1 invoice, 1-5 imei files (random)") +print("Query time interval for result: {:.3f}s ".format(args.checking_interval)) +print("--------------------------------------") +print("SUCCESS RATE") +counter = {} +for result in results: + counter[result["status"]] = counter.get(result["status"], 0) + 1 +total_requests = sum(counter.values()) +print("Success rate: {}".format(counter.get(200, 0) / total_requests if total_requests > 0 else -1)) +print("Statuses:", counter) +print("--------------------------------------") +print("TIME BY REQUEST") +uploading_time = [x["upload_time"] for x in results if x["success"]] +if len(uploading_time) == 0: + print("No valid uploading time") + print("Check the results!") +processing_time = [x["process_time"] for x in results if x["success"]] +print("Uploading time (Avg / Min / Max): {:.3f}s {:.3f}s {:.3f}s".format(sum(uploading_time) / len(uploading_time), min(uploading_time), max(uploading_time))) +print("Processing time (Avg / Min / Max): {:.3f}s {:.3f}s {:.3f}s".format(sum(processing_time) / len(processing_time), min(processing_time), max(processing_time))) +print("--------------------------------------") +print("TIME BY IMAGE") +uploading_time = [x["upload_time"] for x in results if x["success"]] +processing_time = [x["process_time"] for x in results if x["success"]] +num_images = sum(x["num_files"] for x in results if x["success"]) +print("Total images:", num_images) +print("Uploading time: {:.3f}s".format(sum(uploading_time) / num_images)) +print("Processing time: {:.3f}s".format(sum(processing_time) / num_images)) +print("--------------------------------------") diff --git a/test_samples/sbt/big_image.jpg b/test_samples/sbt/big_image.jpg new file mode 100644 index 0000000..b2a110f Binary files /dev/null and b/test_samples/sbt/big_image.jpg differ diff --git a/test_samples/sbt/imei1.jpg b/test_samples/sbt/imei1.jpg new file mode 100644 index 0000000..dab6a81 Binary files /dev/null and b/test_samples/sbt/imei1.jpg differ diff --git a/test_samples/sbt/imei2.jpg b/test_samples/sbt/imei2.jpg new file mode 100644 index 0000000..ac63a83 Binary files /dev/null and b/test_samples/sbt/imei2.jpg differ diff --git a/test_samples/sbt/imei3.jpg b/test_samples/sbt/imei3.jpg new file mode 100644 index 0000000..ec56b13 Binary files /dev/null and b/test_samples/sbt/imei3.jpg differ diff --git a/test_samples/sbt/imei4.jpeg b/test_samples/sbt/imei4.jpeg new file mode 100644 index 0000000..0d76dbc Binary files /dev/null and b/test_samples/sbt/imei4.jpeg differ diff --git a/test_samples/sbt/imei5.jpg b/test_samples/sbt/imei5.jpg new file mode 100644 index 0000000..611a339 Binary files /dev/null and b/test_samples/sbt/imei5.jpg differ diff --git a/test_samples/sbt/invoice.jpg b/test_samples/sbt/invoice.jpg new file mode 100644 index 0000000..1f5965b Binary files /dev/null and b/test_samples/sbt/invoice.jpg differ