From a84e3dce055b7b8ede0186ea47c3dc3da095a929 Mon Sep 17 00:00:00 2001 From: dx-tan Date: Tue, 5 Dec 2023 12:59:06 +0700 Subject: [PATCH] Add: support for pdf file --- cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py | 9 +- cope2n-ai-fi/common/utils_kvu/split_docs.py | 2 + cope2n-api/fwd_api/api/ctel_view.py | 61 ++++++------- .../fwd_api/celery_worker/internal_task.py | 91 ++++++++++++++----- cope2n-api/fwd_api/constant/common.py | 62 +------------ cope2n-api/fwd_api/exception/exceptions.py | 6 ++ cope2n-api/fwd_api/models/UserProfile.py | 6 +- cope2n-api/fwd_api/utils/FileUtils.py | 9 +- cope2n-api/fwd_api/utils/ProcessUtil.py | 12 +++ 9 files changed, 134 insertions(+), 124 deletions(-) diff --git a/cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py b/cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py index 13f2b85..d20fc11 100755 --- a/cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py +++ b/cope2n-ai-fi/api/sdsap_sbt/prediction_sbt.py @@ -3,6 +3,7 @@ import urllib import random import numpy as np from pathlib import Path +import uuid import sys, os cur_dir = str(Path(__file__).parents[2]) sys.path.append(cur_dir) @@ -35,14 +36,15 @@ def sbt_predict(image_url, engine) -> None: save_dir = "./tmp_results" # image_path = os.path.join(save_dir, f"{image_url}.jpg") - image_path = os.path.join(save_dir, "abc.jpg") - cv2.imwrite(image_path, img) + tmp_image_path = os.path.join(save_dir, f"{uuid.uuid4()}.jpg") + cv2.imwrite(tmp_image_path, img) - outputs = process_img(img_path=image_path, + outputs = process_img(img_path=tmp_image_path, save_dir=save_dir, engine=engine, export_all=False, option=option) + os.remove(tmp_image_path) return outputs def predict(page_numb, image_url): @@ -70,6 +72,7 @@ def predict(page_numb, image_url): """ sbt_result = sbt_predict(image_url, engine=sbt_engine) + print(sbt_result) output_dict = { "document_type": "invoice", "document_class": " ", diff --git a/cope2n-ai-fi/common/utils_kvu/split_docs.py b/cope2n-ai-fi/common/utils_kvu/split_docs.py index cf0d16f..52d1078 100755 --- a/cope2n-ai-fi/common/utils_kvu/split_docs.py +++ b/cope2n-ai-fi/common/utils_kvu/split_docs.py @@ -102,6 +102,8 @@ def merge_sbt_output(loutputs): }) return output + print("concat outputs: \n", loutputs) + merged_output = [] combined_output = {"retailername": None, "sold_to_party": None, diff --git a/cope2n-api/fwd_api/api/ctel_view.py b/cope2n-api/fwd_api/api/ctel_view.py index 68cd940..3da94a1 100755 --- a/cope2n-api/fwd_api/api/ctel_view.py +++ b/cope2n-api/fwd_api/api/ctel_view.py @@ -1,6 +1,7 @@ import time import uuid from wsgiref.util import FileWrapper +import base64 from django.core.files.uploadedfile import TemporaryUploadedFile from django.db import transaction @@ -10,15 +11,15 @@ from drf_spectacular.utils import extend_schema from rest_framework import status, viewsets from rest_framework.decorators import action from rest_framework.response import Response -import io from typing import List from fwd import settings +from ..celery_worker.client_connector import c_connector from ..annotation.api import throw_on_failure from ..constant.common import allowed_p_type, ProcessType, REQUEST_ID, FOLDER_TYPE, \ - FolderFileType, TEMPLATE_ID, EntityStatus, standard_ocr_list, pdf_extensions, image_extensions + FolderFileType, TEMPLATE_ID, EntityStatus, standard_ocr_list, pdf_extensions, image_extensions, allowed_file_extensions from ..exception.exceptions import RequiredFieldException, InvalidException, NotFoundException, \ - PermissionDeniedException, LimitReachedException, LockedEntityException + PermissionDeniedException, LimitReachedException, LockedEntityException, FileContentInvalidException from ..models import SubscriptionRequest, UserProfile, SubscriptionRequestFile, OcrTemplate, Subscription from ..response.ReportSerializer import ReportSerializer from ..utils import FileUtils, ProcessUtil @@ -43,7 +44,7 @@ class CtelViewSet(viewsets.ViewSet): } }, responses=None, tags=['ocr']) @action(detail=False, url_path="image/process", methods=["POST"]) - @transaction.atomic + # @transaction.atomic def process(self, request): s_time = time.time() # print(30*"=") @@ -59,7 +60,7 @@ class CtelViewSet(viewsets.ViewSet): rq_id = provider_code + uuid.uuid4().hex file_obj: TemporaryUploadedFile = validated_data['file'] - file_extension = file_obj.name.split(".")[-1] + file_extension = file_obj.name.split(".")[-1].lower() p_type = validated_data['type'] file_name = f"temp_{rq_id}.{file_extension}" @@ -73,12 +74,16 @@ class CtelViewSet(viewsets.ViewSet): from ..celery_worker.client_connector import c_connector file_obj.seek(0) file_path = FileUtils.resize_and_save_file(file_name, new_request, file_obj, 100) - if settings.S3_ENDPOINT!="": - FileUtils.save_to_S3(file_name, new_request, file_obj.read()) - # print(f"[DEBUG]: file_path: {file_path}") + S3_path = FileUtils.save_to_S3(file_name, new_request, file_path) + files: [{ + "file_name": file_name, + "file_path": file_path, # local path to file + "file_type": "" + },] + if file_extension in pdf_extensions: - c_connector.do_pdf((rq_id, sub.id, p_type, user.id, file_name, file_path)) + c_connector.do_pdf((rq_id, sub.id, p_type, user.id, files)) # b_url = ProcessUtil.process_pdf_file(file_name, file_obj, new_request, user) elif file_extension in image_extensions: b_url = ProcessUtil.process_image_file(file_name, file_obj, new_request, user) @@ -117,7 +122,7 @@ class CtelViewSet(viewsets.ViewSet): } }, responses=None, tags=['ocr']) @action(detail=False, url_path="images/process", methods=["POST"]) - @transaction.atomic + # @transaction.atomic def processes(self, request): s_time = time.time() # print(30*"=") @@ -148,34 +153,27 @@ class CtelViewSet(viewsets.ViewSet): provider_code=provider_code, subscription=sub) new_request.save() - - count = 0 + count = 0 + compact_files = [] for doc_type, doc_files in files.items(): for i, doc_file in enumerate(doc_files): _ext = doc_file.name.split(".")[-1] - if _ext not in image_extensions: + if _ext not in allowed_file_extensions: return JsonResponse(status=status.HTTP_406_NOT_ACCEPTABLE, data={"request_id": rq_id, "message": f"File {_ext} is now allowed"}) _name = f"temp_{doc_type}_{rq_id}_{i}.{_ext}" doc_file.seek(0) - # file_path = FileUtils.resize_and_save_file(_name, new_request, doc_file, 100) - # input_file = io.BytesIO(open(doc_file, 'rb').read()) - input_file = doc_file.read() - if settings.S3_ENDPOINT!="": - FileUtils.save_to_S3(_name, new_request, input_file) - else: - file_path = FileUtils.resize_and_save_file(_name, new_request, doc_file, 100) - list_urls.append(ProcessUtil.process_image_file(_name, doc_file, new_request, user)[0]) - list_urls[count]["page_number"] = count - list_urls[count]["doc_type"] = doc_type + file_path = FileUtils.resize_and_save_file(_name, new_request, doc_file, 100) + S3_path = FileUtils.save_to_S3(_name, new_request, file_path) count += 1 + this_file = { + "file_name": _name, + "file_path": file_path, + "file_type": doc_type + } + compact_files.append(this_file) + c_connector.do_pdf((rq_id, sub.id, p_type, user.id, compact_files)) - if p_type in standard_ocr_list: - ProcessUtil.send_to_queue2(rq_id, sub.id, list_urls, user.id, p_type) - elif p_type == ProcessType.TEMPLATE_MATCHING.value: - ProcessUtil.send_template_queue(rq_id, list_urls, validated_data['template'], user.id) j_time = time.time() - print(f"[INFO]: Duration of Pre-processing: {j_time - s_time}s") - print(f"[INFO]: list_urls: {list_urls}") return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id}) @extend_schema(request=None, responses=None, tags=['data']) @@ -289,6 +287,8 @@ class CtelViewSet(viewsets.ViewSet): serializer: ReportSerializer = ReportSerializer(data=report_filter, many=True) serializer.is_valid() # print(f"[DEBUG]: result: {serializer.data[0]}") + if report_filter[0].status == 400: + raise FileContentInvalidException() return Response(status=status.HTTP_200_OK, data=serializer.data[0]) @@ -317,14 +317,13 @@ class CtelViewSet(viewsets.ViewSet): # return Response(status=status.HTTP_200_OK, data=xml_as_string, content_type="application/xml; charset=utf-8") return HttpResponse(xml_as_string,content_type="text/xml") - serializer: ReportSerializer = ReportSerializer(data=report_filter, many=True) serializer.is_valid() return Response(status=status.HTTP_200_OK, data=serializer.data[0]) @action(detail=False, url_path="image/process/app", methods=["POST"]) - @transaction.atomic + # @transaction.atomic def process_app(self, request): app_id = "THIS_IS_OUR_APP_TEST_ACCOUNT_9123" users = UserProfile.objects.filter(sync_id=app_id) diff --git a/cope2n-api/fwd_api/celery_worker/internal_task.py b/cope2n-api/fwd_api/celery_worker/internal_task.py index 0e741bd..20a77ac 100755 --- a/cope2n-api/fwd_api/celery_worker/internal_task.py +++ b/cope2n-api/fwd_api/celery_worker/internal_task.py @@ -4,11 +4,12 @@ import fitz import uuid import os import base64 -import boto3 from fwd_api.celery_worker.worker import app -from ..constant.common import allowed_p_type, ProcessType, REQUEST_ID, FOLDER_TYPE, \ - FolderFileType, TEMPLATE_ID, EntityStatus, standard_ocr_list, pdf_extensions +from ..constant.common import ProcessType, \ + FolderFileType, standard_ocr_list, image_extensions +from django.core.files.uploadedfile import TemporaryUploadedFile +from ..exception.exceptions import FileContentInvalidException from ..utils import FileUtils, ProcessUtil, S3_process from celery.utils.log import get_task_logger from fwd import settings @@ -24,9 +25,27 @@ s3_client = S3_process.MinioS3Client( ) def process_pdf_file(file_name: str, file_path: str, request, user) -> list: - from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile - from fwd_api.constant.common import ProcessType - doc: fitz.Document = fitz.open(stream=FileUtils.get_file(file_path).read(), filetype="pdf") + from fwd_api.models import SubscriptionRequestFile + try: + doc: fitz.Document = fitz.open(stream=FileUtils.get_file(file_path).read(), filetype="pdf") + + # Origin file + new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path, + request=request, + file_name=file_name, + code=f'FIL{uuid.uuid4().hex}') + new_request_file.save() + # Sub-file + return ProcessUtil.pdf_to_images_urls(doc, request, user) + except Exception as e: + request.status = 400 + request.predict_result = {"status": 400, "content": "", "message": f"Unable to extract pdf files {e}"} + request.save() + return None + +def process_pdf_byte(file_name: str, file_path: str, request, user, file_obj) -> list: + from fwd_api.models import SubscriptionRequestFile + doc: fitz.Document = fitz.open(stream=file_obj, filetype="pdf") # Origin file new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path, @@ -34,8 +53,14 @@ def process_pdf_file(file_name: str, file_path: str, request, user) -> list: file_name=file_name, code=f'FIL{uuid.uuid4().hex}') new_request_file.save() - # Sub-file - return ProcessUtil.pdf_to_images_urls(doc, request, user) + try: + # Sub-file + return ProcessUtil.pdf_to_images_urls(doc, request, user) + except Exception as e: + request.status = 400 + request.predict_result = {"status": 400, "content": "", "message": f"Unable to extract pdf files {e}"} + request.save() + return None def process_image_file(file_name: str, file_path, request, user) -> list: @@ -54,25 +79,45 @@ def process_image_file(file_name: str, file_path, request, user) -> list: @app.task(name='do_pdf') -def process_pdf(rq_id, sub_id, p_type, user_id, file_name, file_path): +def process_pdf(rq_id, sub_id, p_type, user_id, files): + """ + pdf_files: [{ + "file_name": "", + "file_path": "", # local path to file + "file_type": "" + },] + """ from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile,UserProfile - + start = time.time() + from django.conf import settings new_request = SubscriptionRequest.objects.filter(request_id=rq_id)[0] user = UserProfile.objects.filter(id=user_id).first() - file_extension = file_name.split(".")[-1] - # logger.info(f"[DEBUG]: file_path: {file_path}") - if file_extension in pdf_extensions: - b_url = process_pdf_file(file_name, file_path, new_request, user) - else: - b_url = process_image_file(file_name, file_path, new_request, user) + b_urls = [] + for i, file in enumerate(files): + extension = file["file_name"].split(".")[-1].lower() + if extension == "pdf": + _b_urls = process_pdf_file(file["file_name"], file["file_path"], new_request, user) + if _b_urls is None: + raise FileContentInvalidException + for i in range(len(_b_urls)): + _b_urls[i]["doc_type"] = file["file_type"] + # b_urls += _b_urls # TODO: Client may request all images in a file, for now, extract the first page only + for j in range(len(b_urls)): + _b_urls[j]["page_number"] = j + len(b_urls) + b_urls.append(_b_urls[0]) + elif extension in image_extensions: + this_url = ProcessUtil.process_image_local_file(file["file_name"], file["file_path"], new_request, user)[0] + this_url["page_number"] = len(b_urls) + if file["file_type"]: + this_url["doc_type"] = file["file_type"] + b_urls.append(this_url) - j_time = time.time() - # logger.info(f"[INFO]: Duration of Pre-processing: {j_time - 0}s") - # logger.info(f"[INFO]: b_url: {b_url}") + start_process = time.time() + logger.info(f"BE proccessing time: {start_process - start}") if p_type in standard_ocr_list: - ProcessUtil.send_to_queue2(rq_id, sub_id, b_url, user_id, p_type) + ProcessUtil.send_to_queue2(rq_id, sub_id, b_urls, user_id, p_type) if p_type == ProcessType.TEMPLATE_MATCHING.value: - ProcessUtil.send_template_queue(rq_id, b_url, '', user_id) + ProcessUtil.send_template_queue(rq_id, b_urls, '', user_id) @app.task(name='upload_file_to_s3') def upload_file_to_s3(local_file_path, s3_key): @@ -81,7 +126,7 @@ def upload_file_to_s3(local_file_path, s3_key): if res != None and res["ResponseMetadata"]["HTTPStatusCode"] == 200: os.remove(local_file_path) else: - print(f"[INFO] S3 is not available, skipping,...") + logger.info(f"S3 is not available, skipping,...") @app.task(name='upload_obj_to_s3') def upload_obj_to_s3(byte_obj, s3_key): @@ -89,4 +134,4 @@ def upload_obj_to_s3(byte_obj, s3_key): obj = base64.b64decode(byte_obj) res = s3_client.update_object(s3_key, obj) else: - print(f"[INFO] S3 is not available, skipping,...") \ No newline at end of file + logger.info(f"S3 is not available, skipping,...") \ No newline at end of file diff --git a/cope2n-api/fwd_api/constant/common.py b/cope2n-api/fwd_api/constant/common.py index f49f7a6..2d7626b 100755 --- a/cope2n-api/fwd_api/constant/common.py +++ b/cope2n-api/fwd_api/constant/common.py @@ -4,67 +4,7 @@ import re image_extensions = ('jpg', 'jpeg', 'png', 'JPG', 'JPEG', 'PNG') pdf_extensions = ('pdf', 'PDF') allowed_file_extensions = image_extensions + pdf_extensions -allowed_p_type = [2, 3, 4, 5, 6] -LIST_BOX_MESSAGE = 'list_box' -NAME_MESSAGE = 'name' -VN_AND_SPACE_REGEX = r"[AĂÂÁẮẤÀẰẦẢẲẨÃẴẪẠẶẬĐEÊÉẾÈỀẺỂẼỄẸỆIÍÌỈĨỊOÔƠÓỐỚÒỒỜỎỔỞÕỖỠỌỘỢUƯÚỨÙỪỦỬŨỮỤỰYÝỲỶỸỴAĂÂÁẮẤÀẰẦẢẲẨÃẴẪẠẶẬĐEÊÉẾÈỀẺỂẼỄẸỆIÍÌỈĨỊOÔƠÓỐỚÒỒỜỎỔỞÕỖỠỌỘỢUƯÚỨÙỪỦỬŨỮỤỰYÝỲỶỸỴAĂÂÁẮẤÀẰẦẢẲẨÃẴẪẠẶẬĐEÊÉẾÈỀẺỂẼỄẸỆIÍÌỈĨỊOÔƠÓỐỚÒỒỜỎỔỞÕỖỠỌỘỢUƯÚỨÙỪỦỬŨỮỤỰYÝỲỶỸỴAĂÂÁẮẤÀẰẦẢẲẨÃẴẪẠẶẬĐEÊÉẾÈỀẺỂẼỄẸỆIÍÌỈĨỊOÔƠÓỐỚÒỒỜỎỔỞÕỖỠỌỘỢUƯÚỨÙỪỦỬŨỮỤỰYÝỲỶỸỴAĂÂÁẮẤÀẰẦẢẲẨÃẴẪẠẶẬĐEÊÉẾÈỀẺỂẼỄẸỆIÍÌỈĨỊOÔƠÓỐỚÒỒỜỎỔỞÕỖỠỌỘỢUƯÚỨÙỪỦỬŨỮỤỰYÝỲỶỸỴAĂÂÁẮẤÀẰẦẢẲẨÃẴẪẠẶẬĐEÊÉẾÈỀẺỂẼỄẸỆIÍÌỈĨỊOÔƠÓỐỚÒỒỜỎỔỞÕỖỠỌỘỢUƯÚỨÙỪỦỬŨỮỤỰYÝỲỶỸỴA-Z0-9 ]+" -IMAGE_NAME = "image_croped.jpg" -TEMPLATE_ID = 'template_id' -pattern = re.compile(VN_AND_SPACE_REGEX) -REQUEST_ID = 'requestId' -FOLDER_TYPE = 'folderType' -MAX_NUMBER_OF_TEMPLATE_DATA_BOX = 20 -MAX_NUMBER_OF_TEMPLATE_ANCHOR_BOX = 3 -NUMBER_OF_ITEM_IN_A_BOX = 4 # 4 coordinates -ESCAPE_VALUE = 'W5@X8#' -USER_MESSAGE = 'user' -PLAN_MESSAGE = 'plan' - - -class FolderFileType(Enum): - TEMPLATES = 'templates' - REQUESTS = 'requests' - - -class FileCategory(Enum): - CROP = 'Crop' - Origin = 'Origin' - BREAK = 'Break' - - -class EntityStatus(Enum): - ACTIVE = 1 - INACTIVE = 0 - - -class TEMPLATE_BOX_TYPE(Enum): - ANCHOR = 1 - DATA = 2 - - -class ProcessType(Enum): - TEMPLATE_MATCHING = 2 - ID_CARD = 3 - DRIVER_LICENSE = 4 - INVOICE = 5 - OCR_WITH_BOX = 6 - AP_INVOICE = 7 - FI_INVOICE = 10 - -class PlanCode(Enum): - TRIAL = 'TRIAL' - BASIC = 'BASIC' - ADVANCED = 'ADVANCED' - - -standard_ocr_list = (ProcessType.INVOICE.value, ProcessType.ID_CARD.value, ProcessType.DRIVER_LICENSE.value, ProcessType.OCR_WITH_BOX.value) -from enum import Enum -import re - -image_extensions = ('jpg', 'jpeg', 'png', 'JPG', 'JPEG', 'PNG') -pdf_extensions = ('pdf', 'PDF') -# allowed_file_extensions = image_extensions + pdf_extensions -allowed_file_extensions = image_extensions +# allowed_file_extensions = image_extensions allowed_p_type = [12] LIST_BOX_MESSAGE = 'list_box' NAME_MESSAGE = 'name' diff --git a/cope2n-api/fwd_api/exception/exceptions.py b/cope2n-api/fwd_api/exception/exceptions.py index 2fa5bc2..1a7b1ad 100755 --- a/cope2n-api/fwd_api/exception/exceptions.py +++ b/cope2n-api/fwd_api/exception/exceptions.py @@ -97,6 +97,12 @@ class FileFormatInvalidException(InvalidException): default_detail = 'File invalid type' detail_with_arg = 'File must have type {}' +class FileContentInvalidException(InvalidException): + status_code = status.HTTP_400_BAD_REQUEST + default_code = 4007 + default_detail = 'Invalid content file' + detail_with_arg = 'One of the files is broken, please select other file and try again' + class TokenExpiredException(GeneralException): status_code = status.HTTP_401_UNAUTHORIZED diff --git a/cope2n-api/fwd_api/models/UserProfile.py b/cope2n-api/fwd_api/models/UserProfile.py index 9973d8c..6b91ad7 100755 --- a/cope2n-api/fwd_api/models/UserProfile.py +++ b/cope2n-api/fwd_api/models/UserProfile.py @@ -7,8 +7,10 @@ from fwd_api.constant.common import EntityStatus class UserProfile(models.Model): id = models.AutoField(primary_key=True) - full_name: str = models.CharField(max_length=200) - sync_id: str = models.CharField(max_length=100) + user_name: str = models.CharField(max_length=200, null=True) + password: str = models.CharField(max_length=200, null=True) + full_name: str = models.CharField(max_length=200, null=True) + sync_id: str = models.CharField(max_length=100, null=True) provider_id: str = models.CharField(max_length=100, default='Ctel') # CTel/GCP/Azure :v current_total_pages: int = models.IntegerField(default=0) limit_total_pages: int = models.IntegerField(default=0) diff --git a/cope2n-api/fwd_api/utils/FileUtils.py b/cope2n-api/fwd_api/utils/FileUtils.py index ebb80eb..59133a1 100755 --- a/cope2n-api/fwd_api/utils/FileUtils.py +++ b/cope2n-api/fwd_api/utils/FileUtils.py @@ -27,7 +27,7 @@ def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUES if not isinstance(f, TemporaryUploadedFile): # print(f'[DEBUG]: {f.name}') raise InvalidException(excArgs="files") - extension = f.name.split(".")[-1] in allowed_file_extensions + extension = f.name.split(".")[-1].lower() in allowed_file_extensions if not extension or "." not in f.name: raise FileFormatInvalidException(excArgs=allowed_file_extensions) if f.size > settings.MAX_UPLOAD_SIZE_OF_A_FILE: @@ -129,14 +129,15 @@ def resize_and_save_file(file_name: str, rq: SubscriptionRequest, file: Temporar print(f"[ERROR]: {e}") raise ServiceUnavailableException() -def save_to_S3(file_name, rq, obj): +def save_to_S3(file_name, rq, local_file_path): try: - base64_obj = base64.b64encode(obj).decode('utf-8') + # base64_obj = base64.b64encode(obj).decode('utf-8') file_path = get_folder_path(rq) assert len(file_path.split("/")) >= 2, "file_path must have at least process type and request id" s3_key = os.path.join(file_path.split("/")[-2], file_path.split("/")[-1], file_name) # c_connector.upload_file_to_s3((file_path, s3_key)) - c_connector.upload_obj_to_s3((base64_obj, s3_key)) + c_connector.upload_file_to_s3((local_file_path, s3_key)) + return s3_key except Exception as e: print(f"[ERROR]: {e}") raise ServiceUnavailableException() diff --git a/cope2n-api/fwd_api/utils/ProcessUtil.py b/cope2n-api/fwd_api/utils/ProcessUtil.py index 957a21b..b6a6f0f 100755 --- a/cope2n-api/fwd_api/utils/ProcessUtil.py +++ b/cope2n-api/fwd_api/utils/ProcessUtil.py @@ -376,6 +376,18 @@ def process_image_file(file_name: str, file_obj: TemporaryUploadedFile, request: 'request_file_id': new_request_file.code }] +def process_image_local_file(file_name: str, file_path: str, request: SubscriptionRequest, user) -> list: + new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path, + request=request, + file_name=file_name, + code=f'FIL{uuid.uuid4().hex}') + new_request_file.save() + return [{ + 'file_url': FileUtils.build_url(FolderFileType.REQUESTS.value, request.request_id, user.id, file_name), + 'page_number': 0, + 'request_file_id': new_request_file.code + }] + def pdf_to_images_urls(doc: fitz.Document, request: SubscriptionRequest, user, dpi: int = 300) -> list: def resize(image, max_w=1920, max_h=1080): logger.info(f"[DEBUG]: image.size: {image.size}, type(image): {type(image)}")