Add: support for pdf file
This commit is contained in:
parent
7e9a8e2d4b
commit
a84e3dce05
@ -3,6 +3,7 @@ import urllib
|
||||
import random
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
import uuid
|
||||
import sys, os
|
||||
cur_dir = str(Path(__file__).parents[2])
|
||||
sys.path.append(cur_dir)
|
||||
@ -35,14 +36,15 @@ def sbt_predict(image_url, engine) -> None:
|
||||
|
||||
save_dir = "./tmp_results"
|
||||
# image_path = os.path.join(save_dir, f"{image_url}.jpg")
|
||||
image_path = os.path.join(save_dir, "abc.jpg")
|
||||
cv2.imwrite(image_path, img)
|
||||
tmp_image_path = os.path.join(save_dir, f"{uuid.uuid4()}.jpg")
|
||||
cv2.imwrite(tmp_image_path, img)
|
||||
|
||||
outputs = process_img(img_path=image_path,
|
||||
outputs = process_img(img_path=tmp_image_path,
|
||||
save_dir=save_dir,
|
||||
engine=engine,
|
||||
export_all=False,
|
||||
option=option)
|
||||
os.remove(tmp_image_path)
|
||||
return outputs
|
||||
|
||||
def predict(page_numb, image_url):
|
||||
@ -70,6 +72,7 @@ def predict(page_numb, image_url):
|
||||
"""
|
||||
|
||||
sbt_result = sbt_predict(image_url, engine=sbt_engine)
|
||||
print(sbt_result)
|
||||
output_dict = {
|
||||
"document_type": "invoice",
|
||||
"document_class": " ",
|
||||
|
@ -102,6 +102,8 @@ def merge_sbt_output(loutputs):
|
||||
})
|
||||
return output
|
||||
|
||||
print("concat outputs: \n", loutputs)
|
||||
|
||||
merged_output = []
|
||||
combined_output = {"retailername": None,
|
||||
"sold_to_party": None,
|
||||
|
@ -1,6 +1,7 @@
|
||||
import time
|
||||
import uuid
|
||||
from wsgiref.util import FileWrapper
|
||||
import base64
|
||||
|
||||
from django.core.files.uploadedfile import TemporaryUploadedFile
|
||||
from django.db import transaction
|
||||
@ -10,15 +11,15 @@ from drf_spectacular.utils import extend_schema
|
||||
from rest_framework import status, viewsets
|
||||
from rest_framework.decorators import action
|
||||
from rest_framework.response import Response
|
||||
import io
|
||||
from typing import List
|
||||
|
||||
from fwd import settings
|
||||
from ..celery_worker.client_connector import c_connector
|
||||
from ..annotation.api import throw_on_failure
|
||||
from ..constant.common import allowed_p_type, ProcessType, REQUEST_ID, FOLDER_TYPE, \
|
||||
FolderFileType, TEMPLATE_ID, EntityStatus, standard_ocr_list, pdf_extensions, image_extensions
|
||||
FolderFileType, TEMPLATE_ID, EntityStatus, standard_ocr_list, pdf_extensions, image_extensions, allowed_file_extensions
|
||||
from ..exception.exceptions import RequiredFieldException, InvalidException, NotFoundException, \
|
||||
PermissionDeniedException, LimitReachedException, LockedEntityException
|
||||
PermissionDeniedException, LimitReachedException, LockedEntityException, FileContentInvalidException
|
||||
from ..models import SubscriptionRequest, UserProfile, SubscriptionRequestFile, OcrTemplate, Subscription
|
||||
from ..response.ReportSerializer import ReportSerializer
|
||||
from ..utils import FileUtils, ProcessUtil
|
||||
@ -43,7 +44,7 @@ class CtelViewSet(viewsets.ViewSet):
|
||||
}
|
||||
}, responses=None, tags=['ocr'])
|
||||
@action(detail=False, url_path="image/process", methods=["POST"])
|
||||
@transaction.atomic
|
||||
# @transaction.atomic
|
||||
def process(self, request):
|
||||
s_time = time.time()
|
||||
# print(30*"=")
|
||||
@ -59,7 +60,7 @@ class CtelViewSet(viewsets.ViewSet):
|
||||
rq_id = provider_code + uuid.uuid4().hex
|
||||
|
||||
file_obj: TemporaryUploadedFile = validated_data['file']
|
||||
file_extension = file_obj.name.split(".")[-1]
|
||||
file_extension = file_obj.name.split(".")[-1].lower()
|
||||
p_type = validated_data['type']
|
||||
file_name = f"temp_{rq_id}.{file_extension}"
|
||||
|
||||
@ -73,12 +74,16 @@ class CtelViewSet(viewsets.ViewSet):
|
||||
from ..celery_worker.client_connector import c_connector
|
||||
file_obj.seek(0)
|
||||
file_path = FileUtils.resize_and_save_file(file_name, new_request, file_obj, 100)
|
||||
if settings.S3_ENDPOINT!="":
|
||||
FileUtils.save_to_S3(file_name, new_request, file_obj.read())
|
||||
# print(f"[DEBUG]: file_path: {file_path}")
|
||||
S3_path = FileUtils.save_to_S3(file_name, new_request, file_path)
|
||||
|
||||
files: [{
|
||||
"file_name": file_name,
|
||||
"file_path": file_path, # local path to file
|
||||
"file_type": ""
|
||||
},]
|
||||
|
||||
if file_extension in pdf_extensions:
|
||||
c_connector.do_pdf((rq_id, sub.id, p_type, user.id, file_name, file_path))
|
||||
c_connector.do_pdf((rq_id, sub.id, p_type, user.id, files))
|
||||
# b_url = ProcessUtil.process_pdf_file(file_name, file_obj, new_request, user)
|
||||
elif file_extension in image_extensions:
|
||||
b_url = ProcessUtil.process_image_file(file_name, file_obj, new_request, user)
|
||||
@ -117,7 +122,7 @@ class CtelViewSet(viewsets.ViewSet):
|
||||
}
|
||||
}, responses=None, tags=['ocr'])
|
||||
@action(detail=False, url_path="images/process", methods=["POST"])
|
||||
@transaction.atomic
|
||||
# @transaction.atomic
|
||||
def processes(self, request):
|
||||
s_time = time.time()
|
||||
# print(30*"=")
|
||||
@ -148,34 +153,27 @@ class CtelViewSet(viewsets.ViewSet):
|
||||
provider_code=provider_code,
|
||||
subscription=sub)
|
||||
new_request.save()
|
||||
|
||||
count = 0
|
||||
count = 0
|
||||
compact_files = []
|
||||
for doc_type, doc_files in files.items():
|
||||
for i, doc_file in enumerate(doc_files):
|
||||
_ext = doc_file.name.split(".")[-1]
|
||||
if _ext not in image_extensions:
|
||||
if _ext not in allowed_file_extensions:
|
||||
return JsonResponse(status=status.HTTP_406_NOT_ACCEPTABLE, data={"request_id": rq_id, "message": f"File {_ext} is now allowed"})
|
||||
_name = f"temp_{doc_type}_{rq_id}_{i}.{_ext}"
|
||||
doc_file.seek(0)
|
||||
# file_path = FileUtils.resize_and_save_file(_name, new_request, doc_file, 100)
|
||||
# input_file = io.BytesIO(open(doc_file, 'rb').read())
|
||||
input_file = doc_file.read()
|
||||
if settings.S3_ENDPOINT!="":
|
||||
FileUtils.save_to_S3(_name, new_request, input_file)
|
||||
else:
|
||||
file_path = FileUtils.resize_and_save_file(_name, new_request, doc_file, 100)
|
||||
list_urls.append(ProcessUtil.process_image_file(_name, doc_file, new_request, user)[0])
|
||||
list_urls[count]["page_number"] = count
|
||||
list_urls[count]["doc_type"] = doc_type
|
||||
file_path = FileUtils.resize_and_save_file(_name, new_request, doc_file, 100)
|
||||
S3_path = FileUtils.save_to_S3(_name, new_request, file_path)
|
||||
count += 1
|
||||
this_file = {
|
||||
"file_name": _name,
|
||||
"file_path": file_path,
|
||||
"file_type": doc_type
|
||||
}
|
||||
compact_files.append(this_file)
|
||||
c_connector.do_pdf((rq_id, sub.id, p_type, user.id, compact_files))
|
||||
|
||||
if p_type in standard_ocr_list:
|
||||
ProcessUtil.send_to_queue2(rq_id, sub.id, list_urls, user.id, p_type)
|
||||
elif p_type == ProcessType.TEMPLATE_MATCHING.value:
|
||||
ProcessUtil.send_template_queue(rq_id, list_urls, validated_data['template'], user.id)
|
||||
j_time = time.time()
|
||||
print(f"[INFO]: Duration of Pre-processing: {j_time - s_time}s")
|
||||
print(f"[INFO]: list_urls: {list_urls}")
|
||||
return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id})
|
||||
|
||||
@extend_schema(request=None, responses=None, tags=['data'])
|
||||
@ -289,6 +287,8 @@ class CtelViewSet(viewsets.ViewSet):
|
||||
serializer: ReportSerializer = ReportSerializer(data=report_filter, many=True)
|
||||
serializer.is_valid()
|
||||
# print(f"[DEBUG]: result: {serializer.data[0]}")
|
||||
if report_filter[0].status == 400:
|
||||
raise FileContentInvalidException()
|
||||
|
||||
return Response(status=status.HTTP_200_OK, data=serializer.data[0])
|
||||
|
||||
@ -317,14 +317,13 @@ class CtelViewSet(viewsets.ViewSet):
|
||||
# return Response(status=status.HTTP_200_OK, data=xml_as_string, content_type="application/xml; charset=utf-8")
|
||||
return HttpResponse(xml_as_string,content_type="text/xml")
|
||||
|
||||
|
||||
serializer: ReportSerializer = ReportSerializer(data=report_filter, many=True)
|
||||
serializer.is_valid()
|
||||
|
||||
return Response(status=status.HTTP_200_OK, data=serializer.data[0])
|
||||
|
||||
@action(detail=False, url_path="image/process/app", methods=["POST"])
|
||||
@transaction.atomic
|
||||
# @transaction.atomic
|
||||
def process_app(self, request):
|
||||
app_id = "THIS_IS_OUR_APP_TEST_ACCOUNT_9123"
|
||||
users = UserProfile.objects.filter(sync_id=app_id)
|
||||
|
@ -4,11 +4,12 @@ import fitz
|
||||
import uuid
|
||||
import os
|
||||
import base64
|
||||
import boto3
|
||||
|
||||
from fwd_api.celery_worker.worker import app
|
||||
from ..constant.common import allowed_p_type, ProcessType, REQUEST_ID, FOLDER_TYPE, \
|
||||
FolderFileType, TEMPLATE_ID, EntityStatus, standard_ocr_list, pdf_extensions
|
||||
from ..constant.common import ProcessType, \
|
||||
FolderFileType, standard_ocr_list, image_extensions
|
||||
from django.core.files.uploadedfile import TemporaryUploadedFile
|
||||
from ..exception.exceptions import FileContentInvalidException
|
||||
from ..utils import FileUtils, ProcessUtil, S3_process
|
||||
from celery.utils.log import get_task_logger
|
||||
from fwd import settings
|
||||
@ -24,9 +25,27 @@ s3_client = S3_process.MinioS3Client(
|
||||
)
|
||||
|
||||
def process_pdf_file(file_name: str, file_path: str, request, user) -> list:
|
||||
from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile
|
||||
from fwd_api.constant.common import ProcessType
|
||||
doc: fitz.Document = fitz.open(stream=FileUtils.get_file(file_path).read(), filetype="pdf")
|
||||
from fwd_api.models import SubscriptionRequestFile
|
||||
try:
|
||||
doc: fitz.Document = fitz.open(stream=FileUtils.get_file(file_path).read(), filetype="pdf")
|
||||
|
||||
# Origin file
|
||||
new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path,
|
||||
request=request,
|
||||
file_name=file_name,
|
||||
code=f'FIL{uuid.uuid4().hex}')
|
||||
new_request_file.save()
|
||||
# Sub-file
|
||||
return ProcessUtil.pdf_to_images_urls(doc, request, user)
|
||||
except Exception as e:
|
||||
request.status = 400
|
||||
request.predict_result = {"status": 400, "content": "", "message": f"Unable to extract pdf files {e}"}
|
||||
request.save()
|
||||
return None
|
||||
|
||||
def process_pdf_byte(file_name: str, file_path: str, request, user, file_obj) -> list:
|
||||
from fwd_api.models import SubscriptionRequestFile
|
||||
doc: fitz.Document = fitz.open(stream=file_obj, filetype="pdf")
|
||||
|
||||
# Origin file
|
||||
new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path,
|
||||
@ -34,8 +53,14 @@ def process_pdf_file(file_name: str, file_path: str, request, user) -> list:
|
||||
file_name=file_name,
|
||||
code=f'FIL{uuid.uuid4().hex}')
|
||||
new_request_file.save()
|
||||
# Sub-file
|
||||
return ProcessUtil.pdf_to_images_urls(doc, request, user)
|
||||
try:
|
||||
# Sub-file
|
||||
return ProcessUtil.pdf_to_images_urls(doc, request, user)
|
||||
except Exception as e:
|
||||
request.status = 400
|
||||
request.predict_result = {"status": 400, "content": "", "message": f"Unable to extract pdf files {e}"}
|
||||
request.save()
|
||||
return None
|
||||
|
||||
|
||||
def process_image_file(file_name: str, file_path, request, user) -> list:
|
||||
@ -54,25 +79,45 @@ def process_image_file(file_name: str, file_path, request, user) -> list:
|
||||
|
||||
|
||||
@app.task(name='do_pdf')
|
||||
def process_pdf(rq_id, sub_id, p_type, user_id, file_name, file_path):
|
||||
def process_pdf(rq_id, sub_id, p_type, user_id, files):
|
||||
"""
|
||||
pdf_files: [{
|
||||
"file_name": "",
|
||||
"file_path": "", # local path to file
|
||||
"file_type": ""
|
||||
},]
|
||||
"""
|
||||
from fwd_api.models import SubscriptionRequest, SubscriptionRequestFile,UserProfile
|
||||
|
||||
start = time.time()
|
||||
from django.conf import settings
|
||||
new_request = SubscriptionRequest.objects.filter(request_id=rq_id)[0]
|
||||
user = UserProfile.objects.filter(id=user_id).first()
|
||||
file_extension = file_name.split(".")[-1]
|
||||
# logger.info(f"[DEBUG]: file_path: {file_path}")
|
||||
if file_extension in pdf_extensions:
|
||||
b_url = process_pdf_file(file_name, file_path, new_request, user)
|
||||
else:
|
||||
b_url = process_image_file(file_name, file_path, new_request, user)
|
||||
b_urls = []
|
||||
for i, file in enumerate(files):
|
||||
extension = file["file_name"].split(".")[-1].lower()
|
||||
if extension == "pdf":
|
||||
_b_urls = process_pdf_file(file["file_name"], file["file_path"], new_request, user)
|
||||
if _b_urls is None:
|
||||
raise FileContentInvalidException
|
||||
for i in range(len(_b_urls)):
|
||||
_b_urls[i]["doc_type"] = file["file_type"]
|
||||
# b_urls += _b_urls # TODO: Client may request all images in a file, for now, extract the first page only
|
||||
for j in range(len(b_urls)):
|
||||
_b_urls[j]["page_number"] = j + len(b_urls)
|
||||
b_urls.append(_b_urls[0])
|
||||
elif extension in image_extensions:
|
||||
this_url = ProcessUtil.process_image_local_file(file["file_name"], file["file_path"], new_request, user)[0]
|
||||
this_url["page_number"] = len(b_urls)
|
||||
if file["file_type"]:
|
||||
this_url["doc_type"] = file["file_type"]
|
||||
b_urls.append(this_url)
|
||||
|
||||
j_time = time.time()
|
||||
# logger.info(f"[INFO]: Duration of Pre-processing: {j_time - 0}s")
|
||||
# logger.info(f"[INFO]: b_url: {b_url}")
|
||||
start_process = time.time()
|
||||
logger.info(f"BE proccessing time: {start_process - start}")
|
||||
if p_type in standard_ocr_list:
|
||||
ProcessUtil.send_to_queue2(rq_id, sub_id, b_url, user_id, p_type)
|
||||
ProcessUtil.send_to_queue2(rq_id, sub_id, b_urls, user_id, p_type)
|
||||
if p_type == ProcessType.TEMPLATE_MATCHING.value:
|
||||
ProcessUtil.send_template_queue(rq_id, b_url, '', user_id)
|
||||
ProcessUtil.send_template_queue(rq_id, b_urls, '', user_id)
|
||||
|
||||
@app.task(name='upload_file_to_s3')
|
||||
def upload_file_to_s3(local_file_path, s3_key):
|
||||
@ -81,7 +126,7 @@ def upload_file_to_s3(local_file_path, s3_key):
|
||||
if res != None and res["ResponseMetadata"]["HTTPStatusCode"] == 200:
|
||||
os.remove(local_file_path)
|
||||
else:
|
||||
print(f"[INFO] S3 is not available, skipping,...")
|
||||
logger.info(f"S3 is not available, skipping,...")
|
||||
|
||||
@app.task(name='upload_obj_to_s3')
|
||||
def upload_obj_to_s3(byte_obj, s3_key):
|
||||
@ -89,4 +134,4 @@ def upload_obj_to_s3(byte_obj, s3_key):
|
||||
obj = base64.b64decode(byte_obj)
|
||||
res = s3_client.update_object(s3_key, obj)
|
||||
else:
|
||||
print(f"[INFO] S3 is not available, skipping,...")
|
||||
logger.info(f"S3 is not available, skipping,...")
|
@ -4,67 +4,7 @@ import re
|
||||
image_extensions = ('jpg', 'jpeg', 'png', 'JPG', 'JPEG', 'PNG')
|
||||
pdf_extensions = ('pdf', 'PDF')
|
||||
allowed_file_extensions = image_extensions + pdf_extensions
|
||||
allowed_p_type = [2, 3, 4, 5, 6]
|
||||
LIST_BOX_MESSAGE = 'list_box'
|
||||
NAME_MESSAGE = 'name'
|
||||
VN_AND_SPACE_REGEX = r"[AĂÂÁẮẤÀẰẦẢẲẨÃẴẪẠẶẬĐEÊÉẾÈỀẺỂẼỄẸỆIÍÌỈĨỊOÔƠÓỐỚÒỒỜỎỔỞÕỖỠỌỘỢUƯÚỨÙỪỦỬŨỮỤỰYÝỲỶỸỴAĂÂÁẮẤÀẰẦẢẲẨÃẴẪẠẶẬĐEÊÉẾÈỀẺỂẼỄẸỆIÍÌỈĨỊOÔƠÓỐỚÒỒỜỎỔỞÕỖỠỌỘỢUƯÚỨÙỪỦỬŨỮỤỰYÝỲỶỸỴAĂÂÁẮẤÀẰẦẢẲẨÃẴẪẠẶẬĐEÊÉẾÈỀẺỂẼỄẸỆIÍÌỈĨỊOÔƠÓỐỚÒỒỜỎỔỞÕỖỠỌỘỢUƯÚỨÙỪỦỬŨỮỤỰYÝỲỶỸỴAĂÂÁẮẤÀẰẦẢẲẨÃẴẪẠẶẬĐEÊÉẾÈỀẺỂẼỄẸỆIÍÌỈĨỊOÔƠÓỐỚÒỒỜỎỔỞÕỖỠỌỘỢUƯÚỨÙỪỦỬŨỮỤỰYÝỲỶỸỴAĂÂÁẮẤÀẰẦẢẲẨÃẴẪẠẶẬĐEÊÉẾÈỀẺỂẼỄẸỆIÍÌỈĨỊOÔƠÓỐỚÒỒỜỎỔỞÕỖỠỌỘỢUƯÚỨÙỪỦỬŨỮỤỰYÝỲỶỸỴAĂÂÁẮẤÀẰẦẢẲẨÃẴẪẠẶẬĐEÊÉẾÈỀẺỂẼỄẸỆIÍÌỈĨỊOÔƠÓỐỚÒỒỜỎỔỞÕỖỠỌỘỢUƯÚỨÙỪỦỬŨỮỤỰYÝỲỶỸỴA-Z0-9 ]+"
|
||||
IMAGE_NAME = "image_croped.jpg"
|
||||
TEMPLATE_ID = 'template_id'
|
||||
pattern = re.compile(VN_AND_SPACE_REGEX)
|
||||
REQUEST_ID = 'requestId'
|
||||
FOLDER_TYPE = 'folderType'
|
||||
MAX_NUMBER_OF_TEMPLATE_DATA_BOX = 20
|
||||
MAX_NUMBER_OF_TEMPLATE_ANCHOR_BOX = 3
|
||||
NUMBER_OF_ITEM_IN_A_BOX = 4 # 4 coordinates
|
||||
ESCAPE_VALUE = 'W5@X8#'
|
||||
USER_MESSAGE = 'user'
|
||||
PLAN_MESSAGE = 'plan'
|
||||
|
||||
|
||||
class FolderFileType(Enum):
|
||||
TEMPLATES = 'templates'
|
||||
REQUESTS = 'requests'
|
||||
|
||||
|
||||
class FileCategory(Enum):
|
||||
CROP = 'Crop'
|
||||
Origin = 'Origin'
|
||||
BREAK = 'Break'
|
||||
|
||||
|
||||
class EntityStatus(Enum):
|
||||
ACTIVE = 1
|
||||
INACTIVE = 0
|
||||
|
||||
|
||||
class TEMPLATE_BOX_TYPE(Enum):
|
||||
ANCHOR = 1
|
||||
DATA = 2
|
||||
|
||||
|
||||
class ProcessType(Enum):
|
||||
TEMPLATE_MATCHING = 2
|
||||
ID_CARD = 3
|
||||
DRIVER_LICENSE = 4
|
||||
INVOICE = 5
|
||||
OCR_WITH_BOX = 6
|
||||
AP_INVOICE = 7
|
||||
FI_INVOICE = 10
|
||||
|
||||
class PlanCode(Enum):
|
||||
TRIAL = 'TRIAL'
|
||||
BASIC = 'BASIC'
|
||||
ADVANCED = 'ADVANCED'
|
||||
|
||||
|
||||
standard_ocr_list = (ProcessType.INVOICE.value, ProcessType.ID_CARD.value, ProcessType.DRIVER_LICENSE.value, ProcessType.OCR_WITH_BOX.value)
|
||||
from enum import Enum
|
||||
import re
|
||||
|
||||
image_extensions = ('jpg', 'jpeg', 'png', 'JPG', 'JPEG', 'PNG')
|
||||
pdf_extensions = ('pdf', 'PDF')
|
||||
# allowed_file_extensions = image_extensions + pdf_extensions
|
||||
allowed_file_extensions = image_extensions
|
||||
# allowed_file_extensions = image_extensions
|
||||
allowed_p_type = [12]
|
||||
LIST_BOX_MESSAGE = 'list_box'
|
||||
NAME_MESSAGE = 'name'
|
||||
|
@ -97,6 +97,12 @@ class FileFormatInvalidException(InvalidException):
|
||||
default_detail = 'File invalid type'
|
||||
detail_with_arg = 'File must have type {}'
|
||||
|
||||
class FileContentInvalidException(InvalidException):
|
||||
status_code = status.HTTP_400_BAD_REQUEST
|
||||
default_code = 4007
|
||||
default_detail = 'Invalid content file'
|
||||
detail_with_arg = 'One of the files is broken, please select other file and try again'
|
||||
|
||||
|
||||
class TokenExpiredException(GeneralException):
|
||||
status_code = status.HTTP_401_UNAUTHORIZED
|
||||
|
@ -7,8 +7,10 @@ from fwd_api.constant.common import EntityStatus
|
||||
|
||||
class UserProfile(models.Model):
|
||||
id = models.AutoField(primary_key=True)
|
||||
full_name: str = models.CharField(max_length=200)
|
||||
sync_id: str = models.CharField(max_length=100)
|
||||
user_name: str = models.CharField(max_length=200, null=True)
|
||||
password: str = models.CharField(max_length=200, null=True)
|
||||
full_name: str = models.CharField(max_length=200, null=True)
|
||||
sync_id: str = models.CharField(max_length=100, null=True)
|
||||
provider_id: str = models.CharField(max_length=100, default='Ctel') # CTel/GCP/Azure :v
|
||||
current_total_pages: int = models.IntegerField(default=0)
|
||||
limit_total_pages: int = models.IntegerField(default=0)
|
||||
|
@ -27,7 +27,7 @@ def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUES
|
||||
if not isinstance(f, TemporaryUploadedFile):
|
||||
# print(f'[DEBUG]: {f.name}')
|
||||
raise InvalidException(excArgs="files")
|
||||
extension = f.name.split(".")[-1] in allowed_file_extensions
|
||||
extension = f.name.split(".")[-1].lower() in allowed_file_extensions
|
||||
if not extension or "." not in f.name:
|
||||
raise FileFormatInvalidException(excArgs=allowed_file_extensions)
|
||||
if f.size > settings.MAX_UPLOAD_SIZE_OF_A_FILE:
|
||||
@ -129,14 +129,15 @@ def resize_and_save_file(file_name: str, rq: SubscriptionRequest, file: Temporar
|
||||
print(f"[ERROR]: {e}")
|
||||
raise ServiceUnavailableException()
|
||||
|
||||
def save_to_S3(file_name, rq, obj):
|
||||
def save_to_S3(file_name, rq, local_file_path):
|
||||
try:
|
||||
base64_obj = base64.b64encode(obj).decode('utf-8')
|
||||
# base64_obj = base64.b64encode(obj).decode('utf-8')
|
||||
file_path = get_folder_path(rq)
|
||||
assert len(file_path.split("/")) >= 2, "file_path must have at least process type and request id"
|
||||
s3_key = os.path.join(file_path.split("/")[-2], file_path.split("/")[-1], file_name)
|
||||
# c_connector.upload_file_to_s3((file_path, s3_key))
|
||||
c_connector.upload_obj_to_s3((base64_obj, s3_key))
|
||||
c_connector.upload_file_to_s3((local_file_path, s3_key))
|
||||
return s3_key
|
||||
except Exception as e:
|
||||
print(f"[ERROR]: {e}")
|
||||
raise ServiceUnavailableException()
|
||||
|
@ -376,6 +376,18 @@ def process_image_file(file_name: str, file_obj: TemporaryUploadedFile, request:
|
||||
'request_file_id': new_request_file.code
|
||||
}]
|
||||
|
||||
def process_image_local_file(file_name: str, file_path: str, request: SubscriptionRequest, user) -> list:
|
||||
new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path,
|
||||
request=request,
|
||||
file_name=file_name,
|
||||
code=f'FIL{uuid.uuid4().hex}')
|
||||
new_request_file.save()
|
||||
return [{
|
||||
'file_url': FileUtils.build_url(FolderFileType.REQUESTS.value, request.request_id, user.id, file_name),
|
||||
'page_number': 0,
|
||||
'request_file_id': new_request_file.code
|
||||
}]
|
||||
|
||||
def pdf_to_images_urls(doc: fitz.Document, request: SubscriptionRequest, user, dpi: int = 300) -> list:
|
||||
def resize(image, max_w=1920, max_h=1080):
|
||||
logger.info(f"[DEBUG]: image.size: {image.size}, type(image): {type(image)}")
|
||||
|
Loading…
Reference in New Issue
Block a user