Add: Limit image decompressed size

This commit is contained in:
dx-tan 2023-12-11 13:15:48 +00:00
parent 59de640e96
commit 2c50f86ed2
6 changed files with 28 additions and 7 deletions

View File

@ -3,9 +3,9 @@ __pycache__
DataBase/image_temp/
DataBase/json_temp/
DataBase/template.db
sdsvtd/
sdsvtr/
sdsvkie/
# sdsvtd/
# sdsvtr/
# sdsvkie/
detectron2/
output/
data/

View File

@ -202,6 +202,7 @@ CELERY_TASK_TIME_LIMIT = 30 * 60
MAX_UPLOAD_SIZE_OF_A_FILE = 100 * 1024 * 1024 # 100 MB
MAX_UPLOAD_FILE_SIZE_OF_A_REQUEST = 100 * 1024 * 1024 # 100 MB
MAX_UPLOAD_FILES_IN_A_REQUEST = 5
MAX_PIXEL_IN_A_FILE = 5000
SIZE_TO_COMPRESS = 2 * 1024 * 1024
MAX_NUMBER_OF_TEMPLATE = 3
MAX_PAGES_OF_PDF_FILE = 50

View File

@ -108,6 +108,12 @@ class FileContentInvalidException(InvalidException):
default_detail = 'Invalid content file'
detail_with_arg = 'One of the files is broken, please select other file and try again'
class InvalidDecompressedSizeException(InvalidException):
status_code = status.HTTP_400_BAD_REQUEST
default_code = 4008
default_detail = 'Invalid decompessed file'
detail_with_arg = '{}x{} is not valid, maximum size for one side is {}'
class TokenExpiredException(GeneralException):
status_code = status.HTTP_401_UNAUTHORIZED

View File

@ -10,11 +10,12 @@ from django.core.files.uploadedfile import TemporaryUploadedFile
from fwd import settings
from fwd_api.constant.common import allowed_file_extensions
from fwd_api.exception.exceptions import GeneralException, RequiredFieldException, InvalidException, \
ServiceUnavailableException, FileFormatInvalidException, LimitReachedException
ServiceUnavailableException, FileFormatInvalidException, LimitReachedException, InvalidDecompressedSizeException
from fwd_api.models import SubscriptionRequest, OcrTemplate
from fwd_api.utils import ProcessUtil
from fwd_api.utils.CryptoUtils import image_authenticator
from ..celery_worker.client_connector import c_connector
import imagesize
def validate_list_file(files, max_file_num=settings.MAX_UPLOAD_FILES_IN_A_REQUEST, min_file_num=1, file_field="files"):
total_file_size = 0
@ -135,6 +136,8 @@ def resize_and_save_file(file_name: str, rq: SubscriptionRequest, file: Temporar
# Create a new directory because it does not exist
os.makedirs(folder_path)
return save_file_with_path(file_name, file, quality, folder_path)
except InvalidDecompressedSizeException as e:
raise e
except Exception as e:
print(f"[ERROR]: {e}")
raise ServiceUnavailableException()
@ -161,6 +164,8 @@ def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, fo
save_pdf(file_path, file)
else:
save_img(file_path, file, quality)
except InvalidDecompressedSizeException as e:
raise e
except Exception as e:
print(e)
raise ServiceUnavailableException()
@ -174,6 +179,12 @@ def save_pdf(file_path: str, file: TemporaryUploadedFile):
def save_img(file_path: str, file: TemporaryUploadedFile, quality):
with open(file.temporary_file_path(), "rb") as fs:
input_file = io.BytesIO(fs.read())
width, height = imagesize.get(input_file)
if width > settings.MAX_PIXEL_IN_A_FILE or height > settings.MAX_PIXEL_IN_A_FILE:
raise InvalidDecompressedSizeException(excArgs=(str(width), str(height), str(settings.MAX_PIXEL_IN_A_FILE)))
with open(file.temporary_file_path(), "rb") as fs:
input_file = io.BytesIO(fs.read())
image = Image.open(input_file)

View File

@ -16,7 +16,7 @@ from fwd_api.exception.exceptions import NumberOfBoxLimitReachedException, \
from fwd_api.utils import DateUtil, FileUtils
from ..constant.common import ProcessType, TEMPLATE_BOX_TYPE, EntityStatus
from ..exception.exceptions import InvalidException, NotFoundException, \
PermissionDeniedException, RequiredFieldException, InvalidException
PermissionDeniedException, RequiredFieldException, InvalidException, InvalidDecompressedSizeException
from ..models import UserProfile, OcrTemplate, OcrTemplateBox, \
Subscription, SubscriptionRequestFile, SubscriptionRequest
from ..celery_worker.client_connector import c_connector
@ -418,7 +418,7 @@ def process_image_local_file(file_name: str, file_path: str, request: Subscripti
}]
def pdf_to_images_urls(doc: fitz.Document, request: SubscriptionRequest, user, dpi: int = 300) -> list:
def resize(image, max_w=1920, max_h=1080):
def resize(image, max_w=2048, max_h=2048):
logger.info(f"[DEBUG]: image.size: {image.size}, type(image): {type(image)}")
cur_w, cur_h = image.width, image.height
image_bytes = image.samples
@ -446,6 +446,8 @@ def pdf_to_images_urls(doc: fitz.Document, request: SubscriptionRequest, user, d
pix = page.get_pixmap(dpi=250) # render page to an image
# pix = resize(pix)
# print(f"[DEBUG]: pix.size: {pix.size}")
if pix.size > 8*3*settings.MAX_PIXEL_IN_A_FILE*settings.MAX_PIXEL_IN_A_FILE:
raise InvalidDecompressedSizeException(excArgs=(str(width), str(height), str(settings.MAX_PIXEL_IN_A_FILE)))
pix.save(saving_path)
print(f"Saving {saving_path}")
new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=saving_path,

View File

@ -48,4 +48,5 @@ PyJWT~=2.6.0
whitenoise==6.4.0
PyMuPDF==1.21.1
djangorestframework-xml==2.0.0
boto3==1.29.7
boto3==1.29.7
imagesize==1.4.1