53 lines
1.6 KiB
Python
53 lines
1.6 KiB
Python
import io
|
|
from PIL import Image
|
|
from PyPDF2 import PdfReader, PdfWriter
|
|
from pdf2image import convert_from_bytes
|
|
|
|
def resize(image, max_w=2048, max_h=2048):
|
|
cur_w = image.width
|
|
cur_h = image.height
|
|
if cur_h > max_w or cur_h > max_h:
|
|
ratio_w = max_w/cur_w
|
|
ratio_h = max_h/cur_h
|
|
ratio = min([ratio_h, ratio_w])
|
|
new_w = int(ratio*cur_w)
|
|
new_h = int(ratio*cur_h)
|
|
image = image.resize((new_w, new_h))
|
|
return image
|
|
|
|
|
|
def fitz_pixmap_to_pillow_with_resize(image, max_w=2048, max_h=2048):
|
|
cur_w, cur_h = image.width, image.height
|
|
image_bytes = image.samples
|
|
image = Image.frombytes("RGB", [cur_w, cur_h], image_bytes)
|
|
image = resize(image, max_w, max_h)
|
|
return image
|
|
|
|
|
|
def get_first_page_pdf(filename, max_size=300):
|
|
def pdf_scale_page(page, size=297):
|
|
"""Scale page to specified size mm"""
|
|
(w, h) = page.mediabox[2:]
|
|
# Units of measurement are not "points". The units of measurement are user defined and default to 1/72 inch. See section 4.2.1 Coordinate spaces of the PDF specification.
|
|
# If we have a page width of 297 mm and at points 1 inch = 25.4 mm
|
|
pmm = (1/72*25.4)
|
|
ks = size / (float(max((w, h))) * pmm)
|
|
page.scale_by(ks)
|
|
return page
|
|
|
|
reader = PdfReader(filename)
|
|
page = reader.pages[0]
|
|
scaled_page = pdf_scale_page(page, max_size)
|
|
|
|
# Create BytesIO
|
|
pdf_bytes = io.BytesIO()
|
|
dst_pdf = PdfWriter()
|
|
dst_pdf.add_page(scaled_page)
|
|
dst_pdf.write(pdf_bytes)
|
|
pdf_bytes.seek(0)
|
|
|
|
image = convert_from_bytes(pdf_bytes.read())
|
|
if isinstance(image, list):
|
|
return image[0]
|
|
return image
|