35 lines
1.2 KiB
Python
35 lines
1.2 KiB
Python
from PyPDF2 import PdfReader, PdfWriter
|
|
from PIL import Image
|
|
from pdf2image import convert_from_bytes
|
|
|
|
|
|
def get_first_page_pdf(filename, max_size=2048):
|
|
def pdf_scale_page(page, size=297):
|
|
"""Scale page to specified size mm"""
|
|
(w, h) = page.mediabox[2:]
|
|
# Units of measurement are not "points". The units of measurement are user defined and default to 1/72 inch. See section 4.2.1 Coordinate spaces of the PDF specification.
|
|
# If we have a page width of 297 mm and at points 1 inch = 25.4 mm
|
|
pmm = (1/72*25.4)
|
|
ks = size / (float(max((w, h))) * pmm)
|
|
page.scale_by(ks)
|
|
return page
|
|
|
|
reader = PdfReader(filename)
|
|
page = reader.pages[0]
|
|
scaled_page = pdf_scale_page(page, max_size)
|
|
|
|
# Create BytesIO
|
|
pdf_bytes = io.BytesIO()
|
|
dst_pdf = PdfWriter()
|
|
dst_pdf.add_page(scaled_page)
|
|
dst_pdf.write(pdf_bytes)
|
|
pdf_bytes.seek(0)
|
|
|
|
image = convert_from_bytes(pdf_bytes.read())
|
|
if isinstance(image, list):
|
|
return image[0]
|
|
return image
|
|
|
|
img = get_first_page_pdf("test_samples/20220303025923NHNE_20220222_Starhub_Order_Confirmation_by_Email.pdf", max_size=300)
|
|
img.save("invoice.jpg", "JPEG")
|