from PyPDF2 import PdfReader, PdfWriter from PIL import Image from pdf2image import convert_from_bytes def get_first_page_pdf(filename, max_size=2048): def pdf_scale_page(page, size=297): """Scale page to specified size mm""" (w, h) = page.mediabox[2:] # Units of measurement are not "points". The units of measurement are user defined and default to 1/72 inch. See section 4.2.1 Coordinate spaces of the PDF specification. # If we have a page width of 297 mm and at points 1 inch = 25.4 mm pmm = (1/72*25.4) ks = size / (float(max((w, h))) * pmm) page.scale_by(ks) return page reader = PdfReader(filename) page = reader.pages[0] scaled_page = pdf_scale_page(page, max_size) # Create BytesIO pdf_bytes = io.BytesIO() dst_pdf = PdfWriter() dst_pdf.add_page(scaled_page) dst_pdf.write(pdf_bytes) pdf_bytes.seek(0) image = convert_from_bytes(pdf_bytes.read()) if isinstance(image, list): return image[0] return image img = get_first_page_pdf("test_samples/20220303025923NHNE_20220222_Starhub_Order_Confirmation_by_Email.pdf", max_size=300) img.save("invoice.jpg", "JPEG")