import io from PIL import Image from PyPDF2 import PdfReader, PdfWriter from pdf2image import convert_from_bytes def resize(image, max_w=2048, max_h=2048): cur_w = image.width cur_h = image.height if cur_h > max_w or cur_h > max_h: ratio_w = max_w/cur_w ratio_h = max_h/cur_h ratio = min([ratio_h, ratio_w]) new_w = int(ratio*cur_w) new_h = int(ratio*cur_h) image = image.resize((new_w, new_h)) return image def fitz_pixmap_to_pillow_with_resize(image, max_w=2048, max_h=2048): cur_w, cur_h = image.width, image.height image_bytes = image.samples image = Image.frombytes("RGB", [cur_w, cur_h], image_bytes) image = resize(image, max_w, max_h) return image def get_first_page_pdf(filename, max_size=300): def pdf_scale_page(page, size=297): """Scale page to specified size mm""" (w, h) = page.mediabox[2:] # Units of measurement are not "points". The units of measurement are user defined and default to 1/72 inch. See section 4.2.1 Coordinate spaces of the PDF specification. # If we have a page width of 297 mm and at points 1 inch = 25.4 mm pmm = (1/72*25.4) ks = size / (float(max((w, h))) * pmm) page.scale_by(ks) return page reader = PdfReader(filename) page = reader.pages[0] scaled_page = pdf_scale_page(page, max_size) # Create BytesIO pdf_bytes = io.BytesIO() dst_pdf = PdfWriter() dst_pdf.add_page(scaled_page) dst_pdf.write(pdf_bytes) pdf_bytes.seek(0) image = convert_from_bytes(pdf_bytes.read()) if isinstance(image, list): return image[0] return image