46 lines
1.4 KiB
Python
Executable File
46 lines
1.4 KiB
Python
Executable File
import fitz # PyMuPDF, imported as fitz for backward compatibility reasons
|
|
import os
|
|
import glob
|
|
from tqdm import tqdm
|
|
import argparse
|
|
import cv2
|
|
from PIL import Image
|
|
|
|
|
|
def convert_pdf2image(file_path, outdir, img_max_size=None):
|
|
if not os.path.exists(outdir):
|
|
os.makedirs(outdir)
|
|
doc = fitz.open(file_path) # open document
|
|
# dpi = 300 # choose desired dpi here
|
|
zoom = 2 # zoom factor, standard: 72 dpi
|
|
magnify = fitz.Matrix(zoom, zoom)
|
|
for idx, page in enumerate(doc):
|
|
pix = page.get_pixmap(matrix=magnify) # render page to an image
|
|
outpath = os.path.join(
|
|
outdir,
|
|
os.path.splitext(os.path.basename(file_path))[0] + "_" + str(idx) + ".png",
|
|
)
|
|
pix.save(outpath)
|
|
|
|
img = Image.open(outpath)
|
|
img = img.convert("L")
|
|
# img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
|
img.save(outpath)
|
|
# if status:
|
|
# print("OK")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--pdf_dir", type=str)
|
|
parser.add_argument("--out_dir", type=str)
|
|
args = parser.parse_args()
|
|
# pdf_dir = "/home/sds/hoanglv/FWD_Raw_Data/Form POS01"
|
|
# outdir = "/home/sds/hoanglv/Projects/FWD/assets/test/test_image_transformer/template_aligner/pdf2image"
|
|
|
|
pdf_paths = glob.glob(args.pdf_dir + "/*.pdf")
|
|
print(pdf_paths[:5])
|
|
|
|
for pdf_path in tqdm(pdf_paths):
|
|
convert_pdf2image(pdf_path, args.out_dir)
|