import os import shutil from pathlib import Path folder1 = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/PV2/Invoice_v2_multi_page" folder2 = "/mnt/hdd2T/AICR/Projects/2023/FI_Invoices/Invoice_v2_multi_page" out_dir = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/PV2/Invoice_v2_multi_page_2" out_dir = Path(out_dir) if not out_dir.exists(): out_dir.mkdir(parents=True, exist_ok=True) # Get list of files in both folders files1 = [f for f in os.listdir(folder1) if os.path.isfile(os.path.join(folder1, f))] files2 = [f for f in os.listdir(folder2) if os.path.isfile(os.path.join(folder2, f))] # Get list of file names in both folders names1 = [os.path.splitext(f)[0] for f in files1] names2 = [os.path.splitext(f)[0] for f in files2] # Find duplicates by comparing names duplicates = set(names1) ^ set(names2) print(len(duplicates)) # Print duplicate file names for d in duplicates: print(f"Duplicate file name found: {d}") pdf_path = Path(folder2) / (d+".pdf") shutil.copy(str(pdf_path), str(out_dir))