32 lines
1.1 KiB
Python
Executable File
32 lines
1.1 KiB
Python
Executable File
import os
|
|
import shutil
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
folder1 = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/PV2/Invoice_v2_multi_page"
|
|
folder2 = "/mnt/hdd2T/AICR/Projects/2023/FI_Invoices/Invoice_v2_multi_page"
|
|
|
|
out_dir = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/PV2/Invoice_v2_multi_page_2"
|
|
out_dir = Path(out_dir)
|
|
if not out_dir.exists():
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Get list of files in both folders
|
|
files1 = [f for f in os.listdir(folder1) if os.path.isfile(os.path.join(folder1, f))]
|
|
files2 = [f for f in os.listdir(folder2) if os.path.isfile(os.path.join(folder2, f))]
|
|
|
|
# Get list of file names in both folders
|
|
names1 = [os.path.splitext(f)[0] for f in files1]
|
|
names2 = [os.path.splitext(f)[0] for f in files2]
|
|
|
|
# Find duplicates by comparing names
|
|
duplicates = set(names1) ^ set(names2)
|
|
print(len(duplicates))
|
|
# Print duplicate file names
|
|
for d in duplicates:
|
|
print(f"Duplicate file name found: {d}")
|
|
pdf_path = Path(folder2) / (d+".pdf")
|
|
shutil.copy(str(pdf_path), str(out_dir))
|
|
|