sbt-idp/cope2n-ai-fi/modules/sdsvkie/scripts/common/get_more_data.py

32 lines
1.1 KiB
Python
Raw Normal View History

2023-12-12 08:14:54 +00:00
import os
import shutil
from pathlib import Path
folder1 = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/PV2/Invoice_v2_multi_page"
folder2 = "/mnt/hdd2T/AICR/Projects/2023/FI_Invoices/Invoice_v2_multi_page"
out_dir = "/mnt/ssd1T/hoanglv/Projects/KIE/DATA/dev_model/Invoice/processed/test/PV2/Invoice_v2_multi_page_2"
out_dir = Path(out_dir)
if not out_dir.exists():
out_dir.mkdir(parents=True, exist_ok=True)
# Get list of files in both folders
files1 = [f for f in os.listdir(folder1) if os.path.isfile(os.path.join(folder1, f))]
files2 = [f for f in os.listdir(folder2) if os.path.isfile(os.path.join(folder2, f))]
# Get list of file names in both folders
names1 = [os.path.splitext(f)[0] for f in files1]
names2 = [os.path.splitext(f)[0] for f in files2]
# Find duplicates by comparing names
duplicates = set(names1) ^ set(names2)
print(len(duplicates))
# Print duplicate file names
for d in duplicates:
print(f"Duplicate file name found: {d}")
pdf_path = Path(folder2) / (d+".pdf")
shutil.copy(str(pdf_path), str(out_dir))