sbt-idp/cope2n-ai-fi/modules/sdsvkie/scripts/common/split_batches.py
2023-12-12 15:14:54 +07:00

35 lines
1.6 KiB
Python
Executable File

import os
import shutil
def split_folder_into_batches(input_folder, output_folder, n):
# Get the list of image files in the input folder
image_files = [f for f in os.listdir(input_folder) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
# Sort the list of image files
image_files.sort()
print("len: ", len(image_files))
# Calculate the number of images per batch
batch_size = len(image_files) // n
# Create the output directories
for i in range(n):
batch_dir = os.path.join(output_folder, f"batch_{i+1}")
os.makedirs(batch_dir, exist_ok=True)
# Split the images into batches
for i, image_file in enumerate(image_files):
# print(i, image_file)
batch_index = i // batch_size
batch_dir = os.path.join(output_folder, f"batch_{batch_index+1}")
if not os.path.exists(batch_dir):
os.makedirs(batch_dir, exist_ok=True)
# print(batch_dir)
# Find the corresponding label file
image_name, image_ext = os.path.splitext(image_file)
label_file = f"{image_name}.txt"
label_path = os.path.join(input_folder, label_file)
# Copy the image and label files into the appropriate batch directory
print(label_path, os.path.join(input_folder, image_file), batch_dir)
shutil.copy(os.path.join(input_folder, image_file), batch_dir)
shutil.copy(label_path, batch_dir)
# Example usage:
split_folder_into_batches("/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/pseudo_ocr/invoice_receipt_sbt", "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/pseudo_ocr/invoice_receipt_sbt_split", 3)