35 lines
1.6 KiB
Python
Executable File
35 lines
1.6 KiB
Python
Executable File
import os
|
|
import shutil
|
|
|
|
def split_folder_into_batches(input_folder, output_folder, n):
|
|
# Get the list of image files in the input folder
|
|
image_files = [f for f in os.listdir(input_folder) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
|
|
# Sort the list of image files
|
|
image_files.sort()
|
|
print("len: ", len(image_files))
|
|
# Calculate the number of images per batch
|
|
batch_size = len(image_files) // n
|
|
# Create the output directories
|
|
for i in range(n):
|
|
batch_dir = os.path.join(output_folder, f"batch_{i+1}")
|
|
os.makedirs(batch_dir, exist_ok=True)
|
|
# Split the images into batches
|
|
for i, image_file in enumerate(image_files):
|
|
# print(i, image_file)
|
|
batch_index = i // batch_size
|
|
batch_dir = os.path.join(output_folder, f"batch_{batch_index+1}")
|
|
if not os.path.exists(batch_dir):
|
|
os.makedirs(batch_dir, exist_ok=True)
|
|
# print(batch_dir)
|
|
# Find the corresponding label file
|
|
image_name, image_ext = os.path.splitext(image_file)
|
|
label_file = f"{image_name}.txt"
|
|
label_path = os.path.join(input_folder, label_file)
|
|
# Copy the image and label files into the appropriate batch directory
|
|
print(label_path, os.path.join(input_folder, image_file), batch_dir)
|
|
shutil.copy(os.path.join(input_folder, image_file), batch_dir)
|
|
shutil.copy(label_path, batch_dir)
|
|
|
|
# Example usage:
|
|
split_folder_into_batches("/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/pseudo_ocr/invoice_receipt_sbt", "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/pseudo_ocr/invoice_receipt_sbt_split", 3)
|