import os import shutil def split_folder_into_batches(input_folder, output_folder, n): # Get the list of image files in the input folder image_files = [f for f in os.listdir(input_folder) if f.lower().endswith(('.jpg', '.jpeg', '.png'))] # Sort the list of image files image_files.sort() print("len: ", len(image_files)) # Calculate the number of images per batch batch_size = len(image_files) // n # Create the output directories for i in range(n): batch_dir = os.path.join(output_folder, f"batch_{i+1}") os.makedirs(batch_dir, exist_ok=True) # Split the images into batches for i, image_file in enumerate(image_files): # print(i, image_file) batch_index = i // batch_size batch_dir = os.path.join(output_folder, f"batch_{batch_index+1}") if not os.path.exists(batch_dir): os.makedirs(batch_dir, exist_ok=True) # print(batch_dir) # Find the corresponding label file image_name, image_ext = os.path.splitext(image_file) label_file = f"{image_name}.txt" label_path = os.path.join(input_folder, label_file) # Copy the image and label files into the appropriate batch directory print(label_path, os.path.join(input_folder, image_file), batch_dir) shutil.copy(os.path.join(input_folder, image_file), batch_dir) shutil.copy(label_path, batch_dir) # Example usage: split_folder_into_batches("/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/pseudo_ocr/invoice_receipt_sbt", "/mnt/ssd1T/hoanglv/Projects/KIE/sdsvkie/workdirs/pseudo_ocr/invoice_receipt_sbt_split", 3)