import os import random import shutil def create_dataset_structure(): # Define source and destination paths source_memory_imgs = "training/memory" source_memory_labels = "training/memory" source_no_memory_imgs = "training/no_memory" # Define the new structure train_imgs = "training/train/images" train_labels = "training/train/labels" val_imgs = "training/val/images" val_labels = "training/val/labels" # Create all required directories os.makedirs(train_imgs, exist_ok=True) os.makedirs(train_labels, exist_ok=True) os.makedirs(val_imgs, exist_ok=True) os.makedirs(val_labels, exist_ok=True) # Get all image files memory_img_files = [f for f in os.listdir(source_memory_imgs) if f.endswith('.png')] no_memory_img_files = [f for f in os.listdir(source_no_memory_imgs) if f.endswith('.png')] # Shuffle and split the files (80% train, 20% validation) random.seed(42) # For reproducibility random.shuffle(memory_img_files) random.shuffle(no_memory_img_files) train_memory_files = memory_img_files[:16] val_memory_files = memory_img_files[16:] train_no_memory_files = no_memory_img_files[:16] val_no_memory_files = no_memory_img_files[16:] # Copy the memory image files and their labels with "memory_" prefix for file in train_memory_files: # Create new filename with prefix new_filename = "memory_" + file # Copy image shutil.copy(os.path.join(source_memory_imgs, file), os.path.join(train_imgs, new_filename)) # Copy label if it exists label_file = file.replace('.png', '.txt') new_label_file = new_filename.replace('.png', '.txt') if os.path.exists(os.path.join(source_memory_labels, label_file)): shutil.copy(os.path.join(source_memory_labels, label_file), os.path.join(train_labels, new_label_file)) for file in val_memory_files: # Create new filename with prefix new_filename = "memory_" + file # Copy image shutil.copy(os.path.join(source_memory_imgs, file), os.path.join(val_imgs, new_filename)) # Copy label if it exists label_file = file.replace('.png', '.txt') new_label_file = new_filename.replace('.png', '.txt') if os.path.exists(os.path.join(source_memory_labels, label_file)): shutil.copy(os.path.join(source_memory_labels, label_file), os.path.join(val_labels, new_label_file)) # Copy the no_memory image files with "no_memory_" prefix for file in train_no_memory_files: # Create new filename with prefix new_filename = "no_memory_" + file # Copy image shutil.copy(os.path.join(source_no_memory_imgs, file), os.path.join(train_imgs, new_filename)) # Create empty label file new_label_file = new_filename.replace('.png', '.txt') with open(os.path.join(train_labels, new_label_file), 'w') as f: pass # Creates an empty file for file in val_no_memory_files: # Create new filename with prefix new_filename = "no_memory_" + file # Copy image shutil.copy(os.path.join(source_no_memory_imgs, file), os.path.join(val_imgs, new_filename)) # Create empty label file new_label_file = new_filename.replace('.png', '.txt') with open(os.path.join(val_labels, new_label_file), 'w') as f: pass # Creates an empty file # Create dataset.yaml file yaml_content = """path: training # dataset root dir train: train/images # train images val: val/images # validation images nc: 1 # number of classes names: ['memory_module'] # class names """ with open('dataset.yaml', 'w') as f: f.write(yaml_content) print("Dataset structure created successfully!") print(f"- {len(train_memory_files) + len(train_no_memory_files)} images for training") print(f"- {len(val_memory_files) + len(val_no_memory_files)} images for validation") if __name__ == "__main__": create_dataset_structure()