Files

101 lines
4.1 KiB
Python
Raw Permalink Normal View History

2025-07-11 20:07:36 +01:00
import os
import random
import shutil
def create_dataset_structure():
# Define source and destination paths
source_memory_imgs = "training/memory"
source_memory_labels = "training/memory"
source_no_memory_imgs = "training/no_memory"
# Define the new structure
train_imgs = "training/train/images"
train_labels = "training/train/labels"
val_imgs = "training/val/images"
val_labels = "training/val/labels"
# Create all required directories
os.makedirs(train_imgs, exist_ok=True)
os.makedirs(train_labels, exist_ok=True)
os.makedirs(val_imgs, exist_ok=True)
os.makedirs(val_labels, exist_ok=True)
# Get all image files
memory_img_files = [f for f in os.listdir(source_memory_imgs) if f.endswith('.png')]
no_memory_img_files = [f for f in os.listdir(source_no_memory_imgs) if f.endswith('.png')]
# Shuffle and split the files (80% train, 20% validation)
random.seed(42) # For reproducibility
random.shuffle(memory_img_files)
random.shuffle(no_memory_img_files)
train_memory_files = memory_img_files[:16]
val_memory_files = memory_img_files[16:]
train_no_memory_files = no_memory_img_files[:16]
val_no_memory_files = no_memory_img_files[16:]
# Copy the memory image files and their labels with "memory_" prefix
for file in train_memory_files:
# Create new filename with prefix
new_filename = "memory_" + file
# Copy image
shutil.copy(os.path.join(source_memory_imgs, file), os.path.join(train_imgs, new_filename))
# Copy label if it exists
label_file = file.replace('.png', '.txt')
new_label_file = new_filename.replace('.png', '.txt')
if os.path.exists(os.path.join(source_memory_labels, label_file)):
shutil.copy(os.path.join(source_memory_labels, label_file),
os.path.join(train_labels, new_label_file))
for file in val_memory_files:
# Create new filename with prefix
new_filename = "memory_" + file
# Copy image
shutil.copy(os.path.join(source_memory_imgs, file), os.path.join(val_imgs, new_filename))
# Copy label if it exists
label_file = file.replace('.png', '.txt')
new_label_file = new_filename.replace('.png', '.txt')
if os.path.exists(os.path.join(source_memory_labels, label_file)):
shutil.copy(os.path.join(source_memory_labels, label_file),
os.path.join(val_labels, new_label_file))
# Copy the no_memory image files with "no_memory_" prefix
for file in train_no_memory_files:
# Create new filename with prefix
new_filename = "no_memory_" + file
# Copy image
shutil.copy(os.path.join(source_no_memory_imgs, file), os.path.join(train_imgs, new_filename))
# Create empty label file
new_label_file = new_filename.replace('.png', '.txt')
with open(os.path.join(train_labels, new_label_file), 'w') as f:
pass # Creates an empty file
for file in val_no_memory_files:
# Create new filename with prefix
new_filename = "no_memory_" + file
# Copy image
shutil.copy(os.path.join(source_no_memory_imgs, file), os.path.join(val_imgs, new_filename))
# Create empty label file
new_label_file = new_filename.replace('.png', '.txt')
with open(os.path.join(val_labels, new_label_file), 'w') as f:
pass # Creates an empty file
# Create dataset.yaml file
yaml_content = """path: training # dataset root dir
train: train/images # train images
val: val/images # validation images
nc: 1 # number of classes
names: ['memory_module'] # class names
"""
with open('dataset.yaml', 'w') as f:
f.write(yaml_content)
print("Dataset structure created successfully!")
print(f"- {len(train_memory_files) + len(train_no_memory_files)} images for training")
print(f"- {len(val_memory_files) + len(val_no_memory_files)} images for validation")
if __name__ == "__main__":
create_dataset_structure()