import os import yaml from pathlib import Path from ultralytics import YOLO import logging from sklearn.model_selection import train_test_split import torch # Setup logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class DatasetPreparer: def __init__(self): # Get the project root self.project_root = Path(__file__).parent.parent self.training_dir = self.project_root / "training" self.output_dir = self.project_root / "yolo_dataset" logger.info(f"Looking for training data in: {self.training_dir}") logger.info(f"Output will be saved to: {self.output_dir}") def verify_dataset(self): """Check if images exist in the correct structure""" memory_images = list((self.training_dir / "memory").glob("*.[jJ][pP][gG]")) + \ list((self.training_dir / "memory").glob("*.[pP][nN][gG]")) no_memory_images = list((self.training_dir / "no_memory").glob("*.[jJ][pP][gG]")) + \ list((self.training_dir / "no_memory").glob("*.[pP][nN][gG]")) if not memory_images: raise FileNotFoundError(f"No images found in {self.training_dir/'memory/'}") if not no_memory_images: logger.warning(f"No images found in {self.training_dir/'no_memory/'}") logger.info(f"Found {len(memory_images)} memory images and {len(no_memory_images)} no_memory images") return memory_images + no_memory_images def organize_yolo_dataset(self, test_size=0.2): """Organize into YOLO directory structure""" try: all_images = self.verify_dataset() # Create directories (self.output_dir / "images/train").mkdir(parents=True, exist_ok=True) (self.output_dir / "images/val").mkdir(parents=True, exist_ok=True) (self.output_dir / "labels/train").mkdir(parents=True, exist_ok=True) (self.output_dir / "labels/val").mkdir(parents=True, exist_ok=True) # Split into train/val train_files, val_files = train_test_split(all_images, test_size=test_size, random_state=42) # Create symlinks (or copy files) for file in train_files: dest = self.output_dir / "images/train" / file.name if not dest.exists(): os.link(str(file), str(dest)) # Handle annotations if they exist label_file = file.with_suffix('.txt') if label_file.exists(): label_dest = self.output_dir / "labels/train" / label_file.name if not label_dest.exists(): os.link(str(label_file), str(label_dest)) for file in val_files: dest = self.output_dir / "images/val" / file.name if not dest.exists(): os.link(str(file), str(dest)) label_file = file.with_suffix('.txt') if label_file.exists(): label_dest = self.output_dir / "labels/val" / label_file.name if not label_dest.exists(): os.link(str(label_file), str(label_dest)) # Create dataset YAML data = { 'train': str(self.output_dir / "images/train"), 'val': str(self.output_dir / "images/val"), 'nc': 1, 'names': ['memory_module'] } with open(self.output_dir / "dataset.yaml", 'w') as f: yaml.dump(data, f) logger.info("YOLO dataset prepared successfully") return True except Exception as e: logger.error(f"Error organizing dataset: {str(e)}") return False def train_model(): """Train YOLO model using ultralytics""" try: model = YOLO('yolov8n.pt') results = model.train( data=str(Path(__file__).parent.parent / "yolo_dataset/dataset.yaml"), epochs=100, # Reduced from 300 for local testing batch=2, # Small batch size for limited VRAM imgsz=416, # Reduced from 640 to save memory device='0' if torch.cuda.is_available() else 'cpu', augment=True, # for small datasets patience=20, # Early stopping if no improvement lr0=0.001, # Learning rate cos_lr=True, # Cosine learning rate scheduler workers=1, # Reduce memory errors cache=False, # Disable cache if low on disk space single_cls=True, optimizer='AdamW', # For small datasets seed=42, pretrained=True # Using pretrained weights ) logger.info("Training completed successfully") return True except Exception as e: logger.error(f"Training failed: {str(e)}") return False if __name__ == "__main__": try: preparer = DatasetPreparer() if preparer.organize_yolo_dataset(): train_model() except Exception as e: logger.error(f"Fatal error: {str(e)}")