Files
ds_task_recycling_project/backend/train.py
T

133 lines
5.3 KiB
Python
Raw Normal View History

2025-07-17 00:03:03 +01:00
import os
import yaml
from pathlib import Path
from ultralytics import YOLO
import logging
from sklearn.model_selection import train_test_split
import torch
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class DatasetPreparer:
def __init__(self):
# Get the project root
self.project_root = Path(__file__).parent.parent
self.training_dir = self.project_root / "training"
self.output_dir = self.project_root / "yolo_dataset"
logger.info(f"Looking for training data in: {self.training_dir}")
logger.info(f"Output will be saved to: {self.output_dir}")
def verify_dataset(self):
"""Check if images exist in the correct structure"""
memory_images = list((self.training_dir / "memory").glob("*.[jJ][pP][gG]")) + \
list((self.training_dir / "memory").glob("*.[pP][nN][gG]"))
no_memory_images = list((self.training_dir / "no_memory").glob("*.[jJ][pP][gG]")) + \
list((self.training_dir / "no_memory").glob("*.[pP][nN][gG]"))
if not memory_images:
raise FileNotFoundError(f"No images found in {self.training_dir/'memory/'}")
if not no_memory_images:
logger.warning(f"No images found in {self.training_dir/'no_memory/'}")
logger.info(f"Found {len(memory_images)} memory images and {len(no_memory_images)} no_memory images")
return memory_images + no_memory_images
def organize_yolo_dataset(self, test_size=0.2):
"""Organize into YOLO directory structure"""
try:
all_images = self.verify_dataset()
# Create directories
(self.output_dir / "images/train").mkdir(parents=True, exist_ok=True)
(self.output_dir / "images/val").mkdir(parents=True, exist_ok=True)
(self.output_dir / "labels/train").mkdir(parents=True, exist_ok=True)
(self.output_dir / "labels/val").mkdir(parents=True, exist_ok=True)
# Split into train/val
train_files, val_files = train_test_split(all_images, test_size=test_size, random_state=42)
# Create symlinks (or copy files)
for file in train_files:
dest = self.output_dir / "images/train" / file.name
if not dest.exists():
os.link(str(file), str(dest))
# Handle annotations if they exist
label_file = file.with_suffix('.txt')
if label_file.exists():
label_dest = self.output_dir / "labels/train" / label_file.name
if not label_dest.exists():
os.link(str(label_file), str(label_dest))
for file in val_files:
dest = self.output_dir / "images/val" / file.name
if not dest.exists():
os.link(str(file), str(dest))
label_file = file.with_suffix('.txt')
if label_file.exists():
label_dest = self.output_dir / "labels/val" / label_file.name
if not label_dest.exists():
os.link(str(label_file), str(label_dest))
# Create dataset YAML
data = {
'train': str(self.output_dir / "images/train"),
'val': str(self.output_dir / "images/val"),
'nc': 1,
'names': ['memory_module']
}
with open(self.output_dir / "dataset.yaml", 'w') as f:
yaml.dump(data, f)
logger.info("YOLO dataset prepared successfully")
return True
except Exception as e:
logger.error(f"Error organizing dataset: {str(e)}")
return False
def train_model():
"""Train YOLO model using ultralytics"""
try:
model = YOLO('yolov8n.pt')
results = model.train(
data=str(Path(__file__).parent.parent / "yolo_dataset/dataset.yaml"),
epochs=100, # Reduced from 300 for local testing
batch=2, # Small batch size for limited VRAM
imgsz=416, # Reduced from 640 to save memory
device='0' if torch.cuda.is_available() else 'cpu',
augment=True, # for small datasets
patience=20, # Early stopping if no improvement
lr0=0.001, # Learning rate
cos_lr=True, # Cosine learning rate scheduler
workers=1, # Reduce if memory errors
cache=False, # Disable cache if low on disk space
single_cls=True,
optimizer='AdamW', # For small datasets
seed=42,
pretrained=True # Using pretrained weights
)
logger.info("Training completed successfully")
return True
except Exception as e:
logger.error(f"Training failed: {str(e)}")
return False
if __name__ == "__main__":
try:
preparer = DatasetPreparer()
if preparer.organize_yolo_dataset():
train_model()
except Exception as e:
logger.error(f"Fatal error: {str(e)}")