133 lines
5.3 KiB
Python
133 lines
5.3 KiB
Python
|
|
import os
|
||
|
|
import yaml
|
||
|
|
from pathlib import Path
|
||
|
|
from ultralytics import YOLO
|
||
|
|
import logging
|
||
|
|
from sklearn.model_selection import train_test_split
|
||
|
|
import torch
|
||
|
|
|
||
|
|
# Setup logging
|
||
|
|
logging.basicConfig(level=logging.INFO)
|
||
|
|
logger = logging.getLogger(__name__)
|
||
|
|
|
||
|
|
|
||
|
|
class DatasetPreparer:
|
||
|
|
def __init__(self):
|
||
|
|
# Get the project root
|
||
|
|
self.project_root = Path(__file__).parent.parent
|
||
|
|
self.training_dir = self.project_root / "training"
|
||
|
|
self.output_dir = self.project_root / "yolo_dataset"
|
||
|
|
|
||
|
|
logger.info(f"Looking for training data in: {self.training_dir}")
|
||
|
|
logger.info(f"Output will be saved to: {self.output_dir}")
|
||
|
|
|
||
|
|
def verify_dataset(self):
|
||
|
|
"""Check if images exist in the correct structure"""
|
||
|
|
memory_images = list((self.training_dir / "memory").glob("*.[jJ][pP][gG]")) + \
|
||
|
|
list((self.training_dir / "memory").glob("*.[pP][nN][gG]"))
|
||
|
|
no_memory_images = list((self.training_dir / "no_memory").glob("*.[jJ][pP][gG]")) + \
|
||
|
|
list((self.training_dir / "no_memory").glob("*.[pP][nN][gG]"))
|
||
|
|
|
||
|
|
if not memory_images:
|
||
|
|
raise FileNotFoundError(f"No images found in {self.training_dir/'memory/'}")
|
||
|
|
if not no_memory_images:
|
||
|
|
logger.warning(f"No images found in {self.training_dir/'no_memory/'}")
|
||
|
|
|
||
|
|
logger.info(f"Found {len(memory_images)} memory images and {len(no_memory_images)} no_memory images")
|
||
|
|
return memory_images + no_memory_images
|
||
|
|
|
||
|
|
def organize_yolo_dataset(self, test_size=0.2):
|
||
|
|
"""Organize into YOLO directory structure"""
|
||
|
|
try:
|
||
|
|
all_images = self.verify_dataset()
|
||
|
|
|
||
|
|
# Create directories
|
||
|
|
(self.output_dir / "images/train").mkdir(parents=True, exist_ok=True)
|
||
|
|
(self.output_dir / "images/val").mkdir(parents=True, exist_ok=True)
|
||
|
|
(self.output_dir / "labels/train").mkdir(parents=True, exist_ok=True)
|
||
|
|
(self.output_dir / "labels/val").mkdir(parents=True, exist_ok=True)
|
||
|
|
|
||
|
|
# Split into train/val
|
||
|
|
train_files, val_files = train_test_split(all_images, test_size=test_size, random_state=42)
|
||
|
|
|
||
|
|
# Create symlinks (or copy files)
|
||
|
|
for file in train_files:
|
||
|
|
dest = self.output_dir / "images/train" / file.name
|
||
|
|
if not dest.exists():
|
||
|
|
os.link(str(file), str(dest))
|
||
|
|
|
||
|
|
# Handle annotations if they exist
|
||
|
|
label_file = file.with_suffix('.txt')
|
||
|
|
if label_file.exists():
|
||
|
|
label_dest = self.output_dir / "labels/train" / label_file.name
|
||
|
|
if not label_dest.exists():
|
||
|
|
os.link(str(label_file), str(label_dest))
|
||
|
|
|
||
|
|
for file in val_files:
|
||
|
|
dest = self.output_dir / "images/val" / file.name
|
||
|
|
if not dest.exists():
|
||
|
|
os.link(str(file), str(dest))
|
||
|
|
|
||
|
|
label_file = file.with_suffix('.txt')
|
||
|
|
if label_file.exists():
|
||
|
|
label_dest = self.output_dir / "labels/val" / label_file.name
|
||
|
|
if not label_dest.exists():
|
||
|
|
os.link(str(label_file), str(label_dest))
|
||
|
|
|
||
|
|
# Create dataset YAML
|
||
|
|
data = {
|
||
|
|
'train': str(self.output_dir / "images/train"),
|
||
|
|
'val': str(self.output_dir / "images/val"),
|
||
|
|
'nc': 1,
|
||
|
|
'names': ['memory_module']
|
||
|
|
}
|
||
|
|
|
||
|
|
with open(self.output_dir / "dataset.yaml", 'w') as f:
|
||
|
|
yaml.dump(data, f)
|
||
|
|
|
||
|
|
logger.info("YOLO dataset prepared successfully")
|
||
|
|
return True
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"Error organizing dataset: {str(e)}")
|
||
|
|
return False
|
||
|
|
|
||
|
|
|
||
|
|
def train_model():
|
||
|
|
"""Train YOLO model using ultralytics"""
|
||
|
|
try:
|
||
|
|
model = YOLO('yolov8n.pt')
|
||
|
|
|
||
|
|
results = model.train(
|
||
|
|
data=str(Path(__file__).parent.parent / "yolo_dataset/dataset.yaml"),
|
||
|
|
epochs=100, # Reduced from 300 for local testing
|
||
|
|
batch=2, # Small batch size for limited VRAM
|
||
|
|
imgsz=416, # Reduced from 640 to save memory
|
||
|
|
device='0' if torch.cuda.is_available() else 'cpu',
|
||
|
|
augment=True, # for small datasets
|
||
|
|
patience=20, # Early stopping if no improvement
|
||
|
|
lr0=0.001, # Learning rate
|
||
|
|
cos_lr=True, # Cosine learning rate scheduler
|
||
|
|
workers=1, # Reduce if memory errors
|
||
|
|
cache=False, # Disable cache if low on disk space
|
||
|
|
single_cls=True,
|
||
|
|
optimizer='AdamW', # For small datasets
|
||
|
|
seed=42,
|
||
|
|
pretrained=True # Using pretrained weights
|
||
|
|
)
|
||
|
|
|
||
|
|
logger.info("Training completed successfully")
|
||
|
|
return True
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"Training failed: {str(e)}")
|
||
|
|
return False
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
try:
|
||
|
|
preparer = DatasetPreparer()
|
||
|
|
if preparer.organize_yolo_dataset():
|
||
|
|
train_model()
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(f"Fatal error: {str(e)}")
|