update project structure and improve scripts
This commit is contained in:
@@ -0,0 +1,132 @@
|
||||
import os
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
from ultralytics import YOLO
|
||||
import logging
|
||||
from sklearn.model_selection import train_test_split
|
||||
import torch
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DatasetPreparer:
|
||||
def __init__(self):
|
||||
# Get the project root
|
||||
self.project_root = Path(__file__).parent.parent
|
||||
self.training_dir = self.project_root / "training"
|
||||
self.output_dir = self.project_root / "yolo_dataset"
|
||||
|
||||
logger.info(f"Looking for training data in: {self.training_dir}")
|
||||
logger.info(f"Output will be saved to: {self.output_dir}")
|
||||
|
||||
def verify_dataset(self):
|
||||
"""Check if images exist in the correct structure"""
|
||||
memory_images = list((self.training_dir / "memory").glob("*.[jJ][pP][gG]")) + \
|
||||
list((self.training_dir / "memory").glob("*.[pP][nN][gG]"))
|
||||
no_memory_images = list((self.training_dir / "no_memory").glob("*.[jJ][pP][gG]")) + \
|
||||
list((self.training_dir / "no_memory").glob("*.[pP][nN][gG]"))
|
||||
|
||||
if not memory_images:
|
||||
raise FileNotFoundError(f"No images found in {self.training_dir/'memory/'}")
|
||||
if not no_memory_images:
|
||||
logger.warning(f"No images found in {self.training_dir/'no_memory/'}")
|
||||
|
||||
logger.info(f"Found {len(memory_images)} memory images and {len(no_memory_images)} no_memory images")
|
||||
return memory_images + no_memory_images
|
||||
|
||||
def organize_yolo_dataset(self, test_size=0.2):
|
||||
"""Organize into YOLO directory structure"""
|
||||
try:
|
||||
all_images = self.verify_dataset()
|
||||
|
||||
# Create directories
|
||||
(self.output_dir / "images/train").mkdir(parents=True, exist_ok=True)
|
||||
(self.output_dir / "images/val").mkdir(parents=True, exist_ok=True)
|
||||
(self.output_dir / "labels/train").mkdir(parents=True, exist_ok=True)
|
||||
(self.output_dir / "labels/val").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Split into train/val
|
||||
train_files, val_files = train_test_split(all_images, test_size=test_size, random_state=42)
|
||||
|
||||
# Create symlinks (or copy files)
|
||||
for file in train_files:
|
||||
dest = self.output_dir / "images/train" / file.name
|
||||
if not dest.exists():
|
||||
os.link(str(file), str(dest))
|
||||
|
||||
# Handle annotations if they exist
|
||||
label_file = file.with_suffix('.txt')
|
||||
if label_file.exists():
|
||||
label_dest = self.output_dir / "labels/train" / label_file.name
|
||||
if not label_dest.exists():
|
||||
os.link(str(label_file), str(label_dest))
|
||||
|
||||
for file in val_files:
|
||||
dest = self.output_dir / "images/val" / file.name
|
||||
if not dest.exists():
|
||||
os.link(str(file), str(dest))
|
||||
|
||||
label_file = file.with_suffix('.txt')
|
||||
if label_file.exists():
|
||||
label_dest = self.output_dir / "labels/val" / label_file.name
|
||||
if not label_dest.exists():
|
||||
os.link(str(label_file), str(label_dest))
|
||||
|
||||
# Create dataset YAML
|
||||
data = {
|
||||
'train': str(self.output_dir / "images/train"),
|
||||
'val': str(self.output_dir / "images/val"),
|
||||
'nc': 1,
|
||||
'names': ['memory_module']
|
||||
}
|
||||
|
||||
with open(self.output_dir / "dataset.yaml", 'w') as f:
|
||||
yaml.dump(data, f)
|
||||
|
||||
logger.info("YOLO dataset prepared successfully")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error organizing dataset: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def train_model():
|
||||
"""Train YOLO model using ultralytics"""
|
||||
try:
|
||||
model = YOLO('yolov8n.pt')
|
||||
|
||||
results = model.train(
|
||||
data=str(Path(__file__).parent.parent / "yolo_dataset/dataset.yaml"),
|
||||
epochs=100, # Reduced from 300 for local testing
|
||||
batch=2, # Small batch size for limited VRAM
|
||||
imgsz=416, # Reduced from 640 to save memory
|
||||
device='0' if torch.cuda.is_available() else 'cpu',
|
||||
augment=True, # for small datasets
|
||||
patience=20, # Early stopping if no improvement
|
||||
lr0=0.001, # Learning rate
|
||||
cos_lr=True, # Cosine learning rate scheduler
|
||||
workers=1, # Reduce memory errors
|
||||
cache=False, # Disable cache if low on disk space
|
||||
single_cls=True,
|
||||
optimizer='AdamW', # For small datasets
|
||||
seed=42,
|
||||
pretrained=True # Using pretrained weights
|
||||
)
|
||||
|
||||
logger.info("Training completed successfully")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Training failed: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
preparer = DatasetPreparer()
|
||||
if preparer.organize_yolo_dataset():
|
||||
train_model()
|
||||
except Exception as e:
|
||||
logger.error(f"Fatal error: {str(e)}")
|
||||
Reference in New Issue
Block a user