#!/usr/bin/env python3 """ Styling Training Script Provides a command-line interface to run the styling training pipeline """ import sys import os import subprocess import argparse from pathlib import Path def run_training_with_config(config_path: str, dataset_path: str = None, **cli_overrides): """Run the styling training pipeline with YAML configuration""" print(f"Starting styling training with config: {config_path}") if dataset_path: print(f"Training dataset: {dataset_path}") else: print("Training dataset: Will use output_dir from YAML config") print() # Build command cmd = ["python", "pipelines/styling/train.py", "--config", config_path] # Add dataset path if provided if dataset_path: cmd.extend(["--dataset", dataset_path]) # Add CLI overrides for key, value in cli_overrides.items(): if value is not None: if key == "output_dir": cmd.extend(["--output-dir", str(value)]) elif key == "epochs": cmd.extend(["--epochs", str(value)]) elif key == "batch_size": cmd.extend(["--batch-size", str(value)]) elif key == "learning_rate": cmd.extend(["--learning-rate", str(value)]) elif key == "max_steps": cmd.extend(["--max-steps", str(value)]) print(f"Running: {' '.join(cmd)}") print() try: result = subprocess.run(cmd, check=True, capture_output=True, text=True) print("Training completed successfully!") print(result.stdout) return True except subprocess.CalledProcessError as e: print(f"Training failed: {e}") print(f"Error output: {e.stderr}") return False def show_training_features(): """Show the features of the styling training pipeline""" print("=== Styling Training Pipeline Features ===") print() print("1. **Model Support**:") print(" - Unsloth optimized models (4x faster)") print(" - LoRA fine-tuning for efficiency") print(" - Support for Llama-3.1, Mistral, Phi-3, Gemma") print() print("2. **Training Features**:") print(" - SFTTrainer with instruction tuning") print(" - Automatic mixed precision (FP16/BF16)") print(" - Gradient checkpointing for memory efficiency") print(" - Configurable LoRA parameters") print() print("3. **Configuration**:") print(" - YAML configuration files") print(" - CLI argument overrides") print(" - Automatic device detection") print() print("4. **Output**:") print(" - Saved LoRA models") print(" - Training logs and checkpoints") print(" - Ready for inference") def create_training_example(): """Create a training example using the formal style configuration""" print("=== Training Example: Formal Style Transfer ===") print() # Check if we have the required files config_path = "configs/styling/formal.yaml" if not Path(config_path).exists(): print(f"Configuration file not found: {config_path}") print(" Please run the data processor first to create the configuration") return False print("Found required files!") print(f" Config: {config_path}") print(" Dataset: Will use output_dir from YAML config") print(" The training pipeline will automatically:") print(" - Load data from the output_dir specified in YAML") print(" - Convert JSONL files to HuggingFace dataset format") print(" - Apply formatting with EOS tokens") print(" - Train the model using SFTTrainer") print() # Run training without explicit dataset path - will use YAML config success = run_training_with_config( config_path=config_path, dataset_path=None, # Use output_dir from YAML config epochs=1, batch_size=2, learning_rate=2e-4 ) if success: print("Training example completed!") print(" Model saved to: ./models/styling") print(" Ready for inference!") return success def main(): """Main function""" parser = argparse.ArgumentParser(description="Styling Training Script") # Subcommands parser.add_argument("command", choices=["train", "example", "features"], help="Command to run") # Training arguments parser.add_argument("--config", type=str, help="Path to YAML configuration file") parser.add_argument("--dataset", type=str, help="Path to training dataset") parser.add_argument("--output-dir", type=str, help="Output directory for model") parser.add_argument("--epochs", type=int, help="Number of training epochs") parser.add_argument("--batch-size", type=int, help="Training batch size") parser.add_argument("--learning-rate", type=float, help="Learning rate") parser.add_argument("--max-steps", type=int, help="Maximum training steps") args = parser.parse_args() if args.command == "features": show_training_features() elif args.command == "example": create_training_example() elif args.command == "train": if not args.config: print("❌ --config is required for training") print("Usage: python scripts/styling/train.py train --config config.yaml") sys.exit(1) # If dataset is not provided, try to use output_dir from config dataset_path = args.dataset if args.dataset else None success = run_training_with_config( config_path=args.config, dataset_path=dataset_path, output_dir=args.output_dir, epochs=args.epochs, batch_size=args.batch_size, learning_rate=args.learning_rate, max_steps=args.max_steps ) if not success: sys.exit(1) if __name__ == "__main__": main()