#!/usr/bin/env python3 """ Instruct Inference Script Provides a command-line interface to run the instruct inference pipeline """ import sys import os import subprocess import argparse from pathlib import Path def run_inference_with_config(config_path: str, message: str = "", max_tokens: int = 128, stream: bool = False, interactive: bool = False): """Run inference using a YAML configuration file""" print(f"Running instruct inference with config: {config_path}") if interactive: print("Mode: Interactive chat") elif message: print(f"Message: {message}") print(f"Max tokens: {max_tokens}") print(f"Streaming: {stream}") cmd = [ "python", "pipelines/instruct/inference.py", "--config", config_path, "--max-tokens", str(max_tokens) ] if interactive: cmd.append("--interactive") elif message: cmd.extend(["--message", message]) if stream: cmd.append("--stream") print(f"Running: {' '.join(cmd)}") try: if interactive: # For interactive mode, don't capture output result = subprocess.run(cmd, check=True) return True else: result = subprocess.run(cmd, capture_output=True, text=True, check=True) print("✅ Inference completed successfully!") print("Output:") print(result.stdout) return result.stdout except subprocess.CalledProcessError as e: print(f"❌ Inference failed: {e}") print("Error output:") print(e.stderr) return None def run_conversation_example(config_path: str): """Run a conversation example""" print(f"=== Conversation Example ===") print(f"Config: {config_path}") example_messages = [ "Can you explain what recursion is in programming?", "How do I debug a Python program?", "What's the difference between a list and a tuple in Python?", "Can you show me how to use a for loop?", "What are the benefits of using functions in programming?" ] print("Running example conversations...") print() for i, message in enumerate(example_messages): print(f"--- Example {i+1} ---") result = run_inference_with_config(config_path, message, max_tokens=256) if not result: print(f"❌ Failed to process message {i+1}") print() print("✅ Conversation examples completed!") def show_inference_features(): """Show the features of the instruct inference pipeline""" print("=== Instruct Inference Pipeline Features ===") print() print("1. **Model Support**:") print(" - Trained LoRA models from instruct training pipeline") print(" - Automatic model loading from config") print(" - Native Unsloth inference optimization") print(" - Chat template integration") print() print("2. **Inference Modes**:") print(" - Single message inference") print(" - Interactive chat session") print(" - Streaming generation") print(" - Batch conversation processing") print() print("3. **Conversation Features**:") print(" - Multi-turn conversation support") print(" - Context preservation across turns") print(" - Proper role handling (user/assistant/system)") print(" - Chat history management") print() print("4. **Generation Control**:") print(" - Configurable max tokens") print(" - Temperature and sampling parameters") print(" - Streaming output support") print(" - Chat template formatting") print() print("5. **Interactive Features**:") print(" - Real-time chat interface") print(" - Command support (clear, stream toggle)") print(" - Conversation history tracking") print(" - Graceful exit handling") print() print("6. **Usage Examples**:") print(" - Single message: --message 'your question here'") print(" - Interactive chat: --interactive") print(" - Streaming: add --stream flag") print(" - Custom tokens: --max-tokens 256") def create_inference_example(): """Create an inference example using the code reasoning configuration""" print("=== Inference Example: Code Reasoning Chat ===") print() # Check if we have the required files config_path = "configs/instruct/code_reasoning.yaml" if not Path(config_path).exists(): print(f"❌ Configuration file not found: {config_path}") print(" Please run the data processor and training first") return False print("✅ Found configuration file!") print(f" Config: {config_path}") print() # Example conversation example_message = "Can you explain what a Python decorator is and show me a simple example?" print(f"Example message: {example_message}") print() # Run inference success = run_inference_with_config( config_path=config_path, message=example_message, max_tokens=256 ) if success: print("✅ Example inference completed successfully!") return True else: print("❌ Example inference failed!") return False def start_interactive_chat(config_path: str, stream: bool = False): """Start an interactive chat session""" print("=== Interactive Chat Session ===") print() if not Path(config_path).exists(): print(f"❌ Configuration file not found: {config_path}") print(" Please run the data processor and training first") return False print(f"Starting interactive chat with config: {config_path}") print("Streaming:", "enabled" if stream else "disabled") print() # Run interactive inference success = run_inference_with_config( config_path=config_path, interactive=True, stream=stream ) return success def create_batch_test(): """Create a batch test with multiple questions""" print("=== Batch Test: Multiple Questions ===") print() config_path = "configs/instruct/code_reasoning.yaml" if not Path(config_path).exists(): print(f"❌ Configuration file not found: {config_path}") print(" Please run the data processor and training first") return False # Create a batch of test questions test_questions = [ "What is object-oriented programming?", "How do you handle errors in Python?", "Explain the concept of variables in programming.", "What's the difference between a compiler and an interpreter?" ] print("Running batch test with multiple questions...") print() success_count = 0 for i, question in enumerate(test_questions): print(f"Question {i+1}: {question}") result = run_inference_with_config(config_path, question, max_tokens=200) if result: success_count += 1 print("-" * 50) print(f"✅ Batch test completed: {success_count}/{len(test_questions)} questions processed successfully") return success_count == len(test_questions) def main(): """Main inference function""" parser = argparse.ArgumentParser(description="Instruct Inference Pipeline") subparsers = parser.add_subparsers(dest="command", help="Available commands") # Inference command infer_parser = subparsers.add_parser("infer", help="Run single inference") infer_parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file") infer_parser.add_argument("--message", type=str, required=True, help="Message to send to the model") infer_parser.add_argument("--max-tokens", type=int, default=128, help="Maximum new tokens to generate") infer_parser.add_argument("--stream", action="store_true", help="Enable streaming generation") # Interactive command interactive_parser = subparsers.add_parser("chat", help="Start interactive chat") interactive_parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file") interactive_parser.add_argument("--stream", action="store_true", help="Enable streaming generation") # Batch test command batch_parser = subparsers.add_parser("batch", help="Run batch test") batch_parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file") # Conversation example command conv_parser = subparsers.add_parser("conversation", help="Run conversation examples") conv_parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file") # Features command subparsers.add_parser("features", help="Show available features") # Example command subparsers.add_parser("example", help="Run example inference") args = parser.parse_args() if args.command == "infer": run_inference_with_config( args.config, args.message, args.max_tokens, args.stream ) elif args.command == "chat": start_interactive_chat(args.config, args.stream) elif args.command == "batch": create_batch_test() elif args.command == "conversation": run_conversation_example(args.config) elif args.command == "features": show_inference_features() elif args.command == "example": create_inference_example() else: print("Instruct Inference Pipeline") print("==========================") print() print("Available commands:") print(" infer - Run single message inference") print(" chat - Start interactive chat session") print(" batch - Run batch test with multiple questions") print(" conversation - Run conversation examples") print(" features - Show available features") print(" example - Run example inference") print() print("Examples:") print(" python scripts/instruct/inference.py infer --config configs/instruct/code_reasoning.yaml --message 'Explain Python loops'") print(" python scripts/instruct/inference.py chat --config configs/instruct/code_reasoning.yaml") print(" python scripts/instruct/inference.py chat --config configs/instruct/code_reasoning.yaml --stream") print() print("Key Features:") print(" ✅ Interactive chat with conversation history") print(" ✅ Streaming generation support") print(" ✅ Multi-turn conversation handling") print(" ✅ Chat template integration") print(" ✅ Configurable generation parameters") if __name__ == "__main__": main()