DS-LLM-TEMPLATE-FINETUNING/scripts/instruct/inference.py


#!/usr/bin/env python3
"""
Instruct Inference Script
Provides a command-line interface to run the instruct inference pipeline
"""

import sys
import os
import subprocess
import argparse
from pathlib import Path

def run_inference_with_config(config_path: str, message: str = "", max_tokens: int = 128, stream: bool = False, interactive: bool = False):
    """Run inference using a YAML configuration file"""
    print(f"Running instruct inference with config: {config_path}")
    if interactive:
        print("Mode: Interactive chat")
    elif message:
        print(f"Message: {message}")
    print(f"Max tokens: {max_tokens}")
    print(f"Streaming: {stream}")

    cmd = [
        "python", "pipelines/instruct/inference.py",
        "--config", config_path,
        "--max-tokens", str(max_tokens)
    ]

    if interactive:
        cmd.append("--interactive")
    elif message:
        cmd.extend(["--message", message])

    if stream:
        cmd.append("--stream")

    print(f"Running: {' '.join(cmd)}")

    try:
        if interactive:
            # For interactive mode, don't capture output
            result = subprocess.run(cmd, check=True)
            return True
        else:
            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
            print("✅ Inference completed successfully!")
            print("Output:")
            print(result.stdout)
            return result.stdout
    except subprocess.CalledProcessError as e:
        print(f"❌ Inference failed: {e}")
        print("Error output:")
        print(e.stderr)
        return None

def run_conversation_example(config_path: str):
    """Run a conversation example"""
    print(f"=== Conversation Example ===")
    print(f"Config: {config_path}")

    example_messages = [
        "Can you explain what recursion is in programming?",
        "How do I debug a Python program?",
        "What's the difference between a list and a tuple in Python?",
        "Can you show me how to use a for loop?",
        "What are the benefits of using functions in programming?"
    ]

    print("Running example conversations...")
    print()

    for i, message in enumerate(example_messages):
        print(f"--- Example {i+1} ---")
        result = run_inference_with_config(config_path, message, max_tokens=256)
        if not result:
            print(f"❌ Failed to process message {i+1}")
        print()

    print("✅ Conversation examples completed!")

def show_inference_features():
    """Show the features of the instruct inference pipeline"""
    print("=== Instruct Inference Pipeline Features ===")
    print()
    print("1. **Model Support**:")
    print("   - Trained LoRA models from instruct training pipeline")
    print("   - Automatic model loading from config")
    print("   - Native Unsloth inference optimization")
    print("   - Chat template integration")
    print()
    print("2. **Inference Modes**:")
    print("   - Single message inference")
    print("   - Interactive chat session")
    print("   - Streaming generation")
    print("   - Batch conversation processing")
    print()
    print("3. **Conversation Features**:")
    print("   - Multi-turn conversation support")
    print("   - Context preservation across turns")
    print("   - Proper role handling (user/assistant/system)")
    print("   - Chat history management")
    print()
    print("4. **Generation Control**:")
    print("   - Configurable max tokens")
    print("   - Temperature and sampling parameters")
    print("   - Streaming output support")
    print("   - Chat template formatting")
    print()
    print("5. **Interactive Features**:")
    print("   - Real-time chat interface")
    print("   - Command support (clear, stream toggle)")
    print("   - Conversation history tracking")
    print("   - Graceful exit handling")
    print()
    print("6. **Usage Examples**:")
    print("   - Single message: --message 'your question here'")
    print("   - Interactive chat: --interactive")
    print("   - Streaming: add --stream flag")
    print("   - Custom tokens: --max-tokens 256")

def create_inference_example():
    """Create an inference example using the code reasoning configuration"""
    print("=== Inference Example: Code Reasoning Chat ===")
    print()

    # Check if we have the required files
    config_path = "configs/instruct/code_reasoning.yaml"

    if not Path(config_path).exists():
        print(f"❌ Configuration file not found: {config_path}")
        print("   Please run the data processor and training first")
        return False

    print("✅ Found configuration file!")
    print(f"   Config: {config_path}")
    print()

    # Example conversation
    example_message = "Can you explain what a Python decorator is and show me a simple example?"

    print(f"Example message: {example_message}")
    print()

    # Run inference
    success = run_inference_with_config(
        config_path=config_path,
        message=example_message,
        max_tokens=256
    )

    if success:
        print("✅ Example inference completed successfully!")
        return True
    else:
        print("❌ Example inference failed!")
        return False

def start_interactive_chat(config_path: str, stream: bool = False):
    """Start an interactive chat session"""
    print("=== Interactive Chat Session ===")
    print()

    if not Path(config_path).exists():
        print(f"❌ Configuration file not found: {config_path}")
        print("   Please run the data processor and training first")
        return False

    print(f"Starting interactive chat with config: {config_path}")
    print("Streaming:", "enabled" if stream else "disabled")
    print()

    # Run interactive inference
    success = run_inference_with_config(
        config_path=config_path,
        interactive=True,
        stream=stream
    )

    return success

def create_batch_test():
    """Create a batch test with multiple questions"""
    print("=== Batch Test: Multiple Questions ===")
    print()

    config_path = "configs/instruct/code_reasoning.yaml"

    if not Path(config_path).exists():
        print(f"❌ Configuration file not found: {config_path}")
        print("   Please run the data processor and training first")
        return False

    # Create a batch of test questions
    test_questions = [
        "What is object-oriented programming?",
        "How do you handle errors in Python?",
        "Explain the concept of variables in programming.",
        "What's the difference between a compiler and an interpreter?"
    ]

    print("Running batch test with multiple questions...")
    print()

    success_count = 0
    for i, question in enumerate(test_questions):
        print(f"Question {i+1}: {question}")
        result = run_inference_with_config(config_path, question, max_tokens=200)
        if result:
            success_count += 1
        print("-" * 50)

    print(f"✅ Batch test completed: {success_count}/{len(test_questions)} questions processed successfully")
    return success_count == len(test_questions)

def main():
    """Main inference function"""
    parser = argparse.ArgumentParser(description="Instruct Inference Pipeline")
    subparsers = parser.add_subparsers(dest="command", help="Available commands")

    # Inference command
    infer_parser = subparsers.add_parser("infer", help="Run single inference")
    infer_parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file")
    infer_parser.add_argument("--message", type=str, required=True, help="Message to send to the model")
    infer_parser.add_argument("--max-tokens", type=int, default=128, help="Maximum new tokens to generate")
    infer_parser.add_argument("--stream", action="store_true", help="Enable streaming generation")

    # Interactive command
    interactive_parser = subparsers.add_parser("chat", help="Start interactive chat")
    interactive_parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file")
    interactive_parser.add_argument("--stream", action="store_true", help="Enable streaming generation")

    # Batch test command
    batch_parser = subparsers.add_parser("batch", help="Run batch test")
    batch_parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file")

    # Conversation example command
    conv_parser = subparsers.add_parser("conversation", help="Run conversation examples")
    conv_parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file")

    # Features command
    subparsers.add_parser("features", help="Show available features")

    # Example command
    subparsers.add_parser("example", help="Run example inference")

    args = parser.parse_args()

    if args.command == "infer":
        run_inference_with_config(
            args.config,
            args.message,
            args.max_tokens,
            args.stream
        )
    elif args.command == "chat":
        start_interactive_chat(args.config, args.stream)
    elif args.command == "batch":
        create_batch_test()
    elif args.command == "conversation":
        run_conversation_example(args.config)
    elif args.command == "features":
        show_inference_features()
    elif args.command == "example":
        create_inference_example()
    else:
        print("Instruct Inference Pipeline")
        print("==========================")
        print()
        print("Available commands:")
        print("  infer        - Run single message inference")
        print("  chat         - Start interactive chat session")
        print("  batch        - Run batch test with multiple questions")
        print("  conversation - Run conversation examples")
        print("  features     - Show available features")
        print("  example      - Run example inference")
        print()
        print("Examples:")
        print("  python scripts/instruct/inference.py infer --config configs/instruct/code_reasoning.yaml --message 'Explain Python loops'")
        print("  python scripts/instruct/inference.py chat --config configs/instruct/code_reasoning.yaml")
        print("  python scripts/instruct/inference.py chat --config configs/instruct/code_reasoning.yaml --stream")
        print()
        print("Key Features:")
        print("  ✅ Interactive chat with conversation history")
        print("  ✅ Streaming generation support")
        print("  ✅ Multi-turn conversation handling")
        print("  ✅ Chat template integration")
        print("  ✅ Configurable generation parameters")

if __name__ == "__main__":
    main()