Files
DS-LLM-TEMPLATE-FINETUNING/scripts/instruct/.ipynb_checkpoints/inference-checkpoint.py
T
2025-08-28 14:12:30 +00:00

292 lines
11 KiB
Python

#!/usr/bin/env python3
"""
Instruct Inference Script
Provides a command-line interface to run the instruct inference pipeline
"""
import sys
import os
import subprocess
import argparse
from pathlib import Path
def run_inference_with_config(config_path: str, message: str = "", max_tokens: int = 128, stream: bool = False, interactive: bool = False):
"""Run inference using a YAML configuration file"""
print(f"Running instruct inference with config: {config_path}")
if interactive:
print("Mode: Interactive chat")
elif message:
print(f"Message: {message}")
print(f"Max tokens: {max_tokens}")
print(f"Streaming: {stream}")
cmd = [
"python", "pipelines/instruct/inference.py",
"--config", config_path,
"--max-tokens", str(max_tokens)
]
if interactive:
cmd.append("--interactive")
elif message:
cmd.extend(["--message", message])
if stream:
cmd.append("--stream")
print(f"Running: {' '.join(cmd)}")
try:
if interactive:
# For interactive mode, don't capture output
result = subprocess.run(cmd, check=True)
return True
else:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
print("✅ Inference completed successfully!")
print("Output:")
print(result.stdout)
return result.stdout
except subprocess.CalledProcessError as e:
print(f"❌ Inference failed: {e}")
print("Error output:")
print(e.stderr)
return None
def run_conversation_example(config_path: str):
"""Run a conversation example"""
print(f"=== Conversation Example ===")
print(f"Config: {config_path}")
example_messages = [
"Can you explain what recursion is in programming?",
"How do I debug a Python program?",
"What's the difference between a list and a tuple in Python?",
"Can you show me how to use a for loop?",
"What are the benefits of using functions in programming?"
]
print("Running example conversations...")
print()
for i, message in enumerate(example_messages):
print(f"--- Example {i+1} ---")
result = run_inference_with_config(config_path, message, max_tokens=256)
if not result:
print(f"❌ Failed to process message {i+1}")
print()
print("✅ Conversation examples completed!")
def show_inference_features():
"""Show the features of the instruct inference pipeline"""
print("=== Instruct Inference Pipeline Features ===")
print()
print("1. **Model Support**:")
print(" - Trained LoRA models from instruct training pipeline")
print(" - Automatic model loading from config")
print(" - Native Unsloth inference optimization")
print(" - Chat template integration")
print()
print("2. **Inference Modes**:")
print(" - Single message inference")
print(" - Interactive chat session")
print(" - Streaming generation")
print(" - Batch conversation processing")
print()
print("3. **Conversation Features**:")
print(" - Multi-turn conversation support")
print(" - Context preservation across turns")
print(" - Proper role handling (user/assistant/system)")
print(" - Chat history management")
print()
print("4. **Generation Control**:")
print(" - Configurable max tokens")
print(" - Temperature and sampling parameters")
print(" - Streaming output support")
print(" - Chat template formatting")
print()
print("5. **Interactive Features**:")
print(" - Real-time chat interface")
print(" - Command support (clear, stream toggle)")
print(" - Conversation history tracking")
print(" - Graceful exit handling")
print()
print("6. **Usage Examples**:")
print(" - Single message: --message 'your question here'")
print(" - Interactive chat: --interactive")
print(" - Streaming: add --stream flag")
print(" - Custom tokens: --max-tokens 256")
def create_inference_example():
"""Create an inference example using the code reasoning configuration"""
print("=== Inference Example: Code Reasoning Chat ===")
print()
# Check if we have the required files
config_path = "configs/instruct/code_reasoning.yaml"
if not Path(config_path).exists():
print(f"❌ Configuration file not found: {config_path}")
print(" Please run the data processor and training first")
return False
print("✅ Found configuration file!")
print(f" Config: {config_path}")
print()
# Example conversation
example_message = "Can you explain what a Python decorator is and show me a simple example?"
print(f"Example message: {example_message}")
print()
# Run inference
success = run_inference_with_config(
config_path=config_path,
message=example_message,
max_tokens=256
)
if success:
print("✅ Example inference completed successfully!")
return True
else:
print("❌ Example inference failed!")
return False
def start_interactive_chat(config_path: str, stream: bool = False):
"""Start an interactive chat session"""
print("=== Interactive Chat Session ===")
print()
if not Path(config_path).exists():
print(f"❌ Configuration file not found: {config_path}")
print(" Please run the data processor and training first")
return False
print(f"Starting interactive chat with config: {config_path}")
print("Streaming:", "enabled" if stream else "disabled")
print()
# Run interactive inference
success = run_inference_with_config(
config_path=config_path,
interactive=True,
stream=stream
)
return success
def create_batch_test():
"""Create a batch test with multiple questions"""
print("=== Batch Test: Multiple Questions ===")
print()
config_path = "configs/instruct/code_reasoning.yaml"
if not Path(config_path).exists():
print(f"❌ Configuration file not found: {config_path}")
print(" Please run the data processor and training first")
return False
# Create a batch of test questions
test_questions = [
"What is object-oriented programming?",
"How do you handle errors in Python?",
"Explain the concept of variables in programming.",
"What's the difference between a compiler and an interpreter?"
]
print("Running batch test with multiple questions...")
print()
success_count = 0
for i, question in enumerate(test_questions):
print(f"Question {i+1}: {question}")
result = run_inference_with_config(config_path, question, max_tokens=200)
if result:
success_count += 1
print("-" * 50)
print(f"✅ Batch test completed: {success_count}/{len(test_questions)} questions processed successfully")
return success_count == len(test_questions)
def main():
"""Main inference function"""
parser = argparse.ArgumentParser(description="Instruct Inference Pipeline")
subparsers = parser.add_subparsers(dest="command", help="Available commands")
# Inference command
infer_parser = subparsers.add_parser("infer", help="Run single inference")
infer_parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file")
infer_parser.add_argument("--message", type=str, required=True, help="Message to send to the model")
infer_parser.add_argument("--max-tokens", type=int, default=128, help="Maximum new tokens to generate")
infer_parser.add_argument("--stream", action="store_true", help="Enable streaming generation")
# Interactive command
interactive_parser = subparsers.add_parser("chat", help="Start interactive chat")
interactive_parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file")
interactive_parser.add_argument("--stream", action="store_true", help="Enable streaming generation")
# Batch test command
batch_parser = subparsers.add_parser("batch", help="Run batch test")
batch_parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file")
# Conversation example command
conv_parser = subparsers.add_parser("conversation", help="Run conversation examples")
conv_parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file")
# Features command
subparsers.add_parser("features", help="Show available features")
# Example command
subparsers.add_parser("example", help="Run example inference")
args = parser.parse_args()
if args.command == "infer":
run_inference_with_config(
args.config,
args.message,
args.max_tokens,
args.stream
)
elif args.command == "chat":
start_interactive_chat(args.config, args.stream)
elif args.command == "batch":
create_batch_test()
elif args.command == "conversation":
run_conversation_example(args.config)
elif args.command == "features":
show_inference_features()
elif args.command == "example":
create_inference_example()
else:
print("Instruct Inference Pipeline")
print("==========================")
print()
print("Available commands:")
print(" infer - Run single message inference")
print(" chat - Start interactive chat session")
print(" batch - Run batch test with multiple questions")
print(" conversation - Run conversation examples")
print(" features - Show available features")
print(" example - Run example inference")
print()
print("Examples:")
print(" python scripts/instruct/inference.py infer --config configs/instruct/code_reasoning.yaml --message 'Explain Python loops'")
print(" python scripts/instruct/inference.py chat --config configs/instruct/code_reasoning.yaml")
print(" python scripts/instruct/inference.py chat --config configs/instruct/code_reasoning.yaml --stream")
print()
print("Key Features:")
print(" ✅ Interactive chat with conversation history")
print(" ✅ Streaming generation support")
print(" ✅ Multi-turn conversation handling")
print(" ✅ Chat template integration")
print(" ✅ Configurable generation parameters")
if __name__ == "__main__":
main()