292 lines
11 KiB
Python
292 lines
11 KiB
Python
|
|
#!/usr/bin/env python3
|
|
"""
|
|
Instruct Inference Script
|
|
Provides a command-line interface to run the instruct inference pipeline
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import subprocess
|
|
import argparse
|
|
from pathlib import Path
|
|
|
|
def run_inference_with_config(config_path: str, message: str = "", max_tokens: int = 128, stream: bool = False, interactive: bool = False):
|
|
"""Run inference using a YAML configuration file"""
|
|
print(f"Running instruct inference with config: {config_path}")
|
|
if interactive:
|
|
print("Mode: Interactive chat")
|
|
elif message:
|
|
print(f"Message: {message}")
|
|
print(f"Max tokens: {max_tokens}")
|
|
print(f"Streaming: {stream}")
|
|
|
|
cmd = [
|
|
"python", "pipelines/instruct/inference.py",
|
|
"--config", config_path,
|
|
"--max-tokens", str(max_tokens)
|
|
]
|
|
|
|
if interactive:
|
|
cmd.append("--interactive")
|
|
elif message:
|
|
cmd.extend(["--message", message])
|
|
|
|
if stream:
|
|
cmd.append("--stream")
|
|
|
|
print(f"Running: {' '.join(cmd)}")
|
|
|
|
try:
|
|
if interactive:
|
|
# For interactive mode, don't capture output
|
|
result = subprocess.run(cmd, check=True)
|
|
return True
|
|
else:
|
|
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
print("✅ Inference completed successfully!")
|
|
print("Output:")
|
|
print(result.stdout)
|
|
return result.stdout
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"❌ Inference failed: {e}")
|
|
print("Error output:")
|
|
print(e.stderr)
|
|
return None
|
|
|
|
def run_conversation_example(config_path: str):
|
|
"""Run a conversation example"""
|
|
print(f"=== Conversation Example ===")
|
|
print(f"Config: {config_path}")
|
|
|
|
example_messages = [
|
|
"Can you explain what recursion is in programming?",
|
|
"How do I debug a Python program?",
|
|
"What's the difference between a list and a tuple in Python?",
|
|
"Can you show me how to use a for loop?",
|
|
"What are the benefits of using functions in programming?"
|
|
]
|
|
|
|
print("Running example conversations...")
|
|
print()
|
|
|
|
for i, message in enumerate(example_messages):
|
|
print(f"--- Example {i+1} ---")
|
|
result = run_inference_with_config(config_path, message, max_tokens=256)
|
|
if not result:
|
|
print(f"❌ Failed to process message {i+1}")
|
|
print()
|
|
|
|
print("✅ Conversation examples completed!")
|
|
|
|
def show_inference_features():
|
|
"""Show the features of the instruct inference pipeline"""
|
|
print("=== Instruct Inference Pipeline Features ===")
|
|
print()
|
|
print("1. **Model Support**:")
|
|
print(" - Trained LoRA models from instruct training pipeline")
|
|
print(" - Automatic model loading from config")
|
|
print(" - Native Unsloth inference optimization")
|
|
print(" - Chat template integration")
|
|
print()
|
|
print("2. **Inference Modes**:")
|
|
print(" - Single message inference")
|
|
print(" - Interactive chat session")
|
|
print(" - Streaming generation")
|
|
print(" - Batch conversation processing")
|
|
print()
|
|
print("3. **Conversation Features**:")
|
|
print(" - Multi-turn conversation support")
|
|
print(" - Context preservation across turns")
|
|
print(" - Proper role handling (user/assistant/system)")
|
|
print(" - Chat history management")
|
|
print()
|
|
print("4. **Generation Control**:")
|
|
print(" - Configurable max tokens")
|
|
print(" - Temperature and sampling parameters")
|
|
print(" - Streaming output support")
|
|
print(" - Chat template formatting")
|
|
print()
|
|
print("5. **Interactive Features**:")
|
|
print(" - Real-time chat interface")
|
|
print(" - Command support (clear, stream toggle)")
|
|
print(" - Conversation history tracking")
|
|
print(" - Graceful exit handling")
|
|
print()
|
|
print("6. **Usage Examples**:")
|
|
print(" - Single message: --message 'your question here'")
|
|
print(" - Interactive chat: --interactive")
|
|
print(" - Streaming: add --stream flag")
|
|
print(" - Custom tokens: --max-tokens 256")
|
|
|
|
def create_inference_example():
|
|
"""Create an inference example using the code reasoning configuration"""
|
|
print("=== Inference Example: Code Reasoning Chat ===")
|
|
print()
|
|
|
|
# Check if we have the required files
|
|
config_path = "configs/instruct/code_reasoning.yaml"
|
|
|
|
if not Path(config_path).exists():
|
|
print(f"❌ Configuration file not found: {config_path}")
|
|
print(" Please run the data processor and training first")
|
|
return False
|
|
|
|
print("✅ Found configuration file!")
|
|
print(f" Config: {config_path}")
|
|
print()
|
|
|
|
# Example conversation
|
|
example_message = "Can you explain what a Python decorator is and show me a simple example?"
|
|
|
|
print(f"Example message: {example_message}")
|
|
print()
|
|
|
|
# Run inference
|
|
success = run_inference_with_config(
|
|
config_path=config_path,
|
|
message=example_message,
|
|
max_tokens=256
|
|
)
|
|
|
|
if success:
|
|
print("✅ Example inference completed successfully!")
|
|
return True
|
|
else:
|
|
print("❌ Example inference failed!")
|
|
return False
|
|
|
|
def start_interactive_chat(config_path: str, stream: bool = False):
|
|
"""Start an interactive chat session"""
|
|
print("=== Interactive Chat Session ===")
|
|
print()
|
|
|
|
if not Path(config_path).exists():
|
|
print(f"❌ Configuration file not found: {config_path}")
|
|
print(" Please run the data processor and training first")
|
|
return False
|
|
|
|
print(f"Starting interactive chat with config: {config_path}")
|
|
print("Streaming:", "enabled" if stream else "disabled")
|
|
print()
|
|
|
|
# Run interactive inference
|
|
success = run_inference_with_config(
|
|
config_path=config_path,
|
|
interactive=True,
|
|
stream=stream
|
|
)
|
|
|
|
return success
|
|
|
|
def create_batch_test():
|
|
"""Create a batch test with multiple questions"""
|
|
print("=== Batch Test: Multiple Questions ===")
|
|
print()
|
|
|
|
config_path = "configs/instruct/code_reasoning.yaml"
|
|
|
|
if not Path(config_path).exists():
|
|
print(f"❌ Configuration file not found: {config_path}")
|
|
print(" Please run the data processor and training first")
|
|
return False
|
|
|
|
# Create a batch of test questions
|
|
test_questions = [
|
|
"What is object-oriented programming?",
|
|
"How do you handle errors in Python?",
|
|
"Explain the concept of variables in programming.",
|
|
"What's the difference between a compiler and an interpreter?"
|
|
]
|
|
|
|
print("Running batch test with multiple questions...")
|
|
print()
|
|
|
|
success_count = 0
|
|
for i, question in enumerate(test_questions):
|
|
print(f"Question {i+1}: {question}")
|
|
result = run_inference_with_config(config_path, question, max_tokens=200)
|
|
if result:
|
|
success_count += 1
|
|
print("-" * 50)
|
|
|
|
print(f"✅ Batch test completed: {success_count}/{len(test_questions)} questions processed successfully")
|
|
return success_count == len(test_questions)
|
|
|
|
def main():
|
|
"""Main inference function"""
|
|
parser = argparse.ArgumentParser(description="Instruct Inference Pipeline")
|
|
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
|
|
|
# Inference command
|
|
infer_parser = subparsers.add_parser("infer", help="Run single inference")
|
|
infer_parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file")
|
|
infer_parser.add_argument("--message", type=str, required=True, help="Message to send to the model")
|
|
infer_parser.add_argument("--max-tokens", type=int, default=128, help="Maximum new tokens to generate")
|
|
infer_parser.add_argument("--stream", action="store_true", help="Enable streaming generation")
|
|
|
|
# Interactive command
|
|
interactive_parser = subparsers.add_parser("chat", help="Start interactive chat")
|
|
interactive_parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file")
|
|
interactive_parser.add_argument("--stream", action="store_true", help="Enable streaming generation")
|
|
|
|
# Batch test command
|
|
batch_parser = subparsers.add_parser("batch", help="Run batch test")
|
|
batch_parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file")
|
|
|
|
# Conversation example command
|
|
conv_parser = subparsers.add_parser("conversation", help="Run conversation examples")
|
|
conv_parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file")
|
|
|
|
# Features command
|
|
subparsers.add_parser("features", help="Show available features")
|
|
|
|
# Example command
|
|
subparsers.add_parser("example", help="Run example inference")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.command == "infer":
|
|
run_inference_with_config(
|
|
args.config,
|
|
args.message,
|
|
args.max_tokens,
|
|
args.stream
|
|
)
|
|
elif args.command == "chat":
|
|
start_interactive_chat(args.config, args.stream)
|
|
elif args.command == "batch":
|
|
create_batch_test()
|
|
elif args.command == "conversation":
|
|
run_conversation_example(args.config)
|
|
elif args.command == "features":
|
|
show_inference_features()
|
|
elif args.command == "example":
|
|
create_inference_example()
|
|
else:
|
|
print("Instruct Inference Pipeline")
|
|
print("==========================")
|
|
print()
|
|
print("Available commands:")
|
|
print(" infer - Run single message inference")
|
|
print(" chat - Start interactive chat session")
|
|
print(" batch - Run batch test with multiple questions")
|
|
print(" conversation - Run conversation examples")
|
|
print(" features - Show available features")
|
|
print(" example - Run example inference")
|
|
print()
|
|
print("Examples:")
|
|
print(" python scripts/instruct/inference.py infer --config configs/instruct/code_reasoning.yaml --message 'Explain Python loops'")
|
|
print(" python scripts/instruct/inference.py chat --config configs/instruct/code_reasoning.yaml")
|
|
print(" python scripts/instruct/inference.py chat --config configs/instruct/code_reasoning.yaml --stream")
|
|
print()
|
|
print("Key Features:")
|
|
print(" ✅ Interactive chat with conversation history")
|
|
print(" ✅ Streaming generation support")
|
|
print(" ✅ Multi-turn conversation handling")
|
|
print(" ✅ Chat template integration")
|
|
print(" ✅ Configurable generation parameters")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|