updated instruct
This commit is contained in:
@@ -0,0 +1,291 @@
|
||||
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Instruct Inference Script
|
||||
Provides a command-line interface to run the instruct inference pipeline
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import subprocess
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
def run_inference_with_config(config_path: str, message: str = "", max_tokens: int = 128, stream: bool = False, interactive: bool = False):
|
||||
"""Run inference using a YAML configuration file"""
|
||||
print(f"Running instruct inference with config: {config_path}")
|
||||
if interactive:
|
||||
print("Mode: Interactive chat")
|
||||
elif message:
|
||||
print(f"Message: {message}")
|
||||
print(f"Max tokens: {max_tokens}")
|
||||
print(f"Streaming: {stream}")
|
||||
|
||||
cmd = [
|
||||
"python", "pipelines/instruct/inference.py",
|
||||
"--config", config_path,
|
||||
"--max-tokens", str(max_tokens)
|
||||
]
|
||||
|
||||
if interactive:
|
||||
cmd.append("--interactive")
|
||||
elif message:
|
||||
cmd.extend(["--message", message])
|
||||
|
||||
if stream:
|
||||
cmd.append("--stream")
|
||||
|
||||
print(f"Running: {' '.join(cmd)}")
|
||||
|
||||
try:
|
||||
if interactive:
|
||||
# For interactive mode, don't capture output
|
||||
result = subprocess.run(cmd, check=True)
|
||||
return True
|
||||
else:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
print("✅ Inference completed successfully!")
|
||||
print("Output:")
|
||||
print(result.stdout)
|
||||
return result.stdout
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"❌ Inference failed: {e}")
|
||||
print("Error output:")
|
||||
print(e.stderr)
|
||||
return None
|
||||
|
||||
def run_conversation_example(config_path: str):
|
||||
"""Run a conversation example"""
|
||||
print(f"=== Conversation Example ===")
|
||||
print(f"Config: {config_path}")
|
||||
|
||||
example_messages = [
|
||||
"Can you explain what recursion is in programming?",
|
||||
"How do I debug a Python program?",
|
||||
"What's the difference between a list and a tuple in Python?",
|
||||
"Can you show me how to use a for loop?",
|
||||
"What are the benefits of using functions in programming?"
|
||||
]
|
||||
|
||||
print("Running example conversations...")
|
||||
print()
|
||||
|
||||
for i, message in enumerate(example_messages):
|
||||
print(f"--- Example {i+1} ---")
|
||||
result = run_inference_with_config(config_path, message, max_tokens=256)
|
||||
if not result:
|
||||
print(f"❌ Failed to process message {i+1}")
|
||||
print()
|
||||
|
||||
print("✅ Conversation examples completed!")
|
||||
|
||||
def show_inference_features():
|
||||
"""Show the features of the instruct inference pipeline"""
|
||||
print("=== Instruct Inference Pipeline Features ===")
|
||||
print()
|
||||
print("1. **Model Support**:")
|
||||
print(" - Trained LoRA models from instruct training pipeline")
|
||||
print(" - Automatic model loading from config")
|
||||
print(" - Native Unsloth inference optimization")
|
||||
print(" - Chat template integration")
|
||||
print()
|
||||
print("2. **Inference Modes**:")
|
||||
print(" - Single message inference")
|
||||
print(" - Interactive chat session")
|
||||
print(" - Streaming generation")
|
||||
print(" - Batch conversation processing")
|
||||
print()
|
||||
print("3. **Conversation Features**:")
|
||||
print(" - Multi-turn conversation support")
|
||||
print(" - Context preservation across turns")
|
||||
print(" - Proper role handling (user/assistant/system)")
|
||||
print(" - Chat history management")
|
||||
print()
|
||||
print("4. **Generation Control**:")
|
||||
print(" - Configurable max tokens")
|
||||
print(" - Temperature and sampling parameters")
|
||||
print(" - Streaming output support")
|
||||
print(" - Chat template formatting")
|
||||
print()
|
||||
print("5. **Interactive Features**:")
|
||||
print(" - Real-time chat interface")
|
||||
print(" - Command support (clear, stream toggle)")
|
||||
print(" - Conversation history tracking")
|
||||
print(" - Graceful exit handling")
|
||||
print()
|
||||
print("6. **Usage Examples**:")
|
||||
print(" - Single message: --message 'your question here'")
|
||||
print(" - Interactive chat: --interactive")
|
||||
print(" - Streaming: add --stream flag")
|
||||
print(" - Custom tokens: --max-tokens 256")
|
||||
|
||||
def create_inference_example():
|
||||
"""Create an inference example using the code reasoning configuration"""
|
||||
print("=== Inference Example: Code Reasoning Chat ===")
|
||||
print()
|
||||
|
||||
# Check if we have the required files
|
||||
config_path = "configs/instruct/code_reasoning.yaml"
|
||||
|
||||
if not Path(config_path).exists():
|
||||
print(f"❌ Configuration file not found: {config_path}")
|
||||
print(" Please run the data processor and training first")
|
||||
return False
|
||||
|
||||
print("✅ Found configuration file!")
|
||||
print(f" Config: {config_path}")
|
||||
print()
|
||||
|
||||
# Example conversation
|
||||
example_message = "Can you explain what a Python decorator is and show me a simple example?"
|
||||
|
||||
print(f"Example message: {example_message}")
|
||||
print()
|
||||
|
||||
# Run inference
|
||||
success = run_inference_with_config(
|
||||
config_path=config_path,
|
||||
message=example_message,
|
||||
max_tokens=256
|
||||
)
|
||||
|
||||
if success:
|
||||
print("✅ Example inference completed successfully!")
|
||||
return True
|
||||
else:
|
||||
print("❌ Example inference failed!")
|
||||
return False
|
||||
|
||||
def start_interactive_chat(config_path: str, stream: bool = False):
|
||||
"""Start an interactive chat session"""
|
||||
print("=== Interactive Chat Session ===")
|
||||
print()
|
||||
|
||||
if not Path(config_path).exists():
|
||||
print(f"❌ Configuration file not found: {config_path}")
|
||||
print(" Please run the data processor and training first")
|
||||
return False
|
||||
|
||||
print(f"Starting interactive chat with config: {config_path}")
|
||||
print("Streaming:", "enabled" if stream else "disabled")
|
||||
print()
|
||||
|
||||
# Run interactive inference
|
||||
success = run_inference_with_config(
|
||||
config_path=config_path,
|
||||
interactive=True,
|
||||
stream=stream
|
||||
)
|
||||
|
||||
return success
|
||||
|
||||
def create_batch_test():
|
||||
"""Create a batch test with multiple questions"""
|
||||
print("=== Batch Test: Multiple Questions ===")
|
||||
print()
|
||||
|
||||
config_path = "configs/instruct/code_reasoning.yaml"
|
||||
|
||||
if not Path(config_path).exists():
|
||||
print(f"❌ Configuration file not found: {config_path}")
|
||||
print(" Please run the data processor and training first")
|
||||
return False
|
||||
|
||||
# Create a batch of test questions
|
||||
test_questions = [
|
||||
"What is object-oriented programming?",
|
||||
"How do you handle errors in Python?",
|
||||
"Explain the concept of variables in programming.",
|
||||
"What's the difference between a compiler and an interpreter?"
|
||||
]
|
||||
|
||||
print("Running batch test with multiple questions...")
|
||||
print()
|
||||
|
||||
success_count = 0
|
||||
for i, question in enumerate(test_questions):
|
||||
print(f"Question {i+1}: {question}")
|
||||
result = run_inference_with_config(config_path, question, max_tokens=200)
|
||||
if result:
|
||||
success_count += 1
|
||||
print("-" * 50)
|
||||
|
||||
print(f"✅ Batch test completed: {success_count}/{len(test_questions)} questions processed successfully")
|
||||
return success_count == len(test_questions)
|
||||
|
||||
def main():
|
||||
"""Main inference function"""
|
||||
parser = argparse.ArgumentParser(description="Instruct Inference Pipeline")
|
||||
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
||||
|
||||
# Inference command
|
||||
infer_parser = subparsers.add_parser("infer", help="Run single inference")
|
||||
infer_parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file")
|
||||
infer_parser.add_argument("--message", type=str, required=True, help="Message to send to the model")
|
||||
infer_parser.add_argument("--max-tokens", type=int, default=128, help="Maximum new tokens to generate")
|
||||
infer_parser.add_argument("--stream", action="store_true", help="Enable streaming generation")
|
||||
|
||||
# Interactive command
|
||||
interactive_parser = subparsers.add_parser("chat", help="Start interactive chat")
|
||||
interactive_parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file")
|
||||
interactive_parser.add_argument("--stream", action="store_true", help="Enable streaming generation")
|
||||
|
||||
# Batch test command
|
||||
batch_parser = subparsers.add_parser("batch", help="Run batch test")
|
||||
batch_parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file")
|
||||
|
||||
# Conversation example command
|
||||
conv_parser = subparsers.add_parser("conversation", help="Run conversation examples")
|
||||
conv_parser.add_argument("--config", type=str, required=True, help="Path to YAML configuration file")
|
||||
|
||||
# Features command
|
||||
subparsers.add_parser("features", help="Show available features")
|
||||
|
||||
# Example command
|
||||
subparsers.add_parser("example", help="Run example inference")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command == "infer":
|
||||
run_inference_with_config(
|
||||
args.config,
|
||||
args.message,
|
||||
args.max_tokens,
|
||||
args.stream
|
||||
)
|
||||
elif args.command == "chat":
|
||||
start_interactive_chat(args.config, args.stream)
|
||||
elif args.command == "batch":
|
||||
create_batch_test()
|
||||
elif args.command == "conversation":
|
||||
run_conversation_example(args.config)
|
||||
elif args.command == "features":
|
||||
show_inference_features()
|
||||
elif args.command == "example":
|
||||
create_inference_example()
|
||||
else:
|
||||
print("Instruct Inference Pipeline")
|
||||
print("==========================")
|
||||
print()
|
||||
print("Available commands:")
|
||||
print(" infer - Run single message inference")
|
||||
print(" chat - Start interactive chat session")
|
||||
print(" batch - Run batch test with multiple questions")
|
||||
print(" conversation - Run conversation examples")
|
||||
print(" features - Show available features")
|
||||
print(" example - Run example inference")
|
||||
print()
|
||||
print("Examples:")
|
||||
print(" python scripts/instruct/inference.py infer --config configs/instruct/code_reasoning.yaml --message 'Explain Python loops'")
|
||||
print(" python scripts/instruct/inference.py chat --config configs/instruct/code_reasoning.yaml")
|
||||
print(" python scripts/instruct/inference.py chat --config configs/instruct/code_reasoning.yaml --stream")
|
||||
print()
|
||||
print("Key Features:")
|
||||
print(" ✅ Interactive chat with conversation history")
|
||||
print(" ✅ Streaming generation support")
|
||||
print(" ✅ Multi-turn conversation handling")
|
||||
print(" ✅ Chat template integration")
|
||||
print(" ✅ Configurable generation parameters")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user