272 lines
9.2 KiB
Python
272 lines
9.2 KiB
Python
"""
|
|
Service for model management and interaction.
|
|
"""
|
|
|
|
import requests
|
|
import json
|
|
from typing import List, Dict, Any, Optional
|
|
|
|
from ai_service.config import config
|
|
from ai_service.models.model_parameters import ModelParameters
|
|
|
|
class ModelService:
|
|
"""Service for model management and interaction."""
|
|
|
|
# Available models
|
|
AVAILABLE_MODELS = {
|
|
'gemma3': {
|
|
'name': 'Gemma 3',
|
|
'description': 'Google Gemma 3 model via Ollama',
|
|
'provider': 'ollama',
|
|
'max_tokens': 8192
|
|
},
|
|
'llama3.3': {
|
|
'name': 'Llama 3 (70B)',
|
|
'description': 'Meta Llama 3 70B model via Ollama',
|
|
'provider': 'ollama',
|
|
'max_tokens': 8192
|
|
},
|
|
'llama3.1': {
|
|
'name': 'Llama 3 (8B)',
|
|
'description': 'Meta Llama 3 8B model via Ollama',
|
|
'provider': 'ollama',
|
|
'max_tokens': 8192
|
|
},
|
|
'mistral': {
|
|
'name': 'Mistral',
|
|
'description': 'Mistral AI model via Ollama',
|
|
'provider': 'ollama',
|
|
'max_tokens': 8192
|
|
},
|
|
'deepseek': {
|
|
'name': 'DeepSeek',
|
|
'description': 'DeepSeek model via Ollama',
|
|
'provider': 'ollama',
|
|
'max_tokens': 8192
|
|
}
|
|
}
|
|
|
|
def __init__(self):
|
|
"""Initialize the model service."""
|
|
self.default_model = config.DEFAULT_MODEL
|
|
self.ollama_api_url = config.OLLAMA_API_URL
|
|
self.openwebui_url = config.OPENWEBUI_URL
|
|
self.openwebui_api_key = config.OPENWEBUI_API_KEY
|
|
|
|
def get_available_models(self) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get a list of available models.
|
|
|
|
Returns:
|
|
List of model information dictionaries.
|
|
"""
|
|
models = []
|
|
for model_id, model_info in self.AVAILABLE_MODELS.items():
|
|
model_data = {
|
|
'id': model_id,
|
|
'is_default': model_id == self.default_model,
|
|
**model_info
|
|
}
|
|
models.append(model_data)
|
|
|
|
# Debug log
|
|
print(f"Model service models: {models}")
|
|
|
|
# Ensure we're returning a non-empty list
|
|
if not models:
|
|
# Return a default model if none are found
|
|
return [{
|
|
'id': 'llama3.1',
|
|
'name': 'Llama 3 (8B)',
|
|
'description': 'Meta Llama 3 8B model via Ollama',
|
|
'provider': 'ollama',
|
|
'max_tokens': 8192,
|
|
'is_default': True
|
|
}]
|
|
|
|
return models
|
|
|
|
def get_model_info(self, model_id: str) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Get information about a specific model.
|
|
|
|
Args:
|
|
model_id: ID of the model.
|
|
|
|
Returns:
|
|
Model information dictionary if found, None otherwise.
|
|
"""
|
|
if model_id not in self.AVAILABLE_MODELS:
|
|
return None
|
|
|
|
return {
|
|
'id': model_id,
|
|
'is_default': model_id == self.default_model,
|
|
**self.AVAILABLE_MODELS[model_id]
|
|
}
|
|
|
|
def generate_response(self, model_id: str, prompt: str,
|
|
context: Optional[List[Dict[str, str]]] = None,
|
|
use_rag: bool = False,
|
|
model_params: Optional[ModelParameters] = None) -> str:
|
|
"""
|
|
Generate a response from the model.
|
|
|
|
Args:
|
|
model_id: ID of the model to use.
|
|
prompt: User prompt.
|
|
context: Optional conversation context.
|
|
use_rag: Whether to use RAG (Retrieval Augmented Generation).
|
|
model_params: Optional model parameters.
|
|
|
|
Returns:
|
|
Generated response.
|
|
"""
|
|
# Debug configuration information
|
|
print(f"ModelService configuration:")
|
|
print(f" - Ollama API URL: {self.ollama_api_url}")
|
|
print(f" - OpenWebUI URL: {self.openwebui_url}")
|
|
print(f" - Default model: {self.default_model}")
|
|
print(f" - Requested model: {model_id}")
|
|
print(f" - Using RAG: {use_rag}")
|
|
|
|
if model_id not in self.AVAILABLE_MODELS:
|
|
model_id = self.default_model
|
|
print(f" - Model not found, using default: {model_id}")
|
|
|
|
# Ensure we're using a valid model
|
|
# (model_id is already validated above)
|
|
|
|
# Prepare the messages for the API call
|
|
messages = []
|
|
|
|
# Use custom system prompt if provided, otherwise use default
|
|
system_content = "You are a helpful assistant."
|
|
if model_params and model_params.system_prompt:
|
|
system_content = model_params.system_prompt
|
|
|
|
messages.append({
|
|
"role": "system",
|
|
"content": system_content
|
|
})
|
|
|
|
# Add conversation context if provided
|
|
if context:
|
|
messages.extend(context)
|
|
|
|
# If RAG is enabled, use OpenWebUI's knowledge database
|
|
if use_rag:
|
|
# We'll use OpenWebUI's built-in RAG capabilities
|
|
# This is handled by sending the request to OpenWebUI instead of Ollama directly
|
|
try:
|
|
# Prepare the request for OpenWebUI
|
|
openwebui_request = {
|
|
"model": model_id,
|
|
"messages": messages + [{"role": "user", "content": prompt}],
|
|
"use_knowledge": True, # Enable RAG
|
|
"stream": False
|
|
}
|
|
|
|
# Add model parameters if provided
|
|
if model_params:
|
|
params = model_params.to_dict()
|
|
# Map parameters to OpenWebUI format
|
|
if 'temperature' in params:
|
|
openwebui_request['temperature'] = params['temperature']
|
|
if 'max_tokens' in params:
|
|
openwebui_request['max_tokens'] = params['max_tokens']
|
|
if 'top_p' in params:
|
|
openwebui_request['top_p'] = params['top_p']
|
|
|
|
# Make the API call to OpenWebUI
|
|
headers = {"Content-Type": "application/json"}
|
|
if self.openwebui_api_key:
|
|
headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
|
|
|
|
# Debug logs
|
|
print(f"Sending RAG request to OpenWebUI at: {self.openwebui_url}/api/chat/completions")
|
|
print(f"OpenWebUI request: {json.dumps(openwebui_request, indent=2)}")
|
|
print(f"Headers: {headers}")
|
|
|
|
# OpenWebUI API endpoint is /api/chat/completions
|
|
response = requests.post(
|
|
f"{self.openwebui_url}/api/chat/completions",
|
|
headers=headers,
|
|
json=openwebui_request,
|
|
timeout=60 # Longer timeout for RAG
|
|
)
|
|
|
|
response.raise_for_status()
|
|
result = response.json()
|
|
|
|
# Extract the response content
|
|
if 'message' in result:
|
|
return result['message']['content']
|
|
else:
|
|
return "Error: Unexpected response format from OpenWebUI"
|
|
|
|
except Exception as e:
|
|
print(f"Error calling OpenWebUI API: {str(e)}")
|
|
# Fall back to direct Ollama call without RAG
|
|
print("Falling back to direct Ollama call without RAG")
|
|
# Continue to the Ollama API call below
|
|
|
|
# Add user prompt
|
|
messages.append({
|
|
"role": "user",
|
|
"content": prompt
|
|
})
|
|
|
|
# Prepare API request parameters for Ollama
|
|
request_json = {
|
|
"model": model_id,
|
|
"messages": messages,
|
|
"stream": False
|
|
}
|
|
|
|
# Add model parameters if provided
|
|
if model_params:
|
|
params = model_params.to_dict()
|
|
# Map parameters to Ollama format
|
|
if 'temperature' in params:
|
|
request_json['temperature'] = params['temperature']
|
|
if 'top_p' in params:
|
|
request_json['top_p'] = params['top_p']
|
|
if 'top_k' in params:
|
|
request_json['top_k'] = params['top_k']
|
|
if 'max_tokens' in params:
|
|
request_json['max_tokens'] = params['max_tokens']
|
|
|
|
# Make the API call to Ollama
|
|
try:
|
|
# Prepare headers
|
|
headers = {"Content-Type": "application/json"}
|
|
|
|
# Direct Ollama API call
|
|
print(f"Sending request to Ollama API at: {self.ollama_api_url}/api/chat")
|
|
print(f"Request JSON: {json.dumps(request_json, indent=2)}")
|
|
|
|
response = requests.post(
|
|
f"{self.ollama_api_url}/api/chat",
|
|
headers={"Content-Type": "application/json"},
|
|
json=request_json,
|
|
timeout=60
|
|
)
|
|
|
|
response.raise_for_status()
|
|
result = response.json()
|
|
|
|
# Extract the response content from Ollama
|
|
# The response format for Ollama API
|
|
if 'message' in result and 'content' in result['message']:
|
|
return result['message']['content']
|
|
else:
|
|
return "Error: Unexpected response format from Ollama"
|
|
|
|
except Exception as e:
|
|
return f"Error generating response: {str(e)}"
|
|
|
|
|
|
# Create a singleton instance
|
|
model_service = ModelService()
|