Files
Iyeoluwa Akinrinola 1896298a18 Added Rag Featured
2025-05-16 15:24:01 +01:00

307 lines
12 KiB
Python

"""
Service for model management and interaction.
"""
import requests
import json
from typing import List, Dict, Any, Optional
from ai_service.config import config
from ai_service.models.model_parameters import ModelParameters
class ModelService:
"""Service for model management and interaction."""
# Available models
AVAILABLE_MODELS = {
'gemma3': {
'name': 'Gemma 3',
'description': 'Google Gemma 3 model via Ollama',
'provider': 'ollama',
'max_tokens': 8192
},
'llama3.3': {
'name': 'Llama 3 (70B)',
'description': 'Meta Llama 3 70B model via Ollama',
'provider': 'ollama',
'max_tokens': 8192
},
'llama3.1': {
'name': 'Llama 3 (8B)',
'description': 'Meta Llama 3 8B model via Ollama',
'provider': 'ollama',
'max_tokens': 8192
},
'mistral': {
'name': 'Mistral',
'description': 'Mistral AI model via Ollama',
'provider': 'ollama',
'max_tokens': 8192
},
'deepseek': {
'name': 'DeepSeek',
'description': 'DeepSeek model via Ollama',
'provider': 'ollama',
'max_tokens': 8192
}
}
def __init__(self):
"""Initialize the model service."""
self.default_model = config.DEFAULT_MODEL
self.ollama_api_url = config.OLLAMA_API_URL
self.openwebui_url = config.OPENWEBUI_URL
self.openwebui_api_key = config.OPENWEBUI_API_KEY
self.api_timeout = config.API_TIMEOUT
def get_available_models(self) -> List[Dict[str, Any]]:
"""
Get a list of available models.
Returns:
List of model information dictionaries.
"""
models = []
for model_id, model_info in self.AVAILABLE_MODELS.items():
model_data = {
'id': model_id,
'is_default': model_id == self.default_model,
**model_info
}
models.append(model_data)
# Debug log
print(f"Model service models: {models}")
# Ensure we're returning a non-empty list
if not models:
# Return a default model if none are found
return [{
'id': 'llama3.1',
'name': 'Llama 3 (8B)',
'description': 'Meta Llama 3 8B model via Ollama',
'provider': 'ollama',
'max_tokens': 8192,
'is_default': True
}]
return models
def get_model_info(self, model_id: str) -> Optional[Dict[str, Any]]:
"""
Get information about a specific model.
Args:
model_id: ID of the model.
Returns:
Model information dictionary if found, None otherwise.
"""
if model_id not in self.AVAILABLE_MODELS:
return None
return {
'id': model_id,
'is_default': model_id == self.default_model,
**self.AVAILABLE_MODELS[model_id]
}
def generate_response(self, model_id: str, prompt: str,
context: Optional[List[Dict[str, str]]] = None,
use_rag: bool = False,
model_params: Optional[ModelParameters] = None) -> str:
"""
Generate a response from the model.
Args:
model_id: ID of the model to use.
prompt: User prompt.
context: Optional conversation context.
use_rag: Whether to use RAG (Retrieval Augmented Generation).
model_params: Optional model parameters.
Returns:
Generated response.
"""
# Debug configuration information
print(f"ModelService configuration:")
print(f" - Ollama API URL: {self.ollama_api_url}")
print(f" - OpenWebUI URL: {self.openwebui_url}")
print(f" - Default model: {self.default_model}")
print(f" - Requested model: {model_id}")
print(f" - Using RAG: {use_rag}")
if model_id not in self.AVAILABLE_MODELS:
model_id = self.default_model
print(f" - Model not found, using default: {model_id}")
# Prepare the messages for the API call
messages = []
# Use custom system prompt if provided, otherwise use default
system_content = "You are a helpful assistant."
if model_params and model_params.system_prompt:
system_content = model_params.system_prompt
messages.append({
"role": "system",
"content": system_content
})
# Add conversation context if provided
if context:
messages.extend(context)
# If RAG is enabled, use OpenWebUI's knowledge database
if use_rag:
# We'll use OpenWebUI's built-in RAG capabilities
# This is handled by sending the request to OpenWebUI instead of Ollama directly
try:
# Prepare the request for OpenWebUI
openwebui_request = {
"model": model_id,
"messages": messages + [{"role": "user", "content": prompt}],
"use_knowledge": True, # Enable RAG
"stream": False
}
# Add model parameters if provided
if model_params:
params = model_params.to_dict()
# Map parameters to OpenWebUI format
if 'temperature' in params:
openwebui_request['temperature'] = params['temperature']
if 'max_tokens' in params:
openwebui_request['max_tokens'] = params['max_tokens']
if 'top_p' in params:
openwebui_request['top_p'] = params['top_p']
if 'top_k' in params:
openwebui_request['top_k'] = params['top_k']
if 'repeat_penalty' in params:
openwebui_request['repeat_penalty'] = params['repeat_penalty']
# Make the API call to OpenWebUI
headers = {"Content-Type": "application/json"}
if self.openwebui_api_key:
headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
# Debug logs
print(f"Sending RAG request to OpenWebUI at: {self.openwebui_url}/api/chat/completions")
print(f"OpenWebUI request: {json.dumps(openwebui_request, indent=2)}")
print(f"Headers: {headers}")
# OpenWebUI API endpoint is /api/chat/completions
response = requests.post(
f"{self.openwebui_url}/api/chat/completions",
headers=headers,
json=openwebui_request,
timeout=self.api_timeout
)
response.raise_for_status()
result = response.json()
# Extract the response content
if 'choices' in result and len(result['choices']) > 0 and 'message' in result['choices'][0]:
# OpenAI-compatible format
return result['choices'][0]['message']['content']
elif 'message' in result and 'content' in result['message']:
# OpenWebUI format
return result['message']['content']
else:
print(f"WARNING: Unexpected response format from OpenWebUI: {json.dumps(result, indent=2)}")
return "Error: Unexpected response format from OpenWebUI. Falling back to direct model call."
except requests.exceptions.Timeout as e:
error_msg = f"Timeout error connecting to OpenWebUI API: {str(e)}. The request exceeded the {self.api_timeout} second timeout."
print(f"ERROR: {error_msg}")
print("Falling back to direct Ollama call without RAG")
# Continue to the Ollama API call below
except requests.exceptions.ConnectionError as e:
error_msg = f"Connection error to OpenWebUI API: {str(e)}. Please check if OpenWebUI is running at {self.openwebui_url}."
print(f"ERROR: {error_msg}")
print("Falling back to direct Ollama call without RAG")
# Continue to the Ollama API call below
except requests.exceptions.HTTPError as e:
error_msg = f"HTTP error from OpenWebUI API: {str(e)}."
print(f"ERROR: {error_msg}")
print("Falling back to direct Ollama call without RAG")
# Continue to the Ollama API call below
except Exception as e:
error_msg = f"Error calling OpenWebUI API: {str(e)}"
print(f"ERROR: {error_msg}")
print("Falling back to direct Ollama call without RAG")
# Continue to the Ollama API call below
# Add user prompt
messages.append({
"role": "user",
"content": prompt
})
# Prepare API request parameters for Ollama
request_json = {
"model": model_id,
"messages": messages,
"stream": False
}
# Add model parameters if provided
if model_params:
params = model_params.to_dict()
# Map parameters to Ollama format
if 'temperature' in params:
request_json['temperature'] = params['temperature']
if 'top_p' in params:
request_json['top_p'] = params['top_p']
if 'top_k' in params:
request_json['top_k'] = params['top_k']
if 'max_tokens' in params:
request_json['max_tokens'] = params['max_tokens']
if 'repeat_penalty' in params:
request_json['repeat_penalty'] = params['repeat_penalty']
# Make the API call to Ollama
try:
# Prepare headers
headers = {"Content-Type": "application/json"}
# Direct Ollama API call
print(f"Sending request to Ollama API at: {self.ollama_api_url}/api/chat")
print(f"Request JSON: {json.dumps(request_json, indent=2)}")
response = requests.post(
f"{self.ollama_api_url}/api/chat",
headers={"Content-Type": "application/json"},
json=request_json,
timeout=self.api_timeout
)
response.raise_for_status()
result = response.json()
# Extract the response content from Ollama
# The response format for Ollama API
if 'message' in result and 'content' in result['message']:
return result['message']['content']
else:
print(f"WARNING: Unexpected response format from Ollama: {json.dumps(result, indent=2)}")
return "Error: Unexpected response format from Ollama"
except requests.exceptions.Timeout as e:
error_msg = f"Timeout error connecting to Ollama API: {str(e)}. The request exceeded the {self.api_timeout} second timeout."
print(f"ERROR: {error_msg}")
return error_msg
except requests.exceptions.ConnectionError as e:
error_msg = f"Connection error to Ollama API: {str(e)}. Please check if Ollama is running at {self.ollama_api_url}."
print(f"ERROR: {error_msg}")
return error_msg
except Exception as e:
error_msg = f"Error generating response: {str(e)}"
print(f"ERROR: {error_msg}")
return error_msg
# Create a singleton instance
model_service = ModelService()