""" Service for model management and interaction. """ import requests import json from typing import List, Dict, Any, Optional from ai_service.config import config from ai_service.models.model_parameters import ModelParameters class ModelService: """Service for model management and interaction.""" # Available models AVAILABLE_MODELS = { 'gemma3': { 'name': 'Gemma 3', 'description': 'Google Gemma 3 model via Ollama', 'provider': 'ollama', 'max_tokens': 8192 }, 'llama3.3': { 'name': 'Llama 3 (70B)', 'description': 'Meta Llama 3 70B model via Ollama', 'provider': 'ollama', 'max_tokens': 8192 }, 'llama3.1': { 'name': 'Llama 3 (8B)', 'description': 'Meta Llama 3 8B model via Ollama', 'provider': 'ollama', 'max_tokens': 8192 }, 'mistral': { 'name': 'Mistral', 'description': 'Mistral AI model via Ollama', 'provider': 'ollama', 'max_tokens': 8192 }, 'deepseek': { 'name': 'DeepSeek', 'description': 'DeepSeek model via Ollama', 'provider': 'ollama', 'max_tokens': 8192 } } def __init__(self): """Initialize the model service.""" self.default_model = config.DEFAULT_MODEL self.ollama_api_url = config.OLLAMA_API_URL self.openwebui_url = config.OPENWEBUI_URL self.openwebui_api_key = config.OPENWEBUI_API_KEY self.api_timeout = config.API_TIMEOUT def get_available_models(self) -> List[Dict[str, Any]]: """ Get a list of available models. Returns: List of model information dictionaries. """ models = [] for model_id, model_info in self.AVAILABLE_MODELS.items(): model_data = { 'id': model_id, 'is_default': model_id == self.default_model, **model_info } models.append(model_data) # Debug log print(f"Model service models: {models}") # Ensure we're returning a non-empty list if not models: # Return a default model if none are found return [{ 'id': 'llama3.1', 'name': 'Llama 3 (8B)', 'description': 'Meta Llama 3 8B model via Ollama', 'provider': 'ollama', 'max_tokens': 8192, 'is_default': True }] return models def get_model_info(self, model_id: str) -> Optional[Dict[str, Any]]: """ Get information about a specific model. Args: model_id: ID of the model. Returns: Model information dictionary if found, None otherwise. """ if model_id not in self.AVAILABLE_MODELS: return None return { 'id': model_id, 'is_default': model_id == self.default_model, **self.AVAILABLE_MODELS[model_id] } def generate_response(self, model_id: str, prompt: str, context: Optional[List[Dict[str, str]]] = None, use_rag: bool = False, model_params: Optional[ModelParameters] = None) -> str: """ Generate a response from the model. Args: model_id: ID of the model to use. prompt: User prompt. context: Optional conversation context. use_rag: Whether to use RAG (Retrieval Augmented Generation). model_params: Optional model parameters. Returns: Generated response. """ # Debug configuration information print(f"ModelService configuration:") print(f" - Ollama API URL: {self.ollama_api_url}") print(f" - OpenWebUI URL: {self.openwebui_url}") print(f" - Default model: {self.default_model}") print(f" - Requested model: {model_id}") print(f" - Using RAG: {use_rag}") if model_id not in self.AVAILABLE_MODELS: model_id = self.default_model print(f" - Model not found, using default: {model_id}") # Prepare the messages for the API call messages = [] # Use custom system prompt if provided, otherwise use default system_content = "You are a helpful assistant." if model_params and model_params.system_prompt: system_content = model_params.system_prompt messages.append({ "role": "system", "content": system_content }) # Add conversation context if provided if context: messages.extend(context) # If RAG is enabled, use OpenWebUI's knowledge database if use_rag: # We'll use OpenWebUI's built-in RAG capabilities # This is handled by sending the request to OpenWebUI instead of Ollama directly try: # Prepare the request for OpenWebUI openwebui_request = { "model": model_id, "messages": messages + [{"role": "user", "content": prompt}], "use_knowledge": True, # Enable RAG "stream": False } # Add model parameters if provided if model_params: params = model_params.to_dict() # Map parameters to OpenWebUI format if 'temperature' in params: openwebui_request['temperature'] = params['temperature'] if 'max_tokens' in params: openwebui_request['max_tokens'] = params['max_tokens'] if 'top_p' in params: openwebui_request['top_p'] = params['top_p'] if 'top_k' in params: openwebui_request['top_k'] = params['top_k'] if 'repeat_penalty' in params: openwebui_request['repeat_penalty'] = params['repeat_penalty'] # Make the API call to OpenWebUI headers = {"Content-Type": "application/json"} if self.openwebui_api_key: headers["Authorization"] = f"Bearer {self.openwebui_api_key}" # Debug logs print(f"Sending RAG request to OpenWebUI at: {self.openwebui_url}/api/chat/completions") print(f"OpenWebUI request: {json.dumps(openwebui_request, indent=2)}") print(f"Headers: {headers}") # OpenWebUI API endpoint is /api/chat/completions response = requests.post( f"{self.openwebui_url}/api/chat/completions", headers=headers, json=openwebui_request, timeout=self.api_timeout ) response.raise_for_status() result = response.json() # Extract the response content if 'choices' in result and len(result['choices']) > 0 and 'message' in result['choices'][0]: # OpenAI-compatible format return result['choices'][0]['message']['content'] elif 'message' in result and 'content' in result['message']: # OpenWebUI format return result['message']['content'] else: print(f"WARNING: Unexpected response format from OpenWebUI: {json.dumps(result, indent=2)}") return "Error: Unexpected response format from OpenWebUI. Falling back to direct model call." except requests.exceptions.Timeout as e: error_msg = f"Timeout error connecting to OpenWebUI API: {str(e)}. The request exceeded the {self.api_timeout} second timeout." print(f"ERROR: {error_msg}") print("Falling back to direct Ollama call without RAG") # Continue to the Ollama API call below except requests.exceptions.ConnectionError as e: error_msg = f"Connection error to OpenWebUI API: {str(e)}. Please check if OpenWebUI is running at {self.openwebui_url}." print(f"ERROR: {error_msg}") print("Falling back to direct Ollama call without RAG") # Continue to the Ollama API call below except requests.exceptions.HTTPError as e: error_msg = f"HTTP error from OpenWebUI API: {str(e)}." print(f"ERROR: {error_msg}") print("Falling back to direct Ollama call without RAG") # Continue to the Ollama API call below except Exception as e: error_msg = f"Error calling OpenWebUI API: {str(e)}" print(f"ERROR: {error_msg}") print("Falling back to direct Ollama call without RAG") # Continue to the Ollama API call below # Add user prompt messages.append({ "role": "user", "content": prompt }) # Prepare API request parameters for Ollama request_json = { "model": model_id, "messages": messages, "stream": False } # Add model parameters if provided if model_params: params = model_params.to_dict() # Map parameters to Ollama format if 'temperature' in params: request_json['temperature'] = params['temperature'] if 'top_p' in params: request_json['top_p'] = params['top_p'] if 'top_k' in params: request_json['top_k'] = params['top_k'] if 'max_tokens' in params: request_json['max_tokens'] = params['max_tokens'] if 'repeat_penalty' in params: request_json['repeat_penalty'] = params['repeat_penalty'] # Make the API call to Ollama try: # Prepare headers headers = {"Content-Type": "application/json"} # Direct Ollama API call print(f"Sending request to Ollama API at: {self.ollama_api_url}/api/chat") print(f"Request JSON: {json.dumps(request_json, indent=2)}") response = requests.post( f"{self.ollama_api_url}/api/chat", headers={"Content-Type": "application/json"}, json=request_json, timeout=self.api_timeout ) response.raise_for_status() result = response.json() # Extract the response content from Ollama # The response format for Ollama API if 'message' in result and 'content' in result['message']: return result['message']['content'] else: print(f"WARNING: Unexpected response format from Ollama: {json.dumps(result, indent=2)}") return "Error: Unexpected response format from Ollama" except requests.exceptions.Timeout as e: error_msg = f"Timeout error connecting to Ollama API: {str(e)}. The request exceeded the {self.api_timeout} second timeout." print(f"ERROR: {error_msg}") return error_msg except requests.exceptions.ConnectionError as e: error_msg = f"Connection error to Ollama API: {str(e)}. Please check if Ollama is running at {self.ollama_api_url}." print(f"ERROR: {error_msg}") return error_msg except Exception as e: error_msg = f"Error generating response: {str(e)}" print(f"ERROR: {error_msg}") return error_msg # Create a singleton instance model_service = ModelService()