ds_zagres_ai/ai_service/models/model_service.py

"""
Service for model management and interaction.
"""

import requests
import json
from typing import List, Dict, Any, Optional

from ai_service.config import config
from ai_service.models.model_parameters import ModelParameters

class ModelService:
    """Service for model management and interaction."""

    # Available models
    AVAILABLE_MODELS = {
        'gemma3': {
            'name': 'Gemma 3',
            'description': 'Google Gemma 3 model via Ollama',
            'provider': 'ollama',
            'max_tokens': 8192
        },
        'llama3.3': {
            'name': 'Llama 3 (70B)',
            'description': 'Meta Llama 3 70B model via Ollama',
            'provider': 'ollama',
            'max_tokens': 8192
        },
        'llama3.1': {
            'name': 'Llama 3 (8B)',
            'description': 'Meta Llama 3 8B model via Ollama',
            'provider': 'ollama',
            'max_tokens': 8192
        },
        'mistral': {
            'name': 'Mistral',
            'description': 'Mistral AI model via Ollama',
            'provider': 'ollama',
            'max_tokens': 8192
        },
        'deepseek': {
            'name': 'DeepSeek',
            'description': 'DeepSeek model via Ollama',
            'provider': 'ollama',
            'max_tokens': 8192
        }
    }

    def __init__(self):
        """Initialize the model service."""
        self.default_model = config.DEFAULT_MODEL
        self.ollama_api_url = config.OLLAMA_API_URL
        self.openwebui_url = config.OPENWEBUI_URL
        self.openwebui_api_key = config.OPENWEBUI_API_KEY
        self.api_timeout = config.API_TIMEOUT

    def get_available_models(self) -> List[Dict[str, Any]]:
        """
        Get a list of available models.

        Returns:
            List of model information dictionaries.
        """
        models = []
        for model_id, model_info in self.AVAILABLE_MODELS.items():
            model_data = {
                'id': model_id,
                'is_default': model_id == self.default_model,
                **model_info
            }
            models.append(model_data)

        # Debug log
        print(f"Model service models: {models}")

        # Ensure we're returning a non-empty list
        if not models:
            # Return a default model if none are found
            return [{
                'id': 'llama3.1',
                'name': 'Llama 3 (8B)',
                'description': 'Meta Llama 3 8B model via Ollama',
                'provider': 'ollama',
                'max_tokens': 8192,
                'is_default': True
            }]

        return models

    def get_model_info(self, model_id: str) -> Optional[Dict[str, Any]]:
        """
        Get information about a specific model.

        Args:
            model_id: ID of the model.

        Returns:
            Model information dictionary if found, None otherwise.
        """
        if model_id not in self.AVAILABLE_MODELS:
            return None

        return {
            'id': model_id,
            'is_default': model_id == self.default_model,
            **self.AVAILABLE_MODELS[model_id]
        }

    def generate_response(self, model_id: str, prompt: str,
                         context: Optional[List[Dict[str, str]]] = None,
                         use_rag: bool = False,
                         model_params: Optional[ModelParameters] = None) -> str:
        """
        Generate a response from the model.

        Args:
            model_id: ID of the model to use.
            prompt: User prompt.
            context: Optional conversation context.
            use_rag: Whether to use RAG (Retrieval Augmented Generation).
            model_params: Optional model parameters.

        Returns:
            Generated response.
        """
        # Debug configuration information
        print(f"ModelService configuration:")
        print(f"  - Ollama API URL: {self.ollama_api_url}")
        print(f"  - OpenWebUI URL: {self.openwebui_url}")
        print(f"  - Default model: {self.default_model}")
        print(f"  - Requested model: {model_id}")
        print(f"  - Using RAG: {use_rag}")

        if model_id not in self.AVAILABLE_MODELS:
            model_id = self.default_model
            print(f"  - Model not found, using default: {model_id}")

        # Prepare the messages for the API call
        messages = []

        # Use custom system prompt if provided, otherwise use default
        system_content = "You are a helpful assistant."
        if model_params and model_params.system_prompt:
            system_content = model_params.system_prompt

        messages.append({
            "role": "system",
            "content": system_content
        })

        # Add conversation context if provided
        if context:
            messages.extend(context)

        # If RAG is enabled, use OpenWebUI's knowledge database
        if use_rag:
            # We'll use OpenWebUI's built-in RAG capabilities
            # This is handled by sending the request to OpenWebUI instead of Ollama directly
            try:
                # Prepare the request for OpenWebUI
                openwebui_request = {
                    "model": model_id,
                    "messages": messages + [{"role": "user", "content": prompt}],
                    "use_knowledge": True,  # Enable RAG
                    "stream": False
                }

                # Add model parameters if provided
                if model_params:
                    params = model_params.to_dict()
                    # Map parameters to OpenWebUI format
                    if 'temperature' in params:
                        openwebui_request['temperature'] = params['temperature']
                    if 'max_tokens' in params:
                        openwebui_request['max_tokens'] = params['max_tokens']
                    if 'top_p' in params:
                        openwebui_request['top_p'] = params['top_p']
                    if 'top_k' in params:
                        openwebui_request['top_k'] = params['top_k']
                    if 'repeat_penalty' in params:
                        openwebui_request['repeat_penalty'] = params['repeat_penalty']

                # Make the API call to OpenWebUI
                headers = {"Content-Type": "application/json"}
                if self.openwebui_api_key:
                    headers["Authorization"] = f"Bearer {self.openwebui_api_key}"

                # Debug logs
                print(f"Sending RAG request to OpenWebUI at: {self.openwebui_url}/api/chat/completions")
                print(f"OpenWebUI request: {json.dumps(openwebui_request, indent=2)}")
                print(f"Headers: {headers}")

                # OpenWebUI API endpoint is /api/chat/completions
                response = requests.post(
                    f"{self.openwebui_url}/api/chat/completions",
                    headers=headers,
                    json=openwebui_request,
                    timeout=self.api_timeout
                )

                response.raise_for_status()
                result = response.json()

                # Extract the response content
                if 'choices' in result and len(result['choices']) > 0 and 'message' in result['choices'][0]:
                    # OpenAI-compatible format
                    return result['choices'][0]['message']['content']
                elif 'message' in result and 'content' in result['message']:
                    # OpenWebUI format
                    return result['message']['content']
                else:
                    print(f"WARNING: Unexpected response format from OpenWebUI: {json.dumps(result, indent=2)}")
                    return "Error: Unexpected response format from OpenWebUI. Falling back to direct model call."

            except requests.exceptions.Timeout as e:
                error_msg = f"Timeout error connecting to OpenWebUI API: {str(e)}. The request exceeded the {self.api_timeout} second timeout."
                print(f"ERROR: {error_msg}")
                print("Falling back to direct Ollama call without RAG")
                # Continue to the Ollama API call below
            except requests.exceptions.ConnectionError as e:
                error_msg = f"Connection error to OpenWebUI API: {str(e)}. Please check if OpenWebUI is running at {self.openwebui_url}."
                print(f"ERROR: {error_msg}")
                print("Falling back to direct Ollama call without RAG")
                # Continue to the Ollama API call below
            except requests.exceptions.HTTPError as e:
                error_msg = f"HTTP error from OpenWebUI API: {str(e)}."
                print(f"ERROR: {error_msg}")
                print("Falling back to direct Ollama call without RAG")
                # Continue to the Ollama API call below
            except Exception as e:
                error_msg = f"Error calling OpenWebUI API: {str(e)}"
                print(f"ERROR: {error_msg}")
                print("Falling back to direct Ollama call without RAG")
                # Continue to the Ollama API call below

        # Add user prompt
        messages.append({
            "role": "user",
            "content": prompt
        })

        # Prepare API request parameters for Ollama
        request_json = {
            "model": model_id,
            "messages": messages,
            "stream": False
        }

        # Add model parameters if provided
        if model_params:
            params = model_params.to_dict()
            # Map parameters to Ollama format
            if 'temperature' in params:
                request_json['temperature'] = params['temperature']
            if 'top_p' in params:
                request_json['top_p'] = params['top_p']
            if 'top_k' in params:
                request_json['top_k'] = params['top_k']
            if 'max_tokens' in params:
                request_json['max_tokens'] = params['max_tokens']
            if 'repeat_penalty' in params:
                request_json['repeat_penalty'] = params['repeat_penalty']

        # Make the API call to Ollama
        try:
            # Prepare headers
            headers = {"Content-Type": "application/json"}

            # Direct Ollama API call
            print(f"Sending request to Ollama API at: {self.ollama_api_url}/api/chat")
            print(f"Request JSON: {json.dumps(request_json, indent=2)}")

            response = requests.post(
                f"{self.ollama_api_url}/api/chat",
                headers={"Content-Type": "application/json"},
                json=request_json,
                timeout=self.api_timeout
            )

            response.raise_for_status()
            result = response.json()

            # Extract the response content from Ollama
            # The response format for Ollama API
            if 'message' in result and 'content' in result['message']:
                return result['message']['content']
            else:
                print(f"WARNING: Unexpected response format from Ollama: {json.dumps(result, indent=2)}")
                return "Error: Unexpected response format from Ollama"

        except requests.exceptions.Timeout as e:
            error_msg = f"Timeout error connecting to Ollama API: {str(e)}. The request exceeded the {self.api_timeout} second timeout."
            print(f"ERROR: {error_msg}")
            return error_msg
        except requests.exceptions.ConnectionError as e:
            error_msg = f"Connection error to Ollama API: {str(e)}. Please check if Ollama is running at {self.ollama_api_url}."
            print(f"ERROR: {error_msg}")
            return error_msg
        except Exception as e:
            error_msg = f"Error generating response: {str(e)}"
            print(f"ERROR: {error_msg}")
            return error_msg


# Create a singleton instance
model_service = ModelService()