Initial commit for deployment

2025-05-09 15:41:16 +01:00
commit ac98999507
54 changed files with 4343 additions and 0 deletions
@@ -0,0 +1,243 @@
+"""
+Service for model management and interaction.
+"""
+
+import os
+import json
+import requests
+from typing import List, Dict, Any, Optional
+
+from ai_service.config import config
+from ai_service.embeddings.document_service import document_service
+from ai_service.models.model_parameters import ModelParameters
+
+class ModelService:
+    """Service for model management and interaction."""
+
+    # Available models
+    AVAILABLE_MODELS = {
+        'gemma3': {
+            'name': 'Gemma 3',
+            'description': 'Google Gemma 3 model via Ollama',
+            'provider': 'ollama',
+            'max_tokens': 8192
+        },
+        'llama3.3': {
+            'name': 'Llama 3 (70B)',
+            'description': 'Meta Llama 3 70B model via Ollama',
+            'provider': 'ollama',
+            'max_tokens': 8192
+        },
+        'llama3.1': {
+            'name': 'Llama 3 (8B)',
+            'description': 'Meta Llama 3 8B model via Ollama',
+            'provider': 'ollama',
+            'max_tokens': 8192
+        },
+        'mistral': {
+            'name': 'Mistral',
+            'description': 'Mistral AI model via Ollama',
+            'provider': 'ollama',
+            'max_tokens': 8192
+        },
+        'deepseek': {
+            'name': 'DeepSeek',
+            'description': 'DeepSeek model via Ollama',
+            'provider': 'ollama',
+            'max_tokens': 8192
+        }
+    }
+
+    def __init__(self):
+        """Initialize the model service."""
+        self.default_model = config.DEFAULT_MODEL
+        self.ollama_api_url = config.OLLAMA_API_URL
+        self.openwebui_url = config.OPENWEBUI_URL
+        self.openwebui_api_key = config.OPENWEBUI_API_KEY
+
+    def get_available_models(self) -> List[Dict[str, Any]]:
+        """
+        Get a list of available models.
+
+        Returns:
+            List of model information dictionaries.
+        """
+        models = []
+        for model_id, model_info in self.AVAILABLE_MODELS.items():
+            model_data = {
+                'id': model_id,
+                'is_default': model_id == self.default_model,
+                **model_info
+            }
+            models.append(model_data)
+
+        return models
+
+    def get_model_info(self, model_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Get information about a specific model.
+
+        Args:
+            model_id: ID of the model.
+
+        Returns:
+            Model information dictionary if found, None otherwise.
+        """
+        if model_id not in self.AVAILABLE_MODELS:
+            return None
+
+        return {
+            'id': model_id,
+            'is_default': model_id == self.default_model,
+            **self.AVAILABLE_MODELS[model_id]
+        }
+
+    def generate_response(self, model_id: str, prompt: str,
+                         context: Optional[List[Dict[str, str]]] = None,
+                         use_rag: bool = False,
+                         model_params: Optional[ModelParameters] = None) -> str:
+        """
+        Generate a response from the model.
+
+        Args:
+            model_id: ID of the model to use.
+            prompt: User prompt.
+            context: Optional conversation context.
+            use_rag: Whether to use RAG (Retrieval Augmented Generation).
+            model_params: Optional model parameters.
+
+        Returns:
+            Generated response.
+        """
+        if model_id not in self.AVAILABLE_MODELS:
+            model_id = self.default_model
+
+        # Get the provider for this model
+        provider = self.AVAILABLE_MODELS[model_id].get('provider', 'ollama')
+
+        # Prepare the messages for the API call
+        messages = []
+
+        # Use custom system prompt if provided, otherwise use default
+        system_content = "You are a helpful assistant."
+        if model_params and model_params.system_prompt:
+            system_content = model_params.system_prompt
+
+        messages.append({
+            "role": "system",
+            "content": system_content
+        })
+
+        # Add conversation context if provided
+        if context:
+            messages.extend(context)
+
+        # If RAG is enabled, use OpenWebUI's knowledge database
+        if use_rag:
+            # We'll use OpenWebUI's built-in RAG capabilities
+            # This is handled by sending the request to OpenWebUI instead of Ollama directly
+            try:
+                # Prepare the request for OpenWebUI
+                openwebui_request = {
+                    "model": model_id,
+                    "messages": messages + [{"role": "user", "content": prompt}],
+                    "use_knowledge": True,  # Enable RAG
+                    "stream": False
+                }
+
+                # Add model parameters if provided
+                if model_params:
+                    params = model_params.to_dict()
+                    # Map parameters to OpenWebUI format
+                    if 'temperature' in params:
+                        openwebui_request['temperature'] = params['temperature']
+                    if 'max_tokens' in params:
+                        openwebui_request['max_tokens'] = params['max_tokens']
+                    if 'top_p' in params:
+                        openwebui_request['top_p'] = params['top_p']
+
+                # Make the API call to OpenWebUI
+                headers = {"Content-Type": "application/json"}
+                if self.openwebui_api_key:
+                    headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
+
+                # OpenWebUI API endpoint is /api/chat/completions
+                response = requests.post(
+                    f"{self.openwebui_url}/api/chat/completions",
+                    headers=headers,
+                    json=openwebui_request,
+                    timeout=60  # Longer timeout for RAG
+                )
+
+                response.raise_for_status()
+                result = response.json()
+
+                # Extract the response content
+                if 'message' in result:
+                    return result['message']['content']
+                else:
+                    return "Error: Unexpected response format from OpenWebUI"
+
+            except Exception as e:
+                print(f"Error calling OpenWebUI API: {str(e)}")
+                # Fall back to direct Ollama call without RAG
+                print("Falling back to direct Ollama call without RAG")
+                # Continue to the Ollama API call below
+
+        # Add user prompt
+        messages.append({
+            "role": "user",
+            "content": prompt
+        })
+
+        # Prepare API request parameters for Ollama
+        request_json = {
+            "model": model_id,
+            "messages": messages,
+            "stream": False
+        }
+
+        # Add model parameters if provided
+        if model_params:
+            params = model_params.to_dict()
+            # Map parameters to Ollama format
+            if 'temperature' in params:
+                request_json['temperature'] = params['temperature']
+            if 'top_p' in params:
+                request_json['top_p'] = params['top_p']
+            if 'top_k' in params:
+                request_json['top_k'] = params['top_k']
+            if 'max_tokens' in params:
+                request_json['max_tokens'] = params['max_tokens']
+
+        # Make the API call to Ollama
+        try:
+            # Ollama API endpoint is /api/chat or /api/generate
+            response = requests.post(
+                f"{self.ollama_api_url}/api/generate",
+                headers={"Content-Type": "application/json"},
+                json=request_json,
+                timeout=30
+            )
+
+            response.raise_for_status()
+            result = response.json()
+
+            # Extract the response content from Ollama
+            # The response format depends on whether we're using /api/chat or /api/generate
+            if 'message' in result and 'content' in result['message']:
+                # Format for /api/chat
+                return result['message']['content']
+            elif 'response' in result:
+                # Format for /api/generate
+                return result['response']
+            else:
+                return "Error: Unexpected response format from Ollama"
+
+        except Exception as e:
+            print(f"Error calling Ollama API: {str(e)}")
+            return f"Error generating response: {str(e)}"
+
+
+# Create a singleton instance
+model_service = ModelService()