Initial commit for deployment

2025-05-09 15:41:16 +01:00
commit ac98999507
54 changed files with 4343 additions and 0 deletions
@@ -0,0 +1,309 @@
+"""
+Service for chat functionality.
+"""
+
+import os
+import json
+import uuid
+from datetime import datetime
+from typing import List, Dict, Any, Optional
+
+from ai_service.config import config
+from ai_service.models.model_service import model_service
+from ai_service.models.model_parameters import ModelParameters
+
+class ChatService:
+    """Service for chat functionality."""
+
+    def __init__(self):
+        """Initialize the chat service."""
+        # Ensure data directory exists
+        os.makedirs(os.path.dirname(config.SQLITE_DB_PATH), exist_ok=True)
+
+        # For now, we'll store chat data in a simple JSON file
+        self.chats_file = os.path.join(os.path.dirname(config.SQLITE_DB_PATH), 'chats.json')
+        self._load_chats()
+
+    def _load_chats(self):
+        """Load chats from file."""
+        if os.path.exists(self.chats_file):
+            try:
+                with open(self.chats_file, 'r') as f:
+                    self.chats = json.load(f)
+            except Exception as e:
+                print(f"Error loading chats: {str(e)}")
+                self.chats = {}
+        else:
+            self.chats = {}
+
+    def _save_chats(self):
+        """Save chats to file."""
+        try:
+            with open(self.chats_file, 'w') as f:
+                json.dump(self.chats, f, indent=2)
+        except Exception as e:
+            print(f"Error saving chats: {str(e)}")
+
+    def create_chat(self, user_id: str, title: Optional[str] = None,
+                   model_id: Optional[str] = None, is_team_chat: bool = False) -> str:
+        """
+        Create a new chat.
+
+        Args:
+            user_id: ID of the user creating the chat.
+            title: Optional title for the chat.
+            model_id: Optional model ID to use for this chat.
+            is_team_chat: Whether this is a team chat.
+
+        Returns:
+            ID of the created chat.
+        """
+        # Generate a unique ID for the chat
+        chat_id = str(uuid.uuid4())
+
+        # Create chat data
+        self.chats[chat_id] = {
+            'id': chat_id,
+            'title': title or f"Chat {len(self.chats) + 1}",
+            'user_id': user_id,
+            'model_id': model_id or config.DEFAULT_MODEL,
+            'is_team_chat': is_team_chat,
+            'created_at': datetime.utcnow().isoformat(),
+            'updated_at': datetime.utcnow().isoformat(),
+            'messages': [],
+            'team_members': [user_id] if is_team_chat else []
+        }
+
+        # Save chats to file
+        self._save_chats()
+
+        return chat_id
+
+    def add_message(self, chat_id: str, content: str, user_id: str,
+                   is_user_message: bool = True) -> Dict[str, Any]:
+        """
+        Add a message to a chat.
+
+        Args:
+            chat_id: ID of the chat.
+            content: Message content.
+            user_id: ID of the user sending the message.
+            is_user_message: Whether this is a user message (vs. bot message).
+
+        Returns:
+            Added message.
+        """
+        if chat_id not in self.chats:
+            raise ValueError(f"Chat with ID {chat_id} not found")
+
+        # Create message data
+        message = {
+            'id': str(uuid.uuid4()),
+            'content': content,
+            'user_id': user_id if is_user_message else None,
+            'is_user_message': is_user_message,
+            'timestamp': datetime.utcnow().isoformat()
+        }
+
+        # Add message to chat
+        self.chats[chat_id]['messages'].append(message)
+
+        # Update chat timestamp
+        self.chats[chat_id]['updated_at'] = datetime.utcnow().isoformat()
+
+        # Save chats to file
+        self._save_chats()
+
+        return message
+
+    def get_chat(self, chat_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Get a chat by ID.
+
+        Args:
+            chat_id: ID of the chat.
+
+        Returns:
+            Chat data if found, None otherwise.
+        """
+        return self.chats.get(chat_id)
+
+    def get_user_chats(self, user_id: str) -> List[Dict[str, Any]]:
+        """
+        Get all chats for a user.
+
+        Args:
+            user_id: ID of the user.
+
+        Returns:
+            List of chat data.
+        """
+        user_chats = []
+
+        for chat_id, chat in self.chats.items():
+            # Include private chats owned by the user
+            if chat['user_id'] == user_id and not chat['is_team_chat']:
+                user_chats.append(chat)
+
+            # Include team chats where the user is a member
+            elif chat['is_team_chat'] and user_id in chat['team_members']:
+                user_chats.append(chat)
+
+        # Sort by updated_at (newest first)
+        user_chats.sort(key=lambda x: x['updated_at'], reverse=True)
+
+        return user_chats
+
+    def add_team_member(self, chat_id: str, user_id: str) -> bool:
+        """
+        Add a user to a team chat.
+
+        Args:
+            chat_id: ID of the team chat.
+            user_id: ID of the user to add.
+
+        Returns:
+            True if addition was successful, False otherwise.
+        """
+        if chat_id not in self.chats:
+            return False
+
+        chat = self.chats[chat_id]
+
+        if not chat['is_team_chat']:
+            return False
+
+        if user_id not in chat['team_members']:
+            chat['team_members'].append(user_id)
+            self._save_chats()
+
+        return True
+
+    def remove_team_member(self, chat_id: str, user_id: str) -> bool:
+        """
+        Remove a user from a team chat.
+
+        Args:
+            chat_id: ID of the team chat.
+            user_id: ID of the user to remove.
+
+        Returns:
+            True if removal was successful, False otherwise.
+        """
+        if chat_id not in self.chats:
+            return False
+
+        chat = self.chats[chat_id]
+
+        if not chat['is_team_chat']:
+            return False
+
+        if user_id in chat['team_members']:
+            chat['team_members'].remove(user_id)
+            self._save_chats()
+
+        return True
+
+    def delete_chat(self, chat_id: str) -> bool:
+        """
+        Delete a chat.
+
+        Args:
+            chat_id: ID of the chat to delete.
+
+        Returns:
+            True if deletion was successful, False otherwise.
+        """
+        if chat_id not in self.chats:
+            return False
+
+        del self.chats[chat_id]
+        self._save_chats()
+
+        return True
+
+    def get_chat_response(self, chat_id: str, message: str, user_id: str,
+                         use_rag: bool = False, temperature: Optional[float] = None,
+                         max_tokens: Optional[int] = None, top_p: Optional[float] = None,
+                         frequency_penalty: Optional[float] = None, presence_penalty: Optional[float] = None,
+                         stop_sequences: Optional[List[str]] = None, system_prompt: Optional[str] = None,
+                         min_p: Optional[float] = None, top_k: Optional[int] = None,
+                         repeat_penalty: Optional[float] = None, function_calling: Optional[bool] = None) -> Dict[str, Any]:
+        """
+        Get a response from the chatbot.
+
+        Args:
+            chat_id: ID of the chat.
+            message: User message.
+            user_id: ID of the user sending the message.
+            use_rag: Whether to use RAG (Retrieval Augmented Generation).
+            temperature: Controls randomness in the response.
+            max_tokens: Maximum number of tokens to generate.
+            top_p: Nucleus sampling parameter.
+            frequency_penalty: Penalizes repeated tokens.
+            presence_penalty: Penalizes repeated topics.
+            stop_sequences: Sequences where the API will stop generating.
+            system_prompt: System prompt to guide the model's behavior.
+            min_p: Minimum probability threshold for token selection.
+            top_k: Only sample from the top k tokens.
+            repeat_penalty: Penalty for repeating tokens.
+            function_calling: Whether to enable function calling.
+
+        Returns:
+            Bot response message.
+        """
+        if chat_id not in self.chats:
+            raise ValueError(f"Chat with ID {chat_id} not found")
+
+        chat = self.chats[chat_id]
+
+        # Add user message to chat
+        self.add_message(chat_id, message, user_id, is_user_message=True)
+
+        # Prepare conversation context for the model
+        context = []
+        for msg in chat['messages'][-10:]:  # Use last 10 messages as context
+            role = "user" if msg['is_user_message'] else "assistant"
+            context.append({
+                "role": role,
+                "content": msg['content']
+            })
+
+        # Create model parameters
+        model_params = ModelParameters(
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_p=top_p,
+            frequency_penalty=frequency_penalty,
+            presence_penalty=presence_penalty,
+            stop_sequences=stop_sequences,
+            system_prompt=system_prompt,
+            min_p=min_p,
+            top_k=top_k,
+            repeat_penalty=repeat_penalty,
+            function_calling=function_calling
+        )
+
+        # Get response from model
+        model_id = chat['model_id']
+        response_text = model_service.generate_response(
+            model_id=model_id,
+            prompt=message,
+            context=context,
+            use_rag=use_rag,
+            model_params=model_params
+        )
+
+        # Add bot response to chat
+        response_message = self.add_message(
+            chat_id=chat_id,
+            content=response_text,
+            user_id=user_id,
+            is_user_message=False
+        )
+
+        return response_message
+
+
+# Create a singleton instance
+chat_service = ChatService()
@@ -0,0 +1,170 @@
+"""
+Model parameters for AI models.
+"""
+
+from typing import Dict, Any, Optional, List
+from pydantic import BaseModel, Field, validator
+
+
+class ModelParameters(BaseModel):
+    """Parameters for AI model generation."""
+
+    # Basic parameters
+    temperature: Optional[float] = Field(
+        0.7,
+        description="Controls randomness: 0 is deterministic, higher values are more random",
+        ge=0.0,
+        le=2.0
+    )
+
+    max_tokens: Optional[int] = Field(
+        1000,
+        description="Maximum number of tokens to generate",
+        gt=0
+    )
+
+    # Sampling parameters
+    top_p: Optional[float] = Field(
+        1.0,
+        description="Nucleus sampling: consider tokens with top_p probability mass",
+        ge=0.0,
+        le=1.0
+    )
+
+    top_k: Optional[int] = Field(
+        None,
+        description="Only sample from the top k tokens",
+        gt=0
+    )
+
+    # Repetition control
+    frequency_penalty: Optional[float] = Field(
+        0.0,
+        description="Penalizes repeated tokens",
+        ge=-2.0,
+        le=2.0
+    )
+
+    presence_penalty: Optional[float] = Field(
+        0.0,
+        description="Penalizes repeated topics",
+        ge=-2.0,
+        le=2.0
+    )
+
+    # Advanced parameters
+    stop_sequences: Optional[List[str]] = Field(
+        None,
+        description="Sequences where the API will stop generating"
+    )
+
+    min_p: Optional[float] = Field(
+        None,
+        description="Minimum probability threshold for token selection",
+        ge=0.0,
+        le=1.0
+    )
+
+    repeat_penalty: Optional[float] = Field(
+        None,
+        description="Penalty for repeating tokens",
+        ge=0.0
+    )
+
+    presence_penalty_tokens: Optional[int] = Field(
+        None,
+        description="Number of tokens to consider for presence penalty",
+        gt=0
+    )
+
+    # System prompt
+    system_prompt: Optional[str] = Field(
+        None,
+        description="System prompt to guide the model's behavior"
+    )
+
+    # Function calling
+    function_calling: Optional[bool] = Field(
+        None,
+        description="Whether to enable function calling"
+    )
+
+    # Additional parameters that might be model-specific
+    extra_params: Optional[Dict[str, Any]] = Field(
+        None,
+        description="Additional model-specific parameters"
+    )
+
+    @validator('temperature', 'top_p', 'frequency_penalty', 'presence_penalty', pre=True)
+    def validate_float_params(cls, v):
+        """Validate float parameters."""
+        if v is not None and not isinstance(v, bool):  # Avoid converting bool to float
+            return float(v)
+        return v
+
+    @validator('max_tokens', 'top_k', pre=True)
+    def validate_int_params(cls, v):
+        """Validate integer parameters."""
+        if v is not None and not isinstance(v, bool):  # Avoid converting bool to int
+            return int(v)
+        return v
+
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Convert parameters to a dictionary, excluding None values.
+
+        Returns:
+            Dictionary of parameters.
+        """
+        result = {}
+        for key, value in self.dict().items():
+            if value is not None and key != 'extra_params':
+                result[key] = value
+
+        # Add any extra parameters
+        if self.extra_params:
+            result.update(self.extra_params)
+
+        return result
+
+    def for_provider(self, provider: str) -> Dict[str, Any]:
+        """
+        Get parameters formatted for a specific provider.
+
+        Args:
+            provider: Provider name (e.g., 'openai', 'ollama', 'anthropic').
+
+        Returns:
+            Dictionary of parameters formatted for the provider.
+        """
+        params = self.to_dict()
+
+        # Handle provider-specific parameter naming
+        if provider == 'openai':
+            # OpenAI uses 'stop' instead of 'stop_sequences'
+            if 'stop_sequences' in params:
+                params['stop'] = params.pop('stop_sequences')
+
+        elif provider == 'ollama':
+            # Ollama has specific parameter handling
+            # Remove parameters not supported by Ollama
+            params_to_keep = ['temperature', 'top_p', 'top_k', 'max_tokens', 'stop_sequences']
+            params = {k: v for k, v in params.items() if k in params_to_keep}
+
+            # Rename stop_sequences to stop if present
+            if 'stop_sequences' in params:
+                params['stop'] = params.pop('stop_sequences')
+
+        elif provider == 'anthropic':
+            # Anthropic uses 'stop_sequences' and different temperature scaling
+            if 'temperature' in params:
+                # Anthropic's temperature is typically 0-1
+                params['temperature'] = min(params['temperature'], 1.0)
+
+        elif provider == 'cohere':
+            # Cohere uses 'stop_sequences' and has some unique parameters
+            pass
+
+        # Add more provider-specific conversions as needed
+
+        return params
@@ -0,0 +1,243 @@
+"""
+Service for model management and interaction.
+"""
+
+import os
+import json
+import requests
+from typing import List, Dict, Any, Optional
+
+from ai_service.config import config
+from ai_service.embeddings.document_service import document_service
+from ai_service.models.model_parameters import ModelParameters
+
+class ModelService:
+    """Service for model management and interaction."""
+
+    # Available models
+    AVAILABLE_MODELS = {
+        'gemma3': {
+            'name': 'Gemma 3',
+            'description': 'Google Gemma 3 model via Ollama',
+            'provider': 'ollama',
+            'max_tokens': 8192
+        },
+        'llama3.3': {
+            'name': 'Llama 3 (70B)',
+            'description': 'Meta Llama 3 70B model via Ollama',
+            'provider': 'ollama',
+            'max_tokens': 8192
+        },
+        'llama3.1': {
+            'name': 'Llama 3 (8B)',
+            'description': 'Meta Llama 3 8B model via Ollama',
+            'provider': 'ollama',
+            'max_tokens': 8192
+        },
+        'mistral': {
+            'name': 'Mistral',
+            'description': 'Mistral AI model via Ollama',
+            'provider': 'ollama',
+            'max_tokens': 8192
+        },
+        'deepseek': {
+            'name': 'DeepSeek',
+            'description': 'DeepSeek model via Ollama',
+            'provider': 'ollama',
+            'max_tokens': 8192
+        }
+    }
+
+    def __init__(self):
+        """Initialize the model service."""
+        self.default_model = config.DEFAULT_MODEL
+        self.ollama_api_url = config.OLLAMA_API_URL
+        self.openwebui_url = config.OPENWEBUI_URL
+        self.openwebui_api_key = config.OPENWEBUI_API_KEY
+
+    def get_available_models(self) -> List[Dict[str, Any]]:
+        """
+        Get a list of available models.
+
+        Returns:
+            List of model information dictionaries.
+        """
+        models = []
+        for model_id, model_info in self.AVAILABLE_MODELS.items():
+            model_data = {
+                'id': model_id,
+                'is_default': model_id == self.default_model,
+                **model_info
+            }
+            models.append(model_data)
+
+        return models
+
+    def get_model_info(self, model_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Get information about a specific model.
+
+        Args:
+            model_id: ID of the model.
+
+        Returns:
+            Model information dictionary if found, None otherwise.
+        """
+        if model_id not in self.AVAILABLE_MODELS:
+            return None
+
+        return {
+            'id': model_id,
+            'is_default': model_id == self.default_model,
+            **self.AVAILABLE_MODELS[model_id]
+        }
+
+    def generate_response(self, model_id: str, prompt: str,
+                         context: Optional[List[Dict[str, str]]] = None,
+                         use_rag: bool = False,
+                         model_params: Optional[ModelParameters] = None) -> str:
+        """
+        Generate a response from the model.
+
+        Args:
+            model_id: ID of the model to use.
+            prompt: User prompt.
+            context: Optional conversation context.
+            use_rag: Whether to use RAG (Retrieval Augmented Generation).
+            model_params: Optional model parameters.
+
+        Returns:
+            Generated response.
+        """
+        if model_id not in self.AVAILABLE_MODELS:
+            model_id = self.default_model
+
+        # Get the provider for this model
+        provider = self.AVAILABLE_MODELS[model_id].get('provider', 'ollama')
+
+        # Prepare the messages for the API call
+        messages = []
+
+        # Use custom system prompt if provided, otherwise use default
+        system_content = "You are a helpful assistant."
+        if model_params and model_params.system_prompt:
+            system_content = model_params.system_prompt
+
+        messages.append({
+            "role": "system",
+            "content": system_content
+        })
+
+        # Add conversation context if provided
+        if context:
+            messages.extend(context)
+
+        # If RAG is enabled, use OpenWebUI's knowledge database
+        if use_rag:
+            # We'll use OpenWebUI's built-in RAG capabilities
+            # This is handled by sending the request to OpenWebUI instead of Ollama directly
+            try:
+                # Prepare the request for OpenWebUI
+                openwebui_request = {
+                    "model": model_id,
+                    "messages": messages + [{"role": "user", "content": prompt}],
+                    "use_knowledge": True,  # Enable RAG
+                    "stream": False
+                }
+
+                # Add model parameters if provided
+                if model_params:
+                    params = model_params.to_dict()
+                    # Map parameters to OpenWebUI format
+                    if 'temperature' in params:
+                        openwebui_request['temperature'] = params['temperature']
+                    if 'max_tokens' in params:
+                        openwebui_request['max_tokens'] = params['max_tokens']
+                    if 'top_p' in params:
+                        openwebui_request['top_p'] = params['top_p']
+
+                # Make the API call to OpenWebUI
+                headers = {"Content-Type": "application/json"}
+                if self.openwebui_api_key:
+                    headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
+
+                # OpenWebUI API endpoint is /api/chat/completions
+                response = requests.post(
+                    f"{self.openwebui_url}/api/chat/completions",
+                    headers=headers,
+                    json=openwebui_request,
+                    timeout=60  # Longer timeout for RAG
+                )
+
+                response.raise_for_status()
+                result = response.json()
+
+                # Extract the response content
+                if 'message' in result:
+                    return result['message']['content']
+                else:
+                    return "Error: Unexpected response format from OpenWebUI"
+
+            except Exception as e:
+                print(f"Error calling OpenWebUI API: {str(e)}")
+                # Fall back to direct Ollama call without RAG
+                print("Falling back to direct Ollama call without RAG")
+                # Continue to the Ollama API call below
+
+        # Add user prompt
+        messages.append({
+            "role": "user",
+            "content": prompt
+        })
+
+        # Prepare API request parameters for Ollama
+        request_json = {
+            "model": model_id,
+            "messages": messages,
+            "stream": False
+        }
+
+        # Add model parameters if provided
+        if model_params:
+            params = model_params.to_dict()
+            # Map parameters to Ollama format
+            if 'temperature' in params:
+                request_json['temperature'] = params['temperature']
+            if 'top_p' in params:
+                request_json['top_p'] = params['top_p']
+            if 'top_k' in params:
+                request_json['top_k'] = params['top_k']
+            if 'max_tokens' in params:
+                request_json['max_tokens'] = params['max_tokens']
+
+        # Make the API call to Ollama
+        try:
+            # Ollama API endpoint is /api/chat or /api/generate
+            response = requests.post(
+                f"{self.ollama_api_url}/api/generate",
+                headers={"Content-Type": "application/json"},
+                json=request_json,
+                timeout=30
+            )
+
+            response.raise_for_status()
+            result = response.json()
+
+            # Extract the response content from Ollama
+            # The response format depends on whether we're using /api/chat or /api/generate
+            if 'message' in result and 'content' in result['message']:
+                # Format for /api/chat
+                return result['message']['content']
+            elif 'response' in result:
+                # Format for /api/generate
+                return result['response']
+            else:
+                return "Error: Unexpected response format from Ollama"
+
+        except Exception as e:
+            print(f"Error calling Ollama API: {str(e)}")
+            return f"Error generating response: {str(e)}"
+
+
+# Create a singleton instance
+model_service = ModelService()