Initial commit for deployment

2025-05-09 15:41:16 +01:00
commit ac98999507
54 changed files with 4343 additions and 0 deletions
@@ -0,0 +1,18 @@
+# API configuration
+API_HOST=0.0.0.0
+API_PORT=5251
+
+# OpenWebUI configuration
+OPENWEBUI_URL=http://104.225.217.215:8080
+OPENWEBUI_API_KEY=your-openwebui-api-key  # Replace with the actual key in your .env file
+
+# Ollama configuration
+OLLAMA_API_URL=http://localhost:11434
+DEFAULT_MODEL=llama3.1
+
+# Local storage
+SQLITE_DB_PATH=ai_service/data/chatbot.db
+
+# Document processing
+CHUNK_SIZE=1000
+CHUNK_OVERLAP=200
@@ -0,0 +1,33 @@
+# API configuration
+API_HOST=0.0.0.0
+API_PORT=5251
+
+# Pinecone configuration
+# Sign up at https://www.pinecone.io/ to get your API key
+PINECONE_API_KEY=your-pinecone-api-key-here
+PINECONE_ENVIRONMENT=your-pinecone-environment-here
+PINECONE_INDEX_NAME=chatbot-index
+
+# Model configuration
+# Sign up at https://platform.openai.com/ to get your API key
+DEFAULT_MODEL=gpt-3.5-turbo
+OPENAI_API_KEY=your-openai-api-key-here
+
+# Local storage
+# Path to SQLite database (will be replaced with MySQL in production)
+SQLITE_DB_PATH=ai_service/data/chatbot.db
+
+# Document processing
+# Adjust these values based on your needs
+CHUNK_SIZE=1000
+CHUNK_OVERLAP=200
+
+# Embedding model
+# Options: all-MiniLM-L6-v2 (default), paraphrase-MiniLM-L3-v2 (smaller/faster)
+EMBEDDING_MODEL=all-MiniLM-L6-v2
+
+# Production settings
+# Set to 'production' in production environment
+ENVIRONMENT=production
+LOG_LEVEL=INFO
+ENABLE_MOCK=false
@@ -0,0 +1,94 @@
+# AI Service for Chatbot Application
+
+This is the AI service component for the chatbot application. It provides APIs for document processing, embeddings, and chat functionality.
+
+## Features
+
+- Document processing and embedding
+- Retrieval-augmented generation (RAG)
+- Chat functionality with model switching
+- Team chat support
+
+## Project Structure
+
+```
+ai_service/
+├── embeddings/       # Embedding and document processing services
+├── models/           # Model and chat services
+├── utils/            # Utility functions
+├── data/             # Data storage
+├── config.py         # Configuration settings
+├── api.py            # FastAPI application
+└── run.py            # Script to run the service
+```
+
+## Setup Instructions
+
+1. Create a virtual environment:
+   ```
+   python -m venv venv
+   source venv/bin/activate  # On Windows: venv\Scripts\activate
+   ```
+
+2. Install dependencies:
+   ```
+   pip install -r requirements.txt
+   ```
+
+3. Copy `.env.example` to `.env` and update the values:
+   ```
+   cp .env.example .env
+   # Edit the .env file with appropriate values
+   ```
+
+4. Run the service:
+   ```
+   python run.py
+   ```
+
+## API Documentation
+
+Once the service is running, you can access the API documentation at:
+- Swagger UI: http://localhost:5251/docs
+- ReDoc: http://localhost:5251/redoc
+
+## Deployment
+
+To deploy the service:
+
+1. Make the deployment script executable:
+   ```
+   chmod +x deploy.sh
+   ```
+
+2. Run the deployment script:
+   ```
+   ./deploy.sh
+   ```
+
+This will start the service on port 5251 using uvicorn with nohup.
+
+## API Endpoints
+
+### Document Endpoints
+
+- `POST /documents` - Process a document for embedding
+- `GET /documents` - Get all documents
+- `GET /documents/{doc_id}` - Get a document by ID
+- `DELETE /documents/{doc_id}` - Delete a document
+- `POST /documents/search` - Search for documents
+
+### Model Endpoints
+
+- `GET /models` - Get available models
+- `GET /models/{model_id}` - Get information about a model
+
+### Chat Endpoints
+
+- `POST /chats` - Create a new chat
+- `GET /chats/user/{user_id}` - Get all chats for a user
+- `GET /chats/{chat_id}` - Get a chat by ID
+- `POST /chats/{chat_id}/messages` - Send a message to a chat
+- `POST /chats/{chat_id}/members/{user_id}` - Add a user to a team chat
+- `DELETE /chats/{chat_id}/members/{user_id}` - Remove a user from a team chat
+- `DELETE /chats/{chat_id}` - Delete a chat
@@ -0,0 +1,6 @@
+"""
+AI service package.
+"""
+
+# Import for easier access
+from ai_service.api import app
@@ -0,0 +1,28 @@
+{
+  "e2b1bdc2-a384-4775-9c14-42b221e5554f": {
+    "id": "e2b1bdc2-a384-4775-9c14-42b221e5554f",
+    "title": "Test Chat",
+    "user_id": "test_user",
+    "model_id": "gpt-3.5-turbo",
+    "is_team_chat": false,
+    "created_at": "2025-05-06T11:50:43.558931",
+    "updated_at": "2025-05-06T11:51:20.982846",
+    "messages": [
+      {
+        "id": "865a1e57-c71f-4bab-a4ba-56d630a38631",
+        "content": "Hello, AI!",
+        "user_id": "test_user",
+        "is_user_message": true,
+        "timestamp": "2025-05-06T11:51:20.051537"
+      },
+      {
+        "id": "eb755b4f-fe55-4bec-b77f-ed20941df360",
+        "content": "Error generating response: 401 Client Error: Unauthorized for url: https://api.openai.com/v1/chat/completions",
+        "user_id": null,
+        "is_user_message": false,
+        "timestamp": "2025-05-06T11:51:20.982829"
+      }
+    ],
+    "team_members": []
+  }
+}
@@ -0,0 +1,377 @@
+"""
+FastAPI application for the AI service.
+"""
+
+from fastapi import FastAPI, HTTPException, Depends, Body, Query, Path
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+from typing import List, Dict, Any, Optional
+
+from ai_service.config import config
+from ai_service.embeddings.document_service import document_service
+from ai_service.models.model_service import model_service
+from ai_service.models.chat_service import chat_service
+from ai_service.models.model_parameters import ModelParameters
+
+# Create FastAPI app
+app = FastAPI(
+    title="AI Service API",
+    description="API for the AI service",
+    version="1.0.0"
+)
+
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Allow all origins
+    allow_credentials=True,
+    allow_methods=["*"],  # Allow all methods
+    allow_headers=["*"],  # Allow all headers
+)
+
+# Define API models
+class DocumentRequest(BaseModel):
+    """Request model for document processing."""
+    content: str = Field(..., description="Document content")
+    title: str = Field(..., description="Document title")
+    description: Optional[str] = Field(None, description="Document description")
+    metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata")
+
+class DocumentResponse(BaseModel):
+    """Response model for document processing."""
+    id: str = Field(..., description="Document ID")
+    title: str = Field(..., description="Document title")
+    description: str = Field(..., description="Document description")
+    chunk_count: int = Field(..., description="Number of chunks")
+    metadata: Dict[str, Any] = Field(..., description="Additional metadata")
+
+class SearchRequest(BaseModel):
+    """Request model for document search."""
+    query: str = Field(..., description="Search query")
+    top_k: int = Field(5, description="Number of results to return")
+
+class SearchResult(BaseModel):
+    """Model for a search result."""
+    id: str = Field(..., description="Result ID")
+    score: float = Field(..., description="Similarity score")
+    metadata: Dict[str, Any] = Field(..., description="Result metadata")
+
+class ModelInfo(BaseModel):
+    """Model for model information."""
+    id: str = Field(..., description="Model ID")
+    name: str = Field(..., description="Model name")
+    description: str = Field(..., description="Model description")
+    provider: str = Field(..., description="Model provider")
+    max_tokens: int = Field(..., description="Maximum tokens")
+    is_default: bool = Field(..., description="Whether this is the default model")
+
+class ChatRequest(BaseModel):
+    """Request model for creating a chat."""
+    user_id: str = Field(..., description="User ID")
+    title: Optional[str] = Field(None, description="Chat title")
+    model_id: Optional[str] = Field(None, description="Model ID")
+    is_team_chat: bool = Field(False, description="Whether this is a team chat")
+
+class MessageRequest(BaseModel):
+    """Request model for sending a message."""
+    message: str = Field(..., description="Message content")
+    user_id: str = Field(..., description="User ID")
+    use_rag: bool = Field(False, description="Whether to use RAG")
+
+    # Model parameters
+    temperature: Optional[float] = Field(None, description="Controls randomness: higher values mean more random completions")
+    max_tokens: Optional[int] = Field(None, description="Maximum number of tokens to generate")
+    top_p: Optional[float] = Field(None, description="Nucleus sampling parameter")
+    frequency_penalty: Optional[float] = Field(None, description="Penalizes repeated tokens")
+    presence_penalty: Optional[float] = Field(None, description="Penalizes repeated topics")
+    stop_sequences: Optional[List[str]] = Field(None, description="Sequences where the API will stop generating")
+    system_prompt: Optional[str] = Field(None, description="System prompt to guide the model's behavior")
+
+    # Additional advanced parameters
+    min_p: Optional[float] = Field(None, description="Minimum probability threshold for token selection")
+    top_k: Optional[int] = Field(None, description="Only sample from the top k tokens")
+    repeat_penalty: Optional[float] = Field(None, description="Penalty for repeating tokens")
+    function_calling: Optional[bool] = Field(None, description="Whether to enable function calling")
+
+class Message(BaseModel):
+    """Model for a message."""
+    id: str = Field(..., description="Message ID")
+    content: str = Field(..., description="Message content")
+    user_id: Optional[str] = Field(None, description="User ID")
+    is_user_message: bool = Field(..., description="Whether this is a user message")
+    timestamp: str = Field(..., description="Message timestamp")
+
+class Chat(BaseModel):
+    """Model for a chat."""
+    id: str = Field(..., description="Chat ID")
+    title: str = Field(..., description="Chat title")
+    user_id: str = Field(..., description="User ID")
+    model_id: str = Field(..., description="Model ID")
+    is_team_chat: bool = Field(..., description="Whether this is a team chat")
+    created_at: str = Field(..., description="Creation timestamp")
+    updated_at: str = Field(..., description="Update timestamp")
+    messages: List[Message] = Field(..., description="Chat messages")
+    team_members: List[str] = Field(..., description="Team members")
+
+# Define API endpoints
+@app.get("/health")
+async def health_check():
+    """
+    Health check endpoint.
+
+    Returns:
+        Health status.
+    """
+    return {"status": "healthy"}
+
+# Document endpoints
+@app.post("/documents", response_model=DocumentResponse)
+async def process_document(request: DocumentRequest):
+    """
+    Process a document for embedding.
+
+    Args:
+        request: Document processing request.
+
+    Returns:
+        Processed document information.
+    """
+    doc_id = document_service.process_document(
+        content=request.content,
+        title=request.title,
+        description=request.description,
+        metadata=request.metadata
+    )
+
+    return document_service.get_document(doc_id)
+
+@app.get("/documents", response_model=List[DocumentResponse])
+async def get_all_documents():
+    """
+    Get all documents.
+
+    Returns:
+        List of document information.
+    """
+    return document_service.get_all_documents()
+
+@app.get("/documents/{doc_id}", response_model=DocumentResponse)
+async def get_document(doc_id: str):
+    """
+    Get a document by ID.
+
+    Args:
+        doc_id: Document ID.
+
+    Returns:
+        Document information.
+    """
+    doc = document_service.get_document(doc_id)
+    if not doc:
+        raise HTTPException(status_code=404, detail="Document not found")
+
+    return doc
+
+@app.delete("/documents/{doc_id}")
+async def delete_document(doc_id: str):
+    """
+    Delete a document.
+
+    Args:
+        doc_id: Document ID.
+
+    Returns:
+        Deletion status.
+    """
+    success = document_service.delete_document(doc_id)
+    if not success:
+        raise HTTPException(status_code=404, detail="Document not found")
+
+    return {"status": "success", "message": "Document deleted"}
+
+@app.post("/documents/search", response_model=List[SearchResult])
+async def search_documents(request: SearchRequest):
+    """
+    Search for documents.
+
+    Args:
+        request: Search request.
+
+    Returns:
+        Search results.
+    """
+    results = document_service.search_documents(
+        query=request.query,
+        top_k=request.top_k
+    )
+
+    return results
+
+# Model endpoints
+@app.get("/models", response_model=List[ModelInfo])
+async def get_available_models():
+    """
+    Get available models.
+
+    Returns:
+        List of model information.
+    """
+    return model_service.get_available_models()
+
+@app.get("/models/{model_id}", response_model=ModelInfo)
+async def get_model_info(model_id: str):
+    """
+    Get information about a model.
+
+    Args:
+        model_id: Model ID.
+
+    Returns:
+        Model information.
+    """
+    model_info = model_service.get_model_info(model_id)
+    if not model_info:
+        raise HTTPException(status_code=404, detail="Model not found")
+
+    return model_info
+
+# Chat endpoints
+@app.post("/chats", response_model=Chat)
+async def create_chat(request: ChatRequest):
+    """
+    Create a new chat.
+
+    Args:
+        request: Chat creation request.
+
+    Returns:
+        Created chat.
+    """
+    chat_id = chat_service.create_chat(
+        user_id=request.user_id,
+        title=request.title,
+        model_id=request.model_id,
+        is_team_chat=request.is_team_chat
+    )
+
+    return chat_service.get_chat(chat_id)
+
+@app.get("/chats/user/{user_id}", response_model=List[Chat])
+async def get_user_chats(user_id: str):
+    """
+    Get all chats for a user.
+
+    Args:
+        user_id: User ID.
+
+    Returns:
+        List of chats.
+    """
+    return chat_service.get_user_chats(user_id)
+
+@app.get("/chats/{chat_id}", response_model=Chat)
+async def get_chat(chat_id: str):
+    """
+    Get a chat by ID.
+
+    Args:
+        chat_id: Chat ID.
+
+    Returns:
+        Chat information.
+    """
+    chat = chat_service.get_chat(chat_id)
+    if not chat:
+        raise HTTPException(status_code=404, detail="Chat not found")
+
+    return chat
+
+@app.post("/chats/{chat_id}/messages", response_model=Message)
+async def send_message(chat_id: str, request: MessageRequest):
+    """
+    Send a message to a chat.
+
+    Args:
+        chat_id: Chat ID.
+        request: Message request with optional model parameters.
+
+    Returns:
+        Bot response message.
+    """
+    try:
+        # Extract model parameters from the request
+        response = chat_service.get_chat_response(
+            chat_id=chat_id,
+            message=request.message,
+            user_id=request.user_id,
+            use_rag=request.use_rag,
+            temperature=request.temperature,
+            max_tokens=request.max_tokens,
+            top_p=request.top_p,
+            frequency_penalty=request.frequency_penalty,
+            presence_penalty=request.presence_penalty,
+            stop_sequences=request.stop_sequences,
+            system_prompt=request.system_prompt,
+            min_p=request.min_p,
+            top_k=request.top_k,
+            repeat_penalty=request.repeat_penalty,
+            function_calling=request.function_calling
+        )
+
+        return response
+
+    except ValueError as e:
+        raise HTTPException(status_code=404, detail=str(e))
+
+@app.post("/chats/{chat_id}/members/{user_id}")
+async def add_team_member(chat_id: str, user_id: str):
+    """
+    Add a user to a team chat.
+
+    Args:
+        chat_id: Chat ID.
+        user_id: User ID.
+
+    Returns:
+        Addition status.
+    """
+    success = chat_service.add_team_member(chat_id, user_id)
+    if not success:
+        raise HTTPException(status_code=400, detail="Failed to add team member")
+
+    return {"status": "success", "message": "Team member added"}
+
+@app.delete("/chats/{chat_id}/members/{user_id}")
+async def remove_team_member(chat_id: str, user_id: str):
+    """
+    Remove a user from a team chat.
+
+    Args:
+        chat_id: Chat ID.
+        user_id: User ID.
+
+    Returns:
+        Removal status.
+    """
+    success = chat_service.remove_team_member(chat_id, user_id)
+    if not success:
+        raise HTTPException(status_code=400, detail="Failed to remove team member")
+
+    return {"status": "success", "message": "Team member removed"}
+
+@app.delete("/chats/{chat_id}")
+async def delete_chat(chat_id: str):
+    """
+    Delete a chat.
+
+    Args:
+        chat_id: Chat ID.
+
+    Returns:
+        Deletion status.
+    """
+    success = chat_service.delete_chat(chat_id)
+    if not success:
+        raise HTTPException(status_code=404, detail="Chat not found")
+
+    return {"status": "success", "message": "Chat deleted"}
@@ -0,0 +1,37 @@
+"""
+Configuration settings for the AI service.
+"""
+
+import os
+from dotenv import load_dotenv
+
+# Load environment variables from .env file
+load_dotenv()
+
+class Config:
+    """Base configuration."""
+
+    # API configuration
+    API_HOST = os.environ.get('API_HOST', '0.0.0.0')
+    API_PORT = int(os.environ.get('API_PORT', 5251))
+
+    # OpenWebUI configuration
+    OPENWEBUI_URL = os.environ.get('OPENWEBUI_URL', 'http://104.225.217.215:8080')
+    OPENWEBUI_API_KEY = os.environ.get('OPENWEBUI_API_KEY', '')
+
+    # Ollama configuration
+    OLLAMA_API_URL = os.environ.get('OLLAMA_API_URL', 'http://localhost:11434')
+    DEFAULT_MODEL = os.environ.get('DEFAULT_MODEL', 'llama3.1')
+
+    # Local storage
+    SQLITE_DB_PATH = os.environ.get('SQLITE_DB_PATH', 'ai_service/data/chatbot.db')
+
+    # Document processing
+    CHUNK_SIZE = int(os.environ.get('CHUNK_SIZE', 1000))
+    CHUNK_OVERLAP = int(os.environ.get('CHUNK_OVERLAP', 200))
+
+    # Embedding model
+    EMBEDDING_MODEL = os.environ.get('EMBEDDING_MODEL', 'all-MiniLM-L6-v2')
+
+
+config = Config()
@@ -0,0 +1,28 @@
+{
+  "48b04d66-3ef4-440b-8508-ced930aa42a9": {
+    "id": "48b04d66-3ef4-440b-8508-ced930aa42a9",
+    "title": "Test Chat",
+    "user_id": "test_user",
+    "model_id": "gpt-3.5-turbo",
+    "is_team_chat": false,
+    "created_at": "2025-05-05T20:29:54.936964",
+    "updated_at": "2025-05-05T20:29:55.394906",
+    "messages": [
+      {
+        "id": "9daafee5-f00c-4493-96ea-96492f97482e",
+        "content": "Tell me about artificial intelligence",
+        "user_id": "test_user",
+        "is_user_message": true,
+        "timestamp": "2025-05-05T20:29:54.971667"
+      },
+      {
+        "id": "0337e0b1-1e18-4d09-8b8b-c5ee295870a0",
+        "content": "Error generating response: 401 Client Error: Unauthorized for url: https://api.openai.com/v1/chat/completions",
+        "user_id": null,
+        "is_user_message": false,
+        "timestamp": "2025-05-05T20:29:55.394891"
+      }
+    ],
+    "team_members": []
+  }
+}
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# Create a directory for the AI service logs
+mkdir -p logs
+
+# Activate virtual environment
+source venv/bin/activate
+
+# Export environment variables
+export API_HOST=0.0.0.0
+export API_PORT=5251
+
+# Make sure the Python path includes the current directory
+export PYTHONPATH=$PYTHONPATH:$(pwd)
+
+# Run the application with uvicorn and nohup
+nohup uvicorn ai_service.run:app --host $API_HOST --port $API_PORT > logs/ai_service.log 2>&1 &
+
+echo "AI Service started on port $API_PORT. Check ai_service.log for output."
+echo "To stop the application, find the process ID with 'ps aux | grep uvicorn' and kill it with 'kill <PID>'."
@@ -0,0 +1,261 @@
+"""
+Service for document processing and chunking.
+"""
+
+import os
+import json
+import uuid
+import requests
+import base64
+from typing import List, Dict, Any, Optional
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+
+from ai_service.config import config
+
+class DocumentService:
+    """Service for document processing and chunking."""
+
+    def __init__(self):
+        """Initialize the document service."""
+        self.chunk_size = config.CHUNK_SIZE
+        self.chunk_overlap = config.CHUNK_OVERLAP
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=self.chunk_size,
+            chunk_overlap=self.chunk_overlap,
+            length_function=len
+        )
+
+        # OpenWebUI configuration
+        self.openwebui_url = config.OPENWEBUI_URL
+        self.openwebui_api_key = config.OPENWEBUI_API_KEY
+
+        # Ensure data directory exists
+        os.makedirs(os.path.dirname(config.SQLITE_DB_PATH), exist_ok=True)
+
+        # For now, we'll store document metadata in a simple JSON file
+        self.metadata_file = os.path.join(os.path.dirname(config.SQLITE_DB_PATH), 'document_metadata.json')
+        self._load_metadata()
+
+    def _load_metadata(self):
+        """Load document metadata from file."""
+        if os.path.exists(self.metadata_file):
+            try:
+                with open(self.metadata_file, 'r') as f:
+                    self.documents = json.load(f)
+            except Exception as e:
+                print(f"Error loading document metadata: {str(e)}")
+                self.documents = {}
+        else:
+            self.documents = {}
+
+    def _save_metadata(self):
+        """Save document metadata to file."""
+        try:
+            with open(self.metadata_file, 'w') as f:
+                json.dump(self.documents, f, indent=2)
+        except Exception as e:
+            print(f"Error saving document metadata: {str(e)}")
+
+    def process_document(self, content: str, title: str,
+                         description: Optional[str] = None,
+                         metadata: Optional[Dict[str, Any]] = None) -> str:
+        """
+        Process a document for embedding.
+
+        Args:
+            content: Document content.
+            title: Document title.
+            description: Optional document description.
+            metadata: Optional additional metadata.
+
+        Returns:
+            Document ID.
+        """
+        # Generate a unique ID for the document
+        doc_id = str(uuid.uuid4())
+
+        # Upload the document to OpenWebUI for RAG processing
+        try:
+            # Prepare headers
+            headers = {"Content-Type": "application/json"}
+            if self.openwebui_api_key:
+                headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
+
+            # Prepare the document data
+            document_data = {
+                "filename": f"{title}.txt",
+                "content": base64.b64encode(content.encode('utf-8')).decode('utf-8'),
+                "description": description or title
+            }
+
+            # Upload to OpenWebUI
+            response = requests.post(
+                f"{self.openwebui_url}/api/knowledge/upload",
+                headers=headers,
+                json=document_data,
+                timeout=60
+            )
+
+            response.raise_for_status()
+            result = response.json()
+
+            # Get the OpenWebUI document ID
+            openwebui_doc_id = result.get('id', '')
+
+            # Store document metadata
+            self.documents[doc_id] = {
+                'id': doc_id,
+                'title': title,
+                'description': description or '',
+                'openwebui_id': openwebui_doc_id,
+                'metadata': metadata or {}
+            }
+
+            # Save metadata to file
+            self._save_metadata()
+
+            return doc_id
+
+        except Exception as e:
+            print(f"Error uploading document to OpenWebUI: {str(e)}")
+
+            # Fall back to local processing if OpenWebUI upload fails
+            print("Falling back to local document processing")
+
+            # Split the document into chunks for local reference
+            chunks = self.text_splitter.split_text(content)
+
+            # Store document metadata
+            self.documents[doc_id] = {
+                'id': doc_id,
+                'title': title,
+                'description': description or '',
+                'chunk_count': len(chunks),
+                'openwebui_upload_failed': True,
+                'metadata': metadata or {}
+            }
+
+            # Save metadata to file
+            self._save_metadata()
+
+            return doc_id
+
+    def get_document(self, doc_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Get document metadata.
+
+        Args:
+            doc_id: Document ID.
+
+        Returns:
+            Document metadata if found, None otherwise.
+        """
+        return self.documents.get(doc_id)
+
+    def get_all_documents(self) -> List[Dict[str, Any]]:
+        """
+        Get all document metadata.
+
+        Returns:
+            List of document metadata.
+        """
+        # Get documents from local storage
+        local_documents = list(self.documents.values())
+
+        # Try to get documents from OpenWebUI as well
+        try:
+            # Prepare headers
+            headers = {"Content-Type": "application/json"}
+            if self.openwebui_api_key:
+                headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
+
+            # Get documents from OpenWebUI
+            response = requests.get(
+                f"{self.openwebui_url}/api/knowledge",
+                headers=headers,
+                timeout=30
+            )
+
+            if response.status_code == 200:
+                openwebui_docs = response.json()
+
+                # Update local documents with OpenWebUI information
+                for doc in local_documents:
+                    if 'openwebui_id' in doc:
+                        for openwebui_doc in openwebui_docs:
+                            if openwebui_doc.get('id') == doc['openwebui_id']:
+                                doc['openwebui_status'] = 'active'
+                                doc['openwebui_info'] = openwebui_doc
+                                break
+
+        except Exception as e:
+            print(f"Error getting documents from OpenWebUI: {str(e)}")
+
+        return local_documents
+
+    def delete_document(self, doc_id: str) -> bool:
+        """
+        Delete a document and its chunks.
+
+        Args:
+            doc_id: Document ID.
+
+        Returns:
+            True if deletion was successful, False otherwise.
+        """
+        if doc_id not in self.documents:
+            return False
+
+        # Check if document was uploaded to OpenWebUI
+        doc = self.documents[doc_id]
+        openwebui_id = doc.get('openwebui_id')
+
+        if openwebui_id:
+            try:
+                # Prepare headers
+                headers = {"Content-Type": "application/json"}
+                if self.openwebui_api_key:
+                    headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
+
+                # Delete from OpenWebUI
+                response = requests.delete(
+                    f"{self.openwebui_url}/api/knowledge/{openwebui_id}",
+                    headers=headers,
+                    timeout=30
+                )
+
+                if response.status_code != 200:
+                    print(f"Warning: Failed to delete document from OpenWebUI: {response.text}")
+
+            except Exception as e:
+                print(f"Error deleting document from OpenWebUI: {str(e)}")
+
+        # Delete document metadata
+        del self.documents[doc_id]
+
+        # Save metadata to file
+        self._save_metadata()
+
+        return True
+
+    def search_documents(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
+        """
+        Search for documents similar to a query.
+
+        Args:
+            query: Search query.
+            top_k: Number of results to return.
+
+        Returns:
+            List of similar document chunks with their metadata.
+        """
+        # Note: We don't need to implement this method anymore since
+        # RAG is handled directly by OpenWebUI when use_rag=True in the model service
+
+        # Return empty results - this is just a placeholder
+        # The actual RAG functionality is in the model_service.generate_response method
+        return []
+
+
+# Create a singleton instance
+document_service = DocumentService()
@@ -0,0 +1,214 @@
+"""
+Service for generating and managing embeddings.
+"""
+
+import os
+import random
+import pinecone
+import numpy as np
+from typing import List, Dict, Any, Optional, Union
+from sentence_transformers import SentenceTransformer
+
+from ai_service.config import config
+
+class EmbeddingService:
+    """Service for generating and managing embeddings."""
+
+    def __init__(self, use_mock=True):  # Default to mock implementation
+        """Initialize the embedding service."""
+        self.use_mock = use_mock
+
+        if not self.use_mock:
+            # Use a smaller model for testing
+            self.model_name = "paraphrase-MiniLM-L3-v2"  # Smaller model than the default
+            try:
+                self.model = SentenceTransformer(self.model_name)
+                print(f"Loaded embedding model: {self.model_name}")
+            except Exception as e:
+                print(f"Error loading embedding model: {str(e)}")
+                self.use_mock = True
+                print("Falling back to mock implementation")
+        else:
+            print("Using mock embedding implementation")
+            self.model_name = "mock-model"
+            self.model = None
+
+        self._initialize_pinecone()
+
+    def _initialize_pinecone(self):
+        """Initialize Pinecone client."""
+        if not config.PINECONE_API_KEY or not config.PINECONE_ENVIRONMENT:
+            print("Warning: Pinecone API key or environment not set. Vector storage will not be available.")
+            self.index = None
+            return
+
+        try:
+            pinecone.init(
+                api_key=config.PINECONE_API_KEY,
+                environment=config.PINECONE_ENVIRONMENT
+            )
+
+            # Check if index exists, create if it doesn't
+            if config.PINECONE_INDEX_NAME not in pinecone.list_indexes():
+                pinecone.create_index(
+                    name=config.PINECONE_INDEX_NAME,
+                    dimension=self.model.get_sentence_embedding_dimension(),
+                    metric="cosine"
+                )
+
+            self.index = pinecone.Index(config.PINECONE_INDEX_NAME)
+            print(f"Connected to Pinecone index: {config.PINECONE_INDEX_NAME}")
+        except Exception as e:
+            print(f"Error connecting to Pinecone: {str(e)}")
+            self.index = None
+
+    def generate_embedding(self, text: str) -> List[float]:
+        """
+        Generate an embedding for a text.
+
+        Args:
+            text: Text to embed.
+
+        Returns:
+            Embedding vector.
+        """
+        if self.use_mock:
+            # Generate a mock embedding vector (384 dimensions for consistency)
+            return [random.random() for _ in range(384)]
+
+        embedding = self.model.encode(text)
+        return embedding.tolist()
+
+    def generate_embeddings(self, texts: List[str]) -> List[List[float]]:
+        """
+        Generate embeddings for multiple texts.
+
+        Args:
+            texts: List of texts to embed.
+
+        Returns:
+            List of embedding vectors.
+        """
+        if self.use_mock:
+            # Generate mock embedding vectors
+            return [[random.random() for _ in range(384)] for _ in texts]
+
+        embeddings = self.model.encode(texts)
+        return embeddings.tolist()
+
+    def store_embeddings(self, ids: List[str], embeddings: List[List[float]],
+                         metadata: Optional[List[Dict[str, Any]]] = None) -> bool:
+        """
+        Store embeddings in Pinecone.
+
+        Args:
+            ids: List of IDs for the embeddings.
+            embeddings: List of embedding vectors.
+            metadata: Optional list of metadata dictionaries.
+
+        Returns:
+            True if storage was successful, False otherwise.
+        """
+        if self.use_mock:
+            print(f"Mock: Stored {len(ids)} embeddings")
+            return True
+
+        if self.index is None:
+            print("Warning: Pinecone index not available. Embeddings not stored.")
+            return False
+
+        if metadata is None:
+            metadata = [{} for _ in ids]
+
+        vectors = [
+            (id, embedding, meta)
+            for id, embedding, meta in zip(ids, embeddings, metadata)
+        ]
+
+        try:
+            self.index.upsert(vectors=vectors)
+            return True
+        except Exception as e:
+            print(f"Error storing embeddings in Pinecone: {str(e)}")
+            return False
+
+    def search_similar(self, query_embedding: List[float], top_k: int = 5) -> List[Dict[str, Any]]:
+        """
+        Search for similar embeddings in Pinecone.
+
+        Args:
+            query_embedding: Query embedding vector.
+            top_k: Number of results to return.
+
+        Returns:
+            List of similar items with their metadata.
+        """
+        if self.use_mock:
+            # Generate mock search results
+            print(f"Mock: Searching for similar embeddings (top_k={top_k})")
+            mock_results = []
+            for i in range(min(top_k, 3)):  # Return at most 3 mock results
+                mock_results.append({
+                    'id': f"mock_doc_{i}",
+                    'score': 0.9 - (i * 0.1),  # Decreasing similarity scores
+                    'metadata': {
+                        'document_id': f"mock_doc_{i}",
+                        'chunk_index': i,
+                        'title': f"Mock Document {i}",
+                        'description': f"This is a mock document {i}",
+                        'chunk_text': f"This is the content of mock document {i}..."
+                    }
+                })
+            return mock_results
+
+        if self.index is None:
+            print("Warning: Pinecone index not available. Search not performed.")
+            return []
+
+        try:
+            results = self.index.query(
+                vector=query_embedding,
+                top_k=top_k,
+                include_metadata=True
+            )
+
+            return [
+                {
+                    'id': match['id'],
+                    'score': match['score'],
+                    'metadata': match.get('metadata', {})
+                }
+                for match in results.get('matches', [])
+            ]
+        except Exception as e:
+            print(f"Error searching in Pinecone: {str(e)}")
+            return []
+
+    def delete_embeddings(self, ids: List[str]) -> bool:
+        """
+        Delete embeddings from Pinecone.
+
+        Args:
+            ids: List of IDs to delete.
+
+        Returns:
+            True if deletion was successful, False otherwise.
+        """
+        if self.use_mock:
+            print(f"Mock: Deleted {len(ids)} embeddings")
+            return True
+
+        if self.index is None:
+            print("Warning: Pinecone index not available. Deletion not performed.")
+            return False
+
+        try:
+            self.index.delete(ids=ids)
+            return True
+        except Exception as e:
+            print(f"Error deleting embeddings from Pinecone: {str(e)}")
+            return False
+
+
+# Create a singleton instance
+embedding_service = EmbeddingService()
@@ -0,0 +1,309 @@
+"""
+Service for chat functionality.
+"""
+
+import os
+import json
+import uuid
+from datetime import datetime
+from typing import List, Dict, Any, Optional
+
+from ai_service.config import config
+from ai_service.models.model_service import model_service
+from ai_service.models.model_parameters import ModelParameters
+
+class ChatService:
+    """Service for chat functionality."""
+
+    def __init__(self):
+        """Initialize the chat service."""
+        # Ensure data directory exists
+        os.makedirs(os.path.dirname(config.SQLITE_DB_PATH), exist_ok=True)
+
+        # For now, we'll store chat data in a simple JSON file
+        self.chats_file = os.path.join(os.path.dirname(config.SQLITE_DB_PATH), 'chats.json')
+        self._load_chats()
+
+    def _load_chats(self):
+        """Load chats from file."""
+        if os.path.exists(self.chats_file):
+            try:
+                with open(self.chats_file, 'r') as f:
+                    self.chats = json.load(f)
+            except Exception as e:
+                print(f"Error loading chats: {str(e)}")
+                self.chats = {}
+        else:
+            self.chats = {}
+
+    def _save_chats(self):
+        """Save chats to file."""
+        try:
+            with open(self.chats_file, 'w') as f:
+                json.dump(self.chats, f, indent=2)
+        except Exception as e:
+            print(f"Error saving chats: {str(e)}")
+
+    def create_chat(self, user_id: str, title: Optional[str] = None,
+                   model_id: Optional[str] = None, is_team_chat: bool = False) -> str:
+        """
+        Create a new chat.
+
+        Args:
+            user_id: ID of the user creating the chat.
+            title: Optional title for the chat.
+            model_id: Optional model ID to use for this chat.
+            is_team_chat: Whether this is a team chat.
+
+        Returns:
+            ID of the created chat.
+        """
+        # Generate a unique ID for the chat
+        chat_id = str(uuid.uuid4())
+
+        # Create chat data
+        self.chats[chat_id] = {
+            'id': chat_id,
+            'title': title or f"Chat {len(self.chats) + 1}",
+            'user_id': user_id,
+            'model_id': model_id or config.DEFAULT_MODEL,
+            'is_team_chat': is_team_chat,
+            'created_at': datetime.utcnow().isoformat(),
+            'updated_at': datetime.utcnow().isoformat(),
+            'messages': [],
+            'team_members': [user_id] if is_team_chat else []
+        }
+
+        # Save chats to file
+        self._save_chats()
+
+        return chat_id
+
+    def add_message(self, chat_id: str, content: str, user_id: str,
+                   is_user_message: bool = True) -> Dict[str, Any]:
+        """
+        Add a message to a chat.
+
+        Args:
+            chat_id: ID of the chat.
+            content: Message content.
+            user_id: ID of the user sending the message.
+            is_user_message: Whether this is a user message (vs. bot message).
+
+        Returns:
+            Added message.
+        """
+        if chat_id not in self.chats:
+            raise ValueError(f"Chat with ID {chat_id} not found")
+
+        # Create message data
+        message = {
+            'id': str(uuid.uuid4()),
+            'content': content,
+            'user_id': user_id if is_user_message else None,
+            'is_user_message': is_user_message,
+            'timestamp': datetime.utcnow().isoformat()
+        }
+
+        # Add message to chat
+        self.chats[chat_id]['messages'].append(message)
+
+        # Update chat timestamp
+        self.chats[chat_id]['updated_at'] = datetime.utcnow().isoformat()
+
+        # Save chats to file
+        self._save_chats()
+
+        return message
+
+    def get_chat(self, chat_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Get a chat by ID.
+
+        Args:
+            chat_id: ID of the chat.
+
+        Returns:
+            Chat data if found, None otherwise.
+        """
+        return self.chats.get(chat_id)
+
+    def get_user_chats(self, user_id: str) -> List[Dict[str, Any]]:
+        """
+        Get all chats for a user.
+
+        Args:
+            user_id: ID of the user.
+
+        Returns:
+            List of chat data.
+        """
+        user_chats = []
+
+        for chat_id, chat in self.chats.items():
+            # Include private chats owned by the user
+            if chat['user_id'] == user_id and not chat['is_team_chat']:
+                user_chats.append(chat)
+
+            # Include team chats where the user is a member
+            elif chat['is_team_chat'] and user_id in chat['team_members']:
+                user_chats.append(chat)
+
+        # Sort by updated_at (newest first)
+        user_chats.sort(key=lambda x: x['updated_at'], reverse=True)
+
+        return user_chats
+
+    def add_team_member(self, chat_id: str, user_id: str) -> bool:
+        """
+        Add a user to a team chat.
+
+        Args:
+            chat_id: ID of the team chat.
+            user_id: ID of the user to add.
+
+        Returns:
+            True if addition was successful, False otherwise.
+        """
+        if chat_id not in self.chats:
+            return False
+
+        chat = self.chats[chat_id]
+
+        if not chat['is_team_chat']:
+            return False
+
+        if user_id not in chat['team_members']:
+            chat['team_members'].append(user_id)
+            self._save_chats()
+
+        return True
+
+    def remove_team_member(self, chat_id: str, user_id: str) -> bool:
+        """
+        Remove a user from a team chat.
+
+        Args:
+            chat_id: ID of the team chat.
+            user_id: ID of the user to remove.
+
+        Returns:
+            True if removal was successful, False otherwise.
+        """
+        if chat_id not in self.chats:
+            return False
+
+        chat = self.chats[chat_id]
+
+        if not chat['is_team_chat']:
+            return False
+
+        if user_id in chat['team_members']:
+            chat['team_members'].remove(user_id)
+            self._save_chats()
+
+        return True
+
+    def delete_chat(self, chat_id: str) -> bool:
+        """
+        Delete a chat.
+
+        Args:
+            chat_id: ID of the chat to delete.
+
+        Returns:
+            True if deletion was successful, False otherwise.
+        """
+        if chat_id not in self.chats:
+            return False
+
+        del self.chats[chat_id]
+        self._save_chats()
+
+        return True
+
+    def get_chat_response(self, chat_id: str, message: str, user_id: str,
+                         use_rag: bool = False, temperature: Optional[float] = None,
+                         max_tokens: Optional[int] = None, top_p: Optional[float] = None,
+                         frequency_penalty: Optional[float] = None, presence_penalty: Optional[float] = None,
+                         stop_sequences: Optional[List[str]] = None, system_prompt: Optional[str] = None,
+                         min_p: Optional[float] = None, top_k: Optional[int] = None,
+                         repeat_penalty: Optional[float] = None, function_calling: Optional[bool] = None) -> Dict[str, Any]:
+        """
+        Get a response from the chatbot.
+
+        Args:
+            chat_id: ID of the chat.
+            message: User message.
+            user_id: ID of the user sending the message.
+            use_rag: Whether to use RAG (Retrieval Augmented Generation).
+            temperature: Controls randomness in the response.
+            max_tokens: Maximum number of tokens to generate.
+            top_p: Nucleus sampling parameter.
+            frequency_penalty: Penalizes repeated tokens.
+            presence_penalty: Penalizes repeated topics.
+            stop_sequences: Sequences where the API will stop generating.
+            system_prompt: System prompt to guide the model's behavior.
+            min_p: Minimum probability threshold for token selection.
+            top_k: Only sample from the top k tokens.
+            repeat_penalty: Penalty for repeating tokens.
+            function_calling: Whether to enable function calling.
+
+        Returns:
+            Bot response message.
+        """
+        if chat_id not in self.chats:
+            raise ValueError(f"Chat with ID {chat_id} not found")
+
+        chat = self.chats[chat_id]
+
+        # Add user message to chat
+        self.add_message(chat_id, message, user_id, is_user_message=True)
+
+        # Prepare conversation context for the model
+        context = []
+        for msg in chat['messages'][-10:]:  # Use last 10 messages as context
+            role = "user" if msg['is_user_message'] else "assistant"
+            context.append({
+                "role": role,
+                "content": msg['content']
+            })
+
+        # Create model parameters
+        model_params = ModelParameters(
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_p=top_p,
+            frequency_penalty=frequency_penalty,
+            presence_penalty=presence_penalty,
+            stop_sequences=stop_sequences,
+            system_prompt=system_prompt,
+            min_p=min_p,
+            top_k=top_k,
+            repeat_penalty=repeat_penalty,
+            function_calling=function_calling
+        )
+
+        # Get response from model
+        model_id = chat['model_id']
+        response_text = model_service.generate_response(
+            model_id=model_id,
+            prompt=message,
+            context=context,
+            use_rag=use_rag,
+            model_params=model_params
+        )
+
+        # Add bot response to chat
+        response_message = self.add_message(
+            chat_id=chat_id,
+            content=response_text,
+            user_id=user_id,
+            is_user_message=False
+        )
+
+        return response_message
+
+
+# Create a singleton instance
+chat_service = ChatService()
@@ -0,0 +1,170 @@
+"""
+Model parameters for AI models.
+"""
+
+from typing import Dict, Any, Optional, List
+from pydantic import BaseModel, Field, validator
+
+
+class ModelParameters(BaseModel):
+    """Parameters for AI model generation."""
+
+    # Basic parameters
+    temperature: Optional[float] = Field(
+        0.7,
+        description="Controls randomness: 0 is deterministic, higher values are more random",
+        ge=0.0,
+        le=2.0
+    )
+
+    max_tokens: Optional[int] = Field(
+        1000,
+        description="Maximum number of tokens to generate",
+        gt=0
+    )
+
+    # Sampling parameters
+    top_p: Optional[float] = Field(
+        1.0,
+        description="Nucleus sampling: consider tokens with top_p probability mass",
+        ge=0.0,
+        le=1.0
+    )
+
+    top_k: Optional[int] = Field(
+        None,
+        description="Only sample from the top k tokens",
+        gt=0
+    )
+
+    # Repetition control
+    frequency_penalty: Optional[float] = Field(
+        0.0,
+        description="Penalizes repeated tokens",
+        ge=-2.0,
+        le=2.0
+    )
+
+    presence_penalty: Optional[float] = Field(
+        0.0,
+        description="Penalizes repeated topics",
+        ge=-2.0,
+        le=2.0
+    )
+
+    # Advanced parameters
+    stop_sequences: Optional[List[str]] = Field(
+        None,
+        description="Sequences where the API will stop generating"
+    )
+
+    min_p: Optional[float] = Field(
+        None,
+        description="Minimum probability threshold for token selection",
+        ge=0.0,
+        le=1.0
+    )
+
+    repeat_penalty: Optional[float] = Field(
+        None,
+        description="Penalty for repeating tokens",
+        ge=0.0
+    )
+
+    presence_penalty_tokens: Optional[int] = Field(
+        None,
+        description="Number of tokens to consider for presence penalty",
+        gt=0
+    )
+
+    # System prompt
+    system_prompt: Optional[str] = Field(
+        None,
+        description="System prompt to guide the model's behavior"
+    )
+
+    # Function calling
+    function_calling: Optional[bool] = Field(
+        None,
+        description="Whether to enable function calling"
+    )
+
+    # Additional parameters that might be model-specific
+    extra_params: Optional[Dict[str, Any]] = Field(
+        None,
+        description="Additional model-specific parameters"
+    )
+
+    @validator('temperature', 'top_p', 'frequency_penalty', 'presence_penalty', pre=True)
+    def validate_float_params(cls, v):
+        """Validate float parameters."""
+        if v is not None and not isinstance(v, bool):  # Avoid converting bool to float
+            return float(v)
+        return v
+
+    @validator('max_tokens', 'top_k', pre=True)
+    def validate_int_params(cls, v):
+        """Validate integer parameters."""
+        if v is not None and not isinstance(v, bool):  # Avoid converting bool to int
+            return int(v)
+        return v
+
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Convert parameters to a dictionary, excluding None values.
+
+        Returns:
+            Dictionary of parameters.
+        """
+        result = {}
+        for key, value in self.dict().items():
+            if value is not None and key != 'extra_params':
+                result[key] = value
+
+        # Add any extra parameters
+        if self.extra_params:
+            result.update(self.extra_params)
+
+        return result
+
+    def for_provider(self, provider: str) -> Dict[str, Any]:
+        """
+        Get parameters formatted for a specific provider.
+
+        Args:
+            provider: Provider name (e.g., 'openai', 'ollama', 'anthropic').
+
+        Returns:
+            Dictionary of parameters formatted for the provider.
+        """
+        params = self.to_dict()
+
+        # Handle provider-specific parameter naming
+        if provider == 'openai':
+            # OpenAI uses 'stop' instead of 'stop_sequences'
+            if 'stop_sequences' in params:
+                params['stop'] = params.pop('stop_sequences')
+
+        elif provider == 'ollama':
+            # Ollama has specific parameter handling
+            # Remove parameters not supported by Ollama
+            params_to_keep = ['temperature', 'top_p', 'top_k', 'max_tokens', 'stop_sequences']
+            params = {k: v for k, v in params.items() if k in params_to_keep}
+
+            # Rename stop_sequences to stop if present
+            if 'stop_sequences' in params:
+                params['stop'] = params.pop('stop_sequences')
+
+        elif provider == 'anthropic':
+            # Anthropic uses 'stop_sequences' and different temperature scaling
+            if 'temperature' in params:
+                # Anthropic's temperature is typically 0-1
+                params['temperature'] = min(params['temperature'], 1.0)
+
+        elif provider == 'cohere':
+            # Cohere uses 'stop_sequences' and has some unique parameters
+            pass
+
+        # Add more provider-specific conversions as needed
+
+        return params
@@ -0,0 +1,243 @@
+"""
+Service for model management and interaction.
+"""
+
+import os
+import json
+import requests
+from typing import List, Dict, Any, Optional
+
+from ai_service.config import config
+from ai_service.embeddings.document_service import document_service
+from ai_service.models.model_parameters import ModelParameters
+
+class ModelService:
+    """Service for model management and interaction."""
+
+    # Available models
+    AVAILABLE_MODELS = {
+        'gemma3': {
+            'name': 'Gemma 3',
+            'description': 'Google Gemma 3 model via Ollama',
+            'provider': 'ollama',
+            'max_tokens': 8192
+        },
+        'llama3.3': {
+            'name': 'Llama 3 (70B)',
+            'description': 'Meta Llama 3 70B model via Ollama',
+            'provider': 'ollama',
+            'max_tokens': 8192
+        },
+        'llama3.1': {
+            'name': 'Llama 3 (8B)',
+            'description': 'Meta Llama 3 8B model via Ollama',
+            'provider': 'ollama',
+            'max_tokens': 8192
+        },
+        'mistral': {
+            'name': 'Mistral',
+            'description': 'Mistral AI model via Ollama',
+            'provider': 'ollama',
+            'max_tokens': 8192
+        },
+        'deepseek': {
+            'name': 'DeepSeek',
+            'description': 'DeepSeek model via Ollama',
+            'provider': 'ollama',
+            'max_tokens': 8192
+        }
+    }
+
+    def __init__(self):
+        """Initialize the model service."""
+        self.default_model = config.DEFAULT_MODEL
+        self.ollama_api_url = config.OLLAMA_API_URL
+        self.openwebui_url = config.OPENWEBUI_URL
+        self.openwebui_api_key = config.OPENWEBUI_API_KEY
+
+    def get_available_models(self) -> List[Dict[str, Any]]:
+        """
+        Get a list of available models.
+
+        Returns:
+            List of model information dictionaries.
+        """
+        models = []
+        for model_id, model_info in self.AVAILABLE_MODELS.items():
+            model_data = {
+                'id': model_id,
+                'is_default': model_id == self.default_model,
+                **model_info
+            }
+            models.append(model_data)
+
+        return models
+
+    def get_model_info(self, model_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Get information about a specific model.
+
+        Args:
+            model_id: ID of the model.
+
+        Returns:
+            Model information dictionary if found, None otherwise.
+        """
+        if model_id not in self.AVAILABLE_MODELS:
+            return None
+
+        return {
+            'id': model_id,
+            'is_default': model_id == self.default_model,
+            **self.AVAILABLE_MODELS[model_id]
+        }
+
+    def generate_response(self, model_id: str, prompt: str,
+                         context: Optional[List[Dict[str, str]]] = None,
+                         use_rag: bool = False,
+                         model_params: Optional[ModelParameters] = None) -> str:
+        """
+        Generate a response from the model.
+
+        Args:
+            model_id: ID of the model to use.
+            prompt: User prompt.
+            context: Optional conversation context.
+            use_rag: Whether to use RAG (Retrieval Augmented Generation).
+            model_params: Optional model parameters.
+
+        Returns:
+            Generated response.
+        """
+        if model_id not in self.AVAILABLE_MODELS:
+            model_id = self.default_model
+
+        # Get the provider for this model
+        provider = self.AVAILABLE_MODELS[model_id].get('provider', 'ollama')
+
+        # Prepare the messages for the API call
+        messages = []
+
+        # Use custom system prompt if provided, otherwise use default
+        system_content = "You are a helpful assistant."
+        if model_params and model_params.system_prompt:
+            system_content = model_params.system_prompt
+
+        messages.append({
+            "role": "system",
+            "content": system_content
+        })
+
+        # Add conversation context if provided
+        if context:
+            messages.extend(context)
+
+        # If RAG is enabled, use OpenWebUI's knowledge database
+        if use_rag:
+            # We'll use OpenWebUI's built-in RAG capabilities
+            # This is handled by sending the request to OpenWebUI instead of Ollama directly
+            try:
+                # Prepare the request for OpenWebUI
+                openwebui_request = {
+                    "model": model_id,
+                    "messages": messages + [{"role": "user", "content": prompt}],
+                    "use_knowledge": True,  # Enable RAG
+                    "stream": False
+                }
+
+                # Add model parameters if provided
+                if model_params:
+                    params = model_params.to_dict()
+                    # Map parameters to OpenWebUI format
+                    if 'temperature' in params:
+                        openwebui_request['temperature'] = params['temperature']
+                    if 'max_tokens' in params:
+                        openwebui_request['max_tokens'] = params['max_tokens']
+                    if 'top_p' in params:
+                        openwebui_request['top_p'] = params['top_p']
+
+                # Make the API call to OpenWebUI
+                headers = {"Content-Type": "application/json"}
+                if self.openwebui_api_key:
+                    headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
+
+                # OpenWebUI API endpoint is /api/chat/completions
+                response = requests.post(
+                    f"{self.openwebui_url}/api/chat/completions",
+                    headers=headers,
+                    json=openwebui_request,
+                    timeout=60  # Longer timeout for RAG
+                )
+
+                response.raise_for_status()
+                result = response.json()
+
+                # Extract the response content
+                if 'message' in result:
+                    return result['message']['content']
+                else:
+                    return "Error: Unexpected response format from OpenWebUI"
+
+            except Exception as e:
+                print(f"Error calling OpenWebUI API: {str(e)}")
+                # Fall back to direct Ollama call without RAG
+                print("Falling back to direct Ollama call without RAG")
+                # Continue to the Ollama API call below
+
+        # Add user prompt
+        messages.append({
+            "role": "user",
+            "content": prompt
+        })
+
+        # Prepare API request parameters for Ollama
+        request_json = {
+            "model": model_id,
+            "messages": messages,
+            "stream": False
+        }
+
+        # Add model parameters if provided
+        if model_params:
+            params = model_params.to_dict()
+            # Map parameters to Ollama format
+            if 'temperature' in params:
+                request_json['temperature'] = params['temperature']
+            if 'top_p' in params:
+                request_json['top_p'] = params['top_p']
+            if 'top_k' in params:
+                request_json['top_k'] = params['top_k']
+            if 'max_tokens' in params:
+                request_json['max_tokens'] = params['max_tokens']
+
+        # Make the API call to Ollama
+        try:
+            # Ollama API endpoint is /api/chat or /api/generate
+            response = requests.post(
+                f"{self.ollama_api_url}/api/generate",
+                headers={"Content-Type": "application/json"},
+                json=request_json,
+                timeout=30
+            )
+
+            response.raise_for_status()
+            result = response.json()
+
+            # Extract the response content from Ollama
+            # The response format depends on whether we're using /api/chat or /api/generate
+            if 'message' in result and 'content' in result['message']:
+                # Format for /api/chat
+                return result['message']['content']
+            elif 'response' in result:
+                # Format for /api/generate
+                return result['response']
+            else:
+                return "Error: Unexpected response format from Ollama"
+
+        except Exception as e:
+            print(f"Error calling Ollama API: {str(e)}")
+            return f"Error generating response: {str(e)}"
+
+
+# Create a singleton instance
+model_service = ModelService()
@@ -0,0 +1,28 @@
+# Core dependencies with fixed versions for stability
+fastapi==0.103.1
+uvicorn[standard]==0.23.2
+pydantic==2.3.0
+python-dotenv==1.0.0
+
+# AI/ML dependencies
+pinecone-client==2.2.2
+langchain==0.0.267
+sentence-transformers==2.2.2
+numpy==1.26.4
+openai==1.3.0
+
+# For local storage (will be replaced with MySQL in production)
+sqlalchemy==2.0.20
+pymysql==1.1.0
+cryptography==41.0.3  # Required for PyMySQL
+
+# Utilities
+tqdm==4.67.1
+requests==2.32.3
+tenacity==8.5.0  # For retrying API calls
+
+# Production dependencies
+gunicorn==21.2.0  # Production WSGI server
+python-json-logger==2.0.7  # Structured logging
+prometheus-client==0.17.1  # Metrics
+sentry-sdk==1.39.1  # Error tracking
@@ -0,0 +1,19 @@
+# Core dependencies
+fastapi==0.103.1
+uvicorn==0.23.2
+pydantic==2.3.0
+python-dotenv==1.0.0
+
+# AI/ML dependencies
+pinecone-client==2.2.2
+langchain==0.0.267
+sentence-transformers==2.2.2
+numpy==1.26.4
+
+# For local storage
+sqlalchemy==2.0.20
+sqlite3==0.0.0  # This is a placeholder, sqlite3 is part of Python's standard library
+
+# Utilities
+tqdm==4.67.1
+requests==2.32.3
@@ -0,0 +1,21 @@
+"""
+Script to run the AI service.
+"""
+
+import uvicorn
+import os
+import sys
+
+# Add the parent directory to the path so we can import ai_service
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from ai_service.config import config
+
+if __name__ == "__main__":
+    print(f"Starting AI service on {config.API_HOST}:{config.API_PORT}")
+    uvicorn.run(
+        "ai_service.api:app",
+        host=config.API_HOST,
+        port=config.API_PORT,
+        reload=True
+    )