Made changes to suit Openwebui

2025-05-09 16:47:30 +01:00
parent ac98999507
commit 9535052645
14 changed files with 93 additions and 330 deletions
@@ -7,12 +7,9 @@ OPENWEBUI_URL=http://104.225.217.215:8080
 OPENWEBUI_API_KEY=your-openwebui-api-key  # Replace with the actual key in your .env file
 # Ollama configuration
-OLLAMA_API_URL=http://localhost:11434
+OLLAMA_API_URL=http://104.225.217.215:8080/ollama
 DEFAULT_MODEL=llama3.1
 # Local storage
 SQLITE_DB_PATH=ai_service/data/chatbot.db
 # Document processing
 CHUNK_SIZE=1000
 CHUNK_OVERLAP=200
@@ -1,33 +0,0 @@
 # API configuration
 API_HOST=0.0.0.0
 API_PORT=5251
 # Pinecone configuration
 # Sign up at https://www.pinecone.io/ to get your API key
 PINECONE_API_KEY=your-pinecone-api-key-here
 PINECONE_ENVIRONMENT=your-pinecone-environment-here
 PINECONE_INDEX_NAME=chatbot-index
 # Model configuration
 # Sign up at https://platform.openai.com/ to get your API key
 DEFAULT_MODEL=gpt-3.5-turbo
 OPENAI_API_KEY=your-openai-api-key-here
 # Local storage
 # Path to SQLite database (will be replaced with MySQL in production)
 SQLITE_DB_PATH=ai_service/data/chatbot.db
 # Document processing
 # Adjust these values based on your needs
 CHUNK_SIZE=1000
 CHUNK_OVERLAP=200
 # Embedding model
 # Options: all-MiniLM-L6-v2 (default), paraphrase-MiniLM-L3-v2 (smaller/faster)
 EMBEDDING_MODEL=all-MiniLM-L6-v2
 # Production settings
 # Set to 'production' in production environment
 ENVIRONMENT=production
 LOG_LEVEL=INFO
 ENABLE_MOCK=false
@@ -23,15 +23,9 @@ class Config:
    OLLAMA_API_URL = os.environ.get('OLLAMA_API_URL', 'http://localhost:11434')
    DEFAULT_MODEL = os.environ.get('DEFAULT_MODEL', 'llama3.1')
    # Local storage
    SQLITE_DB_PATH = os.environ.get('SQLITE_DB_PATH', 'ai_service/data/chatbot.db')
    # Document processing
    CHUNK_SIZE = int(os.environ.get('CHUNK_SIZE', 1000))
    CHUNK_OVERLAP = int(os.environ.get('CHUNK_OVERLAP', 200))
    # Embedding model
    EMBEDDING_MODEL = os.environ.get('EMBEDDING_MODEL', 'all-MiniLM-L6-v2')
 config = Config()
@@ -24,5 +24,57 @@
      }
    ],
    "team_members": []
  },
  "79cb7d8f-f0ce-4cef-bdb1-02a1620bc947": {
    "id": "79cb7d8f-f0ce-4cef-bdb1-02a1620bc947",
    "title": "Test Chat",
    "user_id": "test_user",
    "model_id": "llama3.1",
    "is_team_chat": false,
    "created_at": "2025-05-09T15:36:12.454269",
    "updated_at": "2025-05-09T15:36:13.909638",
    "messages": [
      {
        "id": "e0826a7c-e0b8-48ea-a435-4568da33dd7e",
        "content": "Hello, AI!",
        "user_id": "test_user",
        "is_user_message": true,
        "timestamp": "2025-05-09T15:36:12.488522"
      },
      {
        "id": "5d804015-e8fa-4d0e-a519-8d5ea27cedee",
        "content": "Error generating response: 403 Client Error: Forbidden for url: http://104.225.217.215:8080/ollama/api/generate",
        "user_id": null,
        "is_user_message": false,
        "timestamp": "2025-05-09T15:36:13.909623"
      }
    ],
    "team_members": []
  },
  "62347aa7-db22-4558-9aa2-6600ba48fb8f": {
    "id": "62347aa7-db22-4558-9aa2-6600ba48fb8f",
    "title": "Test Chat",
    "user_id": "test_user",
    "model_id": "llama3.1",
    "is_team_chat": false,
    "created_at": "2025-05-09T15:37:55.028615",
    "updated_at": "2025-05-09T15:38:26.192942",
    "messages": [
      {
        "id": "3cde3be3-9367-4b3d-a4d9-04e484b8b75a",
        "content": "Hello, AI!",
        "user_id": "test_user",
        "is_user_message": true,
        "timestamp": "2025-05-09T15:37:55.033282"
      },
      {
        "id": "39ce98e3-4a8f-4ce7-a2e8-9e5f42e79fcb",
        "content": "Error generating response: HTTPConnectionPool(host='104.225.217.215', port=8080): Read timed out. (read timeout=30)",
        "user_id": null,
        "is_user_message": false,
        "timestamp": "2025-05-09T15:38:26.192928"
      }
    ],
    "team_members": []
  }
 }
@@ -14,7 +14,7 @@ export API_PORT=5251
 export PYTHONPATH=$PYTHONPATH:$(pwd)
 # Run the application with uvicorn and nohup
-nohup uvicorn ai_service.run:app --host $API_HOST --port $API_PORT > logs/ai_service.log 2>&1 &
+nohup uvicorn ai_service.api:app --host $API_HOST --port $API_PORT > logs/ai_service.log 2>&1 &
 echo "AI Service started on port $API_PORT. Check ai_service.log for output."
 echo "To stop the application, find the process ID with 'ps aux | grep uvicorn' and kill it with 'kill <PID>'."
@@ -30,10 +30,10 @@ class DocumentService:
        self.openwebui_api_key = config.OPENWEBUI_API_KEY
        # Ensure data directory exists
-        os.makedirs(os.path.dirname(config.SQLITE_DB_PATH), exist_ok=True)
+        os.makedirs('ai_service/data', exist_ok=True)
        # For now, we'll store document metadata in a simple JSON file
-        self.metadata_file = os.path.join(os.path.dirname(config.SQLITE_DB_PATH), 'document_metadata.json')
+        self.metadata_file = 'ai_service/data/document_metadata.json'
        self._load_metadata()
    def _load_metadata(self):
@@ -1,214 +0,0 @@
 """
 Service for generating and managing embeddings.
 """
 import os
 import random
 import pinecone
 import numpy as np
 from typing import List, Dict, Any, Optional, Union
 from sentence_transformers import SentenceTransformer
 from ai_service.config import config
 class EmbeddingService:
    """Service for generating and managing embeddings."""
    def __init__(self, use_mock=True):  # Default to mock implementation
        """Initialize the embedding service."""
        self.use_mock = use_mock
        if not self.use_mock:
            # Use a smaller model for testing
            self.model_name = "paraphrase-MiniLM-L3-v2"  # Smaller model than the default
            try:
                self.model = SentenceTransformer(self.model_name)
                print(f"Loaded embedding model: {self.model_name}")
            except Exception as e:
                print(f"Error loading embedding model: {str(e)}")
                self.use_mock = True
                print("Falling back to mock implementation")
        else:
            print("Using mock embedding implementation")
            self.model_name = "mock-model"
            self.model = None
        self._initialize_pinecone()
    def _initialize_pinecone(self):
        """Initialize Pinecone client."""
        if not config.PINECONE_API_KEY or not config.PINECONE_ENVIRONMENT:
            print("Warning: Pinecone API key or environment not set. Vector storage will not be available.")
            self.index = None
            return
        try:
            pinecone.init(
                api_key=config.PINECONE_API_KEY,
                environment=config.PINECONE_ENVIRONMENT
            )
            # Check if index exists, create if it doesn't
            if config.PINECONE_INDEX_NAME not in pinecone.list_indexes():
                pinecone.create_index(
                    name=config.PINECONE_INDEX_NAME,
                    dimension=self.model.get_sentence_embedding_dimension(),
                    metric="cosine"
                )
            self.index = pinecone.Index(config.PINECONE_INDEX_NAME)
            print(f"Connected to Pinecone index: {config.PINECONE_INDEX_NAME}")
        except Exception as e:
            print(f"Error connecting to Pinecone: {str(e)}")
            self.index = None
    def generate_embedding(self, text: str) -> List[float]:
        """
        Generate an embedding for a text.
        Args:
            text: Text to embed.
        Returns:
            Embedding vector.
        """
        if self.use_mock:
            # Generate a mock embedding vector (384 dimensions for consistency)
            return [random.random() for _ in range(384)]
        embedding = self.model.encode(text)
        return embedding.tolist()
    def generate_embeddings(self, texts: List[str]) -> List[List[float]]:
        """
        Generate embeddings for multiple texts.
        Args:
            texts: List of texts to embed.
        Returns:
            List of embedding vectors.
        """
        if self.use_mock:
            # Generate mock embedding vectors
            return [[random.random() for _ in range(384)] for _ in texts]
        embeddings = self.model.encode(texts)
        return embeddings.tolist()
    def store_embeddings(self, ids: List[str], embeddings: List[List[float]],
                         metadata: Optional[List[Dict[str, Any]]] = None) -> bool:
        """
        Store embeddings in Pinecone.
        Args:
            ids: List of IDs for the embeddings.
            embeddings: List of embedding vectors.
            metadata: Optional list of metadata dictionaries.
        Returns:
            True if storage was successful, False otherwise.
        """
        if self.use_mock:
            print(f"Mock: Stored {len(ids)} embeddings")
            return True
        if self.index is None:
            print("Warning: Pinecone index not available. Embeddings not stored.")
            return False
        if metadata is None:
            metadata = [{} for _ in ids]
        vectors = [
            (id, embedding, meta)
            for id, embedding, meta in zip(ids, embeddings, metadata)
        ]
        try:
            self.index.upsert(vectors=vectors)
            return True
        except Exception as e:
            print(f"Error storing embeddings in Pinecone: {str(e)}")
            return False
    def search_similar(self, query_embedding: List[float], top_k: int = 5) -> List[Dict[str, Any]]:
        """
        Search for similar embeddings in Pinecone.
        Args:
            query_embedding: Query embedding vector.
            top_k: Number of results to return.
        Returns:
            List of similar items with their metadata.
        """
        if self.use_mock:
            # Generate mock search results
            print(f"Mock: Searching for similar embeddings (top_k={top_k})")
            mock_results = []
            for i in range(min(top_k, 3)):  # Return at most 3 mock results
                mock_results.append({
                    'id': f"mock_doc_{i}",
                    'score': 0.9 - (i * 0.1),  # Decreasing similarity scores
                    'metadata': {
                        'document_id': f"mock_doc_{i}",
                        'chunk_index': i,
                        'title': f"Mock Document {i}",
                        'description': f"This is a mock document {i}",
                        'chunk_text': f"This is the content of mock document {i}..."
                    }
                })
            return mock_results
        if self.index is None:
            print("Warning: Pinecone index not available. Search not performed.")
            return []
        try:
            results = self.index.query(
                vector=query_embedding,
                top_k=top_k,
                include_metadata=True
            )
            return [
                {
                    'id': match['id'],
                    'score': match['score'],
                    'metadata': match.get('metadata', {})
                }
                for match in results.get('matches', [])
            ]
        except Exception as e:
            print(f"Error searching in Pinecone: {str(e)}")
            return []
    def delete_embeddings(self, ids: List[str]) -> bool:
        """
        Delete embeddings from Pinecone.
        Args:
            ids: List of IDs to delete.
        Returns:
            True if deletion was successful, False otherwise.
        """
        if self.use_mock:
            print(f"Mock: Deleted {len(ids)} embeddings")
            return True
        if self.index is None:
            print("Warning: Pinecone index not available. Deletion not performed.")
            return False
        try:
            self.index.delete(ids=ids)
            return True
        except Exception as e:
            print(f"Error deleting embeddings from Pinecone: {str(e)}")
            return False
 # Create a singleton instance
 embedding_service = EmbeddingService()
@@ -18,10 +18,10 @@ class ChatService:
    def __init__(self):
        """Initialize the chat service."""
        # Ensure data directory exists
-        os.makedirs(os.path.dirname(config.SQLITE_DB_PATH), exist_ok=True)
+        os.makedirs('ai_service/data', exist_ok=True)
        # For now, we'll store chat data in a simple JSON file
-        self.chats_file = os.path.join(os.path.dirname(config.SQLITE_DB_PATH), 'chats.json')
+        self.chats_file = 'ai_service/data/chats.json'
        self._load_chats()
    def _load_chats(self):
@@ -212,12 +212,17 @@ class ModelService:
        # Make the API call to Ollama
        try:
            # Prepare headers with API key
            headers = {"Content-Type": "application/json"}
            if self.openwebui_api_key:
                headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
            # Ollama API endpoint is /api/chat or /api/generate
            response = requests.post(
                f"{self.ollama_api_url}/api/generate",
-                headers={"Content-Type": "application/json"},
+                headers=headers,
                json=request_json,
-                timeout=30
+                timeout=60
            )
            response.raise_for_status()
@@ -1,28 +0,0 @@
 # Core dependencies with fixed versions for stability
 fastapi==0.103.1
 uvicorn[standard]==0.23.2
 pydantic==2.3.0
 python-dotenv==1.0.0
 # AI/ML dependencies
 pinecone-client==2.2.2
 langchain==0.0.267
 sentence-transformers==2.2.2
 numpy==1.26.4
 openai==1.3.0
 # For local storage (will be replaced with MySQL in production)
 sqlalchemy==2.0.20
 pymysql==1.1.0
 cryptography==41.0.3  # Required for PyMySQL
 # Utilities
 tqdm==4.67.1
 requests==2.32.3
 tenacity==8.5.0  # For retrying API calls
 # Production dependencies
 gunicorn==21.2.0  # Production WSGI server
 python-json-logger==2.0.7  # Structured logging
 prometheus-client==0.17.1  # Metrics
 sentry-sdk==1.39.1  # Error tracking
@@ -4,16 +4,8 @@ uvicorn==0.23.2
 pydantic==2.3.0
 python-dotenv==1.0.0
-# AI/ML dependencies
+# Document processing
-pinecone-client==2.2.2
+langchain-text-splitters==0.3.8
 langchain==0.0.267
 sentence-transformers==2.2.2
 numpy==1.26.4
 # For local storage
 sqlalchemy==2.0.20
 sqlite3==0.0.0  # This is a placeholder, sqlite3 is part of Python's standard library
 # Utilities
 tqdm==4.67.1
 requests==2.32.3
@@ -2,44 +2,37 @@
 # Stop any existing service
 pkill -f "uvicorn ai_service.api:app" || true
 pkill -f "run_ai_service.py" || true
 # Create data directory if it doesn't exist
 mkdir -p ai_service/data
 # Set environment variables for testing
 # In production, replace these with your actual API keys
 export PINECONE_API_KEY="test-pinecone-api-key"
 export PINECONE_ENVIRONMENT="test-pinecone-environment"
 export OPENAI_API_KEY="test-openai-api-key"
 # Create empty files for local storage if they don't exist
 touch ai_service/data/chatbot.db
 touch ai_service/data/document_metadata.json
 touch ai_service/data/chats.json
-# For testing purposes, we'll use a simplified API
+# Start the AI service
-echo "Starting Simple API Service on port 5251..."
+echo "Starting AI Service on port 5251..."
 VENV_PATH="./venv"
 PYTHON_PATH="$VENV_PATH/bin/python"
 # Check if the virtual environment exists
 if [ -f "$PYTHON_PATH" ]; then
    echo "Using Python from virtual environment: $PYTHON_PATH"
-    # Use the simplified API for testing
+    nohup $PYTHON_PATH run_ai_service.py > ai_service.log 2>&1 &
    nohup $PYTHON_PATH simple_api.py > ai_service.log 2>&1 &
 else
    echo "Virtual environment not found at $VENV_PATH, using system Python"
-    nohup python simple_api.py > ai_service.log 2>&1 &
+    nohup python run_ai_service.py > ai_service.log 2>&1 &
 fi
 # Wait a moment for the service to start
 sleep 2
 # Check if the service is running
-if pgrep -f "simple_api.py" > /dev/null; then
+if pgrep -f "run_ai_service.py" > /dev/null; then
    echo "AI Service started successfully on port 5251"
    echo "Check ai_service.log for output"
-    echo "To stop the service, run: pkill -f \"simple_api.py\""
+    echo "To stop the service, run: pkill -f \"run_ai_service.py\""
    # Test the health endpoint
    echo -e "\nTesting health endpoint..."
@@ -60,7 +53,7 @@ response = requests.post(
    json={
        "user_id": "test_user",
        "title": "Test Chat",
-        "model_id": "gpt-3.5-turbo"
+        "model_id": "llama3.1"
    }
 )
@@ -4,10 +4,10 @@
 # Usage: ./remote_deploy.sh [server_ip] [user] [port] [remote_dir]
 # Default values
-SERVER_IP=${1:-"104.225.217.215"}
+SERVER_IP=${1:-"104.225.217.215"}  # IP address of the server where OpenWebUI is installed
-SERVER_USER=${2:-"root"}
+SERVER_USER=${2:-"root"}           # SSH username for the server
-SERVER_PORT=${3:-"22"}
+SERVER_PORT=${3:-"22"}             # SSH port for the server
-REMOTE_DIR=${4:-"/root/openwebui"}
+REMOTE_DIR=${4:-"/root/openwebui"} # Directory where OpenWebUI is installed
 LOCAL_DIR="."
 echo "Deploying to server: $SERVER_IP"
@@ -36,12 +36,22 @@ ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "cd $REMOTE_DIR/ai_service_app && \
    python3 -m venv venv || true && \
    source venv/bin/activate && \
    pip install --upgrade pip && \
-    pip install -r requirements.txt && \
+    pip install -r ai_service/requirements.txt && \
-    pip install python-dotenv langchain-text-splitters"
+    pip install python-dotenv langchain-text-splitters requests"
 # Stop any existing service
 echo "Stopping any existing service..."
-ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "pkill -f 'uvicorn ai_service.run:app' || true"
+ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "pkill -f 'uvicorn ai_service.api:app' || true"
 # Set up environment file if it doesn't exist
 echo "Setting up environment file..."
 ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "cd $REMOTE_DIR/ai_service_app && \
    if [ ! -f ai_service/.env ]; then \
        cp ai_service/.env.example ai_service/.env; \
        echo 'Created .env file from .env.example'; \
    else \
        echo '.env file already exists'; \
    fi"
 # Start the service
 echo "Starting the service..."
@@ -52,7 +62,7 @@ ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "cd $REMOTE_DIR/ai_service_app && \
 # Check if the service is running
 echo "Checking if the service is running..."
 sleep 5
-ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "ps aux | grep 'uvicorn ai_service.run:app' | grep -v grep"
+ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "ps aux | grep 'uvicorn ai_service.api:app' | grep -v grep"
 if [ $? -eq 0 ]; then
    echo "Service is running!"
    echo "You can access the API at: http://$SERVER_IP:5251"
@@ -6,11 +6,6 @@ import uvicorn
 import os
 import sys
 # Set environment variables for testing
 os.environ['PINECONE_API_KEY'] = 'test-key'
 os.environ['PINECONE_ENVIRONMENT'] = 'test-env'
 os.environ['OPENAI_API_KEY'] = 'test-key'
 # Run the service
 if __name__ == "__main__":
    print("Starting AI service on 0.0.0.0:5251")