diff --git a/ai_service/.env.example b/ai_service/.env.example index 3803587..8fd3757 100644 --- a/ai_service/.env.example +++ b/ai_service/.env.example @@ -7,12 +7,9 @@ OPENWEBUI_URL=http://104.225.217.215:8080 OPENWEBUI_API_KEY=your-openwebui-api-key # Replace with the actual key in your .env file # Ollama configuration -OLLAMA_API_URL=http://localhost:11434 +OLLAMA_API_URL=http://104.225.217.215:8080/ollama DEFAULT_MODEL=llama3.1 -# Local storage -SQLITE_DB_PATH=ai_service/data/chatbot.db - # Document processing CHUNK_SIZE=1000 CHUNK_OVERLAP=200 diff --git a/ai_service/.env.production b/ai_service/.env.production deleted file mode 100644 index 5d2cafd..0000000 --- a/ai_service/.env.production +++ /dev/null @@ -1,33 +0,0 @@ -# API configuration -API_HOST=0.0.0.0 -API_PORT=5251 - -# Pinecone configuration -# Sign up at https://www.pinecone.io/ to get your API key -PINECONE_API_KEY=your-pinecone-api-key-here -PINECONE_ENVIRONMENT=your-pinecone-environment-here -PINECONE_INDEX_NAME=chatbot-index - -# Model configuration -# Sign up at https://platform.openai.com/ to get your API key -DEFAULT_MODEL=gpt-3.5-turbo -OPENAI_API_KEY=your-openai-api-key-here - -# Local storage -# Path to SQLite database (will be replaced with MySQL in production) -SQLITE_DB_PATH=ai_service/data/chatbot.db - -# Document processing -# Adjust these values based on your needs -CHUNK_SIZE=1000 -CHUNK_OVERLAP=200 - -# Embedding model -# Options: all-MiniLM-L6-v2 (default), paraphrase-MiniLM-L3-v2 (smaller/faster) -EMBEDDING_MODEL=all-MiniLM-L6-v2 - -# Production settings -# Set to 'production' in production environment -ENVIRONMENT=production -LOG_LEVEL=INFO -ENABLE_MOCK=false diff --git a/ai_service/config.py b/ai_service/config.py index f373100..3d5888d 100644 --- a/ai_service/config.py +++ b/ai_service/config.py @@ -23,15 +23,9 @@ class Config: OLLAMA_API_URL = os.environ.get('OLLAMA_API_URL', 'http://localhost:11434') DEFAULT_MODEL = os.environ.get('DEFAULT_MODEL', 'llama3.1') - # Local storage - SQLITE_DB_PATH = os.environ.get('SQLITE_DB_PATH', 'ai_service/data/chatbot.db') - # Document processing CHUNK_SIZE = int(os.environ.get('CHUNK_SIZE', 1000)) CHUNK_OVERLAP = int(os.environ.get('CHUNK_OVERLAP', 200)) - # Embedding model - EMBEDDING_MODEL = os.environ.get('EMBEDDING_MODEL', 'all-MiniLM-L6-v2') - config = Config() diff --git a/ai_service/data/chats.json b/ai_service/data/chats.json index b50e610..8ec329f 100644 --- a/ai_service/data/chats.json +++ b/ai_service/data/chats.json @@ -24,5 +24,57 @@ } ], "team_members": [] + }, + "79cb7d8f-f0ce-4cef-bdb1-02a1620bc947": { + "id": "79cb7d8f-f0ce-4cef-bdb1-02a1620bc947", + "title": "Test Chat", + "user_id": "test_user", + "model_id": "llama3.1", + "is_team_chat": false, + "created_at": "2025-05-09T15:36:12.454269", + "updated_at": "2025-05-09T15:36:13.909638", + "messages": [ + { + "id": "e0826a7c-e0b8-48ea-a435-4568da33dd7e", + "content": "Hello, AI!", + "user_id": "test_user", + "is_user_message": true, + "timestamp": "2025-05-09T15:36:12.488522" + }, + { + "id": "5d804015-e8fa-4d0e-a519-8d5ea27cedee", + "content": "Error generating response: 403 Client Error: Forbidden for url: http://104.225.217.215:8080/ollama/api/generate", + "user_id": null, + "is_user_message": false, + "timestamp": "2025-05-09T15:36:13.909623" + } + ], + "team_members": [] + }, + "62347aa7-db22-4558-9aa2-6600ba48fb8f": { + "id": "62347aa7-db22-4558-9aa2-6600ba48fb8f", + "title": "Test Chat", + "user_id": "test_user", + "model_id": "llama3.1", + "is_team_chat": false, + "created_at": "2025-05-09T15:37:55.028615", + "updated_at": "2025-05-09T15:38:26.192942", + "messages": [ + { + "id": "3cde3be3-9367-4b3d-a4d9-04e484b8b75a", + "content": "Hello, AI!", + "user_id": "test_user", + "is_user_message": true, + "timestamp": "2025-05-09T15:37:55.033282" + }, + { + "id": "39ce98e3-4a8f-4ce7-a2e8-9e5f42e79fcb", + "content": "Error generating response: HTTPConnectionPool(host='104.225.217.215', port=8080): Read timed out. (read timeout=30)", + "user_id": null, + "is_user_message": false, + "timestamp": "2025-05-09T15:38:26.192928" + } + ], + "team_members": [] } } \ No newline at end of file diff --git a/ai_service/deploy.sh b/ai_service/deploy.sh index 4cc0e7f..3f61ab9 100755 --- a/ai_service/deploy.sh +++ b/ai_service/deploy.sh @@ -14,7 +14,7 @@ export API_PORT=5251 export PYTHONPATH=$PYTHONPATH:$(pwd) # Run the application with uvicorn and nohup -nohup uvicorn ai_service.run:app --host $API_HOST --port $API_PORT > logs/ai_service.log 2>&1 & +nohup uvicorn ai_service.api:app --host $API_HOST --port $API_PORT > logs/ai_service.log 2>&1 & echo "AI Service started on port $API_PORT. Check ai_service.log for output." echo "To stop the application, find the process ID with 'ps aux | grep uvicorn' and kill it with 'kill '." diff --git a/ai_service/embeddings/document_service.py b/ai_service/embeddings/document_service.py index 17b87c4..be48c4d 100644 --- a/ai_service/embeddings/document_service.py +++ b/ai_service/embeddings/document_service.py @@ -30,10 +30,10 @@ class DocumentService: self.openwebui_api_key = config.OPENWEBUI_API_KEY # Ensure data directory exists - os.makedirs(os.path.dirname(config.SQLITE_DB_PATH), exist_ok=True) + os.makedirs('ai_service/data', exist_ok=True) # For now, we'll store document metadata in a simple JSON file - self.metadata_file = os.path.join(os.path.dirname(config.SQLITE_DB_PATH), 'document_metadata.json') + self.metadata_file = 'ai_service/data/document_metadata.json' self._load_metadata() def _load_metadata(self): diff --git a/ai_service/embeddings/embedding_service.py b/ai_service/embeddings/embedding_service.py deleted file mode 100644 index a7212b7..0000000 --- a/ai_service/embeddings/embedding_service.py +++ /dev/null @@ -1,214 +0,0 @@ -""" -Service for generating and managing embeddings. -""" - -import os -import random -import pinecone -import numpy as np -from typing import List, Dict, Any, Optional, Union -from sentence_transformers import SentenceTransformer - -from ai_service.config import config - -class EmbeddingService: - """Service for generating and managing embeddings.""" - - def __init__(self, use_mock=True): # Default to mock implementation - """Initialize the embedding service.""" - self.use_mock = use_mock - - if not self.use_mock: - # Use a smaller model for testing - self.model_name = "paraphrase-MiniLM-L3-v2" # Smaller model than the default - try: - self.model = SentenceTransformer(self.model_name) - print(f"Loaded embedding model: {self.model_name}") - except Exception as e: - print(f"Error loading embedding model: {str(e)}") - self.use_mock = True - print("Falling back to mock implementation") - else: - print("Using mock embedding implementation") - self.model_name = "mock-model" - self.model = None - - self._initialize_pinecone() - - def _initialize_pinecone(self): - """Initialize Pinecone client.""" - if not config.PINECONE_API_KEY or not config.PINECONE_ENVIRONMENT: - print("Warning: Pinecone API key or environment not set. Vector storage will not be available.") - self.index = None - return - - try: - pinecone.init( - api_key=config.PINECONE_API_KEY, - environment=config.PINECONE_ENVIRONMENT - ) - - # Check if index exists, create if it doesn't - if config.PINECONE_INDEX_NAME not in pinecone.list_indexes(): - pinecone.create_index( - name=config.PINECONE_INDEX_NAME, - dimension=self.model.get_sentence_embedding_dimension(), - metric="cosine" - ) - - self.index = pinecone.Index(config.PINECONE_INDEX_NAME) - print(f"Connected to Pinecone index: {config.PINECONE_INDEX_NAME}") - except Exception as e: - print(f"Error connecting to Pinecone: {str(e)}") - self.index = None - - def generate_embedding(self, text: str) -> List[float]: - """ - Generate an embedding for a text. - - Args: - text: Text to embed. - - Returns: - Embedding vector. - """ - if self.use_mock: - # Generate a mock embedding vector (384 dimensions for consistency) - return [random.random() for _ in range(384)] - - embedding = self.model.encode(text) - return embedding.tolist() - - def generate_embeddings(self, texts: List[str]) -> List[List[float]]: - """ - Generate embeddings for multiple texts. - - Args: - texts: List of texts to embed. - - Returns: - List of embedding vectors. - """ - if self.use_mock: - # Generate mock embedding vectors - return [[random.random() for _ in range(384)] for _ in texts] - - embeddings = self.model.encode(texts) - return embeddings.tolist() - - def store_embeddings(self, ids: List[str], embeddings: List[List[float]], - metadata: Optional[List[Dict[str, Any]]] = None) -> bool: - """ - Store embeddings in Pinecone. - - Args: - ids: List of IDs for the embeddings. - embeddings: List of embedding vectors. - metadata: Optional list of metadata dictionaries. - - Returns: - True if storage was successful, False otherwise. - """ - if self.use_mock: - print(f"Mock: Stored {len(ids)} embeddings") - return True - - if self.index is None: - print("Warning: Pinecone index not available. Embeddings not stored.") - return False - - if metadata is None: - metadata = [{} for _ in ids] - - vectors = [ - (id, embedding, meta) - for id, embedding, meta in zip(ids, embeddings, metadata) - ] - - try: - self.index.upsert(vectors=vectors) - return True - except Exception as e: - print(f"Error storing embeddings in Pinecone: {str(e)}") - return False - - def search_similar(self, query_embedding: List[float], top_k: int = 5) -> List[Dict[str, Any]]: - """ - Search for similar embeddings in Pinecone. - - Args: - query_embedding: Query embedding vector. - top_k: Number of results to return. - - Returns: - List of similar items with their metadata. - """ - if self.use_mock: - # Generate mock search results - print(f"Mock: Searching for similar embeddings (top_k={top_k})") - mock_results = [] - for i in range(min(top_k, 3)): # Return at most 3 mock results - mock_results.append({ - 'id': f"mock_doc_{i}", - 'score': 0.9 - (i * 0.1), # Decreasing similarity scores - 'metadata': { - 'document_id': f"mock_doc_{i}", - 'chunk_index': i, - 'title': f"Mock Document {i}", - 'description': f"This is a mock document {i}", - 'chunk_text': f"This is the content of mock document {i}..." - } - }) - return mock_results - - if self.index is None: - print("Warning: Pinecone index not available. Search not performed.") - return [] - - try: - results = self.index.query( - vector=query_embedding, - top_k=top_k, - include_metadata=True - ) - - return [ - { - 'id': match['id'], - 'score': match['score'], - 'metadata': match.get('metadata', {}) - } - for match in results.get('matches', []) - ] - except Exception as e: - print(f"Error searching in Pinecone: {str(e)}") - return [] - - def delete_embeddings(self, ids: List[str]) -> bool: - """ - Delete embeddings from Pinecone. - - Args: - ids: List of IDs to delete. - - Returns: - True if deletion was successful, False otherwise. - """ - if self.use_mock: - print(f"Mock: Deleted {len(ids)} embeddings") - return True - - if self.index is None: - print("Warning: Pinecone index not available. Deletion not performed.") - return False - - try: - self.index.delete(ids=ids) - return True - except Exception as e: - print(f"Error deleting embeddings from Pinecone: {str(e)}") - return False - - -# Create a singleton instance -embedding_service = EmbeddingService() diff --git a/ai_service/models/chat_service.py b/ai_service/models/chat_service.py index a583c3e..54fa9a8 100644 --- a/ai_service/models/chat_service.py +++ b/ai_service/models/chat_service.py @@ -18,10 +18,10 @@ class ChatService: def __init__(self): """Initialize the chat service.""" # Ensure data directory exists - os.makedirs(os.path.dirname(config.SQLITE_DB_PATH), exist_ok=True) + os.makedirs('ai_service/data', exist_ok=True) # For now, we'll store chat data in a simple JSON file - self.chats_file = os.path.join(os.path.dirname(config.SQLITE_DB_PATH), 'chats.json') + self.chats_file = 'ai_service/data/chats.json' self._load_chats() def _load_chats(self): diff --git a/ai_service/models/model_service.py b/ai_service/models/model_service.py index 638feee..3b185fc 100644 --- a/ai_service/models/model_service.py +++ b/ai_service/models/model_service.py @@ -212,12 +212,17 @@ class ModelService: # Make the API call to Ollama try: + # Prepare headers with API key + headers = {"Content-Type": "application/json"} + if self.openwebui_api_key: + headers["Authorization"] = f"Bearer {self.openwebui_api_key}" + # Ollama API endpoint is /api/chat or /api/generate response = requests.post( f"{self.ollama_api_url}/api/generate", - headers={"Content-Type": "application/json"}, + headers=headers, json=request_json, - timeout=30 + timeout=60 ) response.raise_for_status() diff --git a/ai_service/requirements.production.txt b/ai_service/requirements.production.txt deleted file mode 100644 index 2007d95..0000000 --- a/ai_service/requirements.production.txt +++ /dev/null @@ -1,28 +0,0 @@ -# Core dependencies with fixed versions for stability -fastapi==0.103.1 -uvicorn[standard]==0.23.2 -pydantic==2.3.0 -python-dotenv==1.0.0 - -# AI/ML dependencies -pinecone-client==2.2.2 -langchain==0.0.267 -sentence-transformers==2.2.2 -numpy==1.26.4 -openai==1.3.0 - -# For local storage (will be replaced with MySQL in production) -sqlalchemy==2.0.20 -pymysql==1.1.0 -cryptography==41.0.3 # Required for PyMySQL - -# Utilities -tqdm==4.67.1 -requests==2.32.3 -tenacity==8.5.0 # For retrying API calls - -# Production dependencies -gunicorn==21.2.0 # Production WSGI server -python-json-logger==2.0.7 # Structured logging -prometheus-client==0.17.1 # Metrics -sentry-sdk==1.39.1 # Error tracking diff --git a/ai_service/requirements.txt b/ai_service/requirements.txt index 752109b..2ec847f 100644 --- a/ai_service/requirements.txt +++ b/ai_service/requirements.txt @@ -4,16 +4,8 @@ uvicorn==0.23.2 pydantic==2.3.0 python-dotenv==1.0.0 -# AI/ML dependencies -pinecone-client==2.2.2 -langchain==0.0.267 -sentence-transformers==2.2.2 -numpy==1.26.4 - -# For local storage -sqlalchemy==2.0.20 -sqlite3==0.0.0 # This is a placeholder, sqlite3 is part of Python's standard library +# Document processing +langchain-text-splitters==0.3.8 # Utilities -tqdm==4.67.1 requests==2.32.3 diff --git a/deploy_ai_service.sh b/deploy_ai_service.sh index 6275b72..e5802e7 100755 --- a/deploy_ai_service.sh +++ b/deploy_ai_service.sh @@ -2,44 +2,37 @@ # Stop any existing service pkill -f "uvicorn ai_service.api:app" || true +pkill -f "run_ai_service.py" || true # Create data directory if it doesn't exist mkdir -p ai_service/data -# Set environment variables for testing -# In production, replace these with your actual API keys -export PINECONE_API_KEY="test-pinecone-api-key" -export PINECONE_ENVIRONMENT="test-pinecone-environment" -export OPENAI_API_KEY="test-openai-api-key" - # Create empty files for local storage if they don't exist -touch ai_service/data/chatbot.db touch ai_service/data/document_metadata.json touch ai_service/data/chats.json -# For testing purposes, we'll use a simplified API -echo "Starting Simple API Service on port 5251..." +# Start the AI service +echo "Starting AI Service on port 5251..." VENV_PATH="./venv" PYTHON_PATH="$VENV_PATH/bin/python" # Check if the virtual environment exists if [ -f "$PYTHON_PATH" ]; then echo "Using Python from virtual environment: $PYTHON_PATH" - # Use the simplified API for testing - nohup $PYTHON_PATH simple_api.py > ai_service.log 2>&1 & + nohup $PYTHON_PATH run_ai_service.py > ai_service.log 2>&1 & else echo "Virtual environment not found at $VENV_PATH, using system Python" - nohup python simple_api.py > ai_service.log 2>&1 & + nohup python run_ai_service.py > ai_service.log 2>&1 & fi # Wait a moment for the service to start sleep 2 # Check if the service is running -if pgrep -f "simple_api.py" > /dev/null; then +if pgrep -f "run_ai_service.py" > /dev/null; then echo "AI Service started successfully on port 5251" echo "Check ai_service.log for output" - echo "To stop the service, run: pkill -f \"simple_api.py\"" + echo "To stop the service, run: pkill -f \"run_ai_service.py\"" # Test the health endpoint echo -e "\nTesting health endpoint..." @@ -60,7 +53,7 @@ response = requests.post( json={ "user_id": "test_user", "title": "Test Chat", - "model_id": "gpt-3.5-turbo" + "model_id": "llama3.1" } ) diff --git a/remote_deploy.sh b/remote_deploy.sh index 3c3ad9a..1a42a8a 100755 --- a/remote_deploy.sh +++ b/remote_deploy.sh @@ -4,10 +4,10 @@ # Usage: ./remote_deploy.sh [server_ip] [user] [port] [remote_dir] # Default values -SERVER_IP=${1:-"104.225.217.215"} -SERVER_USER=${2:-"root"} -SERVER_PORT=${3:-"22"} -REMOTE_DIR=${4:-"/root/openwebui"} +SERVER_IP=${1:-"104.225.217.215"} # IP address of the server where OpenWebUI is installed +SERVER_USER=${2:-"root"} # SSH username for the server +SERVER_PORT=${3:-"22"} # SSH port for the server +REMOTE_DIR=${4:-"/root/openwebui"} # Directory where OpenWebUI is installed LOCAL_DIR="." echo "Deploying to server: $SERVER_IP" @@ -36,12 +36,22 @@ ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "cd $REMOTE_DIR/ai_service_app && \ python3 -m venv venv || true && \ source venv/bin/activate && \ pip install --upgrade pip && \ - pip install -r requirements.txt && \ - pip install python-dotenv langchain-text-splitters" + pip install -r ai_service/requirements.txt && \ + pip install python-dotenv langchain-text-splitters requests" # Stop any existing service echo "Stopping any existing service..." -ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "pkill -f 'uvicorn ai_service.run:app' || true" +ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "pkill -f 'uvicorn ai_service.api:app' || true" + +# Set up environment file if it doesn't exist +echo "Setting up environment file..." +ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "cd $REMOTE_DIR/ai_service_app && \ + if [ ! -f ai_service/.env ]; then \ + cp ai_service/.env.example ai_service/.env; \ + echo 'Created .env file from .env.example'; \ + else \ + echo '.env file already exists'; \ + fi" # Start the service echo "Starting the service..." @@ -52,7 +62,7 @@ ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "cd $REMOTE_DIR/ai_service_app && \ # Check if the service is running echo "Checking if the service is running..." sleep 5 -ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "ps aux | grep 'uvicorn ai_service.run:app' | grep -v grep" +ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "ps aux | grep 'uvicorn ai_service.api:app' | grep -v grep" if [ $? -eq 0 ]; then echo "Service is running!" echo "You can access the API at: http://$SERVER_IP:5251" diff --git a/run_ai_service.py b/run_ai_service.py index 5798ef7..3aaf3cb 100644 --- a/run_ai_service.py +++ b/run_ai_service.py @@ -6,11 +6,6 @@ import uvicorn import os import sys -# Set environment variables for testing -os.environ['PINECONE_API_KEY'] = 'test-key' -os.environ['PINECONE_ENVIRONMENT'] = 'test-env' -os.environ['OPENAI_API_KEY'] = 'test-key' - # Run the service if __name__ == "__main__": print("Starting AI service on 0.0.0.0:5251")