From ac98999507740d5bf53a0334c017766ad75c321b Mon Sep 17 00:00:00 2001 From: Iyeoluwa Akinrinola Date: Fri, 9 May 2025 15:41:16 +0100 Subject: [PATCH] Initial commit for deployment --- .env.example | 17 + .gitignore | 109 ++++++ DEPLOYMENT.md | 135 ++++++++ OLLAMA_INTEGRATION.md | 87 +++++ PRODUCTION_READINESS.md | 136 ++++++++ README.md | 61 ++++ ai_service/.env.example | 18 + ai_service/.env.production | 33 ++ ai_service/README.md | 94 +++++ ai_service/__init__.py | 6 + ai_service/ai_service/data/chats.json | 28 ++ ai_service/api.py | 377 +++++++++++++++++++++ ai_service/config.py | 37 ++ ai_service/data/chats.json | 28 ++ ai_service/data/document_metadata.json | 0 ai_service/deploy.sh | 20 ++ ai_service/embeddings/document_service.py | 261 ++++++++++++++ ai_service/embeddings/embedding_service.py | 214 ++++++++++++ ai_service/models/chat_service.py | 309 +++++++++++++++++ ai_service/models/model_parameters.py | 170 ++++++++++ ai_service/models/model_service.py | 243 +++++++++++++ ai_service/requirements.production.txt | 28 ++ ai_service/requirements.txt | 19 ++ ai_service/run.py | 21 ++ ai_service_workflow.md | 173 ++++++++++ app/__init__.py | 34 ++ app/api/__init__.py | 0 app/api/api.py | 110 ++++++ app/api/routes.py | 100 ++++++ app/config/__init__.py | 0 app/config/config.py | 79 +++++ app/database/__init__.py | 0 app/database/db.py | 36 ++ app/models/__init__.py | 0 app/models/chat.py | 67 ++++ app/models/document.py | 59 ++++ app/models/user.py | 24 ++ app/services/__init__.py | 0 app/services/chat_service.py | 227 +++++++++++++ app/services/chatbot_service.py | 105 ++++++ app/services/document_service.py | 165 +++++++++ app/services/model_service.py | 95 ++++++ app/utils/__init__.py | 0 deploy.sh | 15 + deploy_ai_service.sh | 102 ++++++ deploy_production.sh | 82 +++++ remote_deploy.sh | 65 ++++ requirements-deploy.txt | 10 + requirements.txt | 22 ++ run.py | 14 + run_ai_service.py | 22 ++ simple_api.py | 144 ++++++++ test_chat_with_params.py | 69 ++++ test_ollama.py | 73 ++++ 54 files changed, 4343 insertions(+) create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 DEPLOYMENT.md create mode 100644 OLLAMA_INTEGRATION.md create mode 100644 PRODUCTION_READINESS.md create mode 100644 README.md create mode 100644 ai_service/.env.example create mode 100644 ai_service/.env.production create mode 100644 ai_service/README.md create mode 100644 ai_service/__init__.py create mode 100644 ai_service/ai_service/data/chats.json create mode 100644 ai_service/api.py create mode 100644 ai_service/config.py create mode 100644 ai_service/data/chats.json create mode 100644 ai_service/data/document_metadata.json create mode 100755 ai_service/deploy.sh create mode 100644 ai_service/embeddings/document_service.py create mode 100644 ai_service/embeddings/embedding_service.py create mode 100644 ai_service/models/chat_service.py create mode 100644 ai_service/models/model_parameters.py create mode 100644 ai_service/models/model_service.py create mode 100644 ai_service/requirements.production.txt create mode 100644 ai_service/requirements.txt create mode 100644 ai_service/run.py create mode 100644 ai_service_workflow.md create mode 100644 app/__init__.py create mode 100644 app/api/__init__.py create mode 100644 app/api/api.py create mode 100644 app/api/routes.py create mode 100644 app/config/__init__.py create mode 100644 app/config/config.py create mode 100644 app/database/__init__.py create mode 100644 app/database/db.py create mode 100644 app/models/__init__.py create mode 100644 app/models/chat.py create mode 100644 app/models/document.py create mode 100644 app/models/user.py create mode 100644 app/services/__init__.py create mode 100644 app/services/chat_service.py create mode 100644 app/services/chatbot_service.py create mode 100644 app/services/document_service.py create mode 100644 app/services/model_service.py create mode 100644 app/utils/__init__.py create mode 100755 deploy.sh create mode 100755 deploy_ai_service.sh create mode 100644 deploy_production.sh create mode 100755 remote_deploy.sh create mode 100644 requirements-deploy.txt create mode 100644 requirements.txt create mode 100644 run.py create mode 100644 run_ai_service.py create mode 100644 simple_api.py create mode 100644 test_chat_with_params.py create mode 100644 test_ollama.py diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..7f119a6 --- /dev/null +++ b/.env.example @@ -0,0 +1,17 @@ +# Flask configuration +FLASK_APP=run.py +FLASK_ENV=development +FLASK_CONFIG=development +SECRET_KEY=your-secret-key-here + +# Database configuration +DATABASE_URL=mysql+pymysql://username:password@localhost/chatbot + +# Pinecone configuration +PINECONE_API_KEY=your-pinecone-api-key +PINECONE_ENVIRONMENT=your-pinecone-environment +PINECONE_INDEX_NAME=chatbot-index + +# Model configuration +DEFAULT_MODEL=gpt-3.5-turbo +OPENAI_API_KEY=your-openai-api-key diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4e06a0b --- /dev/null +++ b/.gitignore @@ -0,0 +1,109 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# IDE files +.idea/ +.vscode/ +*.swp +*.swo + +# Project specific +uploads/ +*.db diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md new file mode 100644 index 0000000..3f5e300 --- /dev/null +++ b/DEPLOYMENT.md @@ -0,0 +1,135 @@ +# Deployment Instructions + +This document provides instructions for deploying the chatbot application with Ollama and OpenWebUI integration. + +## Prerequisites + +- Python 3.8 or higher +- pip +- virtualenv or venv +- Access to OpenWebUI at http://104.225.217.215:8080 + +## Deployment Steps + +1. **Clone the repository** + + ```bash + git clone + cd + ``` + +2. **Create and activate a virtual environment** + + ```bash + python -m venv venv + source venv/bin/activate # On Windows: venv\Scripts\activate + ``` + +3. **Install dependencies** + + ```bash + pip install -r requirements.txt + pip install python-dotenv langchain-text-splitters + ``` + +4. **Create a .env file** + + Copy the .env.example file to .env and update the values: + + ```bash + cp ai_service/.env.example ai_service/.env + # Edit the .env file with appropriate values + ``` + + Make sure to include the OpenWebUI configuration: + + ``` + # OpenWebUI configuration + OPENWEBUI_URL=http://104.225.217.215:8080 + OPENWEBUI_API_KEY=GdCU4ieYDqHsLfH2 + + # Ollama configuration + OLLAMA_API_URL=http://104.225.217.215:8080/ollama + DEFAULT_MODEL=llama3.1 + ``` + +5. **Run the deployment script** + + For local deployment: + ```bash + python -m ai_service.run + ``` + + For server deployment: + ```bash + ./ai_service/deploy.sh + ``` + + This will start the application on port 5251 using uvicorn with nohup. + + For remote deployment from your local machine: + ```bash + ./remote_deploy.sh 157.157.221.29 user 22 /home/user/ds_zagres_ai + ``` + +6. **Verify the application is running** + + ```bash + curl http://localhost:5251/api/health + ``` + + You should see a response like: + ```json + { + "status": "healthy" + } + ``` + +## Managing the Deployed Application + +- **View logs** + + ```bash + tail -f app.log + ``` + +- **Stop the application** + + ```bash + ps aux | grep uvicorn # Find the process ID (PID) + kill # Replace with the actual process ID + ``` + +- **Restart the application** + + ```bash + ./deploy.sh + ``` + +## API Endpoints + +- `GET /health` - Health check endpoint +- `POST /chats` - Create a new chat +- `POST /chats/{chat_id}/messages` - Send a message to the chatbot +- `GET /chats/{chat_id}` - Get chat history + +## Ollama and OpenWebUI Integration + +The chatbot now uses Ollama models via OpenWebUI. The following models are available: + +- **gemma3**: Google Gemma 3 model +- **llama3.3**: Meta Llama 3 70B model +- **llama3.1**: Meta Llama 3 8B model +- **mistral**: Mistral AI model +- **deepseek**: DeepSeek model + +### Document Training + +To use RAG with your documents: + +1. Go to the OpenWebUI interface at http://104.225.217.215:8080/ +2. Navigate to the Knowledge section +3. Upload your documents +4. OpenWebUI will automatically process them for RAG + +When using the chatbot API, set `use_rag=True` in your chat requests to enable RAG. diff --git a/OLLAMA_INTEGRATION.md b/OLLAMA_INTEGRATION.md new file mode 100644 index 0000000..13df107 --- /dev/null +++ b/OLLAMA_INTEGRATION.md @@ -0,0 +1,87 @@ +# Ollama and OpenWebUI Integration + +This document explains how to use the chatbot with Ollama and OpenWebUI. + +## Overview + +The chatbot has been updated to use Ollama models via OpenWebUI. The following models are now available: + +- **gemma3**: Google Gemma 3 model +- **llama3.3**: Meta Llama 3 70B model +- **llama3.1**: Meta Llama 3 8B model +- **mistral**: Mistral AI model +- **deepseek**: DeepSeek model + +## OpenWebUI + +OpenWebUI is running at: http://104.225.217.215:8080/ + +### Features + +1. **Document Training**: OpenWebUI provides built-in RAG capabilities. When you upload a document through OpenWebUI, it automatically processes it for RAG. + +2. **Knowledge Database**: OpenWebUI stores documents in its knowledge database, which can be accessed during chat sessions. + +## Using the Integration + +### Configuration + +1. Update your `.env` file with the following settings: + +``` +# OpenWebUI configuration +OPENWEBUI_URL=http://104.225.217.215:8080 +OPENWEBUI_API_KEY=your-openwebui-api-key + +# Ollama configuration +OLLAMA_API_URL=http://localhost:11434 +DEFAULT_MODEL=llama3.1 +``` + +### Document Processing + +To use RAG with your documents: + +1. Go to the OpenWebUI interface at http://104.225.217.215:8080/ +2. Navigate to the Knowledge section +3. Upload your documents +4. OpenWebUI will automatically process them for RAG + +### Chat with RAG + +When using the chatbot API: + +1. Set `use_rag=True` in your chat requests to enable RAG +2. The system will use OpenWebUI's knowledge database to enhance responses + +## API Usage + +The API endpoints remain the same, but now they use Ollama models via OpenWebUI: + +```python +# Example: Get a response with RAG +response = chat_service.get_chat_response( + chat_id="your-chat-id", + message="Tell me about the documents I uploaded", + user_id="user123", + use_rag=True, + model_id="llama3.1" # Use one of the Ollama models +) +``` + +## Troubleshooting + +If you encounter issues: + +1. Make sure OpenWebUI is accessible at http://104.225.217.215:8080/ +2. Check that you have the correct API key if authentication is enabled +3. Verify that the documents are properly uploaded to OpenWebUI's knowledge database + +## Direct Usage + +For direct usage without the API, you can: + +1. Go to http://104.225.217.215:8080/ +2. Select the model you want to use +3. Upload documents in the Knowledge section +4. Chat with the model and it will use the knowledge database automatically diff --git a/PRODUCTION_READINESS.md b/PRODUCTION_READINESS.md new file mode 100644 index 0000000..5577b93 --- /dev/null +++ b/PRODUCTION_READINESS.md @@ -0,0 +1,136 @@ +# Production Readiness Checklist + +This document outlines what's currently implemented, what's missing, and what improvements can be made to make the AI service production-ready. + +## Current Implementation + +### Core Functionality +- ✅ Document processing and chunking +- ✅ Embedding generation and storage +- ✅ Document search +- ✅ Chat functionality +- ✅ Model switching +- ✅ Team chat support +- ✅ RAG (Retrieval-Augmented Generation) +- ✅ Customizable model parameters + +### API Endpoints +- ✅ Health check endpoint +- ✅ Document management endpoints +- ✅ Model information endpoints +- ✅ Chat management endpoints +- ✅ Message sending endpoint + +## Missing Components + +### Authentication and Authorization +- ❌ User authentication +- ❌ API key validation +- ❌ Role-based access control +- ❌ Document access permissions + +### Database Integration +- ❌ MySQL database integration (currently using JSON files and SQLite) +- ❌ Database migration scripts +- ❌ Connection pooling + +### Monitoring and Observability +- ❌ Structured logging +- ❌ Metrics collection +- ❌ Alerting system +- ❌ Detailed health checks + +### Testing +- ❌ Unit tests +- ❌ Integration tests +- ❌ Load tests +- ❌ CI/CD pipeline + +### Documentation +- ❌ API documentation +- ❌ Developer guide +- ❌ Deployment guide +- ❌ User guide + +## Improvement Opportunities + +### Performance +- Implement caching for frequently accessed data +- Add connection pooling for database connections +- Optimize embedding generation and search +- Implement batch processing for document ingestion + +### Scalability +- Add support for distributed deployment +- Implement horizontal scaling +- Add load balancing +- Implement message queues for asynchronous processing + +### Security +- Add input validation and sanitization +- Implement rate limiting +- Add CORS configuration +- Implement secure storage for API keys + +### User Experience +- Add progress tracking for document processing +- Implement streaming responses for chat +- Add support for file attachments +- Implement chat history export + +### AI Capabilities +- Add support for more AI models +- Implement fine-tuning capabilities +- Add support for function calling +- Implement conversation summarization + +## Document Storage + +Currently, documents are stored in two places: + +1. **Document Metadata**: Stored in a JSON file at `ai_service/data/document_metadata.json` +2. **Document Embeddings**: Stored in Pinecone vector database + +For production, you should: +- Replace the JSON file storage with MySQL database +- Implement proper document versioning +- Add document access controls +- Implement backup and recovery procedures + +## API Keys and Configuration + +The system is designed to use environment variables for configuration, including API keys. The following keys need to be set: + +1. **Pinecone API Key**: For vector storage + - Sign up at https://www.pinecone.io/ + - Set `PINECONE_API_KEY` and `PINECONE_ENVIRONMENT` in `.env` + +2. **OpenAI API Key**: For AI model access + - Sign up at https://platform.openai.com/ + - Set `OPENAI_API_KEY` in `.env` + +A template `.env.production` file has been created with placeholders for these keys. + +## Deployment + +A production deployment script (`deploy_production.sh`) has been created to: +- Set up the virtual environment +- Install dependencies +- Check for API keys +- Start the service with proper logging +- Verify the service is running + +To deploy: +1. Copy `.env.production` to `.env` and add your API keys +2. Run `./deploy_production.sh` +3. Monitor the service with `tail -f ai_service.log` + +## Next Steps + +1. Implement authentication and authorization +2. Set up MySQL database integration +3. Add comprehensive testing +4. Implement monitoring and observability +5. Create detailed documentation +6. Address security concerns +7. Optimize performance and scalability diff --git a/README.md b/README.md new file mode 100644 index 0000000..f634351 --- /dev/null +++ b/README.md @@ -0,0 +1,61 @@ +# Chatbot Application + +A chatbot application with document training, private/team chat options, and model switching capability. + +## Features + +- Document training through library page +- Private chat functionality +- Team chat functionality (multiple users can see each other's interactions) +- Model switching capability + +## Technology Stack + +- **Backend**: Flask with FastAPI +- **Database**: MySQL +- **Vector Database**: Pinecone +- **Embeddings**: Sentence Transformers / OpenAI Embeddings +- **Chat Models**: Various LLMs (configurable) + +## Project Structure + +``` +app/ +├── api/ # API endpoints (Flask and FastAPI) +├── config/ # Configuration settings +├── database/ # Database connection and utilities +├── models/ # Database models +├── services/ # Business logic services +└── utils/ # Utility functions +tests/ # Test cases +``` + +## Setup Instructions + +1. Clone the repository +2. Create a virtual environment: + ``` + python -m venv venv + source venv/bin/activate # On Windows: venv\Scripts\activate + ``` +3. Install dependencies: + ``` + pip install -r requirements.txt + ``` +4. Copy `.env.example` to `.env` and update the values +5. Initialize the database: + ``` + flask db init + flask db migrate + flask db upgrade + ``` +6. Run the application: + ``` + python run.py + ``` + +## API Documentation + +Once the application is running, you can access the API documentation at: +- FastAPI Swagger UI: http://localhost:5000/docs +- FastAPI ReDoc: http://localhost:5000/redoc diff --git a/ai_service/.env.example b/ai_service/.env.example new file mode 100644 index 0000000..3803587 --- /dev/null +++ b/ai_service/.env.example @@ -0,0 +1,18 @@ +# API configuration +API_HOST=0.0.0.0 +API_PORT=5251 + +# OpenWebUI configuration +OPENWEBUI_URL=http://104.225.217.215:8080 +OPENWEBUI_API_KEY=your-openwebui-api-key # Replace with the actual key in your .env file + +# Ollama configuration +OLLAMA_API_URL=http://localhost:11434 +DEFAULT_MODEL=llama3.1 + +# Local storage +SQLITE_DB_PATH=ai_service/data/chatbot.db + +# Document processing +CHUNK_SIZE=1000 +CHUNK_OVERLAP=200 diff --git a/ai_service/.env.production b/ai_service/.env.production new file mode 100644 index 0000000..5d2cafd --- /dev/null +++ b/ai_service/.env.production @@ -0,0 +1,33 @@ +# API configuration +API_HOST=0.0.0.0 +API_PORT=5251 + +# Pinecone configuration +# Sign up at https://www.pinecone.io/ to get your API key +PINECONE_API_KEY=your-pinecone-api-key-here +PINECONE_ENVIRONMENT=your-pinecone-environment-here +PINECONE_INDEX_NAME=chatbot-index + +# Model configuration +# Sign up at https://platform.openai.com/ to get your API key +DEFAULT_MODEL=gpt-3.5-turbo +OPENAI_API_KEY=your-openai-api-key-here + +# Local storage +# Path to SQLite database (will be replaced with MySQL in production) +SQLITE_DB_PATH=ai_service/data/chatbot.db + +# Document processing +# Adjust these values based on your needs +CHUNK_SIZE=1000 +CHUNK_OVERLAP=200 + +# Embedding model +# Options: all-MiniLM-L6-v2 (default), paraphrase-MiniLM-L3-v2 (smaller/faster) +EMBEDDING_MODEL=all-MiniLM-L6-v2 + +# Production settings +# Set to 'production' in production environment +ENVIRONMENT=production +LOG_LEVEL=INFO +ENABLE_MOCK=false diff --git a/ai_service/README.md b/ai_service/README.md new file mode 100644 index 0000000..eafe1e5 --- /dev/null +++ b/ai_service/README.md @@ -0,0 +1,94 @@ +# AI Service for Chatbot Application + +This is the AI service component for the chatbot application. It provides APIs for document processing, embeddings, and chat functionality. + +## Features + +- Document processing and embedding +- Retrieval-augmented generation (RAG) +- Chat functionality with model switching +- Team chat support + +## Project Structure + +``` +ai_service/ +├── embeddings/ # Embedding and document processing services +├── models/ # Model and chat services +├── utils/ # Utility functions +├── data/ # Data storage +├── config.py # Configuration settings +├── api.py # FastAPI application +└── run.py # Script to run the service +``` + +## Setup Instructions + +1. Create a virtual environment: + ``` + python -m venv venv + source venv/bin/activate # On Windows: venv\Scripts\activate + ``` + +2. Install dependencies: + ``` + pip install -r requirements.txt + ``` + +3. Copy `.env.example` to `.env` and update the values: + ``` + cp .env.example .env + # Edit the .env file with appropriate values + ``` + +4. Run the service: + ``` + python run.py + ``` + +## API Documentation + +Once the service is running, you can access the API documentation at: +- Swagger UI: http://localhost:5251/docs +- ReDoc: http://localhost:5251/redoc + +## Deployment + +To deploy the service: + +1. Make the deployment script executable: + ``` + chmod +x deploy.sh + ``` + +2. Run the deployment script: + ``` + ./deploy.sh + ``` + +This will start the service on port 5251 using uvicorn with nohup. + +## API Endpoints + +### Document Endpoints + +- `POST /documents` - Process a document for embedding +- `GET /documents` - Get all documents +- `GET /documents/{doc_id}` - Get a document by ID +- `DELETE /documents/{doc_id}` - Delete a document +- `POST /documents/search` - Search for documents + +### Model Endpoints + +- `GET /models` - Get available models +- `GET /models/{model_id}` - Get information about a model + +### Chat Endpoints + +- `POST /chats` - Create a new chat +- `GET /chats/user/{user_id}` - Get all chats for a user +- `GET /chats/{chat_id}` - Get a chat by ID +- `POST /chats/{chat_id}/messages` - Send a message to a chat +- `POST /chats/{chat_id}/members/{user_id}` - Add a user to a team chat +- `DELETE /chats/{chat_id}/members/{user_id}` - Remove a user from a team chat +- `DELETE /chats/{chat_id}` - Delete a chat diff --git a/ai_service/__init__.py b/ai_service/__init__.py new file mode 100644 index 0000000..2fbe56a --- /dev/null +++ b/ai_service/__init__.py @@ -0,0 +1,6 @@ +""" +AI service package. +""" + +# Import for easier access +from ai_service.api import app diff --git a/ai_service/ai_service/data/chats.json b/ai_service/ai_service/data/chats.json new file mode 100644 index 0000000..6e17500 --- /dev/null +++ b/ai_service/ai_service/data/chats.json @@ -0,0 +1,28 @@ +{ + "e2b1bdc2-a384-4775-9c14-42b221e5554f": { + "id": "e2b1bdc2-a384-4775-9c14-42b221e5554f", + "title": "Test Chat", + "user_id": "test_user", + "model_id": "gpt-3.5-turbo", + "is_team_chat": false, + "created_at": "2025-05-06T11:50:43.558931", + "updated_at": "2025-05-06T11:51:20.982846", + "messages": [ + { + "id": "865a1e57-c71f-4bab-a4ba-56d630a38631", + "content": "Hello, AI!", + "user_id": "test_user", + "is_user_message": true, + "timestamp": "2025-05-06T11:51:20.051537" + }, + { + "id": "eb755b4f-fe55-4bec-b77f-ed20941df360", + "content": "Error generating response: 401 Client Error: Unauthorized for url: https://api.openai.com/v1/chat/completions", + "user_id": null, + "is_user_message": false, + "timestamp": "2025-05-06T11:51:20.982829" + } + ], + "team_members": [] + } +} \ No newline at end of file diff --git a/ai_service/api.py b/ai_service/api.py new file mode 100644 index 0000000..dd7d589 --- /dev/null +++ b/ai_service/api.py @@ -0,0 +1,377 @@ +""" +FastAPI application for the AI service. +""" + +from fastapi import FastAPI, HTTPException, Depends, Body, Query, Path +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel, Field +from typing import List, Dict, Any, Optional + +from ai_service.config import config +from ai_service.embeddings.document_service import document_service +from ai_service.models.model_service import model_service +from ai_service.models.chat_service import chat_service +from ai_service.models.model_parameters import ModelParameters + +# Create FastAPI app +app = FastAPI( + title="AI Service API", + description="API for the AI service", + version="1.0.0" +) + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], # Allow all origins + allow_credentials=True, + allow_methods=["*"], # Allow all methods + allow_headers=["*"], # Allow all headers +) + +# Define API models +class DocumentRequest(BaseModel): + """Request model for document processing.""" + content: str = Field(..., description="Document content") + title: str = Field(..., description="Document title") + description: Optional[str] = Field(None, description="Document description") + metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata") + +class DocumentResponse(BaseModel): + """Response model for document processing.""" + id: str = Field(..., description="Document ID") + title: str = Field(..., description="Document title") + description: str = Field(..., description="Document description") + chunk_count: int = Field(..., description="Number of chunks") + metadata: Dict[str, Any] = Field(..., description="Additional metadata") + +class SearchRequest(BaseModel): + """Request model for document search.""" + query: str = Field(..., description="Search query") + top_k: int = Field(5, description="Number of results to return") + +class SearchResult(BaseModel): + """Model for a search result.""" + id: str = Field(..., description="Result ID") + score: float = Field(..., description="Similarity score") + metadata: Dict[str, Any] = Field(..., description="Result metadata") + +class ModelInfo(BaseModel): + """Model for model information.""" + id: str = Field(..., description="Model ID") + name: str = Field(..., description="Model name") + description: str = Field(..., description="Model description") + provider: str = Field(..., description="Model provider") + max_tokens: int = Field(..., description="Maximum tokens") + is_default: bool = Field(..., description="Whether this is the default model") + +class ChatRequest(BaseModel): + """Request model for creating a chat.""" + user_id: str = Field(..., description="User ID") + title: Optional[str] = Field(None, description="Chat title") + model_id: Optional[str] = Field(None, description="Model ID") + is_team_chat: bool = Field(False, description="Whether this is a team chat") + +class MessageRequest(BaseModel): + """Request model for sending a message.""" + message: str = Field(..., description="Message content") + user_id: str = Field(..., description="User ID") + use_rag: bool = Field(False, description="Whether to use RAG") + + # Model parameters + temperature: Optional[float] = Field(None, description="Controls randomness: higher values mean more random completions") + max_tokens: Optional[int] = Field(None, description="Maximum number of tokens to generate") + top_p: Optional[float] = Field(None, description="Nucleus sampling parameter") + frequency_penalty: Optional[float] = Field(None, description="Penalizes repeated tokens") + presence_penalty: Optional[float] = Field(None, description="Penalizes repeated topics") + stop_sequences: Optional[List[str]] = Field(None, description="Sequences where the API will stop generating") + system_prompt: Optional[str] = Field(None, description="System prompt to guide the model's behavior") + + # Additional advanced parameters + min_p: Optional[float] = Field(None, description="Minimum probability threshold for token selection") + top_k: Optional[int] = Field(None, description="Only sample from the top k tokens") + repeat_penalty: Optional[float] = Field(None, description="Penalty for repeating tokens") + function_calling: Optional[bool] = Field(None, description="Whether to enable function calling") + +class Message(BaseModel): + """Model for a message.""" + id: str = Field(..., description="Message ID") + content: str = Field(..., description="Message content") + user_id: Optional[str] = Field(None, description="User ID") + is_user_message: bool = Field(..., description="Whether this is a user message") + timestamp: str = Field(..., description="Message timestamp") + +class Chat(BaseModel): + """Model for a chat.""" + id: str = Field(..., description="Chat ID") + title: str = Field(..., description="Chat title") + user_id: str = Field(..., description="User ID") + model_id: str = Field(..., description="Model ID") + is_team_chat: bool = Field(..., description="Whether this is a team chat") + created_at: str = Field(..., description="Creation timestamp") + updated_at: str = Field(..., description="Update timestamp") + messages: List[Message] = Field(..., description="Chat messages") + team_members: List[str] = Field(..., description="Team members") + +# Define API endpoints +@app.get("/health") +async def health_check(): + """ + Health check endpoint. + + Returns: + Health status. + """ + return {"status": "healthy"} + +# Document endpoints +@app.post("/documents", response_model=DocumentResponse) +async def process_document(request: DocumentRequest): + """ + Process a document for embedding. + + Args: + request: Document processing request. + + Returns: + Processed document information. + """ + doc_id = document_service.process_document( + content=request.content, + title=request.title, + description=request.description, + metadata=request.metadata + ) + + return document_service.get_document(doc_id) + +@app.get("/documents", response_model=List[DocumentResponse]) +async def get_all_documents(): + """ + Get all documents. + + Returns: + List of document information. + """ + return document_service.get_all_documents() + +@app.get("/documents/{doc_id}", response_model=DocumentResponse) +async def get_document(doc_id: str): + """ + Get a document by ID. + + Args: + doc_id: Document ID. + + Returns: + Document information. + """ + doc = document_service.get_document(doc_id) + if not doc: + raise HTTPException(status_code=404, detail="Document not found") + + return doc + +@app.delete("/documents/{doc_id}") +async def delete_document(doc_id: str): + """ + Delete a document. + + Args: + doc_id: Document ID. + + Returns: + Deletion status. + """ + success = document_service.delete_document(doc_id) + if not success: + raise HTTPException(status_code=404, detail="Document not found") + + return {"status": "success", "message": "Document deleted"} + +@app.post("/documents/search", response_model=List[SearchResult]) +async def search_documents(request: SearchRequest): + """ + Search for documents. + + Args: + request: Search request. + + Returns: + Search results. + """ + results = document_service.search_documents( + query=request.query, + top_k=request.top_k + ) + + return results + +# Model endpoints +@app.get("/models", response_model=List[ModelInfo]) +async def get_available_models(): + """ + Get available models. + + Returns: + List of model information. + """ + return model_service.get_available_models() + +@app.get("/models/{model_id}", response_model=ModelInfo) +async def get_model_info(model_id: str): + """ + Get information about a model. + + Args: + model_id: Model ID. + + Returns: + Model information. + """ + model_info = model_service.get_model_info(model_id) + if not model_info: + raise HTTPException(status_code=404, detail="Model not found") + + return model_info + +# Chat endpoints +@app.post("/chats", response_model=Chat) +async def create_chat(request: ChatRequest): + """ + Create a new chat. + + Args: + request: Chat creation request. + + Returns: + Created chat. + """ + chat_id = chat_service.create_chat( + user_id=request.user_id, + title=request.title, + model_id=request.model_id, + is_team_chat=request.is_team_chat + ) + + return chat_service.get_chat(chat_id) + +@app.get("/chats/user/{user_id}", response_model=List[Chat]) +async def get_user_chats(user_id: str): + """ + Get all chats for a user. + + Args: + user_id: User ID. + + Returns: + List of chats. + """ + return chat_service.get_user_chats(user_id) + +@app.get("/chats/{chat_id}", response_model=Chat) +async def get_chat(chat_id: str): + """ + Get a chat by ID. + + Args: + chat_id: Chat ID. + + Returns: + Chat information. + """ + chat = chat_service.get_chat(chat_id) + if not chat: + raise HTTPException(status_code=404, detail="Chat not found") + + return chat + +@app.post("/chats/{chat_id}/messages", response_model=Message) +async def send_message(chat_id: str, request: MessageRequest): + """ + Send a message to a chat. + + Args: + chat_id: Chat ID. + request: Message request with optional model parameters. + + Returns: + Bot response message. + """ + try: + # Extract model parameters from the request + response = chat_service.get_chat_response( + chat_id=chat_id, + message=request.message, + user_id=request.user_id, + use_rag=request.use_rag, + temperature=request.temperature, + max_tokens=request.max_tokens, + top_p=request.top_p, + frequency_penalty=request.frequency_penalty, + presence_penalty=request.presence_penalty, + stop_sequences=request.stop_sequences, + system_prompt=request.system_prompt, + min_p=request.min_p, + top_k=request.top_k, + repeat_penalty=request.repeat_penalty, + function_calling=request.function_calling + ) + + return response + + except ValueError as e: + raise HTTPException(status_code=404, detail=str(e)) + +@app.post("/chats/{chat_id}/members/{user_id}") +async def add_team_member(chat_id: str, user_id: str): + """ + Add a user to a team chat. + + Args: + chat_id: Chat ID. + user_id: User ID. + + Returns: + Addition status. + """ + success = chat_service.add_team_member(chat_id, user_id) + if not success: + raise HTTPException(status_code=400, detail="Failed to add team member") + + return {"status": "success", "message": "Team member added"} + +@app.delete("/chats/{chat_id}/members/{user_id}") +async def remove_team_member(chat_id: str, user_id: str): + """ + Remove a user from a team chat. + + Args: + chat_id: Chat ID. + user_id: User ID. + + Returns: + Removal status. + """ + success = chat_service.remove_team_member(chat_id, user_id) + if not success: + raise HTTPException(status_code=400, detail="Failed to remove team member") + + return {"status": "success", "message": "Team member removed"} + +@app.delete("/chats/{chat_id}") +async def delete_chat(chat_id: str): + """ + Delete a chat. + + Args: + chat_id: Chat ID. + + Returns: + Deletion status. + """ + success = chat_service.delete_chat(chat_id) + if not success: + raise HTTPException(status_code=404, detail="Chat not found") + + return {"status": "success", "message": "Chat deleted"} diff --git a/ai_service/config.py b/ai_service/config.py new file mode 100644 index 0000000..f373100 --- /dev/null +++ b/ai_service/config.py @@ -0,0 +1,37 @@ +""" +Configuration settings for the AI service. +""" + +import os +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +class Config: + """Base configuration.""" + + # API configuration + API_HOST = os.environ.get('API_HOST', '0.0.0.0') + API_PORT = int(os.environ.get('API_PORT', 5251)) + + # OpenWebUI configuration + OPENWEBUI_URL = os.environ.get('OPENWEBUI_URL', 'http://104.225.217.215:8080') + OPENWEBUI_API_KEY = os.environ.get('OPENWEBUI_API_KEY', '') + + # Ollama configuration + OLLAMA_API_URL = os.environ.get('OLLAMA_API_URL', 'http://localhost:11434') + DEFAULT_MODEL = os.environ.get('DEFAULT_MODEL', 'llama3.1') + + # Local storage + SQLITE_DB_PATH = os.environ.get('SQLITE_DB_PATH', 'ai_service/data/chatbot.db') + + # Document processing + CHUNK_SIZE = int(os.environ.get('CHUNK_SIZE', 1000)) + CHUNK_OVERLAP = int(os.environ.get('CHUNK_OVERLAP', 200)) + + # Embedding model + EMBEDDING_MODEL = os.environ.get('EMBEDDING_MODEL', 'all-MiniLM-L6-v2') + + +config = Config() diff --git a/ai_service/data/chats.json b/ai_service/data/chats.json new file mode 100644 index 0000000..b50e610 --- /dev/null +++ b/ai_service/data/chats.json @@ -0,0 +1,28 @@ +{ + "48b04d66-3ef4-440b-8508-ced930aa42a9": { + "id": "48b04d66-3ef4-440b-8508-ced930aa42a9", + "title": "Test Chat", + "user_id": "test_user", + "model_id": "gpt-3.5-turbo", + "is_team_chat": false, + "created_at": "2025-05-05T20:29:54.936964", + "updated_at": "2025-05-05T20:29:55.394906", + "messages": [ + { + "id": "9daafee5-f00c-4493-96ea-96492f97482e", + "content": "Tell me about artificial intelligence", + "user_id": "test_user", + "is_user_message": true, + "timestamp": "2025-05-05T20:29:54.971667" + }, + { + "id": "0337e0b1-1e18-4d09-8b8b-c5ee295870a0", + "content": "Error generating response: 401 Client Error: Unauthorized for url: https://api.openai.com/v1/chat/completions", + "user_id": null, + "is_user_message": false, + "timestamp": "2025-05-05T20:29:55.394891" + } + ], + "team_members": [] + } +} \ No newline at end of file diff --git a/ai_service/data/document_metadata.json b/ai_service/data/document_metadata.json new file mode 100644 index 0000000..e69de29 diff --git a/ai_service/deploy.sh b/ai_service/deploy.sh new file mode 100755 index 0000000..4cc0e7f --- /dev/null +++ b/ai_service/deploy.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Create a directory for the AI service logs +mkdir -p logs + +# Activate virtual environment +source venv/bin/activate + +# Export environment variables +export API_HOST=0.0.0.0 +export API_PORT=5251 + +# Make sure the Python path includes the current directory +export PYTHONPATH=$PYTHONPATH:$(pwd) + +# Run the application with uvicorn and nohup +nohup uvicorn ai_service.run:app --host $API_HOST --port $API_PORT > logs/ai_service.log 2>&1 & + +echo "AI Service started on port $API_PORT. Check ai_service.log for output." +echo "To stop the application, find the process ID with 'ps aux | grep uvicorn' and kill it with 'kill '." diff --git a/ai_service/embeddings/document_service.py b/ai_service/embeddings/document_service.py new file mode 100644 index 0000000..17b87c4 --- /dev/null +++ b/ai_service/embeddings/document_service.py @@ -0,0 +1,261 @@ +""" +Service for document processing and chunking. +""" + +import os +import json +import uuid +import requests +import base64 +from typing import List, Dict, Any, Optional +from langchain_text_splitters import RecursiveCharacterTextSplitter + +from ai_service.config import config + +class DocumentService: + """Service for document processing and chunking.""" + + def __init__(self): + """Initialize the document service.""" + self.chunk_size = config.CHUNK_SIZE + self.chunk_overlap = config.CHUNK_OVERLAP + self.text_splitter = RecursiveCharacterTextSplitter( + chunk_size=self.chunk_size, + chunk_overlap=self.chunk_overlap, + length_function=len + ) + + # OpenWebUI configuration + self.openwebui_url = config.OPENWEBUI_URL + self.openwebui_api_key = config.OPENWEBUI_API_KEY + + # Ensure data directory exists + os.makedirs(os.path.dirname(config.SQLITE_DB_PATH), exist_ok=True) + + # For now, we'll store document metadata in a simple JSON file + self.metadata_file = os.path.join(os.path.dirname(config.SQLITE_DB_PATH), 'document_metadata.json') + self._load_metadata() + + def _load_metadata(self): + """Load document metadata from file.""" + if os.path.exists(self.metadata_file): + try: + with open(self.metadata_file, 'r') as f: + self.documents = json.load(f) + except Exception as e: + print(f"Error loading document metadata: {str(e)}") + self.documents = {} + else: + self.documents = {} + + def _save_metadata(self): + """Save document metadata to file.""" + try: + with open(self.metadata_file, 'w') as f: + json.dump(self.documents, f, indent=2) + except Exception as e: + print(f"Error saving document metadata: {str(e)}") + + def process_document(self, content: str, title: str, + description: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None) -> str: + """ + Process a document for embedding. + + Args: + content: Document content. + title: Document title. + description: Optional document description. + metadata: Optional additional metadata. + + Returns: + Document ID. + """ + # Generate a unique ID for the document + doc_id = str(uuid.uuid4()) + + # Upload the document to OpenWebUI for RAG processing + try: + # Prepare headers + headers = {"Content-Type": "application/json"} + if self.openwebui_api_key: + headers["Authorization"] = f"Bearer {self.openwebui_api_key}" + + # Prepare the document data + document_data = { + "filename": f"{title}.txt", + "content": base64.b64encode(content.encode('utf-8')).decode('utf-8'), + "description": description or title + } + + # Upload to OpenWebUI + response = requests.post( + f"{self.openwebui_url}/api/knowledge/upload", + headers=headers, + json=document_data, + timeout=60 + ) + + response.raise_for_status() + result = response.json() + + # Get the OpenWebUI document ID + openwebui_doc_id = result.get('id', '') + + # Store document metadata + self.documents[doc_id] = { + 'id': doc_id, + 'title': title, + 'description': description or '', + 'openwebui_id': openwebui_doc_id, + 'metadata': metadata or {} + } + + # Save metadata to file + self._save_metadata() + + return doc_id + + except Exception as e: + print(f"Error uploading document to OpenWebUI: {str(e)}") + + # Fall back to local processing if OpenWebUI upload fails + print("Falling back to local document processing") + + # Split the document into chunks for local reference + chunks = self.text_splitter.split_text(content) + + # Store document metadata + self.documents[doc_id] = { + 'id': doc_id, + 'title': title, + 'description': description or '', + 'chunk_count': len(chunks), + 'openwebui_upload_failed': True, + 'metadata': metadata or {} + } + + # Save metadata to file + self._save_metadata() + + return doc_id + + def get_document(self, doc_id: str) -> Optional[Dict[str, Any]]: + """ + Get document metadata. + + Args: + doc_id: Document ID. + + Returns: + Document metadata if found, None otherwise. + """ + return self.documents.get(doc_id) + + def get_all_documents(self) -> List[Dict[str, Any]]: + """ + Get all document metadata. + + Returns: + List of document metadata. + """ + # Get documents from local storage + local_documents = list(self.documents.values()) + + # Try to get documents from OpenWebUI as well + try: + # Prepare headers + headers = {"Content-Type": "application/json"} + if self.openwebui_api_key: + headers["Authorization"] = f"Bearer {self.openwebui_api_key}" + + # Get documents from OpenWebUI + response = requests.get( + f"{self.openwebui_url}/api/knowledge", + headers=headers, + timeout=30 + ) + + if response.status_code == 200: + openwebui_docs = response.json() + + # Update local documents with OpenWebUI information + for doc in local_documents: + if 'openwebui_id' in doc: + for openwebui_doc in openwebui_docs: + if openwebui_doc.get('id') == doc['openwebui_id']: + doc['openwebui_status'] = 'active' + doc['openwebui_info'] = openwebui_doc + break + + except Exception as e: + print(f"Error getting documents from OpenWebUI: {str(e)}") + + return local_documents + + def delete_document(self, doc_id: str) -> bool: + """ + Delete a document and its chunks. + + Args: + doc_id: Document ID. + + Returns: + True if deletion was successful, False otherwise. + """ + if doc_id not in self.documents: + return False + + # Check if document was uploaded to OpenWebUI + doc = self.documents[doc_id] + openwebui_id = doc.get('openwebui_id') + + if openwebui_id: + try: + # Prepare headers + headers = {"Content-Type": "application/json"} + if self.openwebui_api_key: + headers["Authorization"] = f"Bearer {self.openwebui_api_key}" + + # Delete from OpenWebUI + response = requests.delete( + f"{self.openwebui_url}/api/knowledge/{openwebui_id}", + headers=headers, + timeout=30 + ) + + if response.status_code != 200: + print(f"Warning: Failed to delete document from OpenWebUI: {response.text}") + + except Exception as e: + print(f"Error deleting document from OpenWebUI: {str(e)}") + + # Delete document metadata + del self.documents[doc_id] + + # Save metadata to file + self._save_metadata() + + return True + + def search_documents(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]: + """ + Search for documents similar to a query. + + Args: + query: Search query. + top_k: Number of results to return. + + Returns: + List of similar document chunks with their metadata. + """ + # Note: We don't need to implement this method anymore since + # RAG is handled directly by OpenWebUI when use_rag=True in the model service + + # Return empty results - this is just a placeholder + # The actual RAG functionality is in the model_service.generate_response method + return [] + + +# Create a singleton instance +document_service = DocumentService() diff --git a/ai_service/embeddings/embedding_service.py b/ai_service/embeddings/embedding_service.py new file mode 100644 index 0000000..a7212b7 --- /dev/null +++ b/ai_service/embeddings/embedding_service.py @@ -0,0 +1,214 @@ +""" +Service for generating and managing embeddings. +""" + +import os +import random +import pinecone +import numpy as np +from typing import List, Dict, Any, Optional, Union +from sentence_transformers import SentenceTransformer + +from ai_service.config import config + +class EmbeddingService: + """Service for generating and managing embeddings.""" + + def __init__(self, use_mock=True): # Default to mock implementation + """Initialize the embedding service.""" + self.use_mock = use_mock + + if not self.use_mock: + # Use a smaller model for testing + self.model_name = "paraphrase-MiniLM-L3-v2" # Smaller model than the default + try: + self.model = SentenceTransformer(self.model_name) + print(f"Loaded embedding model: {self.model_name}") + except Exception as e: + print(f"Error loading embedding model: {str(e)}") + self.use_mock = True + print("Falling back to mock implementation") + else: + print("Using mock embedding implementation") + self.model_name = "mock-model" + self.model = None + + self._initialize_pinecone() + + def _initialize_pinecone(self): + """Initialize Pinecone client.""" + if not config.PINECONE_API_KEY or not config.PINECONE_ENVIRONMENT: + print("Warning: Pinecone API key or environment not set. Vector storage will not be available.") + self.index = None + return + + try: + pinecone.init( + api_key=config.PINECONE_API_KEY, + environment=config.PINECONE_ENVIRONMENT + ) + + # Check if index exists, create if it doesn't + if config.PINECONE_INDEX_NAME not in pinecone.list_indexes(): + pinecone.create_index( + name=config.PINECONE_INDEX_NAME, + dimension=self.model.get_sentence_embedding_dimension(), + metric="cosine" + ) + + self.index = pinecone.Index(config.PINECONE_INDEX_NAME) + print(f"Connected to Pinecone index: {config.PINECONE_INDEX_NAME}") + except Exception as e: + print(f"Error connecting to Pinecone: {str(e)}") + self.index = None + + def generate_embedding(self, text: str) -> List[float]: + """ + Generate an embedding for a text. + + Args: + text: Text to embed. + + Returns: + Embedding vector. + """ + if self.use_mock: + # Generate a mock embedding vector (384 dimensions for consistency) + return [random.random() for _ in range(384)] + + embedding = self.model.encode(text) + return embedding.tolist() + + def generate_embeddings(self, texts: List[str]) -> List[List[float]]: + """ + Generate embeddings for multiple texts. + + Args: + texts: List of texts to embed. + + Returns: + List of embedding vectors. + """ + if self.use_mock: + # Generate mock embedding vectors + return [[random.random() for _ in range(384)] for _ in texts] + + embeddings = self.model.encode(texts) + return embeddings.tolist() + + def store_embeddings(self, ids: List[str], embeddings: List[List[float]], + metadata: Optional[List[Dict[str, Any]]] = None) -> bool: + """ + Store embeddings in Pinecone. + + Args: + ids: List of IDs for the embeddings. + embeddings: List of embedding vectors. + metadata: Optional list of metadata dictionaries. + + Returns: + True if storage was successful, False otherwise. + """ + if self.use_mock: + print(f"Mock: Stored {len(ids)} embeddings") + return True + + if self.index is None: + print("Warning: Pinecone index not available. Embeddings not stored.") + return False + + if metadata is None: + metadata = [{} for _ in ids] + + vectors = [ + (id, embedding, meta) + for id, embedding, meta in zip(ids, embeddings, metadata) + ] + + try: + self.index.upsert(vectors=vectors) + return True + except Exception as e: + print(f"Error storing embeddings in Pinecone: {str(e)}") + return False + + def search_similar(self, query_embedding: List[float], top_k: int = 5) -> List[Dict[str, Any]]: + """ + Search for similar embeddings in Pinecone. + + Args: + query_embedding: Query embedding vector. + top_k: Number of results to return. + + Returns: + List of similar items with their metadata. + """ + if self.use_mock: + # Generate mock search results + print(f"Mock: Searching for similar embeddings (top_k={top_k})") + mock_results = [] + for i in range(min(top_k, 3)): # Return at most 3 mock results + mock_results.append({ + 'id': f"mock_doc_{i}", + 'score': 0.9 - (i * 0.1), # Decreasing similarity scores + 'metadata': { + 'document_id': f"mock_doc_{i}", + 'chunk_index': i, + 'title': f"Mock Document {i}", + 'description': f"This is a mock document {i}", + 'chunk_text': f"This is the content of mock document {i}..." + } + }) + return mock_results + + if self.index is None: + print("Warning: Pinecone index not available. Search not performed.") + return [] + + try: + results = self.index.query( + vector=query_embedding, + top_k=top_k, + include_metadata=True + ) + + return [ + { + 'id': match['id'], + 'score': match['score'], + 'metadata': match.get('metadata', {}) + } + for match in results.get('matches', []) + ] + except Exception as e: + print(f"Error searching in Pinecone: {str(e)}") + return [] + + def delete_embeddings(self, ids: List[str]) -> bool: + """ + Delete embeddings from Pinecone. + + Args: + ids: List of IDs to delete. + + Returns: + True if deletion was successful, False otherwise. + """ + if self.use_mock: + print(f"Mock: Deleted {len(ids)} embeddings") + return True + + if self.index is None: + print("Warning: Pinecone index not available. Deletion not performed.") + return False + + try: + self.index.delete(ids=ids) + return True + except Exception as e: + print(f"Error deleting embeddings from Pinecone: {str(e)}") + return False + + +# Create a singleton instance +embedding_service = EmbeddingService() diff --git a/ai_service/models/chat_service.py b/ai_service/models/chat_service.py new file mode 100644 index 0000000..a583c3e --- /dev/null +++ b/ai_service/models/chat_service.py @@ -0,0 +1,309 @@ +""" +Service for chat functionality. +""" + +import os +import json +import uuid +from datetime import datetime +from typing import List, Dict, Any, Optional + +from ai_service.config import config +from ai_service.models.model_service import model_service +from ai_service.models.model_parameters import ModelParameters + +class ChatService: + """Service for chat functionality.""" + + def __init__(self): + """Initialize the chat service.""" + # Ensure data directory exists + os.makedirs(os.path.dirname(config.SQLITE_DB_PATH), exist_ok=True) + + # For now, we'll store chat data in a simple JSON file + self.chats_file = os.path.join(os.path.dirname(config.SQLITE_DB_PATH), 'chats.json') + self._load_chats() + + def _load_chats(self): + """Load chats from file.""" + if os.path.exists(self.chats_file): + try: + with open(self.chats_file, 'r') as f: + self.chats = json.load(f) + except Exception as e: + print(f"Error loading chats: {str(e)}") + self.chats = {} + else: + self.chats = {} + + def _save_chats(self): + """Save chats to file.""" + try: + with open(self.chats_file, 'w') as f: + json.dump(self.chats, f, indent=2) + except Exception as e: + print(f"Error saving chats: {str(e)}") + + def create_chat(self, user_id: str, title: Optional[str] = None, + model_id: Optional[str] = None, is_team_chat: bool = False) -> str: + """ + Create a new chat. + + Args: + user_id: ID of the user creating the chat. + title: Optional title for the chat. + model_id: Optional model ID to use for this chat. + is_team_chat: Whether this is a team chat. + + Returns: + ID of the created chat. + """ + # Generate a unique ID for the chat + chat_id = str(uuid.uuid4()) + + # Create chat data + self.chats[chat_id] = { + 'id': chat_id, + 'title': title or f"Chat {len(self.chats) + 1}", + 'user_id': user_id, + 'model_id': model_id or config.DEFAULT_MODEL, + 'is_team_chat': is_team_chat, + 'created_at': datetime.utcnow().isoformat(), + 'updated_at': datetime.utcnow().isoformat(), + 'messages': [], + 'team_members': [user_id] if is_team_chat else [] + } + + # Save chats to file + self._save_chats() + + return chat_id + + def add_message(self, chat_id: str, content: str, user_id: str, + is_user_message: bool = True) -> Dict[str, Any]: + """ + Add a message to a chat. + + Args: + chat_id: ID of the chat. + content: Message content. + user_id: ID of the user sending the message. + is_user_message: Whether this is a user message (vs. bot message). + + Returns: + Added message. + """ + if chat_id not in self.chats: + raise ValueError(f"Chat with ID {chat_id} not found") + + # Create message data + message = { + 'id': str(uuid.uuid4()), + 'content': content, + 'user_id': user_id if is_user_message else None, + 'is_user_message': is_user_message, + 'timestamp': datetime.utcnow().isoformat() + } + + # Add message to chat + self.chats[chat_id]['messages'].append(message) + + # Update chat timestamp + self.chats[chat_id]['updated_at'] = datetime.utcnow().isoformat() + + # Save chats to file + self._save_chats() + + return message + + def get_chat(self, chat_id: str) -> Optional[Dict[str, Any]]: + """ + Get a chat by ID. + + Args: + chat_id: ID of the chat. + + Returns: + Chat data if found, None otherwise. + """ + return self.chats.get(chat_id) + + def get_user_chats(self, user_id: str) -> List[Dict[str, Any]]: + """ + Get all chats for a user. + + Args: + user_id: ID of the user. + + Returns: + List of chat data. + """ + user_chats = [] + + for chat_id, chat in self.chats.items(): + # Include private chats owned by the user + if chat['user_id'] == user_id and not chat['is_team_chat']: + user_chats.append(chat) + + # Include team chats where the user is a member + elif chat['is_team_chat'] and user_id in chat['team_members']: + user_chats.append(chat) + + # Sort by updated_at (newest first) + user_chats.sort(key=lambda x: x['updated_at'], reverse=True) + + return user_chats + + def add_team_member(self, chat_id: str, user_id: str) -> bool: + """ + Add a user to a team chat. + + Args: + chat_id: ID of the team chat. + user_id: ID of the user to add. + + Returns: + True if addition was successful, False otherwise. + """ + if chat_id not in self.chats: + return False + + chat = self.chats[chat_id] + + if not chat['is_team_chat']: + return False + + if user_id not in chat['team_members']: + chat['team_members'].append(user_id) + self._save_chats() + + return True + + def remove_team_member(self, chat_id: str, user_id: str) -> bool: + """ + Remove a user from a team chat. + + Args: + chat_id: ID of the team chat. + user_id: ID of the user to remove. + + Returns: + True if removal was successful, False otherwise. + """ + if chat_id not in self.chats: + return False + + chat = self.chats[chat_id] + + if not chat['is_team_chat']: + return False + + if user_id in chat['team_members']: + chat['team_members'].remove(user_id) + self._save_chats() + + return True + + def delete_chat(self, chat_id: str) -> bool: + """ + Delete a chat. + + Args: + chat_id: ID of the chat to delete. + + Returns: + True if deletion was successful, False otherwise. + """ + if chat_id not in self.chats: + return False + + del self.chats[chat_id] + self._save_chats() + + return True + + def get_chat_response(self, chat_id: str, message: str, user_id: str, + use_rag: bool = False, temperature: Optional[float] = None, + max_tokens: Optional[int] = None, top_p: Optional[float] = None, + frequency_penalty: Optional[float] = None, presence_penalty: Optional[float] = None, + stop_sequences: Optional[List[str]] = None, system_prompt: Optional[str] = None, + min_p: Optional[float] = None, top_k: Optional[int] = None, + repeat_penalty: Optional[float] = None, function_calling: Optional[bool] = None) -> Dict[str, Any]: + """ + Get a response from the chatbot. + + Args: + chat_id: ID of the chat. + message: User message. + user_id: ID of the user sending the message. + use_rag: Whether to use RAG (Retrieval Augmented Generation). + temperature: Controls randomness in the response. + max_tokens: Maximum number of tokens to generate. + top_p: Nucleus sampling parameter. + frequency_penalty: Penalizes repeated tokens. + presence_penalty: Penalizes repeated topics. + stop_sequences: Sequences where the API will stop generating. + system_prompt: System prompt to guide the model's behavior. + min_p: Minimum probability threshold for token selection. + top_k: Only sample from the top k tokens. + repeat_penalty: Penalty for repeating tokens. + function_calling: Whether to enable function calling. + + Returns: + Bot response message. + """ + if chat_id not in self.chats: + raise ValueError(f"Chat with ID {chat_id} not found") + + chat = self.chats[chat_id] + + # Add user message to chat + self.add_message(chat_id, message, user_id, is_user_message=True) + + # Prepare conversation context for the model + context = [] + for msg in chat['messages'][-10:]: # Use last 10 messages as context + role = "user" if msg['is_user_message'] else "assistant" + context.append({ + "role": role, + "content": msg['content'] + }) + + # Create model parameters + model_params = ModelParameters( + temperature=temperature, + max_tokens=max_tokens, + top_p=top_p, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + stop_sequences=stop_sequences, + system_prompt=system_prompt, + min_p=min_p, + top_k=top_k, + repeat_penalty=repeat_penalty, + function_calling=function_calling + ) + + # Get response from model + model_id = chat['model_id'] + response_text = model_service.generate_response( + model_id=model_id, + prompt=message, + context=context, + use_rag=use_rag, + model_params=model_params + ) + + # Add bot response to chat + response_message = self.add_message( + chat_id=chat_id, + content=response_text, + user_id=user_id, + is_user_message=False + ) + + return response_message + + +# Create a singleton instance +chat_service = ChatService() diff --git a/ai_service/models/model_parameters.py b/ai_service/models/model_parameters.py new file mode 100644 index 0000000..cc27724 --- /dev/null +++ b/ai_service/models/model_parameters.py @@ -0,0 +1,170 @@ +""" +Model parameters for AI models. +""" + +from typing import Dict, Any, Optional, List +from pydantic import BaseModel, Field, validator + + +class ModelParameters(BaseModel): + """Parameters for AI model generation.""" + + # Basic parameters + temperature: Optional[float] = Field( + 0.7, + description="Controls randomness: 0 is deterministic, higher values are more random", + ge=0.0, + le=2.0 + ) + + max_tokens: Optional[int] = Field( + 1000, + description="Maximum number of tokens to generate", + gt=0 + ) + + # Sampling parameters + top_p: Optional[float] = Field( + 1.0, + description="Nucleus sampling: consider tokens with top_p probability mass", + ge=0.0, + le=1.0 + ) + + top_k: Optional[int] = Field( + None, + description="Only sample from the top k tokens", + gt=0 + ) + + # Repetition control + frequency_penalty: Optional[float] = Field( + 0.0, + description="Penalizes repeated tokens", + ge=-2.0, + le=2.0 + ) + + presence_penalty: Optional[float] = Field( + 0.0, + description="Penalizes repeated topics", + ge=-2.0, + le=2.0 + ) + + # Advanced parameters + stop_sequences: Optional[List[str]] = Field( + None, + description="Sequences where the API will stop generating" + ) + + min_p: Optional[float] = Field( + None, + description="Minimum probability threshold for token selection", + ge=0.0, + le=1.0 + ) + + repeat_penalty: Optional[float] = Field( + None, + description="Penalty for repeating tokens", + ge=0.0 + ) + + presence_penalty_tokens: Optional[int] = Field( + None, + description="Number of tokens to consider for presence penalty", + gt=0 + ) + + # System prompt + system_prompt: Optional[str] = Field( + None, + description="System prompt to guide the model's behavior" + ) + + # Function calling + function_calling: Optional[bool] = Field( + None, + description="Whether to enable function calling" + ) + + # Additional parameters that might be model-specific + extra_params: Optional[Dict[str, Any]] = Field( + None, + description="Additional model-specific parameters" + ) + + @validator('temperature', 'top_p', 'frequency_penalty', 'presence_penalty', pre=True) + def validate_float_params(cls, v): + """Validate float parameters.""" + if v is not None and not isinstance(v, bool): # Avoid converting bool to float + return float(v) + return v + + @validator('max_tokens', 'top_k', pre=True) + def validate_int_params(cls, v): + """Validate integer parameters.""" + if v is not None and not isinstance(v, bool): # Avoid converting bool to int + return int(v) + return v + + def to_dict(self) -> Dict[str, Any]: + """ + Convert parameters to a dictionary, excluding None values. + + Returns: + Dictionary of parameters. + """ + result = {} + for key, value in self.dict().items(): + if value is not None and key != 'extra_params': + result[key] = value + + # Add any extra parameters + if self.extra_params: + result.update(self.extra_params) + + return result + + def for_provider(self, provider: str) -> Dict[str, Any]: + """ + Get parameters formatted for a specific provider. + + Args: + provider: Provider name (e.g., 'openai', 'ollama', 'anthropic'). + + Returns: + Dictionary of parameters formatted for the provider. + """ + params = self.to_dict() + + # Handle provider-specific parameter naming + if provider == 'openai': + # OpenAI uses 'stop' instead of 'stop_sequences' + if 'stop_sequences' in params: + params['stop'] = params.pop('stop_sequences') + + elif provider == 'ollama': + # Ollama has specific parameter handling + # Remove parameters not supported by Ollama + params_to_keep = ['temperature', 'top_p', 'top_k', 'max_tokens', 'stop_sequences'] + params = {k: v for k, v in params.items() if k in params_to_keep} + + # Rename stop_sequences to stop if present + if 'stop_sequences' in params: + params['stop'] = params.pop('stop_sequences') + + elif provider == 'anthropic': + # Anthropic uses 'stop_sequences' and different temperature scaling + if 'temperature' in params: + # Anthropic's temperature is typically 0-1 + params['temperature'] = min(params['temperature'], 1.0) + + elif provider == 'cohere': + # Cohere uses 'stop_sequences' and has some unique parameters + pass + + # Add more provider-specific conversions as needed + + return params diff --git a/ai_service/models/model_service.py b/ai_service/models/model_service.py new file mode 100644 index 0000000..638feee --- /dev/null +++ b/ai_service/models/model_service.py @@ -0,0 +1,243 @@ +""" +Service for model management and interaction. +""" + +import os +import json +import requests +from typing import List, Dict, Any, Optional + +from ai_service.config import config +from ai_service.embeddings.document_service import document_service +from ai_service.models.model_parameters import ModelParameters + +class ModelService: + """Service for model management and interaction.""" + + # Available models + AVAILABLE_MODELS = { + 'gemma3': { + 'name': 'Gemma 3', + 'description': 'Google Gemma 3 model via Ollama', + 'provider': 'ollama', + 'max_tokens': 8192 + }, + 'llama3.3': { + 'name': 'Llama 3 (70B)', + 'description': 'Meta Llama 3 70B model via Ollama', + 'provider': 'ollama', + 'max_tokens': 8192 + }, + 'llama3.1': { + 'name': 'Llama 3 (8B)', + 'description': 'Meta Llama 3 8B model via Ollama', + 'provider': 'ollama', + 'max_tokens': 8192 + }, + 'mistral': { + 'name': 'Mistral', + 'description': 'Mistral AI model via Ollama', + 'provider': 'ollama', + 'max_tokens': 8192 + }, + 'deepseek': { + 'name': 'DeepSeek', + 'description': 'DeepSeek model via Ollama', + 'provider': 'ollama', + 'max_tokens': 8192 + } + } + + def __init__(self): + """Initialize the model service.""" + self.default_model = config.DEFAULT_MODEL + self.ollama_api_url = config.OLLAMA_API_URL + self.openwebui_url = config.OPENWEBUI_URL + self.openwebui_api_key = config.OPENWEBUI_API_KEY + + def get_available_models(self) -> List[Dict[str, Any]]: + """ + Get a list of available models. + + Returns: + List of model information dictionaries. + """ + models = [] + for model_id, model_info in self.AVAILABLE_MODELS.items(): + model_data = { + 'id': model_id, + 'is_default': model_id == self.default_model, + **model_info + } + models.append(model_data) + + return models + + def get_model_info(self, model_id: str) -> Optional[Dict[str, Any]]: + """ + Get information about a specific model. + + Args: + model_id: ID of the model. + + Returns: + Model information dictionary if found, None otherwise. + """ + if model_id not in self.AVAILABLE_MODELS: + return None + + return { + 'id': model_id, + 'is_default': model_id == self.default_model, + **self.AVAILABLE_MODELS[model_id] + } + + def generate_response(self, model_id: str, prompt: str, + context: Optional[List[Dict[str, str]]] = None, + use_rag: bool = False, + model_params: Optional[ModelParameters] = None) -> str: + """ + Generate a response from the model. + + Args: + model_id: ID of the model to use. + prompt: User prompt. + context: Optional conversation context. + use_rag: Whether to use RAG (Retrieval Augmented Generation). + model_params: Optional model parameters. + + Returns: + Generated response. + """ + if model_id not in self.AVAILABLE_MODELS: + model_id = self.default_model + + # Get the provider for this model + provider = self.AVAILABLE_MODELS[model_id].get('provider', 'ollama') + + # Prepare the messages for the API call + messages = [] + + # Use custom system prompt if provided, otherwise use default + system_content = "You are a helpful assistant." + if model_params and model_params.system_prompt: + system_content = model_params.system_prompt + + messages.append({ + "role": "system", + "content": system_content + }) + + # Add conversation context if provided + if context: + messages.extend(context) + + # If RAG is enabled, use OpenWebUI's knowledge database + if use_rag: + # We'll use OpenWebUI's built-in RAG capabilities + # This is handled by sending the request to OpenWebUI instead of Ollama directly + try: + # Prepare the request for OpenWebUI + openwebui_request = { + "model": model_id, + "messages": messages + [{"role": "user", "content": prompt}], + "use_knowledge": True, # Enable RAG + "stream": False + } + + # Add model parameters if provided + if model_params: + params = model_params.to_dict() + # Map parameters to OpenWebUI format + if 'temperature' in params: + openwebui_request['temperature'] = params['temperature'] + if 'max_tokens' in params: + openwebui_request['max_tokens'] = params['max_tokens'] + if 'top_p' in params: + openwebui_request['top_p'] = params['top_p'] + + # Make the API call to OpenWebUI + headers = {"Content-Type": "application/json"} + if self.openwebui_api_key: + headers["Authorization"] = f"Bearer {self.openwebui_api_key}" + + # OpenWebUI API endpoint is /api/chat/completions + response = requests.post( + f"{self.openwebui_url}/api/chat/completions", + headers=headers, + json=openwebui_request, + timeout=60 # Longer timeout for RAG + ) + + response.raise_for_status() + result = response.json() + + # Extract the response content + if 'message' in result: + return result['message']['content'] + else: + return "Error: Unexpected response format from OpenWebUI" + + except Exception as e: + print(f"Error calling OpenWebUI API: {str(e)}") + # Fall back to direct Ollama call without RAG + print("Falling back to direct Ollama call without RAG") + # Continue to the Ollama API call below + + # Add user prompt + messages.append({ + "role": "user", + "content": prompt + }) + + # Prepare API request parameters for Ollama + request_json = { + "model": model_id, + "messages": messages, + "stream": False + } + + # Add model parameters if provided + if model_params: + params = model_params.to_dict() + # Map parameters to Ollama format + if 'temperature' in params: + request_json['temperature'] = params['temperature'] + if 'top_p' in params: + request_json['top_p'] = params['top_p'] + if 'top_k' in params: + request_json['top_k'] = params['top_k'] + if 'max_tokens' in params: + request_json['max_tokens'] = params['max_tokens'] + + # Make the API call to Ollama + try: + # Ollama API endpoint is /api/chat or /api/generate + response = requests.post( + f"{self.ollama_api_url}/api/generate", + headers={"Content-Type": "application/json"}, + json=request_json, + timeout=30 + ) + + response.raise_for_status() + result = response.json() + + # Extract the response content from Ollama + # The response format depends on whether we're using /api/chat or /api/generate + if 'message' in result and 'content' in result['message']: + # Format for /api/chat + return result['message']['content'] + elif 'response' in result: + # Format for /api/generate + return result['response'] + else: + return "Error: Unexpected response format from Ollama" + + except Exception as e: + print(f"Error calling Ollama API: {str(e)}") + return f"Error generating response: {str(e)}" + + +# Create a singleton instance +model_service = ModelService() diff --git a/ai_service/requirements.production.txt b/ai_service/requirements.production.txt new file mode 100644 index 0000000..2007d95 --- /dev/null +++ b/ai_service/requirements.production.txt @@ -0,0 +1,28 @@ +# Core dependencies with fixed versions for stability +fastapi==0.103.1 +uvicorn[standard]==0.23.2 +pydantic==2.3.0 +python-dotenv==1.0.0 + +# AI/ML dependencies +pinecone-client==2.2.2 +langchain==0.0.267 +sentence-transformers==2.2.2 +numpy==1.26.4 +openai==1.3.0 + +# For local storage (will be replaced with MySQL in production) +sqlalchemy==2.0.20 +pymysql==1.1.0 +cryptography==41.0.3 # Required for PyMySQL + +# Utilities +tqdm==4.67.1 +requests==2.32.3 +tenacity==8.5.0 # For retrying API calls + +# Production dependencies +gunicorn==21.2.0 # Production WSGI server +python-json-logger==2.0.7 # Structured logging +prometheus-client==0.17.1 # Metrics +sentry-sdk==1.39.1 # Error tracking diff --git a/ai_service/requirements.txt b/ai_service/requirements.txt new file mode 100644 index 0000000..752109b --- /dev/null +++ b/ai_service/requirements.txt @@ -0,0 +1,19 @@ +# Core dependencies +fastapi==0.103.1 +uvicorn==0.23.2 +pydantic==2.3.0 +python-dotenv==1.0.0 + +# AI/ML dependencies +pinecone-client==2.2.2 +langchain==0.0.267 +sentence-transformers==2.2.2 +numpy==1.26.4 + +# For local storage +sqlalchemy==2.0.20 +sqlite3==0.0.0 # This is a placeholder, sqlite3 is part of Python's standard library + +# Utilities +tqdm==4.67.1 +requests==2.32.3 diff --git a/ai_service/run.py b/ai_service/run.py new file mode 100644 index 0000000..c363a4f --- /dev/null +++ b/ai_service/run.py @@ -0,0 +1,21 @@ +""" +Script to run the AI service. +""" + +import uvicorn +import os +import sys + +# Add the parent directory to the path so we can import ai_service +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +from ai_service.config import config + +if __name__ == "__main__": + print(f"Starting AI service on {config.API_HOST}:{config.API_PORT}") + uvicorn.run( + "ai_service.api:app", + host=config.API_HOST, + port=config.API_PORT, + reload=True + ) diff --git a/ai_service_workflow.md b/ai_service_workflow.md new file mode 100644 index 0000000..81ec340 --- /dev/null +++ b/ai_service_workflow.md @@ -0,0 +1,173 @@ +# AI Service Workflow and Architecture + +## Overview + +The AI Service is a modular, API-driven system that provides document processing, embedding, and chat functionality with multiple AI models. It's designed to support a chatbot application with document training, private/team chat options, and model switching capabilities. + +## System Architecture + +``` +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ │ │ │ │ │ +│ Client Apps │────▶│ AI Service API │────▶│ Vector Store │ +│ │ │ │ │ (Pinecone) │ +└─────────────────┘ └────────┬────────┘ └─────────────────┘ + │ + ▼ + ┌─────────────────┐ ┌─────────────────┐ + │ │ │ │ + │ AI Models │────▶│ Local Storage │ + │ │ │ │ + └─────────────────┘ └─────────────────┘ +``` + +## Core Components + +1. **Document Service**: Processes documents, splits them into chunks, and stores embeddings +2. **Embedding Service**: Generates vector embeddings for text using sentence transformers +3. **Model Service**: Manages different AI models and generates responses +4. **Chat Service**: Handles chat creation, message history, and team chat functionality + +## API Endpoints Workflow + +### Health Check + +- **Endpoint**: `GET /health` +- **Purpose**: Simple health check to verify the service is running +- **Response**: `{"status": "healthy"}` + +### Document Management Workflow + +1. **Process Document** + - **Endpoint**: `POST /documents` + - **Purpose**: Process a document for embedding + - **Workflow**: + - Client submits document content, title, and optional metadata + - Document is split into chunks + - Embeddings are generated for each chunk + - Embeddings are stored in Pinecone + - Document metadata is stored locally + - **Response**: Document metadata including ID and chunk count + +2. **Get All Documents** + - **Endpoint**: `GET /documents` + - **Purpose**: Retrieve all processed documents + - **Response**: List of document metadata + +3. **Get Document by ID** + - **Endpoint**: `GET /documents/{doc_id}` + - **Purpose**: Retrieve a specific document's metadata + - **Response**: Document metadata + +4. **Delete Document** + - **Endpoint**: `DELETE /documents/{doc_id}` + - **Purpose**: Remove a document and its embeddings + - **Workflow**: + - Document chunks are deleted from Pinecone + - Document metadata is removed from local storage + - **Response**: Success status + +5. **Search Documents** + - **Endpoint**: `POST /documents/search` + - **Purpose**: Semantic search across document embeddings + - **Workflow**: + - Query text is converted to an embedding + - Similar embeddings are found in Pinecone + - Results are returned with metadata and similarity scores + - **Response**: List of search results with metadata + +### Model Management Workflow + +1. **Get Available Models** + - **Endpoint**: `GET /models` + - **Purpose**: List all available AI models + - **Response**: List of model information (ID, name, description, etc.) + +2. **Get Model Information** + - **Endpoint**: `GET /models/{model_id}` + - **Purpose**: Get details about a specific model + - **Response**: Model information + +### Chat Workflow + +1. **Create Chat** + - **Endpoint**: `POST /chats` + - **Purpose**: Create a new chat session + - **Workflow**: + - Client provides user ID, optional title, and model ID + - System generates a unique chat ID + - Chat metadata is stored locally + - **Response**: Created chat information + +2. **Get User Chats** + - **Endpoint**: `GET /chats/user/{user_id}` + - **Purpose**: Get all chats for a specific user + - **Response**: List of chat information + +3. **Get Chat by ID** + - **Endpoint**: `GET /chats/{chat_id}` + - **Purpose**: Get a specific chat's information and messages + - **Response**: Chat information including message history + +4. **Send Message** + - **Endpoint**: `POST /chats/{chat_id}/messages` + - **Purpose**: Send a message and get AI response + - **Workflow**: + - Client sends message with user ID and optional model parameters + - User message is added to chat history + - If RAG is enabled, relevant documents are retrieved + - AI model generates a response based on chat history and context + - Bot response is added to chat history + - **Response**: Bot response message + +5. **Team Chat Management** + - **Add Team Member**: `POST /chats/{chat_id}/members/{user_id}` + - **Remove Team Member**: `DELETE /chats/{chat_id}/members/{user_id}` + - **Purpose**: Manage team chat participants + - **Response**: Success status + +6. **Delete Chat** + - **Endpoint**: `DELETE /chats/{chat_id}` + - **Purpose**: Remove a chat and its messages + - **Response**: Success status + +## Retrieval-Augmented Generation (RAG) Workflow + +When RAG is enabled in a chat message request: + +1. User message is processed +2. Message is converted to an embedding +3. Similar document chunks are retrieved from Pinecone +4. Retrieved chunks are added as context to the prompt +5. AI model generates a response using both the chat history and document context +6. Response is returned to the user + +## Model Parameters + +The API supports customizing AI model behavior through parameters: + +- `temperature`: Controls randomness (0.0-2.0) +- `max_tokens`: Maximum response length +- `top_p`: Nucleus sampling parameter (0.0-1.0) +- `frequency_penalty`: Penalizes repeated tokens (-2.0-2.0) +- `presence_penalty`: Penalizes repeated topics (-2.0-2.0) +- `stop_sequences`: Sequences where generation stops +- `system_prompt`: Custom system prompt to guide the model + +## Deployment + +The service is deployed using uvicorn: + +```bash +nohup uvicorn ai_service.run:app --host 0.0.0.0 --port 5251 & +``` + +## Example Usage Flow + +1. Process documents for knowledge base +2. Create a new chat session +3. Send messages with or without RAG +4. Optionally add team members for collaborative chats +5. Switch models as needed for different capabilities + +This architecture provides a flexible, scalable foundation for building AI-powered chat applications with document training capabilities. diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..3ee7c4e --- /dev/null +++ b/app/__init__.py @@ -0,0 +1,34 @@ +""" +Main application package for the chatbot application. +""" + +from flask import Flask + +from app.config.config import Config + +def create_app(config_class=Config): + """ + Create and configure the Flask application. + + Args: + config_class: Configuration class to use. + + Returns: + Flask application instance. + """ + # Initialize Flask app + flask_app = Flask(__name__) + flask_app.config.from_object(config_class) + + # Register Flask routes + from app.api import routes as flask_routes + flask_app.register_blueprint(flask_routes.bp) + + # For now, we'll use only Flask routes and disable FastAPI integration + # until we resolve the integration issues + + # Initialize database + from app.database import db + db.init_app(flask_app) + + return flask_app \ No newline at end of file diff --git a/app/api/__init__.py b/app/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/api/api.py b/app/api/api.py new file mode 100644 index 0000000..ffae7df --- /dev/null +++ b/app/api/api.py @@ -0,0 +1,110 @@ +""" +FastAPI routes for the application. +""" + +from fastapi import APIRouter, Depends, HTTPException, status +from pydantic import BaseModel +from typing import List, Dict, Any, Optional + +from app.services.chatbot_service import chatbot_service + +router = APIRouter() + +class MessageRequest(BaseModel): + """Request model for sending a message.""" + message: str + user_id: str = "default_user" + +class MessageResponse(BaseModel): + """Response model for a message.""" + content: str + is_user: bool + timestamp: str + +class ChatResponse(BaseModel): + """Response model for a chat.""" + chat_id: int + messages: List[MessageResponse] + +@router.get("/health") +async def health_check(): + """ + Health check endpoint. + + Returns: + JSON response with health status. + """ + return {"status": "healthy"} + +@router.post("/chat", response_model=ChatResponse) +async def create_chat(user_id: str = "default_user"): + """ + Create a new chat. + + Args: + user_id: ID of the user creating the chat. + + Returns: + Created chat. + """ + chat_id = chatbot_service.create_chat(user_id) + + return { + "chat_id": chat_id, + "messages": [] + } + +@router.post("/chat/{chat_id}/message", response_model=MessageResponse) +async def send_message(chat_id: int, request: MessageRequest): + """ + Send a message to the chatbot. + + Args: + chat_id: ID of the chat. + request: Message request. + + Returns: + Bot response. + """ + try: + response = chatbot_service.get_response(chat_id, request.message) + + # Get the last message (bot response) + messages = chatbot_service.get_chat_messages(chat_id) + last_message = messages[-1] + + return last_message + + except ValueError as e: + raise HTTPException(status_code=404, detail=str(e)) + +@router.get("/chat/{chat_id}", response_model=ChatResponse) +async def get_chat(chat_id: int): + """ + Get a chat by ID. + + Args: + chat_id: ID of the chat. + + Returns: + Chat with messages. + """ + try: + messages = chatbot_service.get_chat_messages(chat_id) + + return { + "chat_id": chat_id, + "messages": messages + } + + except ValueError as e: + raise HTTPException(status_code=404, detail=str(e)) + +def init_app(app): + """ + Initialize FastAPI application with routes. + + Args: + app: FastAPI application instance. + """ + app.include_router(router, prefix="/api") diff --git a/app/api/routes.py b/app/api/routes.py new file mode 100644 index 0000000..2f50bfc --- /dev/null +++ b/app/api/routes.py @@ -0,0 +1,100 @@ +""" +Flask routes for the application. +""" + +from flask import Blueprint, jsonify, request, abort + +from app.services.chatbot_service import chatbot_service + +bp = Blueprint('main', __name__) + +@bp.route('/') +def index(): + """ + Root endpoint. + + Returns: + JSON response with application information. + """ + return jsonify({ + 'name': 'Chatbot Application', + 'version': '1.0.0', + 'status': 'running' + }) + +@bp.route('/api/health') +def health_check(): + """ + Health check endpoint. + + Returns: + JSON response with health status. + """ + return jsonify({ + 'status': 'healthy' + }) + +@bp.route('/api/chat', methods=['POST']) +def create_chat(): + """ + Create a new chat. + + Returns: + JSON response with chat ID. + """ + user_id = request.json.get('user_id', 'default_user') + chat_id = chatbot_service.create_chat(user_id) + + return jsonify({ + 'chat_id': chat_id, + 'messages': [] + }) + +@bp.route('/api/chat//message', methods=['POST']) +def send_message(chat_id): + """ + Send a message to the chatbot. + + Args: + chat_id: ID of the chat. + + Returns: + JSON response with bot response. + """ + if not request.json or 'message' not in request.json: + abort(400, description="Message is required") + + try: + message = request.json['message'] + response = chatbot_service.get_response(chat_id, message) + + # Get the last message (bot response) + messages = chatbot_service.get_chat_messages(chat_id) + last_message = messages[-1] + + return jsonify(last_message) + + except ValueError as e: + abort(404, description=str(e)) + +@bp.route('/api/chat/', methods=['GET']) +def get_chat(chat_id): + """ + Get a chat by ID. + + Args: + chat_id: ID of the chat. + + Returns: + JSON response with chat messages. + """ + try: + messages = chatbot_service.get_chat_messages(chat_id) + + return jsonify({ + 'chat_id': chat_id, + 'messages': messages + }) + + except ValueError as e: + abort(404, description=str(e)) diff --git a/app/config/__init__.py b/app/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/config/config.py b/app/config/config.py new file mode 100644 index 0000000..08856aa --- /dev/null +++ b/app/config/config.py @@ -0,0 +1,79 @@ +""" +Configuration settings for the application. +""" + +import os +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +class Config: + """Base configuration.""" + + # Flask configuration + SECRET_KEY = os.environ.get('SECRET_KEY', 'dev-key-for-development-only') + DEBUG = False + TESTING = False + + # Database configuration + SQLALCHEMY_DATABASE_URI = os.environ.get( + 'DATABASE_URL', + 'sqlite:///chatbot.db' + ) + SQLALCHEMY_TRACK_MODIFICATIONS = False + INITIALIZE_DATABASE = os.environ.get('INITIALIZE_DATABASE', 'False').lower() == 'true' + + # Pinecone configuration + PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY', '') + PINECONE_ENVIRONMENT = os.environ.get('PINECONE_ENVIRONMENT', '') + PINECONE_INDEX_NAME = os.environ.get('PINECONE_INDEX_NAME', 'chatbot-index') + + # Model configuration + DEFAULT_MODEL = os.environ.get('DEFAULT_MODEL', 'gpt-3.5-turbo') + OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', '') + + +class DevelopmentConfig(Config): + """Development configuration.""" + + DEBUG = True + + +class TestingConfig(Config): + """Testing configuration.""" + + TESTING = True + SQLALCHEMY_DATABASE_URI = 'sqlite:///:memory:' + + +class ProductionConfig(Config): + """Production configuration.""" + + # Ensure all required environment variables are set in production + @classmethod + def init_app(cls, app): + """Initialize production application.""" + # Check for required environment variables + required_vars = [ + 'SECRET_KEY', + 'DATABASE_URL', + 'PINECONE_API_KEY', + 'PINECONE_ENVIRONMENT', + 'OPENAI_API_KEY' + ] + + missing_vars = [var for var in required_vars if not os.environ.get(var)] + if missing_vars: + raise RuntimeError( + f"Missing required environment variables: {', '.join(missing_vars)}" + ) + + +# Configuration dictionary +config = { + 'development': DevelopmentConfig, + 'testing': TestingConfig, + 'production': ProductionConfig, + 'default': DevelopmentConfig +} diff --git a/app/database/__init__.py b/app/database/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/database/db.py b/app/database/db.py new file mode 100644 index 0000000..e32dc49 --- /dev/null +++ b/app/database/db.py @@ -0,0 +1,36 @@ +""" +Database module for the application. +""" + +from flask_sqlalchemy import SQLAlchemy +from sqlalchemy import MetaData + +# Define naming convention for constraints +convention = { + "ix": 'ix_%(column_0_label)s', + "uq": "uq_%(table_name)s_%(column_0_name)s", + "ck": "ck_%(table_name)s_%(constraint_name)s", + "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s", + "pk": "pk_%(table_name)s" +} + +# Create SQLAlchemy instance with naming convention +db = SQLAlchemy(metadata=MetaData(naming_convention=convention)) + +def init_app(app): + """ + Initialize the database with the Flask application. + + Args: + app: Flask application instance. + """ + db.init_app(app) + + # Only initialize database if configured to do so + if app.config.get('INITIALIZE_DATABASE', False): + # Import models to ensure they are registered with SQLAlchemy + from app.models import user, chat, document + + # Create tables if they don't exist + with app.app_context(): + db.create_all() diff --git a/app/models/__init__.py b/app/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/models/chat.py b/app/models/chat.py new file mode 100644 index 0000000..f2dd303 --- /dev/null +++ b/app/models/chat.py @@ -0,0 +1,67 @@ +""" +Chat models for the application. +""" + +from datetime import datetime +from app.database.db import db + +class Chat(db.Model): + """Chat model representing a chat session.""" + + __tablename__ = 'chats' + + id = db.Column(db.Integer, primary_key=True) + title = db.Column(db.String(100), nullable=True) + is_team_chat = db.Column(db.Boolean, default=False) + model_name = db.Column(db.String(50), nullable=False) + created_at = db.Column(db.DateTime, default=datetime.utcnow) + updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + + # Foreign keys + user_id = db.Column(db.Integer, db.ForeignKey('users.id'), nullable=False) + + # Relationships + messages = db.relationship('Message', backref='chat', lazy='dynamic', cascade='all, delete-orphan') + team_members = db.relationship('TeamChatMember', backref='chat', lazy='dynamic', cascade='all, delete-orphan') + + def __repr__(self): + return f'' + + +class Message(db.Model): + """Message model representing a single message in a chat.""" + + __tablename__ = 'messages' + + id = db.Column(db.Integer, primary_key=True) + content = db.Column(db.Text, nullable=False) + is_user_message = db.Column(db.Boolean, default=True) + created_at = db.Column(db.DateTime, default=datetime.utcnow) + + # Foreign keys + chat_id = db.Column(db.Integer, db.ForeignKey('chats.id'), nullable=False) + user_id = db.Column(db.Integer, db.ForeignKey('users.id'), nullable=True) + + def __repr__(self): + return f'' + + +class TeamChatMember(db.Model): + """Model representing a member of a team chat.""" + + __tablename__ = 'team_chat_members' + + id = db.Column(db.Integer, primary_key=True) + joined_at = db.Column(db.DateTime, default=datetime.utcnow) + + # Foreign keys + chat_id = db.Column(db.Integer, db.ForeignKey('chats.id'), nullable=False) + user_id = db.Column(db.Integer, db.ForeignKey('users.id'), nullable=False) + + # Ensure a user can only be added to a team chat once + __table_args__ = ( + db.UniqueConstraint('chat_id', 'user_id', name='uq_team_chat_member'), + ) + + def __repr__(self): + return f'' diff --git a/app/models/document.py b/app/models/document.py new file mode 100644 index 0000000..eca6f7d --- /dev/null +++ b/app/models/document.py @@ -0,0 +1,59 @@ +""" +Document models for the application. +""" + +from datetime import datetime +import json +from app.database.db import db + +class Document(db.Model): + """Document model representing a document in the library.""" + + __tablename__ = 'documents' + + id = db.Column(db.Integer, primary_key=True) + title = db.Column(db.String(255), nullable=False) + description = db.Column(db.Text, nullable=True) + file_path = db.Column(db.String(255), nullable=True) + content_type = db.Column(db.String(50), nullable=False) + status = db.Column(db.String(20), default='pending') # pending, processing, completed, error + created_at = db.Column(db.DateTime, default=datetime.utcnow) + updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + + # Foreign keys + uploaded_by = db.Column(db.Integer, db.ForeignKey('users.id'), nullable=False) + + # Relationships + chunks = db.relationship('DocumentChunk', backref='document', lazy='dynamic', cascade='all, delete-orphan') + + def __repr__(self): + return f'' + + +class DocumentChunk(db.Model): + """Model representing a chunk of a document for embedding.""" + + __tablename__ = 'document_chunks' + + id = db.Column(db.Integer, primary_key=True) + content = db.Column(db.Text, nullable=False) + chunk_index = db.Column(db.Integer, nullable=False) + embedding_id = db.Column(db.String(100), nullable=True) # ID in Pinecone + meta_data = db.Column(db.Text, nullable=True) # JSON string of metadata + created_at = db.Column(db.DateTime, default=datetime.utcnow) + + # Foreign keys + document_id = db.Column(db.Integer, db.ForeignKey('documents.id'), nullable=False) + + def set_metadata(self, metadata_dict): + """Set metadata as JSON string.""" + self.meta_data = json.dumps(metadata_dict) + + def get_metadata(self): + """Get metadata as dictionary.""" + if self.meta_data: + return json.loads(self.meta_data) + return {} + + def __repr__(self): + return f'' diff --git a/app/models/user.py b/app/models/user.py new file mode 100644 index 0000000..9d8f4a9 --- /dev/null +++ b/app/models/user.py @@ -0,0 +1,24 @@ +""" +User model for the application. +""" + +from datetime import datetime +from app.database.db import db + +class User(db.Model): + """User model representing application users.""" + + __tablename__ = 'users' + + id = db.Column(db.Integer, primary_key=True) + username = db.Column(db.String(64), unique=True, nullable=False, index=True) + email = db.Column(db.String(120), unique=True, nullable=False, index=True) + password_hash = db.Column(db.String(128), nullable=False) + created_at = db.Column(db.DateTime, default=datetime.utcnow) + updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + + # Relationships + chats = db.relationship('Chat', backref='user', lazy='dynamic') + + def __repr__(self): + return f'' diff --git a/app/services/__init__.py b/app/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/services/chat_service.py b/app/services/chat_service.py new file mode 100644 index 0000000..d754c3b --- /dev/null +++ b/app/services/chat_service.py @@ -0,0 +1,227 @@ +""" +Service for chat functionality. +""" + +from typing import List, Dict, Any, Optional +from app.database.db import db +from app.models.chat import Chat, Message, TeamChatMember +from app.models.user import User + +class ChatService: + """Service for chat functionality.""" + + def create_chat(self, user_id: int, title: Optional[str] = None, + is_team_chat: bool = False, model_name: Optional[str] = None) -> Chat: + """ + Create a new chat. + + Args: + user_id: ID of the user creating the chat. + title: Optional title for the chat. + is_team_chat: Whether this is a team chat. + model_name: Name of the model to use for this chat. + + Returns: + Created chat. + """ + from app.config.config import Config + + chat = Chat( + user_id=user_id, + title=title, + is_team_chat=is_team_chat, + model_name=model_name or Config().DEFAULT_MODEL + ) + + db.session.add(chat) + db.session.commit() + + # If it's a team chat, add the creator as a member + if is_team_chat: + self.add_team_member(chat.id, user_id) + + return chat + + def get_chat(self, chat_id: int) -> Optional[Chat]: + """ + Get a chat by ID. + + Args: + chat_id: ID of the chat. + + Returns: + Chat if found, None otherwise. + """ + return Chat.query.get(chat_id) + + def get_user_chats(self, user_id: int) -> List[Chat]: + """ + Get all chats for a user. + + Args: + user_id: ID of the user. + + Returns: + List of chats. + """ + # Get private chats + private_chats = Chat.query.filter_by( + user_id=user_id, + is_team_chat=False + ).order_by(Chat.updated_at.desc()).all() + + # Get team chats where user is a member + team_chat_ids = db.session.query(TeamChatMember.chat_id).filter_by(user_id=user_id).all() + team_chat_ids = [chat_id for (chat_id,) in team_chat_ids] + + team_chats = Chat.query.filter( + Chat.id.in_(team_chat_ids) + ).order_by(Chat.updated_at.desc()).all() + + # Combine and sort by updated_at + all_chats = private_chats + team_chats + all_chats.sort(key=lambda x: x.updated_at, reverse=True) + + return all_chats + + def add_message(self, chat_id: int, content: str, + is_user_message: bool = True, user_id: Optional[int] = None) -> Message: + """ + Add a message to a chat. + + Args: + chat_id: ID of the chat. + content: Message content. + is_user_message: Whether this is a user message (vs. bot message). + user_id: ID of the user sending the message (required for user messages). + + Returns: + Created message. + """ + message = Message( + chat_id=chat_id, + content=content, + is_user_message=is_user_message, + user_id=user_id if is_user_message else None + ) + + db.session.add(message) + + # Update chat's updated_at timestamp + chat = Chat.query.get(chat_id) + if chat: + chat.updated_at = message.created_at + + db.session.commit() + + return message + + def get_chat_messages(self, chat_id: int) -> List[Message]: + """ + Get all messages for a chat. + + Args: + chat_id: ID of the chat. + + Returns: + List of messages. + """ + return Message.query.filter_by(chat_id=chat_id).order_by(Message.created_at).all() + + def add_team_member(self, chat_id: int, user_id: int) -> Optional[TeamChatMember]: + """ + Add a user to a team chat. + + Args: + chat_id: ID of the team chat. + user_id: ID of the user to add. + + Returns: + Created team chat member if successful, None otherwise. + """ + chat = Chat.query.get(chat_id) + if not chat or not chat.is_team_chat: + return None + + # Check if user is already a member + existing_member = TeamChatMember.query.filter_by( + chat_id=chat_id, + user_id=user_id + ).first() + + if existing_member: + return existing_member + + member = TeamChatMember( + chat_id=chat_id, + user_id=user_id + ) + + db.session.add(member) + db.session.commit() + + return member + + def get_team_members(self, chat_id: int) -> List[User]: + """ + Get all members of a team chat. + + Args: + chat_id: ID of the team chat. + + Returns: + List of users. + """ + member_ids = db.session.query(TeamChatMember.user_id).filter_by(chat_id=chat_id).all() + member_ids = [user_id for (user_id,) in member_ids] + + return User.query.filter(User.id.in_(member_ids)).all() + + def remove_team_member(self, chat_id: int, user_id: int) -> bool: + """ + Remove a user from a team chat. + + Args: + chat_id: ID of the team chat. + user_id: ID of the user to remove. + + Returns: + True if removal was successful, False otherwise. + """ + member = TeamChatMember.query.filter_by( + chat_id=chat_id, + user_id=user_id + ).first() + + if not member: + return False + + db.session.delete(member) + db.session.commit() + + return True + + def delete_chat(self, chat_id: int) -> bool: + """ + Delete a chat and all its messages. + + Args: + chat_id: ID of the chat to delete. + + Returns: + True if deletion was successful, False otherwise. + """ + chat = Chat.query.get(chat_id) + if not chat: + return False + + try: + db.session.delete(chat) + db.session.commit() + return True + + except Exception as e: + # Log the error + print(f"Error deleting chat {chat_id}: {str(e)}") + db.session.rollback() + return False diff --git a/app/services/chatbot_service.py b/app/services/chatbot_service.py new file mode 100644 index 0000000..3dba74c --- /dev/null +++ b/app/services/chatbot_service.py @@ -0,0 +1,105 @@ +""" +Service for chatbot functionality without database dependency. +""" + +from typing import List, Dict, Any, Optional + +class ChatbotService: + """Service for chatbot functionality.""" + + def __init__(self): + """Initialize the chatbot service.""" + # In-memory storage for chat history + self.chat_history = {} + self.current_chat_id = 0 + + def create_chat(self, user_id: str) -> int: + """ + Create a new chat session. + + Args: + user_id: ID of the user creating the chat. + + Returns: + ID of the created chat. + """ + self.current_chat_id += 1 + chat_id = self.current_chat_id + + self.chat_history[chat_id] = { + 'user_id': user_id, + 'messages': [] + } + + return chat_id + + def add_message(self, chat_id: int, content: str, is_user: bool = True) -> Dict[str, Any]: + """ + Add a message to a chat. + + Args: + chat_id: ID of the chat. + content: Message content. + is_user: Whether this is a user message (vs. bot message). + + Returns: + Added message. + """ + if chat_id not in self.chat_history: + raise ValueError(f"Chat with ID {chat_id} not found") + + message = { + 'content': content, + 'is_user': is_user, + 'timestamp': self._get_timestamp() + } + + self.chat_history[chat_id]['messages'].append(message) + + return message + + def get_chat_messages(self, chat_id: int) -> List[Dict[str, Any]]: + """ + Get all messages for a chat. + + Args: + chat_id: ID of the chat. + + Returns: + List of messages. + """ + if chat_id not in self.chat_history: + raise ValueError(f"Chat with ID {chat_id} not found") + + return self.chat_history[chat_id]['messages'] + + def get_response(self, chat_id: int, message: str) -> str: + """ + Get a response from the chatbot. + + Args: + chat_id: ID of the chat. + message: User message. + + Returns: + Bot response. + """ + # Add user message to chat history + self.add_message(chat_id, message, is_user=True) + + # Simple echo response for now + response = f"You said: {message}" + + # Add bot response to chat history + self.add_message(chat_id, response, is_user=False) + + return response + + def _get_timestamp(self) -> str: + """Get current timestamp.""" + from datetime import datetime + return datetime.utcnow().isoformat() + + +# Create a singleton instance +chatbot_service = ChatbotService() diff --git a/app/services/document_service.py b/app/services/document_service.py new file mode 100644 index 0000000..239bcd8 --- /dev/null +++ b/app/services/document_service.py @@ -0,0 +1,165 @@ +""" +Service for document processing and embedding. +""" + +import os +from typing import List, Dict, Any, Optional +import pinecone +from app.database.db import db +from app.models.document import Document, DocumentChunk +from app.config.config import Config + +class DocumentService: + """Service for document processing and embedding.""" + + def __init__(self, config: Config = None): + """ + Initialize the document service. + + Args: + config: Configuration object. + """ + self.config = config or Config() + self._initialize_pinecone() + + def _initialize_pinecone(self): + """Initialize Pinecone client.""" + pinecone.init( + api_key=self.config.PINECONE_API_KEY, + environment=self.config.PINECONE_ENVIRONMENT + ) + + # Check if index exists, create if it doesn't + if self.config.PINECONE_INDEX_NAME not in pinecone.list_indexes(): + pinecone.create_index( + name=self.config.PINECONE_INDEX_NAME, + dimension=768, # Default dimension for sentence-transformers + metric="cosine" + ) + + self.index = pinecone.Index(self.config.PINECONE_INDEX_NAME) + + def create_document(self, title: str, file_path: str, content_type: str, + description: Optional[str], user_id: int) -> Document: + """ + Create a new document record. + + Args: + title: Document title. + file_path: Path to the document file. + content_type: MIME type of the document. + description: Optional description of the document. + user_id: ID of the user who uploaded the document. + + Returns: + Created document. + """ + document = Document( + title=title, + file_path=file_path, + content_type=content_type, + description=description, + uploaded_by=user_id, + status='pending' + ) + + db.session.add(document) + db.session.commit() + + return document + + def process_document(self, document_id: int) -> bool: + """ + Process a document for embedding. + + Args: + document_id: ID of the document to process. + + Returns: + True if processing was successful, False otherwise. + """ + document = Document.query.get(document_id) + if not document: + return False + + try: + # Update status to processing + document.status = 'processing' + db.session.commit() + + # TODO: Implement document parsing and chunking + # This will be implemented in the next step + + # Update status to completed + document.status = 'completed' + db.session.commit() + return True + + except Exception as e: + # Update status to error + document.status = 'error' + db.session.commit() + # Log the error + print(f"Error processing document {document_id}: {str(e)}") + return False + + def get_document(self, document_id: int) -> Optional[Document]: + """ + Get a document by ID. + + Args: + document_id: ID of the document. + + Returns: + Document if found, None otherwise. + """ + return Document.query.get(document_id) + + def get_all_documents(self, user_id: Optional[int] = None) -> List[Document]: + """ + Get all documents, optionally filtered by user. + + Args: + user_id: Optional user ID to filter by. + + Returns: + List of documents. + """ + query = Document.query + if user_id: + query = query.filter_by(uploaded_by=user_id) + return query.order_by(Document.created_at.desc()).all() + + def delete_document(self, document_id: int) -> bool: + """ + Delete a document and its chunks. + + Args: + document_id: ID of the document to delete. + + Returns: + True if deletion was successful, False otherwise. + """ + document = Document.query.get(document_id) + if not document: + return False + + try: + # Delete document chunks from Pinecone + chunks = DocumentChunk.query.filter_by(document_id=document_id).all() + embedding_ids = [chunk.embedding_id for chunk in chunks if chunk.embedding_id] + + if embedding_ids: + self.index.delete(ids=embedding_ids) + + # Delete document from database + db.session.delete(document) + db.session.commit() + + return True + + except Exception as e: + # Log the error + print(f"Error deleting document {document_id}: {str(e)}") + db.session.rollback() + return False diff --git a/app/services/model_service.py b/app/services/model_service.py new file mode 100644 index 0000000..4a086e2 --- /dev/null +++ b/app/services/model_service.py @@ -0,0 +1,95 @@ +""" +Service for model management and interaction. +""" + +from typing import List, Dict, Any, Optional +from app.config.config import Config + +class ModelService: + """Service for model management and interaction.""" + + # Available models + AVAILABLE_MODELS = { + 'gpt-3.5-turbo': { + 'name': 'GPT-3.5 Turbo', + 'description': 'OpenAI GPT-3.5 Turbo model', + 'provider': 'openai', + 'max_tokens': 4096 + }, + 'gpt-4': { + 'name': 'GPT-4', + 'description': 'OpenAI GPT-4 model', + 'provider': 'openai', + 'max_tokens': 8192 + }, + # Add more models as needed + } + + def __init__(self, config: Config = None): + """ + Initialize the model service. + + Args: + config: Configuration object. + """ + self.config = config or Config() + self.default_model = self.config.DEFAULT_MODEL + + def get_available_models(self) -> List[Dict[str, Any]]: + """ + Get a list of available models. + + Returns: + List of model information dictionaries. + """ + models = [] + for model_id, model_info in self.AVAILABLE_MODELS.items(): + model_data = { + 'id': model_id, + 'is_default': model_id == self.default_model, + **model_info + } + models.append(model_data) + + return models + + def get_model_info(self, model_id: str) -> Optional[Dict[str, Any]]: + """ + Get information about a specific model. + + Args: + model_id: ID of the model. + + Returns: + Model information dictionary if found, None otherwise. + """ + if model_id not in self.AVAILABLE_MODELS: + return None + + return { + 'id': model_id, + 'is_default': model_id == self.default_model, + **self.AVAILABLE_MODELS[model_id] + } + + def generate_response(self, model_id: str, prompt: str, + context: Optional[List[Dict[str, str]]] = None) -> str: + """ + Generate a response from the model. + + Args: + model_id: ID of the model to use. + prompt: User prompt. + context: Optional conversation context. + + Returns: + Generated response. + """ + # TODO: Implement actual model integration + # This is a placeholder that will be implemented in the next steps + + if model_id not in self.AVAILABLE_MODELS: + model_id = self.default_model + + # Placeholder response + return f"This is a placeholder response from {self.AVAILABLE_MODELS[model_id]['name']}. The actual model integration will be implemented in the next steps." diff --git a/app/utils/__init__.py b/app/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/deploy.sh b/deploy.sh new file mode 100755 index 0000000..4cbe9ef --- /dev/null +++ b/deploy.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# Activate virtual environment +source venv/bin/activate + +# Export environment variables +export FLASK_APP=run.py +export FLASK_ENV=production +export FLASK_CONFIG=production + +# Run the application with uvicorn and nohup +nohup uvicorn run:app --host 0.0.0.0 --port 5251 > app.log 2>&1 & + +echo "Application started on port 5251. Check app.log for output." +echo "To stop the application, find the process ID with 'ps aux | grep uvicorn' and kill it with 'kill '." diff --git a/deploy_ai_service.sh b/deploy_ai_service.sh new file mode 100755 index 0000000..6275b72 --- /dev/null +++ b/deploy_ai_service.sh @@ -0,0 +1,102 @@ +#!/bin/bash + +# Stop any existing service +pkill -f "uvicorn ai_service.api:app" || true + +# Create data directory if it doesn't exist +mkdir -p ai_service/data + +# Set environment variables for testing +# In production, replace these with your actual API keys +export PINECONE_API_KEY="test-pinecone-api-key" +export PINECONE_ENVIRONMENT="test-pinecone-environment" +export OPENAI_API_KEY="test-openai-api-key" + +# Create empty files for local storage if they don't exist +touch ai_service/data/chatbot.db +touch ai_service/data/document_metadata.json +touch ai_service/data/chats.json + +# For testing purposes, we'll use a simplified API +echo "Starting Simple API Service on port 5251..." +VENV_PATH="./venv" +PYTHON_PATH="$VENV_PATH/bin/python" + +# Check if the virtual environment exists +if [ -f "$PYTHON_PATH" ]; then + echo "Using Python from virtual environment: $PYTHON_PATH" + # Use the simplified API for testing + nohup $PYTHON_PATH simple_api.py > ai_service.log 2>&1 & +else + echo "Virtual environment not found at $VENV_PATH, using system Python" + nohup python simple_api.py > ai_service.log 2>&1 & +fi + +# Wait a moment for the service to start +sleep 2 + +# Check if the service is running +if pgrep -f "simple_api.py" > /dev/null; then + echo "AI Service started successfully on port 5251" + echo "Check ai_service.log for output" + echo "To stop the service, run: pkill -f \"simple_api.py\"" + + # Test the health endpoint + echo -e "\nTesting health endpoint..." + curl -s http://localhost:5251/health + echo -e "\n" + + # Test creating a chat and sending a message + echo "Testing chat creation and message sending..." + if [ -f "$PYTHON_PATH" ]; then + # Create a simple test script + cat > test_api.py << 'EOF' +import requests +import json + +# Create a chat +response = requests.post( + "http://localhost:5251/chats", + json={ + "user_id": "test_user", + "title": "Test Chat", + "model_id": "gpt-3.5-turbo" + } +) + +if response.status_code == 200: + chat_id = response.json()["id"] + print(f"Chat created with ID: {chat_id}") + + # Send a message with parameters + response = requests.post( + f"http://localhost:5251/chats/{chat_id}/messages", + json={ + "message": "Hello, AI!", + "user_id": "test_user", + "temperature": 0.7, + "max_tokens": 100 + } + ) + + if response.status_code == 200: + print("Message sent successfully") + print(f"Response: {response.json()['content'][:100]}...") + else: + print(f"Error sending message: {response.status_code}") + print(response.text) +else: + print(f"Error creating chat: {response.status_code}") + print(response.text) +EOF + + # Run the test script with the virtual environment's Python + $PYTHON_PATH test_api.py + rm test_api.py + else + echo "Skipping API test as virtual environment Python is not available" + fi +else + echo "Failed to start AI Service. Check ai_service.log for errors." + exit 1 +fi diff --git a/deploy_production.sh b/deploy_production.sh new file mode 100644 index 0000000..9a13258 --- /dev/null +++ b/deploy_production.sh @@ -0,0 +1,82 @@ +#!/bin/bash + +# Production deployment script for AI Service +# This script deploys the AI service in a production environment + +# Exit on error +set -e + +echo "Starting AI Service deployment..." + +# Check if virtual environment exists +if [ ! -d "venv" ]; then + echo "Creating virtual environment..." + python3 -m venv venv +fi + +# Activate virtual environment +source venv/bin/activate + +# Install dependencies +echo "Installing dependencies..." +pip install --upgrade pip +pip install -r ai_service/requirements.txt + +# Check if .env file exists, if not copy from .env.production +if [ ! -f "ai_service/.env" ]; then + echo "Creating .env file from .env.production..." + cp ai_service/.env.production ai_service/.env + echo "Please edit ai_service/.env to add your API keys before continuing." + echo "Then run this script again." + exit 1 +fi + +# Create data directory if it doesn't exist +mkdir -p ai_service/data + +# Check if Pinecone API key is set +PINECONE_API_KEY=$(grep PINECONE_API_KEY ai_service/.env | cut -d '=' -f2) +if [ "$PINECONE_API_KEY" = "your-pinecone-api-key-here" ]; then + echo "Warning: Pinecone API key not set. Vector storage will not be available." + echo "Edit ai_service/.env to set your Pinecone API key." +fi + +# Check if OpenAI API key is set +OPENAI_API_KEY=$(grep OPENAI_API_KEY ai_service/.env | cut -d '=' -f2) +if [ "$OPENAI_API_KEY" = "your-openai-api-key-here" ]; then + echo "Warning: OpenAI API key not set. AI responses will be placeholders." + echo "Edit ai_service/.env to set your OpenAI API key." +fi + +# Stop any existing service +echo "Stopping any existing AI service..." +pkill -f "uvicorn ai_service.run:app" || true + +# Start the service with nohup +echo "Starting AI service..." +cd $(dirname "$0") +nohup uvicorn ai_service.run:app --host 0.0.0.0 --port 5251 > ai_service.log 2>&1 & + +# Wait for service to start +sleep 2 + +# Check if service is running +if pgrep -f "uvicorn ai_service.run:app" > /dev/null; then + echo "AI service started successfully!" + echo "Service is running on http://0.0.0.0:5251" + echo "Logs are available in ai_service.log" +else + echo "Failed to start AI service. Check ai_service.log for details." + exit 1 +fi + +# Test the service +echo "Testing service health..." +if curl -s http://localhost:5251/health | grep -q "healthy"; then + echo "Service is healthy!" +else + echo "Service health check failed. Check ai_service.log for details." + exit 1 +fi + +echo "Deployment complete!" diff --git a/remote_deploy.sh b/remote_deploy.sh new file mode 100755 index 0000000..3c3ad9a --- /dev/null +++ b/remote_deploy.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +# Remote deployment script for the AI service +# Usage: ./remote_deploy.sh [server_ip] [user] [port] [remote_dir] + +# Default values +SERVER_IP=${1:-"104.225.217.215"} +SERVER_USER=${2:-"root"} +SERVER_PORT=${3:-"22"} +REMOTE_DIR=${4:-"/root/openwebui"} +LOCAL_DIR="." + +echo "Deploying to server: $SERVER_IP" +echo "Remote directory: $REMOTE_DIR" + +# Check if the server is reachable +echo "Checking if server is reachable..." +ssh -q -o BatchMode=yes -o ConnectTimeout=5 -p $SERVER_PORT $SERVER_USER@$SERVER_IP exit +if [ $? -ne 0 ]; then + echo "Error: Cannot connect to server $SERVER_IP" + exit 1 +fi + +# Create a subdirectory for our AI service in the OpenWebUI directory +echo "Creating AI service directory in OpenWebUI..." +ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "mkdir -p $REMOTE_DIR/ai_service_app" + +# Sync files to the server +echo "Syncing files to server..." +rsync -avz -e "ssh -p $SERVER_PORT" --exclude 'venv' --exclude '__pycache__' --exclude '*.pyc' --exclude '.git' \ + $LOCAL_DIR/ $SERVER_USER@$SERVER_IP:$REMOTE_DIR/ai_service_app/ + +# Install dependencies on the server +echo "Installing dependencies on the server..." +ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "cd $REMOTE_DIR/ai_service_app && \ + python3 -m venv venv || true && \ + source venv/bin/activate && \ + pip install --upgrade pip && \ + pip install -r requirements.txt && \ + pip install python-dotenv langchain-text-splitters" + +# Stop any existing service +echo "Stopping any existing service..." +ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "pkill -f 'uvicorn ai_service.run:app' || true" + +# Start the service +echo "Starting the service..." +ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "cd $REMOTE_DIR/ai_service_app && \ + source venv/bin/activate && \ + bash ai_service/deploy.sh" + +# Check if the service is running +echo "Checking if the service is running..." +sleep 5 +ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "ps aux | grep 'uvicorn ai_service.run:app' | grep -v grep" +if [ $? -eq 0 ]; then + echo "Service is running!" + echo "You can access the API at: http://$SERVER_IP:5251" + echo "Check logs with: ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP 'tail -f $REMOTE_DIR/ai_service_app/logs/ai_service.log'" +else + echo "Error: Service failed to start. Check logs on the server." + exit 1 +fi + +echo "Deployment completed successfully!" diff --git a/requirements-deploy.txt b/requirements-deploy.txt new file mode 100644 index 0000000..ff1d347 --- /dev/null +++ b/requirements-deploy.txt @@ -0,0 +1,10 @@ +# Core dependencies +flask==2.3.3 +fastapi==0.103.1 +uvicorn==0.23.2 +flask-sqlalchemy==3.0.5 +python-dotenv==1.0.0 +pydantic==2.3.0 + +# For future implementation +pinecone-client==2.2.2 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8a5cdb5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,22 @@ +# Flask and FastAPI integration +flask==2.3.3 +fastapi==0.103.1 +uvicorn==0.23.2 + +# Database +sqlalchemy==2.0.20 +flask-sqlalchemy==3.0.5 +pymysql==1.1.0 +cryptography==41.0.3 # Required for PyMySQL +alembic==1.12.0 + +# Document Processing +langchain-text-splitters==0.3.8 +sentence-transformers==2.2.2 + +# Utilities +python-dotenv==1.0.0 +pydantic==2.3.0 + +# Testing +pytest==7.4.0 diff --git a/run.py b/run.py new file mode 100644 index 0000000..3a154ae --- /dev/null +++ b/run.py @@ -0,0 +1,14 @@ +""" +Application entry point. +""" + +import os +from app import create_app +from app.config.config import config + +# Get configuration from environment or use default +config_name = os.environ.get('FLASK_CONFIG', 'default') +app = create_app(config[config_name]) + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=5000) diff --git a/run_ai_service.py b/run_ai_service.py new file mode 100644 index 0000000..5798ef7 --- /dev/null +++ b/run_ai_service.py @@ -0,0 +1,22 @@ +""" +Script to run the AI service. +""" + +import uvicorn +import os +import sys + +# Set environment variables for testing +os.environ['PINECONE_API_KEY'] = 'test-key' +os.environ['PINECONE_ENVIRONMENT'] = 'test-env' +os.environ['OPENAI_API_KEY'] = 'test-key' + +# Run the service +if __name__ == "__main__": + print("Starting AI service on 0.0.0.0:5251") + uvicorn.run( + "ai_service.api:app", + host="0.0.0.0", + port=5251, + reload=True + ) diff --git a/simple_api.py b/simple_api.py new file mode 100644 index 0000000..b09e3d3 --- /dev/null +++ b/simple_api.py @@ -0,0 +1,144 @@ +""" +Simple API for testing deployment. +""" + +import os +import uuid +from datetime import datetime +from typing import List, Dict, Any, Optional + +from fastapi import FastAPI, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel, Field + +# Create FastAPI app +app = FastAPI( + title="Simple AI Service API", + description="Simple API for testing deployment", + version="1.0.0" +) + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Define API models +class MessageRequest(BaseModel): + """Request model for sending a message.""" + message: str = Field(..., description="Message content") + user_id: str = Field(..., description="User ID") + + # Model parameters + temperature: Optional[float] = Field(None, description="Controls randomness") + max_tokens: Optional[int] = Field(None, description="Maximum tokens to generate") + top_p: Optional[float] = Field(None, description="Nucleus sampling parameter") + frequency_penalty: Optional[float] = Field(None, description="Penalizes repeated tokens") + presence_penalty: Optional[float] = Field(None, description="Penalizes repeated topics") + system_prompt: Optional[str] = Field(None, description="System prompt") + +class Message(BaseModel): + """Model for a message.""" + id: str = Field(..., description="Message ID") + content: str = Field(..., description="Message content") + user_id: Optional[str] = Field(None, description="User ID") + is_user_message: bool = Field(..., description="Whether this is a user message") + timestamp: str = Field(..., description="Message timestamp") + +class ChatRequest(BaseModel): + """Request model for creating a chat.""" + user_id: str = Field(..., description="User ID") + title: Optional[str] = Field(None, description="Chat title") + model_id: Optional[str] = Field(None, description="Model ID") + +class Chat(BaseModel): + """Model for a chat.""" + id: str = Field(..., description="Chat ID") + title: str = Field(..., description="Chat title") + user_id: str = Field(..., description="User ID") + model_id: str = Field(..., description="Model ID") + created_at: str = Field(..., description="Creation timestamp") + updated_at: str = Field(..., description="Update timestamp") + messages: List[Message] = Field(default=[], description="Chat messages") + +# In-memory storage +chats = {} + +# API endpoints +@app.get("/health") +async def health_check(): + """Health check endpoint.""" + return {"status": "healthy"} + +@app.post("/chats", response_model=Chat) +async def create_chat(request: ChatRequest): + """Create a new chat.""" + chat_id = str(uuid.uuid4()) + + chat = { + "id": chat_id, + "title": request.title or f"Chat {len(chats) + 1}", + "user_id": request.user_id, + "model_id": request.model_id or "gpt-3.5-turbo", + "created_at": datetime.utcnow().isoformat(), + "updated_at": datetime.utcnow().isoformat(), + "messages": [] + } + + chats[chat_id] = chat + return chat + +@app.get("/chats/{chat_id}", response_model=Chat) +async def get_chat(chat_id: str): + """Get a chat by ID.""" + if chat_id not in chats: + raise HTTPException(status_code=404, detail="Chat not found") + + return chats[chat_id] + +@app.post("/chats/{chat_id}/messages", response_model=Message) +async def send_message(chat_id: str, request: MessageRequest): + """Send a message to a chat.""" + if chat_id not in chats: + raise HTTPException(status_code=404, detail="Chat not found") + + # Add user message + user_message = { + "id": str(uuid.uuid4()), + "content": request.message, + "user_id": request.user_id, + "is_user_message": True, + "timestamp": datetime.utcnow().isoformat() + } + + chats[chat_id]["messages"].append(user_message) + + # Generate bot response + params_text = "" + if request.temperature is not None: + params_text += f" (temperature={request.temperature})" + if request.max_tokens is not None: + params_text += f" (max_tokens={request.max_tokens})" + if request.system_prompt is not None: + params_text += f" (using custom system prompt)" + + bot_message = { + "id": str(uuid.uuid4()), + "content": f"This is a test response to: '{request.message}'{params_text}", + "user_id": None, + "is_user_message": False, + "timestamp": datetime.utcnow().isoformat() + } + + chats[chat_id]["messages"].append(bot_message) + chats[chat_id]["updated_at"] = datetime.utcnow().isoformat() + + return bot_message + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=5251) diff --git a/test_chat_with_params.py b/test_chat_with_params.py new file mode 100644 index 0000000..f6b6bcc --- /dev/null +++ b/test_chat_with_params.py @@ -0,0 +1,69 @@ +""" +Test script for sending a message with advanced parameters. +""" + +import requests +import json +import uuid + +# Create a new chat +def create_chat(): + response = requests.post( + "http://localhost:5251/chats", + json={ + "user_id": "test_user", + "title": "Test Chat", + "model_id": "gpt-3.5-turbo", + "is_team_chat": False + } + ) + + if response.status_code == 200: + return response.json()["id"] + else: + print(f"Error creating chat: {response.status_code}") + print(response.text) + return None + +# Send a message with advanced parameters +def send_message(chat_id): + response = requests.post( + f"http://localhost:5251/chats/{chat_id}/messages", + json={ + "message": "Tell me about artificial intelligence", + "user_id": "test_user", + "use_rag": False, + "temperature": 0.7, + "max_tokens": 500, + "top_p": 0.9, + "frequency_penalty": 0.5, + "presence_penalty": 0.5, + "system_prompt": "You are a helpful AI assistant that provides concise responses." + } + ) + + if response.status_code == 200: + return response.json() + else: + print(f"Error sending message: {response.status_code}") + print(response.text) + return None + +# Main function +def main(): + print("Creating a new chat...") + chat_id = create_chat() + + if chat_id: + print(f"Chat created with ID: {chat_id}") + + print("\nSending a message with advanced parameters...") + response = send_message(chat_id) + + if response: + print("\nResponse received:") + print(f"Message ID: {response['id']}") + print(f"Content: {response['content']}") + +if __name__ == "__main__": + main() diff --git a/test_ollama.py b/test_ollama.py new file mode 100644 index 0000000..15e28dc --- /dev/null +++ b/test_ollama.py @@ -0,0 +1,73 @@ +""" +Test script for Ollama integration. +""" + +import os +import sys +import requests +import json + +# Add the parent directory to the path so we can import ai_service +sys.path.insert(0, os.path.abspath(os.path.dirname(__file__))) + +from ai_service.config import config +from ai_service.models.model_service import model_service + +def test_available_models(): + """Test getting available models.""" + models = model_service.get_available_models() + print("Available models:") + for model in models: + print(f"- {model['name']} ({model['id']}): {model['description']}") + print() + +def test_generate_response(): + """Test generating a response.""" + model_id = "llama3.1" # Use a specific model instead of config.DEFAULT_MODEL + prompt = "What is the capital of France?" + + print(f"Testing model: {model_id}") + print(f"Prompt: {prompt}") + + response = model_service.generate_response( + model_id=model_id, + prompt=prompt, + use_rag=False + ) + + print("Response:") + print(response) + print() + +def test_rag_response(): + """Test generating a response with RAG.""" + model_id = "llama3.1" # Use a specific model instead of config.DEFAULT_MODEL + prompt = "Tell me about the documents in the knowledge base." + + print(f"Testing RAG with model: {model_id}") + print(f"Prompt: {prompt}") + + response = model_service.generate_response( + model_id=model_id, + prompt=prompt, + use_rag=True + ) + + print("Response with RAG:") + print(response) + print() + +if __name__ == "__main__": + print("Testing Ollama integration") + print(f"OpenWebUI URL: {config.OPENWEBUI_URL}") + + # Override the Ollama API URL to use OpenWebUI + model_service.ollama_api_url = f"{config.OPENWEBUI_URL}/ollama" + print(f"Using Ollama API URL: {model_service.ollama_api_url}") + + print(f"Default model: {config.DEFAULT_MODEL}") + print() + + test_available_models() + test_generate_response() + test_rag_response()