From 4c0cff7ccacdfc99e36ed3cef97e24dcf317c3d3 Mon Sep 17 00:00:00 2001
From: Iyeoluwa Akinrinola <iyeoluwa@connou.app>
Date: Fri, 9 May 2025 17:38:58 +0100
Subject: [PATCH] Using the ai_service as backend for openwebui

---
 DEPLOYMENT.md               | 135 ----------------------------
 OPENWEBUI_SETUP.md          | 104 +++++++++++++++++++++
 PRODUCTION_READINESS.md     | 136 ----------------------------
 README.md                   |  93 ++++++++++++-------
 ai_service/README.md        |  43 ++++-----
 ai_service/api.py           | 126 +++-----------------------
 ai_service/openwebui_api.py | 175 ++++++++++++++++++++++++++++++++++++
 ai_service/requirements.txt |  11 ---
 ai_service_workflow.md      | 173 -----------------------------------
 deploy.sh                   |  15 ----
 deploy_production.sh        |  82 -----------------
 openwebui_config.md         |  83 +++++++++++++++++
 remote_deploy.sh            |   3 +-
 requirements.txt            |  21 ++---
 run.py                      |  14 ---
 15 files changed, 466 insertions(+), 748 deletions(-)
 delete mode 100644 DEPLOYMENT.md
 create mode 100644 OPENWEBUI_SETUP.md
 delete mode 100644 PRODUCTION_READINESS.md
 create mode 100644 ai_service/openwebui_api.py
 delete mode 100644 ai_service/requirements.txt
 delete mode 100644 ai_service_workflow.md
 delete mode 100755 deploy.sh
 delete mode 100644 deploy_production.sh
 create mode 100644 openwebui_config.md
 delete mode 100644 run.py
diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md
deleted file mode 100644
index 3f5e300..0000000
--- a/DEPLOYMENT.md
+++ /dev/null
@@ -1,135 +0,0 @@
-# Deployment Instructions
-
-This document provides instructions for deploying the chatbot application with Ollama and OpenWebUI integration.
-
-## Prerequisites
-
-- Python 3.8 or higher
-- pip
-- virtualenv or venv
-- Access to OpenWebUI at http://104.225.217.215:8080
-
-## Deployment Steps
-
-1. **Clone the repository**
-
-   ```bash
-   git clone <repository-url>
-   cd <repository-directory>
-   ```
-
-2. **Create and activate a virtual environment**
-
-   ```bash
-   python -m venv venv
-   source venv/bin/activate  # On Windows: venv\Scripts\activate
-   ```
-
-3. **Install dependencies**
-
-   ```bash
-   pip install -r requirements.txt
-   pip install python-dotenv langchain-text-splitters
-   ```
-
-4. **Create a .env file**
-
-   Copy the .env.example file to .env and update the values:
-
-   ```bash
-   cp ai_service/.env.example ai_service/.env
-   # Edit the .env file with appropriate values
-   ```
-
-   Make sure to include the OpenWebUI configuration:
-
-   ```
-   # OpenWebUI configuration
-   OPENWEBUI_URL=http://104.225.217.215:8080
-   OPENWEBUI_API_KEY=GdCU4ieYDqHsLfH2
-
-   # Ollama configuration
-   OLLAMA_API_URL=http://104.225.217.215:8080/ollama
-   DEFAULT_MODEL=llama3.1
-   ```
-
-5. **Run the deployment script**
-
-   For local deployment:
-   ```bash
-   python -m ai_service.run
-   ```
-
-   For server deployment:
-   ```bash
-   ./ai_service/deploy.sh
-   ```
-
-   This will start the application on port 5251 using uvicorn with nohup.
-
-   For remote deployment from your local machine:
-   ```bash
-   ./remote_deploy.sh 157.157.221.29 user 22 /home/user/ds_zagres_ai
-   ```
-
-6. **Verify the application is running**
-
-   ```bash
-   curl http://localhost:5251/api/health
-   ```
-
-   You should see a response like:
-   ```json
-   {
-     "status": "healthy"
-   }
-   ```
-
-## Managing the Deployed Application
-
-- **View logs**
-
-  ```bash
-  tail -f app.log
-  ```
-
-- **Stop the application**
-
-  ```bash
-  ps aux | grep uvicorn  # Find the process ID (PID)
-  kill <PID>             # Replace <PID> with the actual process ID
-  ```
-
-- **Restart the application**
-
-  ```bash
-  ./deploy.sh
-  ```
-
-## API Endpoints
-
-- `GET /health` - Health check endpoint
-- `POST /chats` - Create a new chat
-- `POST /chats/{chat_id}/messages` - Send a message to the chatbot
-- `GET /chats/{chat_id}` - Get chat history
-
-## Ollama and OpenWebUI Integration
-
-The chatbot now uses Ollama models via OpenWebUI. The following models are available:
-
-- **gemma3**: Google Gemma 3 model
-- **llama3.3**: Meta Llama 3 70B model
-- **llama3.1**: Meta Llama 3 8B model
-- **mistral**: Mistral AI model
-- **deepseek**: DeepSeek model
-
-### Document Training
-
-To use RAG with your documents:
-
-1. Go to the OpenWebUI interface at http://104.225.217.215:8080/
-2. Navigate to the Knowledge section
-3. Upload your documents
-4. OpenWebUI will automatically process them for RAG
-
-When using the chatbot API, set `use_rag=True` in your chat requests to enable RAG.
diff --git a/OPENWEBUI_SETUP.md b/OPENWEBUI_SETUP.md
new file mode 100644
index 0000000..54f6081
--- /dev/null
+++ b/OPENWEBUI_SETUP.md
@@ -0,0 +1,104 @@
+# Setting Up OpenWebUI with Custom Backend
+
+This guide explains how to configure OpenWebUI to use your custom AI service as its backend.
+
+## Overview
+
+OpenWebUI is designed to work with Ollama directly, but it can be configured to use a custom backend service instead. This allows you to add your own business logic, authentication, and other features while still using OpenWebUI's user-friendly interface.
+
+## Prerequisites
+
+1. OpenWebUI installed and running (at http://104.225.217.215:8080)
+2. Your AI service deployed and running (at http://your-server-ip:5251)
+
+## Configuration Steps
+
+### 1. Locate OpenWebUI's Environment Configuration
+
+If you're running OpenWebUI using Docker, you can modify the environment variables in your docker-compose.yml file or when running the container:
+
+```yaml
+version: '3'
+services:
+  openwebui:
+    image: openwebui/openwebui:latest
+    environment:
+      - OLLAMA_API_BASE_URL=http://your-server-ip:5251/ollama
+      - OPENAI_API_BASE_URL=http://your-server-ip:5251/api
+    ports:
+      - "8080:8080"
+```
+
+If you're running OpenWebUI directly, you can modify the .env file in the OpenWebUI installation directory.
+
+### 2. Set the API Base URLs
+
+Add or modify the following environment variables:
+
+```
+OLLAMA_API_BASE_URL=http://your-server-ip:5251/ollama
+OPENAI_API_BASE_URL=http://your-server-ip:5251/api
+```
+
+Replace `your-server-ip` with the IP address or hostname of the server where your AI service is running.
+
+### 3. Restart OpenWebUI
+
+After updating the environment variables, restart the OpenWebUI service to apply the changes.
+
+If you're using Docker:
+
+```bash
+docker restart openwebui
+```
+
+If you're running OpenWebUI directly:
+
+```bash
+# Stop the current process
+pkill -f "openwebui"
+
+# Start OpenWebUI again
+cd /path/to/openwebui
+npm start
+```
+
+## Testing the Integration
+
+To test if the integration is working correctly:
+
+1. Open OpenWebUI in your browser (http://104.225.217.215:8080)
+2. Try to create a new chat
+3. Select one of the models (gemma3, llama3.3, llama3.1, mistral, or deepseek)
+4. Send a message and check if you get a response
+
+If everything is configured correctly, OpenWebUI will send requests to your AI service, which will then forward them to the Ollama API.
+
+## Troubleshooting
+
+If you encounter issues with the integration:
+
+1. Check the logs of your AI service:
+   ```bash
+   tail -f logs/ai_service.log
+   ```
+
+2. Check the logs of OpenWebUI:
+   ```bash
+   docker logs openwebui
+   ```
+
+3. Verify that your AI service is running and accessible:
+   ```bash
+   curl http://your-server-ip:5251/health
+   ```
+
+4. Verify that the OpenWebUI environment variables are set correctly:
+   ```bash
+   docker exec openwebui env | grep URL
+   ```
+
+## Advanced Configuration
+
+For advanced configuration options, refer to the OpenWebUI documentation:
+https://docs.openwebui.com/getting-started/env-configuration
diff --git a/PRODUCTION_READINESS.md b/PRODUCTION_READINESS.md
deleted file mode 100644
index 5577b93..0000000
--- a/PRODUCTION_READINESS.md
+++ /dev/null
@@ -1,136 +0,0 @@
-# Production Readiness Checklist
-
-This document outlines what's currently implemented, what's missing, and what improvements can be made to make the AI service production-ready.
-
-## Current Implementation
-
-### Core Functionality
-- ✅ Document processing and chunking
-- ✅ Embedding generation and storage
-- ✅ Document search
-- ✅ Chat functionality
-- ✅ Model switching
-- ✅ Team chat support
-- ✅ RAG (Retrieval-Augmented Generation)
-- ✅ Customizable model parameters
-
-### API Endpoints
-- ✅ Health check endpoint
-- ✅ Document management endpoints
-- ✅ Model information endpoints
-- ✅ Chat management endpoints
-- ✅ Message sending endpoint
-
-## Missing Components
-
-### Authentication and Authorization
-- ❌ User authentication
-- ❌ API key validation
-- ❌ Role-based access control
-- ❌ Document access permissions
-
-### Database Integration
-- ❌ MySQL database integration (currently using JSON files and SQLite)
-- ❌ Database migration scripts
-- ❌ Connection pooling
-
-### Monitoring and Observability
-- ❌ Structured logging
-- ❌ Metrics collection
-- ❌ Alerting system
-- ❌ Detailed health checks
-
-### Testing
-- ❌ Unit tests
-- ❌ Integration tests
-- ❌ Load tests
-- ❌ CI/CD pipeline
-
-### Documentation
-- ❌ API documentation
-- ❌ Developer guide
-- ❌ Deployment guide
-- ❌ User guide
-
-## Improvement Opportunities
-
-### Performance
-- Implement caching for frequently accessed data
-- Add connection pooling for database connections
-- Optimize embedding generation and search
-- Implement batch processing for document ingestion
-
-### Scalability
-- Add support for distributed deployment
-- Implement horizontal scaling
-- Add load balancing
-- Implement message queues for asynchronous processing
-
-### Security
-- Add input validation and sanitization
-- Implement rate limiting
-- Add CORS configuration
-- Implement secure storage for API keys
-
-### User Experience
-- Add progress tracking for document processing
-- Implement streaming responses for chat
-- Add support for file attachments
-- Implement chat history export
-
-### AI Capabilities
-- Add support for more AI models
-- Implement fine-tuning capabilities
-- Add support for function calling
-- Implement conversation summarization
-
-## Document Storage
-
-Currently, documents are stored in two places:
-
-1. **Document Metadata**: Stored in a JSON file at `ai_service/data/document_metadata.json`
-2. **Document Embeddings**: Stored in Pinecone vector database
-
-For production, you should:
-- Replace the JSON file storage with MySQL database
-- Implement proper document versioning
-- Add document access controls
-- Implement backup and recovery procedures
-
-## API Keys and Configuration
-
-The system is designed to use environment variables for configuration, including API keys. The following keys need to be set:
-
-1. **Pinecone API Key**: For vector storage
-   - Sign up at https://www.pinecone.io/
-   - Set `PINECONE_API_KEY` and `PINECONE_ENVIRONMENT` in `.env`
-
-2. **OpenAI API Key**: For AI model access
-   - Sign up at https://platform.openai.com/
-   - Set `OPENAI_API_KEY` in `.env`
-
-A template `.env.production` file has been created with placeholders for these keys.
-
-## Deployment
-
-A production deployment script (`deploy_production.sh`) has been created to:
-- Set up the virtual environment
-- Install dependencies
-- Check for API keys
-- Start the service with proper logging
-- Verify the service is running
-
-To deploy:
-1. Copy `.env.production` to `.env` and add your API keys
-2. Run `./deploy_production.sh`
-3. Monitor the service with `tail -f ai_service.log`
-
-## Next Steps
-
-1. Implement authentication and authorization
-2. Set up MySQL database integration
-3. Add comprehensive testing
-4. Implement monitoring and observability
-5. Create detailed documentation
-6. Address security concerns
-7. Optimize performance and scalability
diff --git a/README.md b/README.md
index f634351..f9c11a3 100644
--- a/README.md
+++ b/README.md
@@ -1,61 +1,90 @@
-# Chatbot Application
+# OpenWebUI Backend Service
 
-A chatbot application with document training, private/team chat options, and model switching capability.
+A backend service for OpenWebUI that provides OpenWebUI-compatible API endpoints for chat functionality and model switching.
 
 ## Features
 
-- Document training through library page
-- Private chat functionality
-- Team chat functionality (multiple users can see each other's interactions)
-- Model switching capability
+- OpenWebUI-compatible API endpoints
+- Ollama API proxy
+- Chat functionality with model switching
+- Support for multiple LLM models (gemma3, llama3.3, llama3.1, mistral, deepseek)
 
 ## Technology Stack
 
-- **Backend**: Flask with FastAPI
-- **Database**: MySQL
-- **Vector Database**: Pinecone
-- **Embeddings**: Sentence Transformers / OpenAI Embeddings
-- **Chat Models**: Various LLMs (configurable)
+- **Backend**: FastAPI
+- **Chat Models**: Ollama models via OpenWebUI
 
 ## Project Structure
 
 ```
-app/
-├── api/            # API endpoints (Flask and FastAPI)
-├── config/         # Configuration settings
-├── database/       # Database connection and utilities
-├── models/         # Database models
-├── services/       # Business logic services
-└── utils/          # Utility functions
-tests/              # Test cases
+ai_service/
+├── models/           # Model and chat services
+│   ├── model_service.py
+│   ├── chat_service.py
+│   └── model_parameters.py
+├── embeddings/       # Document processing for RAG
+│   └── document_service.py
+├── openwebui_api.py  # OpenWebUI-compatible API endpoints
+├── config.py         # Configuration settings
+├── api.py            # FastAPI application
+└── deploy.sh         # Deployment script
+run_ai_service.py     # Script to run the service
+deploy_ai_service.sh  # Local deployment script
+remote_deploy.sh      # Remote deployment script
 ```
 
 ## Setup Instructions
 
-1. Clone the repository
-2. Create a virtual environment:
+1. Create a virtual environment:
    ```
    python -m venv venv
    source venv/bin/activate  # On Windows: venv\Scripts\activate
    ```
-3. Install dependencies:
+
+2. Install dependencies:
    ```
    pip install -r requirements.txt
    ```
-4. Copy `.env.example` to `.env` and update the values
-5. Initialize the database:
+
+3. Copy `.env.example` to `.env` and update the values:
    ```
-   flask db init
-   flask db migrate
-   flask db upgrade
+   cp ai_service/.env.example ai_service/.env
+   # Edit the .env file with appropriate values
    ```
-6. Run the application:
+
+4. Run the service:
    ```
-   python run.py
+   python run_ai_service.py
+   ```
+
+## Deployment
+
+To deploy the service:
+
+1. Local deployment:
+   ```
+   ./deploy_ai_service.sh
+   ```
+
+2. Remote deployment:
+   ```
+   ./remote_deploy.sh [server_ip] [user] [port] [remote_dir]
    ```
 
 ## API Documentation
 
-Once the application is running, you can access the API documentation at:
-- FastAPI Swagger UI: http://localhost:5000/docs
-- FastAPI ReDoc: http://localhost:5000/redoc
+Once the service is running, you can access the API documentation at:
+- Swagger UI: http://localhost:5251/docs
+- ReDoc: http://localhost:5251/redoc
+
+## OpenWebUI Configuration
+
+To configure OpenWebUI to use this service as its backend:
+
+1. Set the following environment variables in OpenWebUI:
+   ```
+   OLLAMA_API_BASE_URL=http://your-server-ip:5251/ollama
+   OPENAI_API_BASE_URL=http://your-server-ip:5251/api
+   ```
+
+2. Restart OpenWebUI to apply the changes.
diff --git a/ai_service/README.md b/ai_service/README.md
index eafe1e5..5f634f6 100644
--- a/ai_service/README.md
+++ b/ai_service/README.md
@@ -1,25 +1,28 @@
-# AI Service for Chatbot Application
+# OpenWebUI Backend Service
 
-This is the AI service component for the chatbot application. It provides APIs for document processing, embeddings, and chat functionality.
+This is a backend service for OpenWebUI that provides OpenWebUI-compatible API endpoints for chat functionality and model switching.
 
 ## Features
 
-- Document processing and embedding
-- Retrieval-augmented generation (RAG)
+- OpenWebUI-compatible API endpoints
+- Ollama API proxy
 - Chat functionality with model switching
-- Team chat support
+- Support for multiple LLM models (gemma3, llama3.3, llama3.1, mistral, deepseek)
 
 ## Project Structure
 
 ```
 ai_service/
-├── embeddings/       # Embedding and document processing services
 ├── models/           # Model and chat services
-├── utils/            # Utility functions
-├── data/             # Data storage
+│   ├── model_service.py
+│   ├── chat_service.py
+│   └── model_parameters.py
+├── embeddings/       # Document processing for RAG
+│   └── document_service.py
+├── openwebui_api.py  # OpenWebUI-compatible API endpoints
 ├── config.py         # Configuration settings
 ├── api.py            # FastAPI application
-└── run.py            # Script to run the service
+└── deploy.sh         # Deployment script
 ```
 
 ## Setup Instructions
@@ -43,7 +46,7 @@ ai_service/
 
 4. Run the service:
    ```
-   python run.py
+   python ../run_ai_service.py
    ```
 
 ## API Documentation
@@ -70,20 +73,20 @@ This will start the service on port 5251 using uvicorn with nohup.
 
 ## API Endpoints
 
-### Document Endpoints
+### Health Check
 
-- `POST /documents` - Process a document for embedding
-- `GET /documents` - Get all documents
-- `GET /documents/{doc_id}` - Get a document by ID
-- `DELETE /documents/{doc_id}` - Delete a document
-- `POST /documents/search` - Search for documents
+- `GET /health` - Check if the service is running
 
-### Model Endpoints
+### OpenWebUI-Compatible Endpoints
 
-- `GET /models` - Get available models
-- `GET /models/{model_id}` - Get information about a model
+- `GET /api/models` - Get available models in OpenWebUI format
+- `POST /api/chat/completions` - OpenAI-compatible chat completions endpoint
 
-### Chat Endpoints
+### Ollama API Proxy
+
+- `POST /ollama/api/generate` - Proxy to Ollama's generate endpoint
+
+### Original API Endpoints
 
 - `POST /chats` - Create a new chat
 - `GET /chats/user/{user_id}` - Get all chats for a user
diff --git a/ai_service/api.py b/ai_service/api.py
index dd7d589..8c7f472 100644
--- a/ai_service/api.py
+++ b/ai_service/api.py
@@ -1,22 +1,21 @@
 """
 FastAPI application for the AI service.
+This service acts as a backend for OpenWebUI, providing OpenWebUI-compatible API endpoints.
 """
 
-from fastapi import FastAPI, HTTPException, Depends, Body, Query, Path
+from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
-from typing import List, Dict, Any, Optional
+from typing import List, Optional
 
-from ai_service.config import config
-from ai_service.embeddings.document_service import document_service
 from ai_service.models.model_service import model_service
 from ai_service.models.chat_service import chat_service
-from ai_service.models.model_parameters import ModelParameters
+from ai_service.openwebui_api import router as openwebui_router
 
 # Create FastAPI app
 app = FastAPI(
     title="AI Service API",
-    description="API for the AI service",
+    description="Backend API for OpenWebUI",
     version="1.0.0"
 )
 
@@ -29,32 +28,16 @@ app.add_middleware(
     allow_headers=["*"],  # Allow all headers
 )
 
-# Define API models
-class DocumentRequest(BaseModel):
-    """Request model for document processing."""
-    content: str = Field(..., description="Document content")
-    title: str = Field(..., description="Document title")
-    description: Optional[str] = Field(None, description="Document description")
-    metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata")
+# Include OpenWebUI-compatible API routes
+app.include_router(openwebui_router, prefix="/api")
 
-class DocumentResponse(BaseModel):
-    """Response model for document processing."""
-    id: str = Field(..., description="Document ID")
-    title: str = Field(..., description="Document title")
-    description: str = Field(..., description="Document description")
-    chunk_count: int = Field(..., description="Number of chunks")
-    metadata: Dict[str, Any] = Field(..., description="Additional metadata")
+# Include Ollama proxy routes
+app.include_router(openwebui_router, prefix="/ollama")
 
-class SearchRequest(BaseModel):
-    """Request model for document search."""
-    query: str = Field(..., description="Search query")
-    top_k: int = Field(5, description="Number of results to return")
-
-class SearchResult(BaseModel):
-    """Model for a search result."""
-    id: str = Field(..., description="Result ID")
-    score: float = Field(..., description="Similarity score")
-    metadata: Dict[str, Any] = Field(..., description="Result metadata")
+# Define API models for health check
+class HealthResponse(BaseModel):
+    """Response model for health check."""
+    status: str = Field(..., description="Health status")
 
 class ModelInfo(BaseModel):
     """Model for model information."""
@@ -114,7 +97,7 @@ class Chat(BaseModel):
     team_members: List[str] = Field(..., description="Team members")
 
 # Define API endpoints
-@app.get("/health")
+@app.get("/health", response_model=HealthResponse)
 async def health_check():
     """
     Health check endpoint.
@@ -124,88 +107,7 @@ async def health_check():
     """
     return {"status": "healthy"}
 
-# Document endpoints
-@app.post("/documents", response_model=DocumentResponse)
-async def process_document(request: DocumentRequest):
-    """
-    Process a document for embedding.
 
-    Args:
-        request: Document processing request.
-
-    Returns:
-        Processed document information.
-    """
-    doc_id = document_service.process_document(
-        content=request.content,
-        title=request.title,
-        description=request.description,
-        metadata=request.metadata
-    )
-
-    return document_service.get_document(doc_id)
-
-@app.get("/documents", response_model=List[DocumentResponse])
-async def get_all_documents():
-    """
-    Get all documents.
-
-    Returns:
-        List of document information.
-    """
-    return document_service.get_all_documents()
-
-@app.get("/documents/{doc_id}", response_model=DocumentResponse)
-async def get_document(doc_id: str):
-    """
-    Get a document by ID.
-
-    Args:
-        doc_id: Document ID.
-
-    Returns:
-        Document information.
-    """
-    doc = document_service.get_document(doc_id)
-    if not doc:
-        raise HTTPException(status_code=404, detail="Document not found")
-
-    return doc
-
-@app.delete("/documents/{doc_id}")
-async def delete_document(doc_id: str):
-    """
-    Delete a document.
-
-    Args:
-        doc_id: Document ID.
-
-    Returns:
-        Deletion status.
-    """
-    success = document_service.delete_document(doc_id)
-    if not success:
-        raise HTTPException(status_code=404, detail="Document not found")
-
-    return {"status": "success", "message": "Document deleted"}
-
-@app.post("/documents/search", response_model=List[SearchResult])
-async def search_documents(request: SearchRequest):
-    """
-    Search for documents.
-
-    Args:
-        request: Search request.
-
-    Returns:
-        Search results.
-    """
-    results = document_service.search_documents(
-        query=request.query,
-        top_k=request.top_k
-    )
-
-    return results
 
 # Model endpoints
 @app.get("/models", response_model=List[ModelInfo])
diff --git a/ai_service/openwebui_api.py b/ai_service/openwebui_api.py
new file mode 100644
index 0000000..3b5801e
--- /dev/null
+++ b/ai_service/openwebui_api.py
@@ -0,0 +1,175 @@
+"""
+OpenWebUI-compatible API endpoints for the AI service.
+"""
+
+from fastapi import APIRouter, Depends, HTTPException, Header, Request
+from fastapi.responses import StreamingResponse
+from typing import List, Dict, Any, Optional, Union
+import json
+import time
+import uuid
+
+from ai_service.models.model_service import model_service
+from ai_service.models.chat_service import chat_service
+from ai_service.models.model_parameters import ModelParameters
+
+# Create router
+router = APIRouter()
+
+# Models endpoint
+@router.get("/models", response_model=List[Dict[str, Any]])
+async def get_models():
+    """
+    Get available models in OpenWebUI-compatible format.
+    """
+    models = model_service.get_available_models()
+    
+    # Convert to OpenWebUI format
+    openwebui_models = []
+    for model in models:
+        openwebui_models.append({
+            "id": model["id"],
+            "object": "model",
+            "created": int(time.time()),
+            "owned_by": "user",
+            "permission": [],
+            "root": model["id"],
+            "parent": None
+        })
+    
+    return openwebui_models
+
+# Chat completions endpoint (OpenAI-compatible)
+@router.post("/chat/completions")
+async def chat_completions(request: Request):
+    """
+    OpenAI-compatible chat completions endpoint.
+    """
+    # Parse request body
+    body = await request.json()
+    
+    # Extract parameters
+    model_id = body.get("model", "llama3.1")
+    messages = body.get("messages", [])
+    stream = body.get("stream", False)
+    temperature = body.get("temperature")
+    max_tokens = body.get("max_tokens")
+    top_p = body.get("top_p")
+    frequency_penalty = body.get("frequency_penalty")
+    presence_penalty = body.get("presence_penalty")
+    stop = body.get("stop")
+    
+    # Create a unique chat ID
+    chat_id = str(uuid.uuid4())
+    
+    # Create a user ID (in a real implementation, this would come from authentication)
+    user_id = "openwebui-user"
+    
+    # Create a new chat
+    chat_service.create_chat(user_id=user_id, title="API Chat", model_id=model_id)
+    
+    # Extract the user's message (last user message in the array)
+    user_message = None
+    for msg in reversed(messages):
+        if msg.get("role") == "user":
+            user_message = msg.get("content")
+            break
+    
+    if not user_message:
+        raise HTTPException(status_code=400, detail="No user message found")
+    
+    # Get chat response
+    response = chat_service.get_chat_response(
+        chat_id=chat_id,
+        message=user_message,
+        user_id=user_id,
+        temperature=temperature,
+        max_tokens=max_tokens,
+        top_p=top_p,
+        frequency_penalty=frequency_penalty,
+        presence_penalty=presence_penalty,
+        stop_sequences=stop if isinstance(stop, list) else [stop] if stop else None
+    )
+    
+    # Format response in OpenAI-compatible format
+    completion_id = f"chatcmpl-{str(uuid.uuid4())[:8]}"
+    
+    openai_response = {
+        "id": completion_id,
+        "object": "chat.completion",
+        "created": int(time.time()),
+        "model": model_id,
+        "choices": [
+            {
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": response.get("content", "")
+                },
+                "finish_reason": "stop"
+            }
+        ],
+        "usage": {
+            "prompt_tokens": -1,  # We don't track tokens
+            "completion_tokens": -1,
+            "total_tokens": -1
+        }
+    }
+    
+    # Handle streaming if requested
+    if stream:
+        async def generate_stream():
+            # Yield the response in the SSE format
+            yield f"data: {json.dumps(openai_response)}\n\n"
+            yield "data: [DONE]\n\n"
+        
+        return StreamingResponse(generate_stream(), media_type="text/event-stream")
+    
+    return openai_response
+
+# Health check endpoint
+@router.get("/health")
+async def health_check():
+    """
+    Health check endpoint.
+    """
+    return {"status": "healthy"}
+
+# Ollama API proxy endpoints
+@router.post("/ollama/api/generate")
+async def ollama_generate(request: Request):
+    """
+    Proxy to Ollama's generate endpoint.
+    """
+    # Parse request body
+    body = await request.json()
+    
+    # Extract parameters
+    model_id = body.get("model", "llama3.1")
+    prompt = body.get("prompt", "")
+    
+    # Create a unique chat ID
+    chat_id = str(uuid.uuid4())
+    
+    # Create a user ID (in a real implementation, this would come from authentication)
+    user_id = "openwebui-user"
+    
+    # Create a new chat
+    chat_service.create_chat(user_id=user_id, title="API Chat", model_id=model_id)
+    
+    # Get chat response
+    response = chat_service.get_chat_response(
+        chat_id=chat_id,
+        message=prompt,
+        user_id=user_id
+    )
+    
+    # Format response in Ollama-compatible format
+    ollama_response = {
+        "model": model_id,
+        "created_at": time.strftime("%Y-%m-%dT%H:%M:%S.%fZ", time.gmtime()),
+        "response": response.get("content", ""),
+        "done": True
+    }
+    
+    return ollama_response
diff --git a/ai_service/requirements.txt b/ai_service/requirements.txt
deleted file mode 100644
index 2ec847f..0000000
--- a/ai_service/requirements.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# Core dependencies
-fastapi==0.103.1
-uvicorn==0.23.2
-pydantic==2.3.0
-python-dotenv==1.0.0
-
-# Document processing
-langchain-text-splitters==0.3.8
-
-# Utilities
-requests==2.32.3
diff --git a/ai_service_workflow.md b/ai_service_workflow.md
deleted file mode 100644
index 81ec340..0000000
--- a/ai_service_workflow.md
+++ /dev/null
@@ -1,173 +0,0 @@
-# AI Service Workflow and Architecture
-
-## Overview
-
-The AI Service is a modular, API-driven system that provides document processing, embedding, and chat functionality with multiple AI models. It's designed to support a chatbot application with document training, private/team chat options, and model switching capabilities.
-
-## System Architecture
-
-```
-┌─────────────────┐     ┌─────────────────┐     ┌─────────────────┐
-│                 │     │                 │     │                 │
-│  Client Apps    │────▶│  AI Service API │────▶│  Vector Store   │
-│                 │     │                 │     │   (Pinecone)    │
-└─────────────────┘     └────────┬────────┘     └─────────────────┘
-                                 │
-                                 ▼
-                        ┌─────────────────┐     ┌─────────────────┐
-                        │                 │     │                 │
-                        │   AI Models     │────▶│  Local Storage  │
-                        │                 │     │                 │
-                        └─────────────────┘     └─────────────────┘
-```
-
-## Core Components
-
-1. **Document Service**: Processes documents, splits them into chunks, and stores embeddings
-2. **Embedding Service**: Generates vector embeddings for text using sentence transformers
-3. **Model Service**: Manages different AI models and generates responses
-4. **Chat Service**: Handles chat creation, message history, and team chat functionality
-
-## API Endpoints Workflow
-
-### Health Check
-
-- **Endpoint**: `GET /health`
-- **Purpose**: Simple health check to verify the service is running
-- **Response**: `{"status": "healthy"}`
-
-### Document Management Workflow
-
-1. **Process Document**
-   - **Endpoint**: `POST /documents`
-   - **Purpose**: Process a document for embedding
-   - **Workflow**:
-     - Client submits document content, title, and optional metadata
-     - Document is split into chunks
-     - Embeddings are generated for each chunk
-     - Embeddings are stored in Pinecone
-     - Document metadata is stored locally
-   - **Response**: Document metadata including ID and chunk count
-
-2. **Get All Documents**
-   - **Endpoint**: `GET /documents`
-   - **Purpose**: Retrieve all processed documents
-   - **Response**: List of document metadata
-
-3. **Get Document by ID**
-   - **Endpoint**: `GET /documents/{doc_id}`
-   - **Purpose**: Retrieve a specific document's metadata
-   - **Response**: Document metadata
-
-4. **Delete Document**
-   - **Endpoint**: `DELETE /documents/{doc_id}`
-   - **Purpose**: Remove a document and its embeddings
-   - **Workflow**:
-     - Document chunks are deleted from Pinecone
-     - Document metadata is removed from local storage
-   - **Response**: Success status
-
-5. **Search Documents**
-   - **Endpoint**: `POST /documents/search`
-   - **Purpose**: Semantic search across document embeddings
-   - **Workflow**:
-     - Query text is converted to an embedding
-     - Similar embeddings are found in Pinecone
-     - Results are returned with metadata and similarity scores
-   - **Response**: List of search results with metadata
-
-### Model Management Workflow
-
-1. **Get Available Models**
-   - **Endpoint**: `GET /models`
-   - **Purpose**: List all available AI models
-   - **Response**: List of model information (ID, name, description, etc.)
-
-2. **Get Model Information**
-   - **Endpoint**: `GET /models/{model_id}`
-   - **Purpose**: Get details about a specific model
-   - **Response**: Model information
-
-### Chat Workflow
-
-1. **Create Chat**
-   - **Endpoint**: `POST /chats`
-   - **Purpose**: Create a new chat session
-   - **Workflow**:
-     - Client provides user ID, optional title, and model ID
-     - System generates a unique chat ID
-     - Chat metadata is stored locally
-   - **Response**: Created chat information
-
-2. **Get User Chats**
-   - **Endpoint**: `GET /chats/user/{user_id}`
-   - **Purpose**: Get all chats for a specific user
-   - **Response**: List of chat information
-
-3. **Get Chat by ID**
-   - **Endpoint**: `GET /chats/{chat_id}`
-   - **Purpose**: Get a specific chat's information and messages
-   - **Response**: Chat information including message history
-
-4. **Send Message**
-   - **Endpoint**: `POST /chats/{chat_id}/messages`
-   - **Purpose**: Send a message and get AI response
-   - **Workflow**:
-     - Client sends message with user ID and optional model parameters
-     - User message is added to chat history
-     - If RAG is enabled, relevant documents are retrieved
-     - AI model generates a response based on chat history and context
-     - Bot response is added to chat history
-   - **Response**: Bot response message
-
-5. **Team Chat Management**
-   - **Add Team Member**: `POST /chats/{chat_id}/members/{user_id}`
-   - **Remove Team Member**: `DELETE /chats/{chat_id}/members/{user_id}`
-   - **Purpose**: Manage team chat participants
-   - **Response**: Success status
-
-6. **Delete Chat**
-   - **Endpoint**: `DELETE /chats/{chat_id}`
-   - **Purpose**: Remove a chat and its messages
-   - **Response**: Success status
-
-## Retrieval-Augmented Generation (RAG) Workflow
-
-When RAG is enabled in a chat message request:
-
-1. User message is processed
-2. Message is converted to an embedding
-3. Similar document chunks are retrieved from Pinecone
-4. Retrieved chunks are added as context to the prompt
-5. AI model generates a response using both the chat history and document context
-6. Response is returned to the user
-
-## Model Parameters
-
-The API supports customizing AI model behavior through parameters:
-
-- `temperature`: Controls randomness (0.0-2.0)
-- `max_tokens`: Maximum response length
-- `top_p`: Nucleus sampling parameter (0.0-1.0)
-- `frequency_penalty`: Penalizes repeated tokens (-2.0-2.0)
-- `presence_penalty`: Penalizes repeated topics (-2.0-2.0)
-- `stop_sequences`: Sequences where generation stops
-- `system_prompt`: Custom system prompt to guide the model
-
-## Deployment
-
-The service is deployed using uvicorn:
-
-```bash
-nohup uvicorn ai_service.run:app --host 0.0.0.0 --port 5251 &
-```
-
-## Example Usage Flow
-
-1. Process documents for knowledge base
-2. Create a new chat session
-3. Send messages with or without RAG
-4. Optionally add team members for collaborative chats
-5. Switch models as needed for different capabilities
-
-This architecture provides a flexible, scalable foundation for building AI-powered chat applications with document training capabilities.
diff --git a/deploy.sh b/deploy.sh
deleted file mode 100755
index 4cbe9ef..0000000
--- a/deploy.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-
-# Activate virtual environment
-source venv/bin/activate
-
-# Export environment variables
-export FLASK_APP=run.py
-export FLASK_ENV=production
-export FLASK_CONFIG=production
-
-# Run the application with uvicorn and nohup
-nohup uvicorn run:app --host 0.0.0.0 --port 5251 > app.log 2>&1 &
-
-echo "Application started on port 5251. Check app.log for output."
-echo "To stop the application, find the process ID with 'ps aux | grep uvicorn' and kill it with 'kill <PID>'."
diff --git a/deploy_production.sh b/deploy_production.sh
deleted file mode 100644
index 9a13258..0000000
--- a/deploy_production.sh
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/bin/bash
-
-# Production deployment script for AI Service
-# This script deploys the AI service in a production environment
-
-# Exit on error
-set -e
-
-echo "Starting AI Service deployment..."
-
-# Check if virtual environment exists
-if [ ! -d "venv" ]; then
-    echo "Creating virtual environment..."
-    python3 -m venv venv
-fi
-
-# Activate virtual environment
-source venv/bin/activate
-
-# Install dependencies
-echo "Installing dependencies..."
-pip install --upgrade pip
-pip install -r ai_service/requirements.txt
-
-# Check if .env file exists, if not copy from .env.production
-if [ ! -f "ai_service/.env" ]; then
-    echo "Creating .env file from .env.production..."
-    cp ai_service/.env.production ai_service/.env
-    echo "Please edit ai_service/.env to add your API keys before continuing."
-    echo "Then run this script again."
-    exit 1
-fi
-
-# Create data directory if it doesn't exist
-mkdir -p ai_service/data
-
-# Check if Pinecone API key is set
-PINECONE_API_KEY=$(grep PINECONE_API_KEY ai_service/.env | cut -d '=' -f2)
-if [ "$PINECONE_API_KEY" = "your-pinecone-api-key-here" ]; then
-    echo "Warning: Pinecone API key not set. Vector storage will not be available."
-    echo "Edit ai_service/.env to set your Pinecone API key."
-fi
-
-# Check if OpenAI API key is set
-OPENAI_API_KEY=$(grep OPENAI_API_KEY ai_service/.env | cut -d '=' -f2)
-if [ "$OPENAI_API_KEY" = "your-openai-api-key-here" ]; then
-    echo "Warning: OpenAI API key not set. AI responses will be placeholders."
-    echo "Edit ai_service/.env to set your OpenAI API key."
-fi
-
-# Stop any existing service
-echo "Stopping any existing AI service..."
-pkill -f "uvicorn ai_service.run:app" || true
-
-# Start the service with nohup
-echo "Starting AI service..."
-cd $(dirname "$0")
-nohup uvicorn ai_service.run:app --host 0.0.0.0 --port 5251 > ai_service.log 2>&1 &
-
-# Wait for service to start
-sleep 2
-
-# Check if service is running
-if pgrep -f "uvicorn ai_service.run:app" > /dev/null; then
-    echo "AI service started successfully!"
-    echo "Service is running on http://0.0.0.0:5251"
-    echo "Logs are available in ai_service.log"
-else
-    echo "Failed to start AI service. Check ai_service.log for details."
-    exit 1
-fi
-
-# Test the service
-echo "Testing service health..."
-if curl -s http://localhost:5251/health | grep -q "healthy"; then
-    echo "Service is healthy!"
-else
-    echo "Service health check failed. Check ai_service.log for details."
-    exit 1
-fi
-
-echo "Deployment complete!"
diff --git a/openwebui_config.md b/openwebui_config.md
new file mode 100644
index 0000000..e197e9e
--- /dev/null
+++ b/openwebui_config.md
@@ -0,0 +1,83 @@
+# OpenWebUI Configuration Guide
+
+This guide explains how to configure OpenWebUI to use your AI service as its backend.
+
+## Overview
+
+Your AI service now implements the necessary API endpoints to be compatible with OpenWebUI. This allows OpenWebUI to use your AI service as its backend instead of connecting directly to Ollama.
+
+## Configuration Steps
+
+### 1. Update OpenWebUI Environment Variables
+
+You need to modify the OpenWebUI environment variables to point to your AI service. This can be done by editing the `.env` file in the OpenWebUI installation directory or by setting environment variables when running the OpenWebUI container.
+
+Add or modify the following environment variables:
+
+```
+OLLAMA_API_BASE_URL=http://your-server-ip:5251/ollama
+OPENAI_API_BASE_URL=http://your-server-ip:5251/api
+```
+
+Replace `your-server-ip` with the IP address or hostname of the server where your AI service is running.
+
+### 2. Restart OpenWebUI
+
+After updating the environment variables, restart the OpenWebUI service to apply the changes.
+
+If you're running OpenWebUI using Docker:
+
+```bash
+docker restart openwebui
+```
+
+If you're running OpenWebUI directly:
+
+```bash
+# Stop the current process
+pkill -f "openwebui"
+
+# Start OpenWebUI again
+cd /path/to/openwebui
+npm start
+```
+
+## Testing the Integration
+
+To test if the integration is working correctly:
+
+1. Open OpenWebUI in your browser (http://104.225.217.215:8080)
+2. Try to create a new chat
+3. Select one of the models (gemma3, llama3.3, llama3.1, mistral, or deepseek)
+4. Send a message and check if you get a response
+
+If everything is configured correctly, OpenWebUI will send requests to your AI service, which will then forward them to the Ollama API.
+
+## Troubleshooting
+
+If you encounter issues with the integration:
+
+1. Check the logs of your AI service:
+   ```bash
+   tail -f logs/ai_service.log
+   ```
+
+2. Check the logs of OpenWebUI:
+   ```bash
+   docker logs openwebui
+   ```
+
+3. Verify that your AI service is running and accessible:
+   ```bash
+   curl http://your-server-ip:5251/health
+   ```
+
+4. Verify that the OpenWebUI environment variables are set correctly:
+   ```bash
+   docker exec openwebui env | grep URL
+   ```
+
+## Advanced Configuration
+
+For advanced configuration options, refer to the OpenWebUI documentation:
+https://docs.openwebui.com/getting-started/env-configuration
diff --git a/remote_deploy.sh b/remote_deploy.sh
index 1a42a8a..8d0bbfe 100755
--- a/remote_deploy.sh
+++ b/remote_deploy.sh
@@ -36,8 +36,7 @@ ssh -p $SERVER_PORT $SERVER_USER@$SERVER_IP "cd $REMOTE_DIR/ai_service_app && \
     python3 -m venv venv || true && \
     source venv/bin/activate && \
     pip install --upgrade pip && \
-    pip install -r ai_service/requirements.txt && \
-    pip install python-dotenv langchain-text-splitters requests"
+    pip install -r requirements.txt"
 
 # Stop any existing service
 echo "Stopping any existing service..."
diff --git a/requirements.txt b/requirements.txt
index 8a5cdb5..94d7144 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,22 +1,11 @@
-# Flask and FastAPI integration
-flask==2.3.3
+# Core dependencies
 fastapi==0.103.1
 uvicorn==0.23.2
+pydantic>=2.5.2,<3.0.0  # Updated to resolve dependency conflicts
+python-dotenv==1.0.0
 
-# Database
-sqlalchemy==2.0.20
-flask-sqlalchemy==3.0.5
-pymysql==1.1.0
-cryptography==41.0.3  # Required for PyMySQL
-alembic==1.12.0
-
-# Document Processing
+# Document processing
 langchain-text-splitters==0.3.8
-sentence-transformers==2.2.2
 
 # Utilities
-python-dotenv==1.0.0
-pydantic==2.3.0
-
-# Testing
-pytest==7.4.0
+requests==2.32.3
diff --git a/run.py b/run.py
deleted file mode 100644
index 3a154ae..0000000
--- a/run.py
+++ /dev/null
@@ -1,14 +0,0 @@
-"""
-Application entry point.
-"""
-
-import os
-from app import create_app
-from app.config.config import config
-
-# Get configuration from environment or use default
-config_name = os.environ.get('FLASK_CONFIG', 'default')
-app = create_app(config[config_name])
-
-if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=5000)