Using the ai_service as backend for openwebui

2025-05-09 17:38:58 +01:00
parent 9535052645
commit 4c0cff7cca
15 changed files with 466 additions and 748 deletions
@@ -1,25 +1,28 @@
-# AI Service for Chatbot Application
+# OpenWebUI Backend Service

-This is the AI service component for the chatbot application. It provides APIs for document processing, embeddings, and chat functionality.
+This is a backend service for OpenWebUI that provides OpenWebUI-compatible API endpoints for chat functionality and model switching.

 ## Features

- Document processing and embedding
- Retrieval-augmented generation (RAG)
+- OpenWebUI-compatible API endpoints
+- Ollama API proxy
 - Chat functionality with model switching
- Team chat support
+- Support for multiple LLM models (gemma3, llama3.3, llama3.1, mistral, deepseek)

 ## Project Structure

 ```
 ai_service/
-├── embeddings/       # Embedding and document processing services
 ├── models/           # Model and chat services
-├── utils/            # Utility functions
-├── data/             # Data storage
+│   ├── model_service.py
+│   ├── chat_service.py
+│   └── model_parameters.py
+├── embeddings/       # Document processing for RAG
+│   └── document_service.py
+├── openwebui_api.py  # OpenWebUI-compatible API endpoints
 ├── config.py         # Configuration settings
 ├── api.py            # FastAPI application
-└── run.py            # Script to run the service
+└── deploy.sh         # Deployment script
 ```

 ## Setup Instructions
@@ -43,7 +46,7 @@ ai_service/

 4. Run the service:
   ```
-   python run.py
+   python ../run_ai_service.py
   ```

 ## API Documentation
@@ -70,20 +73,20 @@ This will start the service on port 5251 using uvicorn with nohup.

 ## API Endpoints

-### Document Endpoints
+### Health Check

- `POST /documents` - Process a document for embedding
- `GET /documents` - Get all documents
- `GET /documents/{doc_id}` - Get a document by ID
- `DELETE /documents/{doc_id}` - Delete a document
- `POST /documents/search` - Search for documents
+- `GET /health` - Check if the service is running

-### Model Endpoints
+### OpenWebUI-Compatible Endpoints

- `GET /models` - Get available models
- `GET /models/{model_id}` - Get information about a model
+- `GET /api/models` - Get available models in OpenWebUI format
+- `POST /api/chat/completions` - OpenAI-compatible chat completions endpoint

-### Chat Endpoints
+### Ollama API Proxy
+
+- `POST /ollama/api/generate` - Proxy to Ollama's generate endpoint
+
+### Original API Endpoints

 - `POST /chats` - Create a new chat
 - `GET /chats/user/{user_id}` - Get all chats for a user
@@ -1,22 +1,21 @@
 """
 FastAPI application for the AI service.
+This service acts as a backend for OpenWebUI, providing OpenWebUI-compatible API endpoints.
 """

-from fastapi import FastAPI, HTTPException, Depends, Body, Query, Path
+from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
-from typing import List, Dict, Any, Optional
+from typing import List, Optional

-from ai_service.config import config
-from ai_service.embeddings.document_service import document_service
 from ai_service.models.model_service import model_service
 from ai_service.models.chat_service import chat_service
-from ai_service.models.model_parameters import ModelParameters
+from ai_service.openwebui_api import router as openwebui_router

 # Create FastAPI app
 app = FastAPI(
    title="AI Service API",
-    description="API for the AI service",
+    description="Backend API for OpenWebUI",
    version="1.0.0"
 )

@@ -29,32 +28,16 @@ app.add_middleware(
    allow_headers=["*"],  # Allow all headers
 )

-# Define API models
-class DocumentRequest(BaseModel):
-    """Request model for document processing."""
-    content: str = Field(..., description="Document content")
-    title: str = Field(..., description="Document title")
-    description: Optional[str] = Field(None, description="Document description")
-    metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata")
+# Include OpenWebUI-compatible API routes
+app.include_router(openwebui_router, prefix="/api")

-class DocumentResponse(BaseModel):
-    """Response model for document processing."""
-    id: str = Field(..., description="Document ID")
-    title: str = Field(..., description="Document title")
-    description: str = Field(..., description="Document description")
-    chunk_count: int = Field(..., description="Number of chunks")
-    metadata: Dict[str, Any] = Field(..., description="Additional metadata")
+# Include Ollama proxy routes
+app.include_router(openwebui_router, prefix="/ollama")

-class SearchRequest(BaseModel):
-    """Request model for document search."""
-    query: str = Field(..., description="Search query")
-    top_k: int = Field(5, description="Number of results to return")
-
-class SearchResult(BaseModel):
-    """Model for a search result."""
-    id: str = Field(..., description="Result ID")
-    score: float = Field(..., description="Similarity score")
-    metadata: Dict[str, Any] = Field(..., description="Result metadata")
+# Define API models for health check
+class HealthResponse(BaseModel):
+    """Response model for health check."""
+    status: str = Field(..., description="Health status")

 class ModelInfo(BaseModel):
    """Model for model information."""
@@ -114,7 +97,7 @@ class Chat(BaseModel):
    team_members: List[str] = Field(..., description="Team members")

 # Define API endpoints
-@app.get("/health")
+@app.get("/health", response_model=HealthResponse)
 async def health_check():
    """
    Health check endpoint.
@@ -124,88 +107,7 @@ async def health_check():
    """
    return {"status": "healthy"}

-# Document endpoints
-@app.post("/documents", response_model=DocumentResponse)
-async def process_document(request: DocumentRequest):
-    """
-    Process a document for embedding.

-    Args:
-        request: Document processing request.
-
-    Returns:
-        Processed document information.
-    """
-    doc_id = document_service.process_document(
-        content=request.content,
-        title=request.title,
-        description=request.description,
-        metadata=request.metadata
-    )
-
-    return document_service.get_document(doc_id)
-
-@app.get("/documents", response_model=List[DocumentResponse])
-async def get_all_documents():
-    """
-    Get all documents.
-
-    Returns:
-        List of document information.
-    """
-    return document_service.get_all_documents()
-
-@app.get("/documents/{doc_id}", response_model=DocumentResponse)
-async def get_document(doc_id: str):
-    """
-    Get a document by ID.
-
-    Args:
-        doc_id: Document ID.
-
-    Returns:
-        Document information.
-    """
-    doc = document_service.get_document(doc_id)
-    if not doc:
-        raise HTTPException(status_code=404, detail="Document not found")
-
-    return doc
-
-@app.delete("/documents/{doc_id}")
-async def delete_document(doc_id: str):
-    """
-    Delete a document.
-
-    Args:
-        doc_id: Document ID.
-
-    Returns:
-        Deletion status.
-    """
-    success = document_service.delete_document(doc_id)
-    if not success:
-        raise HTTPException(status_code=404, detail="Document not found")
-
-    return {"status": "success", "message": "Document deleted"}
-
-@app.post("/documents/search", response_model=List[SearchResult])
-async def search_documents(request: SearchRequest):
-    """
-    Search for documents.
-
-    Args:
-        request: Search request.
-
-    Returns:
-        Search results.
-    """
-    results = document_service.search_documents(
-        query=request.query,
-        top_k=request.top_k
-    )
-
-    return results

 # Model endpoints
@app.get("/models", response_model=List[ModelInfo])
@@ -0,0 +1,175 @@
+"""
+OpenWebUI-compatible API endpoints for the AI service.
+"""
+
+from fastapi import APIRouter, Depends, HTTPException, Header, Request
+from fastapi.responses import StreamingResponse
+from typing import List, Dict, Any, Optional, Union
+import json
+import time
+import uuid
+
+from ai_service.models.model_service import model_service
+from ai_service.models.chat_service import chat_service
+from ai_service.models.model_parameters import ModelParameters
+
+# Create router
+router = APIRouter()
+
+# Models endpoint
+@router.get("/models", response_model=List[Dict[str, Any]])
+async def get_models():
+    """
+    Get available models in OpenWebUI-compatible format.
+    """
+    models = model_service.get_available_models()
+    
+    # Convert to OpenWebUI format
+    openwebui_models = []
+    for model in models:
+        openwebui_models.append({
+            "id": model["id"],
+            "object": "model",
+            "created": int(time.time()),
+            "owned_by": "user",
+            "permission": [],
+            "root": model["id"],
+            "parent": None
+        })
+    
+    return openwebui_models
+
+# Chat completions endpoint (OpenAI-compatible)
+@router.post("/chat/completions")
+async def chat_completions(request: Request):
+    """
+    OpenAI-compatible chat completions endpoint.
+    """
+    # Parse request body
+    body = await request.json()
+    
+    # Extract parameters
+    model_id = body.get("model", "llama3.1")
+    messages = body.get("messages", [])
+    stream = body.get("stream", False)
+    temperature = body.get("temperature")
+    max_tokens = body.get("max_tokens")
+    top_p = body.get("top_p")
+    frequency_penalty = body.get("frequency_penalty")
+    presence_penalty = body.get("presence_penalty")
+    stop = body.get("stop")
+    
+    # Create a unique chat ID
+    chat_id = str(uuid.uuid4())
+    
+    # Create a user ID (in a real implementation, this would come from authentication)
+    user_id = "openwebui-user"
+    
+    # Create a new chat
+    chat_service.create_chat(user_id=user_id, title="API Chat", model_id=model_id)
+    
+    # Extract the user's message (last user message in the array)
+    user_message = None
+    for msg in reversed(messages):
+        if msg.get("role") == "user":
+            user_message = msg.get("content")
+            break
+    
+    if not user_message:
+        raise HTTPException(status_code=400, detail="No user message found")
+    
+    # Get chat response
+    response = chat_service.get_chat_response(
+        chat_id=chat_id,
+        message=user_message,
+        user_id=user_id,
+        temperature=temperature,
+        max_tokens=max_tokens,
+        top_p=top_p,
+        frequency_penalty=frequency_penalty,
+        presence_penalty=presence_penalty,
+        stop_sequences=stop if isinstance(stop, list) else [stop] if stop else None
+    )
+    
+    # Format response in OpenAI-compatible format
+    completion_id = f"chatcmpl-{str(uuid.uuid4())[:8]}"
+    
+    openai_response = {
+        "id": completion_id,
+        "object": "chat.completion",
+        "created": int(time.time()),
+        "model": model_id,
+        "choices": [
+            {
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": response.get("content", "")
+                },
+                "finish_reason": "stop"
+            }
+        ],
+        "usage": {
+            "prompt_tokens": -1,  # We don't track tokens
+            "completion_tokens": -1,
+            "total_tokens": -1
+        }
+    }
+    
+    # Handle streaming if requested
+    if stream:
+        async def generate_stream():
+            # Yield the response in the SSE format
+            yield f"data: {json.dumps(openai_response)}\n\n"
+            yield "data: [DONE]\n\n"
+        
+        return StreamingResponse(generate_stream(), media_type="text/event-stream")
+    
+    return openai_response
+
+# Health check endpoint
+@router.get("/health")
+async def health_check():
+    """
+    Health check endpoint.
+    """
+    return {"status": "healthy"}
+
+# Ollama API proxy endpoints
+@router.post("/ollama/api/generate")
+async def ollama_generate(request: Request):
+    """
+    Proxy to Ollama's generate endpoint.
+    """
+    # Parse request body
+    body = await request.json()
+    
+    # Extract parameters
+    model_id = body.get("model", "llama3.1")
+    prompt = body.get("prompt", "")
+    
+    # Create a unique chat ID
+    chat_id = str(uuid.uuid4())
+    
+    # Create a user ID (in a real implementation, this would come from authentication)
+    user_id = "openwebui-user"
+    
+    # Create a new chat
+    chat_service.create_chat(user_id=user_id, title="API Chat", model_id=model_id)
+    
+    # Get chat response
+    response = chat_service.get_chat_response(
+        chat_id=chat_id,
+        message=prompt,
+        user_id=user_id
+    )
+    
+    # Format response in Ollama-compatible format
+    ollama_response = {
+        "model": model_id,
+        "created_at": time.strftime("%Y-%m-%dT%H:%M:%S.%fZ", time.gmtime()),
+        "response": response.get("content", ""),
+        "done": True
+    }
+    
+    return ollama_response
@@ -1,11 +0,0 @@
-# Core dependencies
-fastapi==0.103.1
-uvicorn==0.23.2
-pydantic==2.3.0
-python-dotenv==1.0.0
-
-# Document processing
-langchain-text-splitters==0.3.8
-
-# Utilities
-requests==2.32.3