Using the ai_service as backend for openwebui
This commit is contained in:
+23
-20
@@ -1,25 +1,28 @@
|
||||
# AI Service for Chatbot Application
|
||||
# OpenWebUI Backend Service
|
||||
|
||||
This is the AI service component for the chatbot application. It provides APIs for document processing, embeddings, and chat functionality.
|
||||
This is a backend service for OpenWebUI that provides OpenWebUI-compatible API endpoints for chat functionality and model switching.
|
||||
|
||||
## Features
|
||||
|
||||
- Document processing and embedding
|
||||
- Retrieval-augmented generation (RAG)
|
||||
- OpenWebUI-compatible API endpoints
|
||||
- Ollama API proxy
|
||||
- Chat functionality with model switching
|
||||
- Team chat support
|
||||
- Support for multiple LLM models (gemma3, llama3.3, llama3.1, mistral, deepseek)
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
ai_service/
|
||||
├── embeddings/ # Embedding and document processing services
|
||||
├── models/ # Model and chat services
|
||||
├── utils/ # Utility functions
|
||||
├── data/ # Data storage
|
||||
│ ├── model_service.py
|
||||
│ ├── chat_service.py
|
||||
│ └── model_parameters.py
|
||||
├── embeddings/ # Document processing for RAG
|
||||
│ └── document_service.py
|
||||
├── openwebui_api.py # OpenWebUI-compatible API endpoints
|
||||
├── config.py # Configuration settings
|
||||
├── api.py # FastAPI application
|
||||
└── run.py # Script to run the service
|
||||
└── deploy.sh # Deployment script
|
||||
```
|
||||
|
||||
## Setup Instructions
|
||||
@@ -43,7 +46,7 @@ ai_service/
|
||||
|
||||
4. Run the service:
|
||||
```
|
||||
python run.py
|
||||
python ../run_ai_service.py
|
||||
```
|
||||
|
||||
## API Documentation
|
||||
@@ -70,20 +73,20 @@ This will start the service on port 5251 using uvicorn with nohup.
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### Document Endpoints
|
||||
### Health Check
|
||||
|
||||
- `POST /documents` - Process a document for embedding
|
||||
- `GET /documents` - Get all documents
|
||||
- `GET /documents/{doc_id}` - Get a document by ID
|
||||
- `DELETE /documents/{doc_id}` - Delete a document
|
||||
- `POST /documents/search` - Search for documents
|
||||
- `GET /health` - Check if the service is running
|
||||
|
||||
### Model Endpoints
|
||||
### OpenWebUI-Compatible Endpoints
|
||||
|
||||
- `GET /models` - Get available models
|
||||
- `GET /models/{model_id}` - Get information about a model
|
||||
- `GET /api/models` - Get available models in OpenWebUI format
|
||||
- `POST /api/chat/completions` - OpenAI-compatible chat completions endpoint
|
||||
|
||||
### Chat Endpoints
|
||||
### Ollama API Proxy
|
||||
|
||||
- `POST /ollama/api/generate` - Proxy to Ollama's generate endpoint
|
||||
|
||||
### Original API Endpoints
|
||||
|
||||
- `POST /chats` - Create a new chat
|
||||
- `GET /chats/user/{user_id}` - Get all chats for a user
|
||||
|
||||
+14
-112
@@ -1,22 +1,21 @@
|
||||
"""
|
||||
FastAPI application for the AI service.
|
||||
This service acts as a backend for OpenWebUI, providing OpenWebUI-compatible API endpoints.
|
||||
"""
|
||||
|
||||
from fastapi import FastAPI, HTTPException, Depends, Body, Query, Path
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Dict, Any, Optional
|
||||
from typing import List, Optional
|
||||
|
||||
from ai_service.config import config
|
||||
from ai_service.embeddings.document_service import document_service
|
||||
from ai_service.models.model_service import model_service
|
||||
from ai_service.models.chat_service import chat_service
|
||||
from ai_service.models.model_parameters import ModelParameters
|
||||
from ai_service.openwebui_api import router as openwebui_router
|
||||
|
||||
# Create FastAPI app
|
||||
app = FastAPI(
|
||||
title="AI Service API",
|
||||
description="API for the AI service",
|
||||
description="Backend API for OpenWebUI",
|
||||
version="1.0.0"
|
||||
)
|
||||
|
||||
@@ -29,32 +28,16 @@ app.add_middleware(
|
||||
allow_headers=["*"], # Allow all headers
|
||||
)
|
||||
|
||||
# Define API models
|
||||
class DocumentRequest(BaseModel):
|
||||
"""Request model for document processing."""
|
||||
content: str = Field(..., description="Document content")
|
||||
title: str = Field(..., description="Document title")
|
||||
description: Optional[str] = Field(None, description="Document description")
|
||||
metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata")
|
||||
# Include OpenWebUI-compatible API routes
|
||||
app.include_router(openwebui_router, prefix="/api")
|
||||
|
||||
class DocumentResponse(BaseModel):
|
||||
"""Response model for document processing."""
|
||||
id: str = Field(..., description="Document ID")
|
||||
title: str = Field(..., description="Document title")
|
||||
description: str = Field(..., description="Document description")
|
||||
chunk_count: int = Field(..., description="Number of chunks")
|
||||
metadata: Dict[str, Any] = Field(..., description="Additional metadata")
|
||||
# Include Ollama proxy routes
|
||||
app.include_router(openwebui_router, prefix="/ollama")
|
||||
|
||||
class SearchRequest(BaseModel):
|
||||
"""Request model for document search."""
|
||||
query: str = Field(..., description="Search query")
|
||||
top_k: int = Field(5, description="Number of results to return")
|
||||
|
||||
class SearchResult(BaseModel):
|
||||
"""Model for a search result."""
|
||||
id: str = Field(..., description="Result ID")
|
||||
score: float = Field(..., description="Similarity score")
|
||||
metadata: Dict[str, Any] = Field(..., description="Result metadata")
|
||||
# Define API models for health check
|
||||
class HealthResponse(BaseModel):
|
||||
"""Response model for health check."""
|
||||
status: str = Field(..., description="Health status")
|
||||
|
||||
class ModelInfo(BaseModel):
|
||||
"""Model for model information."""
|
||||
@@ -114,7 +97,7 @@ class Chat(BaseModel):
|
||||
team_members: List[str] = Field(..., description="Team members")
|
||||
|
||||
# Define API endpoints
|
||||
@app.get("/health")
|
||||
@app.get("/health", response_model=HealthResponse)
|
||||
async def health_check():
|
||||
"""
|
||||
Health check endpoint.
|
||||
@@ -124,88 +107,7 @@ async def health_check():
|
||||
"""
|
||||
return {"status": "healthy"}
|
||||
|
||||
# Document endpoints
|
||||
@app.post("/documents", response_model=DocumentResponse)
|
||||
async def process_document(request: DocumentRequest):
|
||||
"""
|
||||
Process a document for embedding.
|
||||
|
||||
Args:
|
||||
request: Document processing request.
|
||||
|
||||
Returns:
|
||||
Processed document information.
|
||||
"""
|
||||
doc_id = document_service.process_document(
|
||||
content=request.content,
|
||||
title=request.title,
|
||||
description=request.description,
|
||||
metadata=request.metadata
|
||||
)
|
||||
|
||||
return document_service.get_document(doc_id)
|
||||
|
||||
@app.get("/documents", response_model=List[DocumentResponse])
|
||||
async def get_all_documents():
|
||||
"""
|
||||
Get all documents.
|
||||
|
||||
Returns:
|
||||
List of document information.
|
||||
"""
|
||||
return document_service.get_all_documents()
|
||||
|
||||
@app.get("/documents/{doc_id}", response_model=DocumentResponse)
|
||||
async def get_document(doc_id: str):
|
||||
"""
|
||||
Get a document by ID.
|
||||
|
||||
Args:
|
||||
doc_id: Document ID.
|
||||
|
||||
Returns:
|
||||
Document information.
|
||||
"""
|
||||
doc = document_service.get_document(doc_id)
|
||||
if not doc:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
return doc
|
||||
|
||||
@app.delete("/documents/{doc_id}")
|
||||
async def delete_document(doc_id: str):
|
||||
"""
|
||||
Delete a document.
|
||||
|
||||
Args:
|
||||
doc_id: Document ID.
|
||||
|
||||
Returns:
|
||||
Deletion status.
|
||||
"""
|
||||
success = document_service.delete_document(doc_id)
|
||||
if not success:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
return {"status": "success", "message": "Document deleted"}
|
||||
|
||||
@app.post("/documents/search", response_model=List[SearchResult])
|
||||
async def search_documents(request: SearchRequest):
|
||||
"""
|
||||
Search for documents.
|
||||
|
||||
Args:
|
||||
request: Search request.
|
||||
|
||||
Returns:
|
||||
Search results.
|
||||
"""
|
||||
results = document_service.search_documents(
|
||||
query=request.query,
|
||||
top_k=request.top_k
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
# Model endpoints
|
||||
@app.get("/models", response_model=List[ModelInfo])
|
||||
|
||||
@@ -0,0 +1,175 @@
|
||||
"""
|
||||
OpenWebUI-compatible API endpoints for the AI service.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Header, Request
|
||||
from fastapi.responses import StreamingResponse
|
||||
from typing import List, Dict, Any, Optional, Union
|
||||
import json
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from ai_service.models.model_service import model_service
|
||||
from ai_service.models.chat_service import chat_service
|
||||
from ai_service.models.model_parameters import ModelParameters
|
||||
|
||||
# Create router
|
||||
router = APIRouter()
|
||||
|
||||
# Models endpoint
|
||||
@router.get("/models", response_model=List[Dict[str, Any]])
|
||||
async def get_models():
|
||||
"""
|
||||
Get available models in OpenWebUI-compatible format.
|
||||
"""
|
||||
models = model_service.get_available_models()
|
||||
|
||||
# Convert to OpenWebUI format
|
||||
openwebui_models = []
|
||||
for model in models:
|
||||
openwebui_models.append({
|
||||
"id": model["id"],
|
||||
"object": "model",
|
||||
"created": int(time.time()),
|
||||
"owned_by": "user",
|
||||
"permission": [],
|
||||
"root": model["id"],
|
||||
"parent": None
|
||||
})
|
||||
|
||||
return openwebui_models
|
||||
|
||||
# Chat completions endpoint (OpenAI-compatible)
|
||||
@router.post("/chat/completions")
|
||||
async def chat_completions(request: Request):
|
||||
"""
|
||||
OpenAI-compatible chat completions endpoint.
|
||||
"""
|
||||
# Parse request body
|
||||
body = await request.json()
|
||||
|
||||
# Extract parameters
|
||||
model_id = body.get("model", "llama3.1")
|
||||
messages = body.get("messages", [])
|
||||
stream = body.get("stream", False)
|
||||
temperature = body.get("temperature")
|
||||
max_tokens = body.get("max_tokens")
|
||||
top_p = body.get("top_p")
|
||||
frequency_penalty = body.get("frequency_penalty")
|
||||
presence_penalty = body.get("presence_penalty")
|
||||
stop = body.get("stop")
|
||||
|
||||
# Create a unique chat ID
|
||||
chat_id = str(uuid.uuid4())
|
||||
|
||||
# Create a user ID (in a real implementation, this would come from authentication)
|
||||
user_id = "openwebui-user"
|
||||
|
||||
# Create a new chat
|
||||
chat_service.create_chat(user_id=user_id, title="API Chat", model_id=model_id)
|
||||
|
||||
# Extract the user's message (last user message in the array)
|
||||
user_message = None
|
||||
for msg in reversed(messages):
|
||||
if msg.get("role") == "user":
|
||||
user_message = msg.get("content")
|
||||
break
|
||||
|
||||
if not user_message:
|
||||
raise HTTPException(status_code=400, detail="No user message found")
|
||||
|
||||
# Get chat response
|
||||
response = chat_service.get_chat_response(
|
||||
chat_id=chat_id,
|
||||
message=user_message,
|
||||
user_id=user_id,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
top_p=top_p,
|
||||
frequency_penalty=frequency_penalty,
|
||||
presence_penalty=presence_penalty,
|
||||
stop_sequences=stop if isinstance(stop, list) else [stop] if stop else None
|
||||
)
|
||||
|
||||
# Format response in OpenAI-compatible format
|
||||
completion_id = f"chatcmpl-{str(uuid.uuid4())[:8]}"
|
||||
|
||||
openai_response = {
|
||||
"id": completion_id,
|
||||
"object": "chat.completion",
|
||||
"created": int(time.time()),
|
||||
"model": model_id,
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": response.get("content", "")
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}
|
||||
],
|
||||
"usage": {
|
||||
"prompt_tokens": -1, # We don't track tokens
|
||||
"completion_tokens": -1,
|
||||
"total_tokens": -1
|
||||
}
|
||||
}
|
||||
|
||||
# Handle streaming if requested
|
||||
if stream:
|
||||
async def generate_stream():
|
||||
# Yield the response in the SSE format
|
||||
yield f"data: {json.dumps(openai_response)}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
return StreamingResponse(generate_stream(), media_type="text/event-stream")
|
||||
|
||||
return openai_response
|
||||
|
||||
# Health check endpoint
|
||||
@router.get("/health")
|
||||
async def health_check():
|
||||
"""
|
||||
Health check endpoint.
|
||||
"""
|
||||
return {"status": "healthy"}
|
||||
|
||||
# Ollama API proxy endpoints
|
||||
@router.post("/ollama/api/generate")
|
||||
async def ollama_generate(request: Request):
|
||||
"""
|
||||
Proxy to Ollama's generate endpoint.
|
||||
"""
|
||||
# Parse request body
|
||||
body = await request.json()
|
||||
|
||||
# Extract parameters
|
||||
model_id = body.get("model", "llama3.1")
|
||||
prompt = body.get("prompt", "")
|
||||
|
||||
# Create a unique chat ID
|
||||
chat_id = str(uuid.uuid4())
|
||||
|
||||
# Create a user ID (in a real implementation, this would come from authentication)
|
||||
user_id = "openwebui-user"
|
||||
|
||||
# Create a new chat
|
||||
chat_service.create_chat(user_id=user_id, title="API Chat", model_id=model_id)
|
||||
|
||||
# Get chat response
|
||||
response = chat_service.get_chat_response(
|
||||
chat_id=chat_id,
|
||||
message=prompt,
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
# Format response in Ollama-compatible format
|
||||
ollama_response = {
|
||||
"model": model_id,
|
||||
"created_at": time.strftime("%Y-%m-%dT%H:%M:%S.%fZ", time.gmtime()),
|
||||
"response": response.get("content", ""),
|
||||
"done": True
|
||||
}
|
||||
|
||||
return ollama_response
|
||||
@@ -1,11 +0,0 @@
|
||||
# Core dependencies
|
||||
fastapi==0.103.1
|
||||
uvicorn==0.23.2
|
||||
pydantic==2.3.0
|
||||
python-dotenv==1.0.0
|
||||
|
||||
# Document processing
|
||||
langchain-text-splitters==0.3.8
|
||||
|
||||
# Utilities
|
||||
requests==2.32.3
|
||||
Reference in New Issue
Block a user