Files
ds_zagres_ai/ai_service/api.py
T

496 lines
17 KiB
Python
Raw Normal View History

2025-05-09 15:41:16 +01:00
"""
FastAPI application for the AI service.
This service acts as a backend for OpenWebUI, providing OpenWebUI-compatible API endpoints.
2025-05-16 15:24:01 +01:00
The service supports document-based question answering using OpenWebUI's knowledge database:
- Set use_rag=True in API requests to enable Retrieval Augmented Generation
- When enabled, the service will use OpenWebUI's knowledge database to find relevant information
- Documents uploaded to OpenWebUI will be used to augment the model's responses
2025-05-09 15:41:16 +01:00
"""
from fastapi import FastAPI, HTTPException
2025-05-09 15:41:16 +01:00
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from typing import List, Optional
2025-05-12 16:30:35 +01:00
import uuid
2025-05-20 02:18:46 +01:00
import asyncio
2025-05-12 16:30:35 +01:00
from datetime import datetime, timezone
2025-05-09 15:41:16 +01:00
from ai_service.models.model_service import model_service
from ai_service.models.chat_service import chat_service
from ai_service.openwebui_api import router as openwebui_router
2025-05-12 16:10:45 +01:00
from ai_service.config import config
2025-05-20 02:18:46 +01:00
from ai_service import bot_manager
2025-05-09 15:41:16 +01:00
# Create FastAPI app
app = FastAPI(
title="AI Service API",
description="Backend API for OpenWebUI",
2025-05-09 15:41:16 +01:00
version="1.0.0"
)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Allow all origins
allow_credentials=True,
allow_methods=["*"], # Allow all methods
allow_headers=["*"], # Allow all headers
)
# Include OpenWebUI-compatible API routes
app.include_router(openwebui_router, prefix="/api")
# Include Ollama proxy routes
app.include_router(openwebui_router, prefix="/ollama")
2025-05-20 02:18:46 +01:00
# API startup event
2025-05-16 13:23:35 +01:00
@app.on_event("startup")
async def startup_event():
"""
2025-05-20 02:18:46 +01:00
Startup event for the API.
2025-05-16 13:23:35 +01:00
"""
2025-05-20 02:18:46 +01:00
print("=" * 50)
print(f"Starting AI Service API on {config.API_HOST}:{config.API_PORT}")
print(f"OpenWebUI URL: {config.OPENWEBUI_URL}")
print(f"Default model: {config.DEFAULT_MODEL}")
2025-05-16 13:23:35 +01:00
2025-05-20 02:18:46 +01:00
# Start the OpenWebUI bot if enabled
if config.BOT_ENABLED:
print("=" * 50)
print("Starting OpenWebUI bot...")
2025-05-16 13:23:35 +01:00
2025-05-20 02:18:46 +01:00
# Start the bot with configuration
success = await bot_manager.start_bot(
openwebui_url=config.OPENWEBUI_URL,
api_key=config.OPENWEBUI_API_KEY,
model_id=config.DEFAULT_MODEL,
system_prompt=config.BOT_SYSTEM_PROMPT,
temperature=config.BOT_TEMPERATURE,
max_tokens=config.BOT_MAX_TOKENS,
top_p=config.BOT_TOP_P,
triggers=config.BOT_TRIGGERS,
respond_to_all=config.BOT_RESPOND_TO_ALL
)
if success:
print("Bot started successfully!")
else:
print("Failed to start bot. Check the logs for details.")
print("=" * 50)
2025-05-16 13:23:35 +01:00
else:
2025-05-20 02:18:46 +01:00
print("OpenWebUI bot is disabled. Set BOT_ENABLED=true in .env to enable it.")
print("=" * 50)
# API shutdown event
@app.on_event("shutdown")
async def shutdown_event():
"""
Shutdown event for the API.
"""
print("=" * 50)
print("Shutting down AI Service API...")
# Stop the OpenWebUI bot if it's running
if bot_manager.is_bot_running():
print("Stopping OpenWebUI bot...")
success = await bot_manager.stop_bot()
if success:
print("Bot stopped successfully!")
else:
print("Failed to stop bot. Check the logs for details.")
print("=" * 50)
2025-05-16 13:23:35 +01:00
# Define API models for health check
class HealthResponse(BaseModel):
"""Response model for health check."""
status: str = Field(..., description="Health status")
2025-05-09 15:41:16 +01:00
class ModelInfo(BaseModel):
"""Model for model information."""
id: str = Field(..., description="Model ID")
name: str = Field(..., description="Model name")
description: str = Field(..., description="Model description")
provider: str = Field(..., description="Model provider")
max_tokens: int = Field(..., description="Maximum tokens")
is_default: bool = Field(..., description="Whether this is the default model")
class ChatRequest(BaseModel):
"""Request model for creating a chat."""
user_id: str = Field(..., description="User ID")
title: Optional[str] = Field(None, description="Chat title")
model_id: Optional[str] = Field(None, description="Model ID")
is_team_chat: bool = Field(False, description="Whether this is a team chat")
class MessageRequest(BaseModel):
"""Request model for sending a message."""
message: str = Field(..., description="Message content")
user_id: str = Field(..., description="User ID")
use_rag: bool = Field(False, description="Whether to use RAG")
# Model parameters
temperature: Optional[float] = Field(None, description="Controls randomness: higher values mean more random completions")
max_tokens: Optional[int] = Field(None, description="Maximum number of tokens to generate")
top_p: Optional[float] = Field(None, description="Nucleus sampling parameter")
frequency_penalty: Optional[float] = Field(None, description="Penalizes repeated tokens")
presence_penalty: Optional[float] = Field(None, description="Penalizes repeated topics")
stop_sequences: Optional[List[str]] = Field(None, description="Sequences where the API will stop generating")
system_prompt: Optional[str] = Field(None, description="System prompt to guide the model's behavior")
# Additional advanced parameters
min_p: Optional[float] = Field(None, description="Minimum probability threshold for token selection")
top_k: Optional[int] = Field(None, description="Only sample from the top k tokens")
repeat_penalty: Optional[float] = Field(None, description="Penalty for repeating tokens")
function_calling: Optional[bool] = Field(None, description="Whether to enable function calling")
class Message(BaseModel):
"""Model for a message."""
id: str = Field(..., description="Message ID")
content: str = Field(..., description="Message content")
user_id: Optional[str] = Field(None, description="User ID")
is_user_message: bool = Field(..., description="Whether this is a user message")
timestamp: str = Field(..., description="Message timestamp")
class Chat(BaseModel):
"""Model for a chat."""
id: str = Field(..., description="Chat ID")
title: str = Field(..., description="Chat title")
user_id: str = Field(..., description="User ID")
model_id: str = Field(..., description="Model ID")
is_team_chat: bool = Field(..., description="Whether this is a team chat")
created_at: str = Field(..., description="Creation timestamp")
updated_at: str = Field(..., description="Update timestamp")
messages: List[Message] = Field(..., description="Chat messages")
team_members: List[str] = Field(..., description="Team members")
# Define API endpoints
@app.get("/health", response_model=HealthResponse)
2025-05-09 15:41:16 +01:00
async def health_check():
"""
Health check endpoint.
Returns:
Health status.
"""
return {"status": "healthy"}
2025-05-12 16:10:45 +01:00
@app.get("/config")
async def get_config():
"""
Get the current configuration.
Returns:
Current configuration settings.
"""
return {
"api_host": config.API_HOST,
"api_port": config.API_PORT,
"openwebui_url": config.OPENWEBUI_URL,
"ollama_api_url": config.OLLAMA_API_URL,
"default_model": config.DEFAULT_MODEL,
2025-05-12 16:30:35 +01:00
"api_timeout": config.API_TIMEOUT,
2025-05-20 02:18:46 +01:00
"bot": {
"enabled": config.BOT_ENABLED,
"running": bot_manager.is_bot_running(),
"model_id": config.DEFAULT_MODEL,
"system_prompt": config.BOT_SYSTEM_PROMPT[:50] + "..." if len(config.BOT_SYSTEM_PROMPT) > 50 else config.BOT_SYSTEM_PROMPT,
"temperature": config.BOT_TEMPERATURE,
"max_tokens": config.BOT_MAX_TOKENS,
"top_p": config.BOT_TOP_P,
"triggers": config.BOT_TRIGGERS,
"respond_to_all": config.BOT_RESPOND_TO_ALL
},
2025-05-12 16:10:45 +01:00
"available_models": list(model_service.AVAILABLE_MODELS.keys())
}
2025-05-20 02:18:46 +01:00
@app.get("/bot/status")
async def get_bot_status():
2025-05-16 15:52:20 +01:00
"""
2025-05-20 02:18:46 +01:00
Get the status and configuration of the OpenWebUI bot.
2025-05-16 15:52:20 +01:00
Returns:
2025-05-20 02:18:46 +01:00
Bot status and configuration.
2025-05-16 15:52:20 +01:00
"""
2025-05-20 02:18:46 +01:00
return {
"enabled": config.BOT_ENABLED,
"running": bot_manager.is_bot_running(),
"config": {
"model_id": config.DEFAULT_MODEL,
"system_prompt": config.BOT_SYSTEM_PROMPT[:50] + "..." if len(config.BOT_SYSTEM_PROMPT) > 50 else config.BOT_SYSTEM_PROMPT,
"temperature": config.BOT_TEMPERATURE,
"max_tokens": config.BOT_MAX_TOKENS,
"top_p": config.BOT_TOP_P,
"triggers": config.BOT_TRIGGERS,
"respond_to_all": config.BOT_RESPOND_TO_ALL
}
}
2025-05-16 15:52:20 +01:00
2025-05-20 02:18:46 +01:00
@app.post("/bot/start")
async def start_bot(
model_id: str = None,
system_prompt: str = None,
temperature: float = None,
max_tokens: int = None,
top_p: float = None,
respond_to_all: bool = None
):
"""
Start the OpenWebUI bot with optional configuration.
2025-05-16 15:52:20 +01:00
2025-05-20 02:18:46 +01:00
Args:
model_id: ID of the model to use (default: config.DEFAULT_MODEL)
system_prompt: System prompt for the bot (default: config.BOT_SYSTEM_PROMPT)
temperature: Temperature for response generation (default: config.BOT_TEMPERATURE)
max_tokens: Maximum number of tokens to generate (default: config.BOT_MAX_TOKENS)
top_p: Top-p sampling parameter (default: config.BOT_TOP_P)
respond_to_all: Whether to respond to all messages (default: config.BOT_RESPOND_TO_ALL)
2025-05-16 15:52:20 +01:00
2025-05-20 02:18:46 +01:00
Returns:
Start status.
"""
if bot_manager.is_bot_running():
return {"status": "already_running", "message": "Bot is already running"}
# Use provided values or defaults from config
success = await bot_manager.start_bot(
openwebui_url=config.OPENWEBUI_URL,
api_key=config.OPENWEBUI_API_KEY,
model_id=model_id or config.DEFAULT_MODEL,
system_prompt=system_prompt or config.BOT_SYSTEM_PROMPT,
temperature=temperature if temperature is not None else config.BOT_TEMPERATURE,
max_tokens=max_tokens if max_tokens is not None else config.BOT_MAX_TOKENS,
top_p=top_p if top_p is not None else config.BOT_TOP_P,
triggers=config.BOT_TRIGGERS,
respond_to_all=respond_to_all if respond_to_all is not None else config.BOT_RESPOND_TO_ALL
)
2025-05-16 15:52:20 +01:00
2025-05-20 02:18:46 +01:00
if success:
return {
"status": "success",
"message": "Bot started successfully",
"config": {
"model_id": model_id or config.DEFAULT_MODEL,
"system_prompt": (system_prompt or config.BOT_SYSTEM_PROMPT)[:50] + "..." if len(system_prompt or config.BOT_SYSTEM_PROMPT) > 50 else (system_prompt or config.BOT_SYSTEM_PROMPT),
"temperature": temperature if temperature is not None else config.BOT_TEMPERATURE,
"max_tokens": max_tokens if max_tokens is not None else config.BOT_MAX_TOKENS,
"top_p": top_p if top_p is not None else config.BOT_TOP_P,
"respond_to_all": respond_to_all if respond_to_all is not None else config.BOT_RESPOND_TO_ALL
}
}
else:
return {"status": "error", "message": "Failed to start bot"}
@app.post("/bot/stop")
async def stop_bot():
"""
Stop the OpenWebUI bot.
2025-05-16 15:52:20 +01:00
2025-05-20 02:18:46 +01:00
Returns:
Stop status.
"""
if not bot_manager.is_bot_running():
return {"status": "not_running", "message": "Bot is not running"}
success = await bot_manager.stop_bot()
2025-05-09 15:41:16 +01:00
2025-05-20 02:18:46 +01:00
if success:
return {"status": "success", "message": "Bot stopped successfully"}
else:
return {"status": "error", "message": "Failed to stop bot"}
2025-05-09 15:41:16 +01:00
# Model endpoints
@app.get("/models", response_model=List[ModelInfo])
async def get_available_models():
"""
Get available models.
Returns:
List of model information.
"""
2025-05-12 16:10:45 +01:00
models = model_service.get_available_models()
# Debug log
print(f"API models: {models}")
return models
2025-05-09 15:41:16 +01:00
@app.get("/models/{model_id}", response_model=ModelInfo)
async def get_model_info(model_id: str):
"""
Get information about a model.
Args:
model_id: Model ID.
Returns:
Model information.
"""
model_info = model_service.get_model_info(model_id)
if not model_info:
raise HTTPException(status_code=404, detail="Model not found")
return model_info
# Chat endpoints
@app.post("/chats", response_model=Chat)
async def create_chat(request: ChatRequest):
"""
Create a new chat.
Args:
request: Chat creation request.
Returns:
Created chat.
"""
chat_id = chat_service.create_chat(
user_id=request.user_id,
title=request.title,
model_id=request.model_id,
is_team_chat=request.is_team_chat
)
return chat_service.get_chat(chat_id)
@app.get("/chats/user/{user_id}", response_model=List[Chat])
async def get_user_chats(user_id: str):
"""
Get all chats for a user.
Args:
user_id: User ID.
Returns:
List of chats.
"""
return chat_service.get_user_chats(user_id)
@app.get("/chats/{chat_id}", response_model=Chat)
async def get_chat(chat_id: str):
"""
Get a chat by ID.
Args:
chat_id: Chat ID.
Returns:
Chat information.
"""
chat = chat_service.get_chat(chat_id)
if not chat:
raise HTTPException(status_code=404, detail="Chat not found")
return chat
@app.post("/chats/{chat_id}/messages", response_model=Message)
async def send_message(chat_id: str, request: MessageRequest):
"""
Send a message to a chat.
Args:
chat_id: Chat ID.
request: Message request with optional model parameters.
Returns:
Bot response message.
"""
try:
2025-05-12 16:30:35 +01:00
print(f"Processing message for chat {chat_id} from user {request.user_id}")
print(f"Message: {request.message[:50]}...") # Print first 50 chars of message
print(f"Using RAG: {request.use_rag}")
print(f"Model parameters: temperature={request.temperature}, max_tokens={request.max_tokens}")
2025-05-09 15:41:16 +01:00
# Extract model parameters from the request
response = chat_service.get_chat_response(
chat_id=chat_id,
message=request.message,
user_id=request.user_id,
use_rag=request.use_rag,
temperature=request.temperature,
max_tokens=request.max_tokens,
top_p=request.top_p,
frequency_penalty=request.frequency_penalty,
presence_penalty=request.presence_penalty,
stop_sequences=request.stop_sequences,
system_prompt=request.system_prompt,
min_p=request.min_p,
top_k=request.top_k,
repeat_penalty=request.repeat_penalty,
function_calling=request.function_calling
)
2025-05-12 16:30:35 +01:00
print(f"Response received. Length: {len(response.get('content', ''))}")
2025-05-09 15:41:16 +01:00
return response
except ValueError as e:
2025-05-12 16:30:35 +01:00
error_msg = f"Chat not found: {str(e)}"
print(f"ERROR: {error_msg}")
raise HTTPException(status_code=404, detail=error_msg)
except Exception as e:
error_msg = f"Error processing message: {str(e)}"
print(f"ERROR: {error_msg}")
# Return an error message instead of raising an exception
# This ensures the client gets a proper response
return {
"id": str(uuid.uuid4()),
"content": f"Error processing message: {str(e)}",
"user_id": None,
"is_user_message": False,
"timestamp": datetime.now(timezone.utc).isoformat()
}
2025-05-09 15:41:16 +01:00
@app.post("/chats/{chat_id}/members/{user_id}")
async def add_team_member(chat_id: str, user_id: str):
"""
Add a user to a team chat.
Args:
chat_id: Chat ID.
user_id: User ID.
Returns:
Addition status.
"""
success = chat_service.add_team_member(chat_id, user_id)
if not success:
raise HTTPException(status_code=400, detail="Failed to add team member")
return {"status": "success", "message": "Team member added"}
@app.delete("/chats/{chat_id}/members/{user_id}")
async def remove_team_member(chat_id: str, user_id: str):
"""
Remove a user from a team chat.
Args:
chat_id: Chat ID.
user_id: User ID.
Returns:
Removal status.
"""
success = chat_service.remove_team_member(chat_id, user_id)
if not success:
raise HTTPException(status_code=400, detail="Failed to remove team member")
return {"status": "success", "message": "Team member removed"}
@app.delete("/chats/{chat_id}")
async def delete_chat(chat_id: str):
"""
Delete a chat.
Args:
chat_id: Chat ID.
Returns:
Deletion status.
"""
success = chat_service.delete_chat(chat_id)
if not success:
raise HTTPException(status_code=404, detail="Chat not found")
return {"status": "success", "message": "Chat deleted"}
2025-05-16 13:23:35 +01:00