ds_zagres_ai/ai_service/api.py

"""
FastAPI application for the AI service.
This service acts as a backend for OpenWebUI, providing OpenWebUI-compatible API endpoints.

The service supports document-based question answering using OpenWebUI's knowledge database:
- Set use_rag=True in API requests to enable Retrieval Augmented Generation
- When enabled, the service will use OpenWebUI's knowledge database to find relevant information
- Documents uploaded to OpenWebUI will be used to augment the model's responses
"""

from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from typing import List, Optional
import uuid
from datetime import datetime, timezone

from ai_service.models.model_service import model_service
from ai_service.models.chat_service import chat_service
from ai_service.openwebui_api import router as openwebui_router
from ai_service.openwebui_channels import openwebui_channels
from ai_service.config import config

# Create FastAPI app
app = FastAPI(
    title="AI Service API",
    description="Backend API for OpenWebUI",
    version="1.0.0"
)

# Add CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Allow all origins
    allow_credentials=True,
    allow_methods=["*"],  # Allow all methods
    allow_headers=["*"],  # Allow all headers
)

# Include OpenWebUI-compatible API routes
app.include_router(openwebui_router, prefix="/api")

# Include Ollama proxy routes
app.include_router(openwebui_router, prefix="/ollama")

# Register webhook for channel messages on startup
@app.on_event("startup")
async def startup_event():
    """
    Register webhook for channel messages on startup.
    """
    # Get the public URL of this service
    service_url = f"http://{config.API_HOST}:{config.API_PORT}"
    if config.PUBLIC_URL:
        service_url = config.PUBLIC_URL

    # Register webhook
    webhook_url = f"{service_url}/webhooks/channel-message"
    print(f"Registering webhook for channel messages: {webhook_url}")

    success = openwebui_channels.register_webhook(webhook_url)
    if success:
        print("Successfully registered webhook for channel messages")
    else:
        print("Failed to register webhook for channel messages")

# Define API models for health check
class HealthResponse(BaseModel):
    """Response model for health check."""
    status: str = Field(..., description="Health status")

class ModelInfo(BaseModel):
    """Model for model information."""
    id: str = Field(..., description="Model ID")
    name: str = Field(..., description="Model name")
    description: str = Field(..., description="Model description")
    provider: str = Field(..., description="Model provider")
    max_tokens: int = Field(..., description="Maximum tokens")
    is_default: bool = Field(..., description="Whether this is the default model")

class ChatRequest(BaseModel):
    """Request model for creating a chat."""
    user_id: str = Field(..., description="User ID")
    title: Optional[str] = Field(None, description="Chat title")
    model_id: Optional[str] = Field(None, description="Model ID")
    is_team_chat: bool = Field(False, description="Whether this is a team chat")

class MessageRequest(BaseModel):
    """Request model for sending a message."""
    message: str = Field(..., description="Message content")
    user_id: str = Field(..., description="User ID")
    use_rag: bool = Field(False, description="Whether to use RAG")

    # Model parameters
    temperature: Optional[float] = Field(None, description="Controls randomness: higher values mean more random completions")
    max_tokens: Optional[int] = Field(None, description="Maximum number of tokens to generate")
    top_p: Optional[float] = Field(None, description="Nucleus sampling parameter")
    frequency_penalty: Optional[float] = Field(None, description="Penalizes repeated tokens")
    presence_penalty: Optional[float] = Field(None, description="Penalizes repeated topics")
    stop_sequences: Optional[List[str]] = Field(None, description="Sequences where the API will stop generating")
    system_prompt: Optional[str] = Field(None, description="System prompt to guide the model's behavior")

    # Additional advanced parameters
    min_p: Optional[float] = Field(None, description="Minimum probability threshold for token selection")
    top_k: Optional[int] = Field(None, description="Only sample from the top k tokens")
    repeat_penalty: Optional[float] = Field(None, description="Penalty for repeating tokens")
    function_calling: Optional[bool] = Field(None, description="Whether to enable function calling")

class Message(BaseModel):
    """Model for a message."""
    id: str = Field(..., description="Message ID")
    content: str = Field(..., description="Message content")
    user_id: Optional[str] = Field(None, description="User ID")
    is_user_message: bool = Field(..., description="Whether this is a user message")
    timestamp: str = Field(..., description="Message timestamp")

class Chat(BaseModel):
    """Model for a chat."""
    id: str = Field(..., description="Chat ID")
    title: str = Field(..., description="Chat title")
    user_id: str = Field(..., description="User ID")
    model_id: str = Field(..., description="Model ID")
    is_team_chat: bool = Field(..., description="Whether this is a team chat")
    created_at: str = Field(..., description="Creation timestamp")
    updated_at: str = Field(..., description="Update timestamp")
    messages: List[Message] = Field(..., description="Chat messages")
    team_members: List[str] = Field(..., description="Team members")

# Define API endpoints
@app.get("/health", response_model=HealthResponse)
async def health_check():
    """
    Health check endpoint.

    Returns:
        Health status.
    """
    return {"status": "healthy"}

@app.get("/test-ollama")
async def test_ollama_connection():
    """
    Test the connection to the Ollama API.

    Returns:
        Connection status and available models from Ollama.
    """
    import requests

    try:
        # Try to connect to Ollama API
        response = requests.get(f"{config.OLLAMA_API_URL}/api/tags", timeout=config.API_TIMEOUT)
        response.raise_for_status()

        # Return the models from Ollama
        return {
            "status": "success",
            "message": "Successfully connected to Ollama API",
            "ollama_url": config.OLLAMA_API_URL,
            "models": response.json()
        }
    except requests.exceptions.Timeout as e:
        return {
            "status": "error",
            "message": f"Timeout connecting to Ollama API: {str(e)}. The request exceeded the {config.API_TIMEOUT} second timeout.",
            "ollama_url": config.OLLAMA_API_URL
        }
    except requests.exceptions.ConnectionError as e:
        return {
            "status": "error",
            "message": f"Connection error to Ollama API: {str(e)}. Please check if Ollama is running at {config.OLLAMA_API_URL}.",
            "ollama_url": config.OLLAMA_API_URL
        }
    except Exception as e:
        return {
            "status": "error",
            "message": f"Failed to connect to Ollama API: {str(e)}",
            "ollama_url": config.OLLAMA_API_URL
        }

@app.post("/test-chat")
async def test_chat_completion():
    """
    Test the chat completion with a simple prompt.

    Returns:
        Model response.
    """
    try:
        # Use the model service directly
        response = model_service.generate_response(
            model_id=config.DEFAULT_MODEL,
            prompt="Hello, how are you?",
            context=[],
            use_rag=False
        )

        return {
            "status": "success",
            "model": config.DEFAULT_MODEL,
            "response": response,
            "ollama_url": config.OLLAMA_API_URL
        }
    except Exception as e:
        return {
            "status": "error",
            "message": f"Failed to get chat completion: {str(e)}",
            "ollama_url": config.OLLAMA_API_URL
        }

@app.post("/test-rag")
async def test_rag_completion(query: str = "What information do you have in your knowledge database?"):
    """
    Test the RAG (Retrieval Augmented Generation) functionality with a query.

    This endpoint tests the integration with OpenWebUI's knowledge database.

    Args:
        query: The question to ask about documents in the knowledge database.

    Returns:
        Model response using RAG.
    """
    try:
        # Use the model service directly with RAG enabled
        response = model_service.generate_response(
            model_id=config.DEFAULT_MODEL,
            prompt=query,
            context=[],
            use_rag=True  # Enable RAG
        )

        return {
            "status": "success",
            "model": config.DEFAULT_MODEL,
            "query": query,
            "use_rag": True,
            "response": response,
            "openwebui_url": config.OPENWEBUI_URL
        }
    except Exception as e:
        return {
            "status": "error",
            "message": f"Failed to get RAG completion: {str(e)}",
            "openwebui_url": config.OPENWEBUI_URL
        }

@app.post("/test-ollama-direct")
async def test_ollama_direct():
    """
    Test the Ollama API directly with a simple chat request.

    Returns:
        Raw Ollama API response.
    """
    import requests

    try:
        # Prepare a simple chat request
        request_json = {
            "model": config.DEFAULT_MODEL,
            "messages": [
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": "Hello, how are you?"}
            ],
            "stream": False
        }

        # Make the API call to Ollama
        print(f"Sending direct request to Ollama API at: {config.OLLAMA_API_URL}/api/chat")
        response = requests.post(
            f"{config.OLLAMA_API_URL}/api/chat",
            headers={"Content-Type": "application/json"},
            json=request_json,
            timeout=config.API_TIMEOUT
        )

        response.raise_for_status()
        result = response.json()

        return {
            "status": "success",
            "ollama_url": config.OLLAMA_API_URL,
            "request": request_json,
            "response": result
        }
    except requests.exceptions.Timeout as e:
        return {
            "status": "error",
            "message": f"Timeout connecting to Ollama API: {str(e)}. The request exceeded the {config.API_TIMEOUT} second timeout.",
            "ollama_url": config.OLLAMA_API_URL
        }
    except requests.exceptions.ConnectionError as e:
        return {
            "status": "error",
            "message": f"Connection error to Ollama API: {str(e)}. Please check if Ollama is running at {config.OLLAMA_API_URL}.",
            "ollama_url": config.OLLAMA_API_URL
        }
    except Exception as e:
        return {
            "status": "error",
            "message": f"Failed to connect to Ollama API: {str(e)}",
            "ollama_url": config.OLLAMA_API_URL
        }

@app.get("/config")
async def get_config():
    """
    Get the current configuration.

    Returns:
        Current configuration settings.
    """
    return {
        "api_host": config.API_HOST,
        "api_port": config.API_PORT,
        "openwebui_url": config.OPENWEBUI_URL,
        "ollama_api_url": config.OLLAMA_API_URL,
        "default_model": config.DEFAULT_MODEL,
        "api_timeout": config.API_TIMEOUT,
        "available_models": list(model_service.AVAILABLE_MODELS.keys())
    }


# Model endpoints
@app.get("/models", response_model=List[ModelInfo])
async def get_available_models():
    """
    Get available models.

    Returns:
        List of model information.
    """
    models = model_service.get_available_models()
    # Debug log
    print(f"API models: {models}")
    return models

@app.get("/models/{model_id}", response_model=ModelInfo)
async def get_model_info(model_id: str):
    """
    Get information about a model.

    Args:
        model_id: Model ID.

    Returns:
        Model information.
    """
    model_info = model_service.get_model_info(model_id)
    if not model_info:
        raise HTTPException(status_code=404, detail="Model not found")

    return model_info

# Chat endpoints
@app.post("/chats", response_model=Chat)
async def create_chat(request: ChatRequest):
    """
    Create a new chat.

    Args:
        request: Chat creation request.

    Returns:
        Created chat.
    """
    chat_id = chat_service.create_chat(
        user_id=request.user_id,
        title=request.title,
        model_id=request.model_id,
        is_team_chat=request.is_team_chat
    )

    return chat_service.get_chat(chat_id)

@app.get("/chats/user/{user_id}", response_model=List[Chat])
async def get_user_chats(user_id: str):
    """
    Get all chats for a user.

    Args:
        user_id: User ID.

    Returns:
        List of chats.
    """
    return chat_service.get_user_chats(user_id)

@app.get("/chats/{chat_id}", response_model=Chat)
async def get_chat(chat_id: str):
    """
    Get a chat by ID.

    Args:
        chat_id: Chat ID.

    Returns:
        Chat information.
    """
    chat = chat_service.get_chat(chat_id)
    if not chat:
        raise HTTPException(status_code=404, detail="Chat not found")

    return chat

@app.post("/chats/{chat_id}/messages", response_model=Message)
async def send_message(chat_id: str, request: MessageRequest):
    """
    Send a message to a chat.

    Args:
        chat_id: Chat ID.
        request: Message request with optional model parameters.

    Returns:
        Bot response message.
    """
    try:
        print(f"Processing message for chat {chat_id} from user {request.user_id}")
        print(f"Message: {request.message[:50]}...")  # Print first 50 chars of message
        print(f"Using RAG: {request.use_rag}")
        print(f"Model parameters: temperature={request.temperature}, max_tokens={request.max_tokens}")

        # Extract model parameters from the request
        response = chat_service.get_chat_response(
            chat_id=chat_id,
            message=request.message,
            user_id=request.user_id,
            use_rag=request.use_rag,
            temperature=request.temperature,
            max_tokens=request.max_tokens,
            top_p=request.top_p,
            frequency_penalty=request.frequency_penalty,
            presence_penalty=request.presence_penalty,
            stop_sequences=request.stop_sequences,
            system_prompt=request.system_prompt,
            min_p=request.min_p,
            top_k=request.top_k,
            repeat_penalty=request.repeat_penalty,
            function_calling=request.function_calling
        )

        print(f"Response received. Length: {len(response.get('content', ''))}")
        return response

    except ValueError as e:
        error_msg = f"Chat not found: {str(e)}"
        print(f"ERROR: {error_msg}")
        raise HTTPException(status_code=404, detail=error_msg)
    except Exception as e:
        error_msg = f"Error processing message: {str(e)}"
        print(f"ERROR: {error_msg}")
        # Return an error message instead of raising an exception
        # This ensures the client gets a proper response
        return {
            "id": str(uuid.uuid4()),
            "content": f"Error processing message: {str(e)}",
            "user_id": None,
            "is_user_message": False,
            "timestamp": datetime.now(timezone.utc).isoformat()
        }

@app.post("/chats/{chat_id}/members/{user_id}")
async def add_team_member(chat_id: str, user_id: str):
    """
    Add a user to a team chat.

    Args:
        chat_id: Chat ID.
        user_id: User ID.

    Returns:
        Addition status.
    """
    success = chat_service.add_team_member(chat_id, user_id)
    if not success:
        raise HTTPException(status_code=400, detail="Failed to add team member")

    return {"status": "success", "message": "Team member added"}

@app.delete("/chats/{chat_id}/members/{user_id}")
async def remove_team_member(chat_id: str, user_id: str):
    """
    Remove a user from a team chat.

    Args:
        chat_id: Chat ID.
        user_id: User ID.

    Returns:
        Removal status.
    """
    success = chat_service.remove_team_member(chat_id, user_id)
    if not success:
        raise HTTPException(status_code=400, detail="Failed to remove team member")

    return {"status": "success", "message": "Team member removed"}

@app.delete("/chats/{chat_id}")
async def delete_chat(chat_id: str):
    """
    Delete a chat.

    Args:
        chat_id: Chat ID.

    Returns:
        Deletion status.
    """
    success = chat_service.delete_chat(chat_id)
    if not success:
        raise HTTPException(status_code=404, detail="Chat not found")

    return {"status": "success", "message": "Chat deleted"}

# OpenWebUI Channels endpoints
@app.get("/channels")
async def get_openwebui_channels():
    """
    Get all OpenWebUI channels.

    Returns:
        List of channels.
    """
    channels = openwebui_channels.get_channels()
    return channels

@app.get("/channels/{channel_id}")
async def get_openwebui_channel(channel_id: str):
    """
    Get an OpenWebUI channel by ID.

    Args:
        channel_id: Channel ID.

    Returns:
        Channel information.
    """
    channel = openwebui_channels.get_channel(channel_id)
    if not channel:
        raise HTTPException(status_code=404, detail="Channel not found")

    return channel

@app.post("/channels")
async def create_openwebui_channel(name: str, description: str = "", is_private: bool = False):
    """
    Create a new OpenWebUI channel.

    Args:
        name: Channel name.
        description: Channel description.
        is_private: Whether the channel is private.

    Returns:
        Created channel.
    """
    channel = openwebui_channels.create_channel(name, description, is_private)
    if not channel:
        raise HTTPException(status_code=400, detail="Failed to create channel")

    return channel

# Webhook endpoint for OpenWebUI channel messages
class ChannelMessageWebhook(BaseModel):
    """Model for channel message webhook."""
    channel_id: str = Field(..., description="Channel ID")
    message: str = Field(..., description="Message content")
    user_id: str = Field(..., description="User ID")
    timestamp: Optional[str] = Field(None, description="Message timestamp")

@app.post("/webhooks/channel-message")
async def channel_message_webhook(request: ChannelMessageWebhook):
    """
    Webhook endpoint for receiving messages from OpenWebUI channels.

    This endpoint is called by OpenWebUI when a message is sent in a channel.
    The AI service will process the message and respond in the channel.

    Args:
        request: Channel message webhook request.

    Returns:
        Processing status.
    """
    try:
        print(f"Received channel message webhook: {request.channel_id}, {request.user_id}, {request.message}")

        # Find the chat associated with this OpenWebUI channel
        chat_id = None
        for cid, chat in chat_service.chats.items():
            if chat.get('is_team_chat') and chat.get('openwebui_channel_id') == request.channel_id:
                chat_id = cid
                break

        if not chat_id:
            print(f"No chat found for OpenWebUI channel {request.channel_id}")
            return {"status": "error", "message": "No chat found for this channel"}

        # Skip messages from the AI assistant to avoid loops
        if request.user_id == "ai-assistant":
            return {"status": "skipped", "message": "Skipping AI assistant message"}

        # Check if we should respond to all messages or only to mentions
        if not config.AI_RESPOND_TO_ALL:
            # Check if the message mentions the AI using configured triggers
            message_lower = request.message.lower()

            is_triggered = False
            for trigger in config.AI_TRIGGERS:
                if trigger.lower() in message_lower:
                    is_triggered = True
                    break

            # If no trigger is found, skip processing
            if not is_triggered:
                print(f"No AI mention found in message, skipping: {request.message[:50]}...")
                return {"status": "skipped", "message": "No AI mention found in message"}

        # Extract the actual message content (remove the trigger)
        # This is a simple approach - for more complex cases, you might want more sophisticated parsing
        processed_message = request.message
        message_lower = request.message.lower()

        # Only try to remove triggers if we're not responding to all messages
        if not config.AI_RESPOND_TO_ALL:
            for trigger in config.AI_TRIGGERS:
                if trigger.lower() in message_lower:
                    # Remove the trigger from the message
                    processed_message = request.message.replace(trigger, "").strip()
                    break

        # Process the message and generate a response
        response = chat_service.get_chat_response(
            chat_id=chat_id,
            message=processed_message,
            user_id=request.user_id
        )

        return {"status": "success", "message": "Message processed", "response": response}

    except Exception as e:
        print(f"Error processing channel message webhook: {str(e)}")
        return {"status": "error", "message": f"Error processing message: {str(e)}"}