Initial commit for deployment
This commit is contained in:
@@ -0,0 +1,18 @@
|
||||
# API configuration
|
||||
API_HOST=0.0.0.0
|
||||
API_PORT=5251
|
||||
|
||||
# OpenWebUI configuration
|
||||
OPENWEBUI_URL=http://104.225.217.215:8080
|
||||
OPENWEBUI_API_KEY=your-openwebui-api-key # Replace with the actual key in your .env file
|
||||
|
||||
# Ollama configuration
|
||||
OLLAMA_API_URL=http://localhost:11434
|
||||
DEFAULT_MODEL=llama3.1
|
||||
|
||||
# Local storage
|
||||
SQLITE_DB_PATH=ai_service/data/chatbot.db
|
||||
|
||||
# Document processing
|
||||
CHUNK_SIZE=1000
|
||||
CHUNK_OVERLAP=200
|
||||
@@ -0,0 +1,33 @@
|
||||
# API configuration
|
||||
API_HOST=0.0.0.0
|
||||
API_PORT=5251
|
||||
|
||||
# Pinecone configuration
|
||||
# Sign up at https://www.pinecone.io/ to get your API key
|
||||
PINECONE_API_KEY=your-pinecone-api-key-here
|
||||
PINECONE_ENVIRONMENT=your-pinecone-environment-here
|
||||
PINECONE_INDEX_NAME=chatbot-index
|
||||
|
||||
# Model configuration
|
||||
# Sign up at https://platform.openai.com/ to get your API key
|
||||
DEFAULT_MODEL=gpt-3.5-turbo
|
||||
OPENAI_API_KEY=your-openai-api-key-here
|
||||
|
||||
# Local storage
|
||||
# Path to SQLite database (will be replaced with MySQL in production)
|
||||
SQLITE_DB_PATH=ai_service/data/chatbot.db
|
||||
|
||||
# Document processing
|
||||
# Adjust these values based on your needs
|
||||
CHUNK_SIZE=1000
|
||||
CHUNK_OVERLAP=200
|
||||
|
||||
# Embedding model
|
||||
# Options: all-MiniLM-L6-v2 (default), paraphrase-MiniLM-L3-v2 (smaller/faster)
|
||||
EMBEDDING_MODEL=all-MiniLM-L6-v2
|
||||
|
||||
# Production settings
|
||||
# Set to 'production' in production environment
|
||||
ENVIRONMENT=production
|
||||
LOG_LEVEL=INFO
|
||||
ENABLE_MOCK=false
|
||||
@@ -0,0 +1,94 @@
|
||||
# AI Service for Chatbot Application
|
||||
|
||||
This is the AI service component for the chatbot application. It provides APIs for document processing, embeddings, and chat functionality.
|
||||
|
||||
## Features
|
||||
|
||||
- Document processing and embedding
|
||||
- Retrieval-augmented generation (RAG)
|
||||
- Chat functionality with model switching
|
||||
- Team chat support
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
ai_service/
|
||||
├── embeddings/ # Embedding and document processing services
|
||||
├── models/ # Model and chat services
|
||||
├── utils/ # Utility functions
|
||||
├── data/ # Data storage
|
||||
├── config.py # Configuration settings
|
||||
├── api.py # FastAPI application
|
||||
└── run.py # Script to run the service
|
||||
```
|
||||
|
||||
## Setup Instructions
|
||||
|
||||
1. Create a virtual environment:
|
||||
```
|
||||
python -m venv venv
|
||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||
```
|
||||
|
||||
2. Install dependencies:
|
||||
```
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
3. Copy `.env.example` to `.env` and update the values:
|
||||
```
|
||||
cp .env.example .env
|
||||
# Edit the .env file with appropriate values
|
||||
```
|
||||
|
||||
4. Run the service:
|
||||
```
|
||||
python run.py
|
||||
```
|
||||
|
||||
## API Documentation
|
||||
|
||||
Once the service is running, you can access the API documentation at:
|
||||
- Swagger UI: http://localhost:5251/docs
|
||||
- ReDoc: http://localhost:5251/redoc
|
||||
|
||||
## Deployment
|
||||
|
||||
To deploy the service:
|
||||
|
||||
1. Make the deployment script executable:
|
||||
```
|
||||
chmod +x deploy.sh
|
||||
```
|
||||
|
||||
2. Run the deployment script:
|
||||
```
|
||||
./deploy.sh
|
||||
```
|
||||
|
||||
This will start the service on port 5251 using uvicorn with nohup.
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### Document Endpoints
|
||||
|
||||
- `POST /documents` - Process a document for embedding
|
||||
- `GET /documents` - Get all documents
|
||||
- `GET /documents/{doc_id}` - Get a document by ID
|
||||
- `DELETE /documents/{doc_id}` - Delete a document
|
||||
- `POST /documents/search` - Search for documents
|
||||
|
||||
### Model Endpoints
|
||||
|
||||
- `GET /models` - Get available models
|
||||
- `GET /models/{model_id}` - Get information about a model
|
||||
|
||||
### Chat Endpoints
|
||||
|
||||
- `POST /chats` - Create a new chat
|
||||
- `GET /chats/user/{user_id}` - Get all chats for a user
|
||||
- `GET /chats/{chat_id}` - Get a chat by ID
|
||||
- `POST /chats/{chat_id}/messages` - Send a message to a chat
|
||||
- `POST /chats/{chat_id}/members/{user_id}` - Add a user to a team chat
|
||||
- `DELETE /chats/{chat_id}/members/{user_id}` - Remove a user from a team chat
|
||||
- `DELETE /chats/{chat_id}` - Delete a chat
|
||||
@@ -0,0 +1,6 @@
|
||||
"""
|
||||
AI service package.
|
||||
"""
|
||||
|
||||
# Import for easier access
|
||||
from ai_service.api import app
|
||||
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"e2b1bdc2-a384-4775-9c14-42b221e5554f": {
|
||||
"id": "e2b1bdc2-a384-4775-9c14-42b221e5554f",
|
||||
"title": "Test Chat",
|
||||
"user_id": "test_user",
|
||||
"model_id": "gpt-3.5-turbo",
|
||||
"is_team_chat": false,
|
||||
"created_at": "2025-05-06T11:50:43.558931",
|
||||
"updated_at": "2025-05-06T11:51:20.982846",
|
||||
"messages": [
|
||||
{
|
||||
"id": "865a1e57-c71f-4bab-a4ba-56d630a38631",
|
||||
"content": "Hello, AI!",
|
||||
"user_id": "test_user",
|
||||
"is_user_message": true,
|
||||
"timestamp": "2025-05-06T11:51:20.051537"
|
||||
},
|
||||
{
|
||||
"id": "eb755b4f-fe55-4bec-b77f-ed20941df360",
|
||||
"content": "Error generating response: 401 Client Error: Unauthorized for url: https://api.openai.com/v1/chat/completions",
|
||||
"user_id": null,
|
||||
"is_user_message": false,
|
||||
"timestamp": "2025-05-06T11:51:20.982829"
|
||||
}
|
||||
],
|
||||
"team_members": []
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,377 @@
|
||||
"""
|
||||
FastAPI application for the AI service.
|
||||
"""
|
||||
|
||||
from fastapi import FastAPI, HTTPException, Depends, Body, Query, Path
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
from ai_service.config import config
|
||||
from ai_service.embeddings.document_service import document_service
|
||||
from ai_service.models.model_service import model_service
|
||||
from ai_service.models.chat_service import chat_service
|
||||
from ai_service.models.model_parameters import ModelParameters
|
||||
|
||||
# Create FastAPI app
|
||||
app = FastAPI(
|
||||
title="AI Service API",
|
||||
description="API for the AI service",
|
||||
version="1.0.0"
|
||||
)
|
||||
|
||||
# Add CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"], # Allow all origins
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"], # Allow all methods
|
||||
allow_headers=["*"], # Allow all headers
|
||||
)
|
||||
|
||||
# Define API models
|
||||
class DocumentRequest(BaseModel):
|
||||
"""Request model for document processing."""
|
||||
content: str = Field(..., description="Document content")
|
||||
title: str = Field(..., description="Document title")
|
||||
description: Optional[str] = Field(None, description="Document description")
|
||||
metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata")
|
||||
|
||||
class DocumentResponse(BaseModel):
|
||||
"""Response model for document processing."""
|
||||
id: str = Field(..., description="Document ID")
|
||||
title: str = Field(..., description="Document title")
|
||||
description: str = Field(..., description="Document description")
|
||||
chunk_count: int = Field(..., description="Number of chunks")
|
||||
metadata: Dict[str, Any] = Field(..., description="Additional metadata")
|
||||
|
||||
class SearchRequest(BaseModel):
|
||||
"""Request model for document search."""
|
||||
query: str = Field(..., description="Search query")
|
||||
top_k: int = Field(5, description="Number of results to return")
|
||||
|
||||
class SearchResult(BaseModel):
|
||||
"""Model for a search result."""
|
||||
id: str = Field(..., description="Result ID")
|
||||
score: float = Field(..., description="Similarity score")
|
||||
metadata: Dict[str, Any] = Field(..., description="Result metadata")
|
||||
|
||||
class ModelInfo(BaseModel):
|
||||
"""Model for model information."""
|
||||
id: str = Field(..., description="Model ID")
|
||||
name: str = Field(..., description="Model name")
|
||||
description: str = Field(..., description="Model description")
|
||||
provider: str = Field(..., description="Model provider")
|
||||
max_tokens: int = Field(..., description="Maximum tokens")
|
||||
is_default: bool = Field(..., description="Whether this is the default model")
|
||||
|
||||
class ChatRequest(BaseModel):
|
||||
"""Request model for creating a chat."""
|
||||
user_id: str = Field(..., description="User ID")
|
||||
title: Optional[str] = Field(None, description="Chat title")
|
||||
model_id: Optional[str] = Field(None, description="Model ID")
|
||||
is_team_chat: bool = Field(False, description="Whether this is a team chat")
|
||||
|
||||
class MessageRequest(BaseModel):
|
||||
"""Request model for sending a message."""
|
||||
message: str = Field(..., description="Message content")
|
||||
user_id: str = Field(..., description="User ID")
|
||||
use_rag: bool = Field(False, description="Whether to use RAG")
|
||||
|
||||
# Model parameters
|
||||
temperature: Optional[float] = Field(None, description="Controls randomness: higher values mean more random completions")
|
||||
max_tokens: Optional[int] = Field(None, description="Maximum number of tokens to generate")
|
||||
top_p: Optional[float] = Field(None, description="Nucleus sampling parameter")
|
||||
frequency_penalty: Optional[float] = Field(None, description="Penalizes repeated tokens")
|
||||
presence_penalty: Optional[float] = Field(None, description="Penalizes repeated topics")
|
||||
stop_sequences: Optional[List[str]] = Field(None, description="Sequences where the API will stop generating")
|
||||
system_prompt: Optional[str] = Field(None, description="System prompt to guide the model's behavior")
|
||||
|
||||
# Additional advanced parameters
|
||||
min_p: Optional[float] = Field(None, description="Minimum probability threshold for token selection")
|
||||
top_k: Optional[int] = Field(None, description="Only sample from the top k tokens")
|
||||
repeat_penalty: Optional[float] = Field(None, description="Penalty for repeating tokens")
|
||||
function_calling: Optional[bool] = Field(None, description="Whether to enable function calling")
|
||||
|
||||
class Message(BaseModel):
|
||||
"""Model for a message."""
|
||||
id: str = Field(..., description="Message ID")
|
||||
content: str = Field(..., description="Message content")
|
||||
user_id: Optional[str] = Field(None, description="User ID")
|
||||
is_user_message: bool = Field(..., description="Whether this is a user message")
|
||||
timestamp: str = Field(..., description="Message timestamp")
|
||||
|
||||
class Chat(BaseModel):
|
||||
"""Model for a chat."""
|
||||
id: str = Field(..., description="Chat ID")
|
||||
title: str = Field(..., description="Chat title")
|
||||
user_id: str = Field(..., description="User ID")
|
||||
model_id: str = Field(..., description="Model ID")
|
||||
is_team_chat: bool = Field(..., description="Whether this is a team chat")
|
||||
created_at: str = Field(..., description="Creation timestamp")
|
||||
updated_at: str = Field(..., description="Update timestamp")
|
||||
messages: List[Message] = Field(..., description="Chat messages")
|
||||
team_members: List[str] = Field(..., description="Team members")
|
||||
|
||||
# Define API endpoints
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""
|
||||
Health check endpoint.
|
||||
|
||||
Returns:
|
||||
Health status.
|
||||
"""
|
||||
return {"status": "healthy"}
|
||||
|
||||
# Document endpoints
|
||||
@app.post("/documents", response_model=DocumentResponse)
|
||||
async def process_document(request: DocumentRequest):
|
||||
"""
|
||||
Process a document for embedding.
|
||||
|
||||
Args:
|
||||
request: Document processing request.
|
||||
|
||||
Returns:
|
||||
Processed document information.
|
||||
"""
|
||||
doc_id = document_service.process_document(
|
||||
content=request.content,
|
||||
title=request.title,
|
||||
description=request.description,
|
||||
metadata=request.metadata
|
||||
)
|
||||
|
||||
return document_service.get_document(doc_id)
|
||||
|
||||
@app.get("/documents", response_model=List[DocumentResponse])
|
||||
async def get_all_documents():
|
||||
"""
|
||||
Get all documents.
|
||||
|
||||
Returns:
|
||||
List of document information.
|
||||
"""
|
||||
return document_service.get_all_documents()
|
||||
|
||||
@app.get("/documents/{doc_id}", response_model=DocumentResponse)
|
||||
async def get_document(doc_id: str):
|
||||
"""
|
||||
Get a document by ID.
|
||||
|
||||
Args:
|
||||
doc_id: Document ID.
|
||||
|
||||
Returns:
|
||||
Document information.
|
||||
"""
|
||||
doc = document_service.get_document(doc_id)
|
||||
if not doc:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
return doc
|
||||
|
||||
@app.delete("/documents/{doc_id}")
|
||||
async def delete_document(doc_id: str):
|
||||
"""
|
||||
Delete a document.
|
||||
|
||||
Args:
|
||||
doc_id: Document ID.
|
||||
|
||||
Returns:
|
||||
Deletion status.
|
||||
"""
|
||||
success = document_service.delete_document(doc_id)
|
||||
if not success:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
return {"status": "success", "message": "Document deleted"}
|
||||
|
||||
@app.post("/documents/search", response_model=List[SearchResult])
|
||||
async def search_documents(request: SearchRequest):
|
||||
"""
|
||||
Search for documents.
|
||||
|
||||
Args:
|
||||
request: Search request.
|
||||
|
||||
Returns:
|
||||
Search results.
|
||||
"""
|
||||
results = document_service.search_documents(
|
||||
query=request.query,
|
||||
top_k=request.top_k
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
# Model endpoints
|
||||
@app.get("/models", response_model=List[ModelInfo])
|
||||
async def get_available_models():
|
||||
"""
|
||||
Get available models.
|
||||
|
||||
Returns:
|
||||
List of model information.
|
||||
"""
|
||||
return model_service.get_available_models()
|
||||
|
||||
@app.get("/models/{model_id}", response_model=ModelInfo)
|
||||
async def get_model_info(model_id: str):
|
||||
"""
|
||||
Get information about a model.
|
||||
|
||||
Args:
|
||||
model_id: Model ID.
|
||||
|
||||
Returns:
|
||||
Model information.
|
||||
"""
|
||||
model_info = model_service.get_model_info(model_id)
|
||||
if not model_info:
|
||||
raise HTTPException(status_code=404, detail="Model not found")
|
||||
|
||||
return model_info
|
||||
|
||||
# Chat endpoints
|
||||
@app.post("/chats", response_model=Chat)
|
||||
async def create_chat(request: ChatRequest):
|
||||
"""
|
||||
Create a new chat.
|
||||
|
||||
Args:
|
||||
request: Chat creation request.
|
||||
|
||||
Returns:
|
||||
Created chat.
|
||||
"""
|
||||
chat_id = chat_service.create_chat(
|
||||
user_id=request.user_id,
|
||||
title=request.title,
|
||||
model_id=request.model_id,
|
||||
is_team_chat=request.is_team_chat
|
||||
)
|
||||
|
||||
return chat_service.get_chat(chat_id)
|
||||
|
||||
@app.get("/chats/user/{user_id}", response_model=List[Chat])
|
||||
async def get_user_chats(user_id: str):
|
||||
"""
|
||||
Get all chats for a user.
|
||||
|
||||
Args:
|
||||
user_id: User ID.
|
||||
|
||||
Returns:
|
||||
List of chats.
|
||||
"""
|
||||
return chat_service.get_user_chats(user_id)
|
||||
|
||||
@app.get("/chats/{chat_id}", response_model=Chat)
|
||||
async def get_chat(chat_id: str):
|
||||
"""
|
||||
Get a chat by ID.
|
||||
|
||||
Args:
|
||||
chat_id: Chat ID.
|
||||
|
||||
Returns:
|
||||
Chat information.
|
||||
"""
|
||||
chat = chat_service.get_chat(chat_id)
|
||||
if not chat:
|
||||
raise HTTPException(status_code=404, detail="Chat not found")
|
||||
|
||||
return chat
|
||||
|
||||
@app.post("/chats/{chat_id}/messages", response_model=Message)
|
||||
async def send_message(chat_id: str, request: MessageRequest):
|
||||
"""
|
||||
Send a message to a chat.
|
||||
|
||||
Args:
|
||||
chat_id: Chat ID.
|
||||
request: Message request with optional model parameters.
|
||||
|
||||
Returns:
|
||||
Bot response message.
|
||||
"""
|
||||
try:
|
||||
# Extract model parameters from the request
|
||||
response = chat_service.get_chat_response(
|
||||
chat_id=chat_id,
|
||||
message=request.message,
|
||||
user_id=request.user_id,
|
||||
use_rag=request.use_rag,
|
||||
temperature=request.temperature,
|
||||
max_tokens=request.max_tokens,
|
||||
top_p=request.top_p,
|
||||
frequency_penalty=request.frequency_penalty,
|
||||
presence_penalty=request.presence_penalty,
|
||||
stop_sequences=request.stop_sequences,
|
||||
system_prompt=request.system_prompt,
|
||||
min_p=request.min_p,
|
||||
top_k=request.top_k,
|
||||
repeat_penalty=request.repeat_penalty,
|
||||
function_calling=request.function_calling
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=404, detail=str(e))
|
||||
|
||||
@app.post("/chats/{chat_id}/members/{user_id}")
|
||||
async def add_team_member(chat_id: str, user_id: str):
|
||||
"""
|
||||
Add a user to a team chat.
|
||||
|
||||
Args:
|
||||
chat_id: Chat ID.
|
||||
user_id: User ID.
|
||||
|
||||
Returns:
|
||||
Addition status.
|
||||
"""
|
||||
success = chat_service.add_team_member(chat_id, user_id)
|
||||
if not success:
|
||||
raise HTTPException(status_code=400, detail="Failed to add team member")
|
||||
|
||||
return {"status": "success", "message": "Team member added"}
|
||||
|
||||
@app.delete("/chats/{chat_id}/members/{user_id}")
|
||||
async def remove_team_member(chat_id: str, user_id: str):
|
||||
"""
|
||||
Remove a user from a team chat.
|
||||
|
||||
Args:
|
||||
chat_id: Chat ID.
|
||||
user_id: User ID.
|
||||
|
||||
Returns:
|
||||
Removal status.
|
||||
"""
|
||||
success = chat_service.remove_team_member(chat_id, user_id)
|
||||
if not success:
|
||||
raise HTTPException(status_code=400, detail="Failed to remove team member")
|
||||
|
||||
return {"status": "success", "message": "Team member removed"}
|
||||
|
||||
@app.delete("/chats/{chat_id}")
|
||||
async def delete_chat(chat_id: str):
|
||||
"""
|
||||
Delete a chat.
|
||||
|
||||
Args:
|
||||
chat_id: Chat ID.
|
||||
|
||||
Returns:
|
||||
Deletion status.
|
||||
"""
|
||||
success = chat_service.delete_chat(chat_id)
|
||||
if not success:
|
||||
raise HTTPException(status_code=404, detail="Chat not found")
|
||||
|
||||
return {"status": "success", "message": "Chat deleted"}
|
||||
@@ -0,0 +1,37 @@
|
||||
"""
|
||||
Configuration settings for the AI service.
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
class Config:
|
||||
"""Base configuration."""
|
||||
|
||||
# API configuration
|
||||
API_HOST = os.environ.get('API_HOST', '0.0.0.0')
|
||||
API_PORT = int(os.environ.get('API_PORT', 5251))
|
||||
|
||||
# OpenWebUI configuration
|
||||
OPENWEBUI_URL = os.environ.get('OPENWEBUI_URL', 'http://104.225.217.215:8080')
|
||||
OPENWEBUI_API_KEY = os.environ.get('OPENWEBUI_API_KEY', '')
|
||||
|
||||
# Ollama configuration
|
||||
OLLAMA_API_URL = os.environ.get('OLLAMA_API_URL', 'http://localhost:11434')
|
||||
DEFAULT_MODEL = os.environ.get('DEFAULT_MODEL', 'llama3.1')
|
||||
|
||||
# Local storage
|
||||
SQLITE_DB_PATH = os.environ.get('SQLITE_DB_PATH', 'ai_service/data/chatbot.db')
|
||||
|
||||
# Document processing
|
||||
CHUNK_SIZE = int(os.environ.get('CHUNK_SIZE', 1000))
|
||||
CHUNK_OVERLAP = int(os.environ.get('CHUNK_OVERLAP', 200))
|
||||
|
||||
# Embedding model
|
||||
EMBEDDING_MODEL = os.environ.get('EMBEDDING_MODEL', 'all-MiniLM-L6-v2')
|
||||
|
||||
|
||||
config = Config()
|
||||
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"48b04d66-3ef4-440b-8508-ced930aa42a9": {
|
||||
"id": "48b04d66-3ef4-440b-8508-ced930aa42a9",
|
||||
"title": "Test Chat",
|
||||
"user_id": "test_user",
|
||||
"model_id": "gpt-3.5-turbo",
|
||||
"is_team_chat": false,
|
||||
"created_at": "2025-05-05T20:29:54.936964",
|
||||
"updated_at": "2025-05-05T20:29:55.394906",
|
||||
"messages": [
|
||||
{
|
||||
"id": "9daafee5-f00c-4493-96ea-96492f97482e",
|
||||
"content": "Tell me about artificial intelligence",
|
||||
"user_id": "test_user",
|
||||
"is_user_message": true,
|
||||
"timestamp": "2025-05-05T20:29:54.971667"
|
||||
},
|
||||
{
|
||||
"id": "0337e0b1-1e18-4d09-8b8b-c5ee295870a0",
|
||||
"content": "Error generating response: 401 Client Error: Unauthorized for url: https://api.openai.com/v1/chat/completions",
|
||||
"user_id": null,
|
||||
"is_user_message": false,
|
||||
"timestamp": "2025-05-05T20:29:55.394891"
|
||||
}
|
||||
],
|
||||
"team_members": []
|
||||
}
|
||||
}
|
||||
Executable
+20
@@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Create a directory for the AI service logs
|
||||
mkdir -p logs
|
||||
|
||||
# Activate virtual environment
|
||||
source venv/bin/activate
|
||||
|
||||
# Export environment variables
|
||||
export API_HOST=0.0.0.0
|
||||
export API_PORT=5251
|
||||
|
||||
# Make sure the Python path includes the current directory
|
||||
export PYTHONPATH=$PYTHONPATH:$(pwd)
|
||||
|
||||
# Run the application with uvicorn and nohup
|
||||
nohup uvicorn ai_service.run:app --host $API_HOST --port $API_PORT > logs/ai_service.log 2>&1 &
|
||||
|
||||
echo "AI Service started on port $API_PORT. Check ai_service.log for output."
|
||||
echo "To stop the application, find the process ID with 'ps aux | grep uvicorn' and kill it with 'kill <PID>'."
|
||||
@@ -0,0 +1,261 @@
|
||||
"""
|
||||
Service for document processing and chunking.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import uuid
|
||||
import requests
|
||||
import base64
|
||||
from typing import List, Dict, Any, Optional
|
||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||
|
||||
from ai_service.config import config
|
||||
|
||||
class DocumentService:
|
||||
"""Service for document processing and chunking."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the document service."""
|
||||
self.chunk_size = config.CHUNK_SIZE
|
||||
self.chunk_overlap = config.CHUNK_OVERLAP
|
||||
self.text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=self.chunk_size,
|
||||
chunk_overlap=self.chunk_overlap,
|
||||
length_function=len
|
||||
)
|
||||
|
||||
# OpenWebUI configuration
|
||||
self.openwebui_url = config.OPENWEBUI_URL
|
||||
self.openwebui_api_key = config.OPENWEBUI_API_KEY
|
||||
|
||||
# Ensure data directory exists
|
||||
os.makedirs(os.path.dirname(config.SQLITE_DB_PATH), exist_ok=True)
|
||||
|
||||
# For now, we'll store document metadata in a simple JSON file
|
||||
self.metadata_file = os.path.join(os.path.dirname(config.SQLITE_DB_PATH), 'document_metadata.json')
|
||||
self._load_metadata()
|
||||
|
||||
def _load_metadata(self):
|
||||
"""Load document metadata from file."""
|
||||
if os.path.exists(self.metadata_file):
|
||||
try:
|
||||
with open(self.metadata_file, 'r') as f:
|
||||
self.documents = json.load(f)
|
||||
except Exception as e:
|
||||
print(f"Error loading document metadata: {str(e)}")
|
||||
self.documents = {}
|
||||
else:
|
||||
self.documents = {}
|
||||
|
||||
def _save_metadata(self):
|
||||
"""Save document metadata to file."""
|
||||
try:
|
||||
with open(self.metadata_file, 'w') as f:
|
||||
json.dump(self.documents, f, indent=2)
|
||||
except Exception as e:
|
||||
print(f"Error saving document metadata: {str(e)}")
|
||||
|
||||
def process_document(self, content: str, title: str,
|
||||
description: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None) -> str:
|
||||
"""
|
||||
Process a document for embedding.
|
||||
|
||||
Args:
|
||||
content: Document content.
|
||||
title: Document title.
|
||||
description: Optional document description.
|
||||
metadata: Optional additional metadata.
|
||||
|
||||
Returns:
|
||||
Document ID.
|
||||
"""
|
||||
# Generate a unique ID for the document
|
||||
doc_id = str(uuid.uuid4())
|
||||
|
||||
# Upload the document to OpenWebUI for RAG processing
|
||||
try:
|
||||
# Prepare headers
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if self.openwebui_api_key:
|
||||
headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
|
||||
|
||||
# Prepare the document data
|
||||
document_data = {
|
||||
"filename": f"{title}.txt",
|
||||
"content": base64.b64encode(content.encode('utf-8')).decode('utf-8'),
|
||||
"description": description or title
|
||||
}
|
||||
|
||||
# Upload to OpenWebUI
|
||||
response = requests.post(
|
||||
f"{self.openwebui_url}/api/knowledge/upload",
|
||||
headers=headers,
|
||||
json=document_data,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
# Get the OpenWebUI document ID
|
||||
openwebui_doc_id = result.get('id', '')
|
||||
|
||||
# Store document metadata
|
||||
self.documents[doc_id] = {
|
||||
'id': doc_id,
|
||||
'title': title,
|
||||
'description': description or '',
|
||||
'openwebui_id': openwebui_doc_id,
|
||||
'metadata': metadata or {}
|
||||
}
|
||||
|
||||
# Save metadata to file
|
||||
self._save_metadata()
|
||||
|
||||
return doc_id
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error uploading document to OpenWebUI: {str(e)}")
|
||||
|
||||
# Fall back to local processing if OpenWebUI upload fails
|
||||
print("Falling back to local document processing")
|
||||
|
||||
# Split the document into chunks for local reference
|
||||
chunks = self.text_splitter.split_text(content)
|
||||
|
||||
# Store document metadata
|
||||
self.documents[doc_id] = {
|
||||
'id': doc_id,
|
||||
'title': title,
|
||||
'description': description or '',
|
||||
'chunk_count': len(chunks),
|
||||
'openwebui_upload_failed': True,
|
||||
'metadata': metadata or {}
|
||||
}
|
||||
|
||||
# Save metadata to file
|
||||
self._save_metadata()
|
||||
|
||||
return doc_id
|
||||
|
||||
def get_document(self, doc_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get document metadata.
|
||||
|
||||
Args:
|
||||
doc_id: Document ID.
|
||||
|
||||
Returns:
|
||||
Document metadata if found, None otherwise.
|
||||
"""
|
||||
return self.documents.get(doc_id)
|
||||
|
||||
def get_all_documents(self) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all document metadata.
|
||||
|
||||
Returns:
|
||||
List of document metadata.
|
||||
"""
|
||||
# Get documents from local storage
|
||||
local_documents = list(self.documents.values())
|
||||
|
||||
# Try to get documents from OpenWebUI as well
|
||||
try:
|
||||
# Prepare headers
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if self.openwebui_api_key:
|
||||
headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
|
||||
|
||||
# Get documents from OpenWebUI
|
||||
response = requests.get(
|
||||
f"{self.openwebui_url}/api/knowledge",
|
||||
headers=headers,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
openwebui_docs = response.json()
|
||||
|
||||
# Update local documents with OpenWebUI information
|
||||
for doc in local_documents:
|
||||
if 'openwebui_id' in doc:
|
||||
for openwebui_doc in openwebui_docs:
|
||||
if openwebui_doc.get('id') == doc['openwebui_id']:
|
||||
doc['openwebui_status'] = 'active'
|
||||
doc['openwebui_info'] = openwebui_doc
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting documents from OpenWebUI: {str(e)}")
|
||||
|
||||
return local_documents
|
||||
|
||||
def delete_document(self, doc_id: str) -> bool:
|
||||
"""
|
||||
Delete a document and its chunks.
|
||||
|
||||
Args:
|
||||
doc_id: Document ID.
|
||||
|
||||
Returns:
|
||||
True if deletion was successful, False otherwise.
|
||||
"""
|
||||
if doc_id not in self.documents:
|
||||
return False
|
||||
|
||||
# Check if document was uploaded to OpenWebUI
|
||||
doc = self.documents[doc_id]
|
||||
openwebui_id = doc.get('openwebui_id')
|
||||
|
||||
if openwebui_id:
|
||||
try:
|
||||
# Prepare headers
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if self.openwebui_api_key:
|
||||
headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
|
||||
|
||||
# Delete from OpenWebUI
|
||||
response = requests.delete(
|
||||
f"{self.openwebui_url}/api/knowledge/{openwebui_id}",
|
||||
headers=headers,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
print(f"Warning: Failed to delete document from OpenWebUI: {response.text}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error deleting document from OpenWebUI: {str(e)}")
|
||||
|
||||
# Delete document metadata
|
||||
del self.documents[doc_id]
|
||||
|
||||
# Save metadata to file
|
||||
self._save_metadata()
|
||||
|
||||
return True
|
||||
|
||||
def search_documents(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Search for documents similar to a query.
|
||||
|
||||
Args:
|
||||
query: Search query.
|
||||
top_k: Number of results to return.
|
||||
|
||||
Returns:
|
||||
List of similar document chunks with their metadata.
|
||||
"""
|
||||
# Note: We don't need to implement this method anymore since
|
||||
# RAG is handled directly by OpenWebUI when use_rag=True in the model service
|
||||
|
||||
# Return empty results - this is just a placeholder
|
||||
# The actual RAG functionality is in the model_service.generate_response method
|
||||
return []
|
||||
|
||||
|
||||
# Create a singleton instance
|
||||
document_service = DocumentService()
|
||||
@@ -0,0 +1,214 @@
|
||||
"""
|
||||
Service for generating and managing embeddings.
|
||||
"""
|
||||
|
||||
import os
|
||||
import random
|
||||
import pinecone
|
||||
import numpy as np
|
||||
from typing import List, Dict, Any, Optional, Union
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
from ai_service.config import config
|
||||
|
||||
class EmbeddingService:
|
||||
"""Service for generating and managing embeddings."""
|
||||
|
||||
def __init__(self, use_mock=True): # Default to mock implementation
|
||||
"""Initialize the embedding service."""
|
||||
self.use_mock = use_mock
|
||||
|
||||
if not self.use_mock:
|
||||
# Use a smaller model for testing
|
||||
self.model_name = "paraphrase-MiniLM-L3-v2" # Smaller model than the default
|
||||
try:
|
||||
self.model = SentenceTransformer(self.model_name)
|
||||
print(f"Loaded embedding model: {self.model_name}")
|
||||
except Exception as e:
|
||||
print(f"Error loading embedding model: {str(e)}")
|
||||
self.use_mock = True
|
||||
print("Falling back to mock implementation")
|
||||
else:
|
||||
print("Using mock embedding implementation")
|
||||
self.model_name = "mock-model"
|
||||
self.model = None
|
||||
|
||||
self._initialize_pinecone()
|
||||
|
||||
def _initialize_pinecone(self):
|
||||
"""Initialize Pinecone client."""
|
||||
if not config.PINECONE_API_KEY or not config.PINECONE_ENVIRONMENT:
|
||||
print("Warning: Pinecone API key or environment not set. Vector storage will not be available.")
|
||||
self.index = None
|
||||
return
|
||||
|
||||
try:
|
||||
pinecone.init(
|
||||
api_key=config.PINECONE_API_KEY,
|
||||
environment=config.PINECONE_ENVIRONMENT
|
||||
)
|
||||
|
||||
# Check if index exists, create if it doesn't
|
||||
if config.PINECONE_INDEX_NAME not in pinecone.list_indexes():
|
||||
pinecone.create_index(
|
||||
name=config.PINECONE_INDEX_NAME,
|
||||
dimension=self.model.get_sentence_embedding_dimension(),
|
||||
metric="cosine"
|
||||
)
|
||||
|
||||
self.index = pinecone.Index(config.PINECONE_INDEX_NAME)
|
||||
print(f"Connected to Pinecone index: {config.PINECONE_INDEX_NAME}")
|
||||
except Exception as e:
|
||||
print(f"Error connecting to Pinecone: {str(e)}")
|
||||
self.index = None
|
||||
|
||||
def generate_embedding(self, text: str) -> List[float]:
|
||||
"""
|
||||
Generate an embedding for a text.
|
||||
|
||||
Args:
|
||||
text: Text to embed.
|
||||
|
||||
Returns:
|
||||
Embedding vector.
|
||||
"""
|
||||
if self.use_mock:
|
||||
# Generate a mock embedding vector (384 dimensions for consistency)
|
||||
return [random.random() for _ in range(384)]
|
||||
|
||||
embedding = self.model.encode(text)
|
||||
return embedding.tolist()
|
||||
|
||||
def generate_embeddings(self, texts: List[str]) -> List[List[float]]:
|
||||
"""
|
||||
Generate embeddings for multiple texts.
|
||||
|
||||
Args:
|
||||
texts: List of texts to embed.
|
||||
|
||||
Returns:
|
||||
List of embedding vectors.
|
||||
"""
|
||||
if self.use_mock:
|
||||
# Generate mock embedding vectors
|
||||
return [[random.random() for _ in range(384)] for _ in texts]
|
||||
|
||||
embeddings = self.model.encode(texts)
|
||||
return embeddings.tolist()
|
||||
|
||||
def store_embeddings(self, ids: List[str], embeddings: List[List[float]],
|
||||
metadata: Optional[List[Dict[str, Any]]] = None) -> bool:
|
||||
"""
|
||||
Store embeddings in Pinecone.
|
||||
|
||||
Args:
|
||||
ids: List of IDs for the embeddings.
|
||||
embeddings: List of embedding vectors.
|
||||
metadata: Optional list of metadata dictionaries.
|
||||
|
||||
Returns:
|
||||
True if storage was successful, False otherwise.
|
||||
"""
|
||||
if self.use_mock:
|
||||
print(f"Mock: Stored {len(ids)} embeddings")
|
||||
return True
|
||||
|
||||
if self.index is None:
|
||||
print("Warning: Pinecone index not available. Embeddings not stored.")
|
||||
return False
|
||||
|
||||
if metadata is None:
|
||||
metadata = [{} for _ in ids]
|
||||
|
||||
vectors = [
|
||||
(id, embedding, meta)
|
||||
for id, embedding, meta in zip(ids, embeddings, metadata)
|
||||
]
|
||||
|
||||
try:
|
||||
self.index.upsert(vectors=vectors)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Error storing embeddings in Pinecone: {str(e)}")
|
||||
return False
|
||||
|
||||
def search_similar(self, query_embedding: List[float], top_k: int = 5) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Search for similar embeddings in Pinecone.
|
||||
|
||||
Args:
|
||||
query_embedding: Query embedding vector.
|
||||
top_k: Number of results to return.
|
||||
|
||||
Returns:
|
||||
List of similar items with their metadata.
|
||||
"""
|
||||
if self.use_mock:
|
||||
# Generate mock search results
|
||||
print(f"Mock: Searching for similar embeddings (top_k={top_k})")
|
||||
mock_results = []
|
||||
for i in range(min(top_k, 3)): # Return at most 3 mock results
|
||||
mock_results.append({
|
||||
'id': f"mock_doc_{i}",
|
||||
'score': 0.9 - (i * 0.1), # Decreasing similarity scores
|
||||
'metadata': {
|
||||
'document_id': f"mock_doc_{i}",
|
||||
'chunk_index': i,
|
||||
'title': f"Mock Document {i}",
|
||||
'description': f"This is a mock document {i}",
|
||||
'chunk_text': f"This is the content of mock document {i}..."
|
||||
}
|
||||
})
|
||||
return mock_results
|
||||
|
||||
if self.index is None:
|
||||
print("Warning: Pinecone index not available. Search not performed.")
|
||||
return []
|
||||
|
||||
try:
|
||||
results = self.index.query(
|
||||
vector=query_embedding,
|
||||
top_k=top_k,
|
||||
include_metadata=True
|
||||
)
|
||||
|
||||
return [
|
||||
{
|
||||
'id': match['id'],
|
||||
'score': match['score'],
|
||||
'metadata': match.get('metadata', {})
|
||||
}
|
||||
for match in results.get('matches', [])
|
||||
]
|
||||
except Exception as e:
|
||||
print(f"Error searching in Pinecone: {str(e)}")
|
||||
return []
|
||||
|
||||
def delete_embeddings(self, ids: List[str]) -> bool:
|
||||
"""
|
||||
Delete embeddings from Pinecone.
|
||||
|
||||
Args:
|
||||
ids: List of IDs to delete.
|
||||
|
||||
Returns:
|
||||
True if deletion was successful, False otherwise.
|
||||
"""
|
||||
if self.use_mock:
|
||||
print(f"Mock: Deleted {len(ids)} embeddings")
|
||||
return True
|
||||
|
||||
if self.index is None:
|
||||
print("Warning: Pinecone index not available. Deletion not performed.")
|
||||
return False
|
||||
|
||||
try:
|
||||
self.index.delete(ids=ids)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Error deleting embeddings from Pinecone: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
# Create a singleton instance
|
||||
embedding_service = EmbeddingService()
|
||||
@@ -0,0 +1,309 @@
|
||||
"""
|
||||
Service for chat functionality.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
from ai_service.config import config
|
||||
from ai_service.models.model_service import model_service
|
||||
from ai_service.models.model_parameters import ModelParameters
|
||||
|
||||
class ChatService:
|
||||
"""Service for chat functionality."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the chat service."""
|
||||
# Ensure data directory exists
|
||||
os.makedirs(os.path.dirname(config.SQLITE_DB_PATH), exist_ok=True)
|
||||
|
||||
# For now, we'll store chat data in a simple JSON file
|
||||
self.chats_file = os.path.join(os.path.dirname(config.SQLITE_DB_PATH), 'chats.json')
|
||||
self._load_chats()
|
||||
|
||||
def _load_chats(self):
|
||||
"""Load chats from file."""
|
||||
if os.path.exists(self.chats_file):
|
||||
try:
|
||||
with open(self.chats_file, 'r') as f:
|
||||
self.chats = json.load(f)
|
||||
except Exception as e:
|
||||
print(f"Error loading chats: {str(e)}")
|
||||
self.chats = {}
|
||||
else:
|
||||
self.chats = {}
|
||||
|
||||
def _save_chats(self):
|
||||
"""Save chats to file."""
|
||||
try:
|
||||
with open(self.chats_file, 'w') as f:
|
||||
json.dump(self.chats, f, indent=2)
|
||||
except Exception as e:
|
||||
print(f"Error saving chats: {str(e)}")
|
||||
|
||||
def create_chat(self, user_id: str, title: Optional[str] = None,
|
||||
model_id: Optional[str] = None, is_team_chat: bool = False) -> str:
|
||||
"""
|
||||
Create a new chat.
|
||||
|
||||
Args:
|
||||
user_id: ID of the user creating the chat.
|
||||
title: Optional title for the chat.
|
||||
model_id: Optional model ID to use for this chat.
|
||||
is_team_chat: Whether this is a team chat.
|
||||
|
||||
Returns:
|
||||
ID of the created chat.
|
||||
"""
|
||||
# Generate a unique ID for the chat
|
||||
chat_id = str(uuid.uuid4())
|
||||
|
||||
# Create chat data
|
||||
self.chats[chat_id] = {
|
||||
'id': chat_id,
|
||||
'title': title or f"Chat {len(self.chats) + 1}",
|
||||
'user_id': user_id,
|
||||
'model_id': model_id or config.DEFAULT_MODEL,
|
||||
'is_team_chat': is_team_chat,
|
||||
'created_at': datetime.utcnow().isoformat(),
|
||||
'updated_at': datetime.utcnow().isoformat(),
|
||||
'messages': [],
|
||||
'team_members': [user_id] if is_team_chat else []
|
||||
}
|
||||
|
||||
# Save chats to file
|
||||
self._save_chats()
|
||||
|
||||
return chat_id
|
||||
|
||||
def add_message(self, chat_id: str, content: str, user_id: str,
|
||||
is_user_message: bool = True) -> Dict[str, Any]:
|
||||
"""
|
||||
Add a message to a chat.
|
||||
|
||||
Args:
|
||||
chat_id: ID of the chat.
|
||||
content: Message content.
|
||||
user_id: ID of the user sending the message.
|
||||
is_user_message: Whether this is a user message (vs. bot message).
|
||||
|
||||
Returns:
|
||||
Added message.
|
||||
"""
|
||||
if chat_id not in self.chats:
|
||||
raise ValueError(f"Chat with ID {chat_id} not found")
|
||||
|
||||
# Create message data
|
||||
message = {
|
||||
'id': str(uuid.uuid4()),
|
||||
'content': content,
|
||||
'user_id': user_id if is_user_message else None,
|
||||
'is_user_message': is_user_message,
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
# Add message to chat
|
||||
self.chats[chat_id]['messages'].append(message)
|
||||
|
||||
# Update chat timestamp
|
||||
self.chats[chat_id]['updated_at'] = datetime.utcnow().isoformat()
|
||||
|
||||
# Save chats to file
|
||||
self._save_chats()
|
||||
|
||||
return message
|
||||
|
||||
def get_chat(self, chat_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get a chat by ID.
|
||||
|
||||
Args:
|
||||
chat_id: ID of the chat.
|
||||
|
||||
Returns:
|
||||
Chat data if found, None otherwise.
|
||||
"""
|
||||
return self.chats.get(chat_id)
|
||||
|
||||
def get_user_chats(self, user_id: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all chats for a user.
|
||||
|
||||
Args:
|
||||
user_id: ID of the user.
|
||||
|
||||
Returns:
|
||||
List of chat data.
|
||||
"""
|
||||
user_chats = []
|
||||
|
||||
for chat_id, chat in self.chats.items():
|
||||
# Include private chats owned by the user
|
||||
if chat['user_id'] == user_id and not chat['is_team_chat']:
|
||||
user_chats.append(chat)
|
||||
|
||||
# Include team chats where the user is a member
|
||||
elif chat['is_team_chat'] and user_id in chat['team_members']:
|
||||
user_chats.append(chat)
|
||||
|
||||
# Sort by updated_at (newest first)
|
||||
user_chats.sort(key=lambda x: x['updated_at'], reverse=True)
|
||||
|
||||
return user_chats
|
||||
|
||||
def add_team_member(self, chat_id: str, user_id: str) -> bool:
|
||||
"""
|
||||
Add a user to a team chat.
|
||||
|
||||
Args:
|
||||
chat_id: ID of the team chat.
|
||||
user_id: ID of the user to add.
|
||||
|
||||
Returns:
|
||||
True if addition was successful, False otherwise.
|
||||
"""
|
||||
if chat_id not in self.chats:
|
||||
return False
|
||||
|
||||
chat = self.chats[chat_id]
|
||||
|
||||
if not chat['is_team_chat']:
|
||||
return False
|
||||
|
||||
if user_id not in chat['team_members']:
|
||||
chat['team_members'].append(user_id)
|
||||
self._save_chats()
|
||||
|
||||
return True
|
||||
|
||||
def remove_team_member(self, chat_id: str, user_id: str) -> bool:
|
||||
"""
|
||||
Remove a user from a team chat.
|
||||
|
||||
Args:
|
||||
chat_id: ID of the team chat.
|
||||
user_id: ID of the user to remove.
|
||||
|
||||
Returns:
|
||||
True if removal was successful, False otherwise.
|
||||
"""
|
||||
if chat_id not in self.chats:
|
||||
return False
|
||||
|
||||
chat = self.chats[chat_id]
|
||||
|
||||
if not chat['is_team_chat']:
|
||||
return False
|
||||
|
||||
if user_id in chat['team_members']:
|
||||
chat['team_members'].remove(user_id)
|
||||
self._save_chats()
|
||||
|
||||
return True
|
||||
|
||||
def delete_chat(self, chat_id: str) -> bool:
|
||||
"""
|
||||
Delete a chat.
|
||||
|
||||
Args:
|
||||
chat_id: ID of the chat to delete.
|
||||
|
||||
Returns:
|
||||
True if deletion was successful, False otherwise.
|
||||
"""
|
||||
if chat_id not in self.chats:
|
||||
return False
|
||||
|
||||
del self.chats[chat_id]
|
||||
self._save_chats()
|
||||
|
||||
return True
|
||||
|
||||
def get_chat_response(self, chat_id: str, message: str, user_id: str,
|
||||
use_rag: bool = False, temperature: Optional[float] = None,
|
||||
max_tokens: Optional[int] = None, top_p: Optional[float] = None,
|
||||
frequency_penalty: Optional[float] = None, presence_penalty: Optional[float] = None,
|
||||
stop_sequences: Optional[List[str]] = None, system_prompt: Optional[str] = None,
|
||||
min_p: Optional[float] = None, top_k: Optional[int] = None,
|
||||
repeat_penalty: Optional[float] = None, function_calling: Optional[bool] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Get a response from the chatbot.
|
||||
|
||||
Args:
|
||||
chat_id: ID of the chat.
|
||||
message: User message.
|
||||
user_id: ID of the user sending the message.
|
||||
use_rag: Whether to use RAG (Retrieval Augmented Generation).
|
||||
temperature: Controls randomness in the response.
|
||||
max_tokens: Maximum number of tokens to generate.
|
||||
top_p: Nucleus sampling parameter.
|
||||
frequency_penalty: Penalizes repeated tokens.
|
||||
presence_penalty: Penalizes repeated topics.
|
||||
stop_sequences: Sequences where the API will stop generating.
|
||||
system_prompt: System prompt to guide the model's behavior.
|
||||
min_p: Minimum probability threshold for token selection.
|
||||
top_k: Only sample from the top k tokens.
|
||||
repeat_penalty: Penalty for repeating tokens.
|
||||
function_calling: Whether to enable function calling.
|
||||
|
||||
Returns:
|
||||
Bot response message.
|
||||
"""
|
||||
if chat_id not in self.chats:
|
||||
raise ValueError(f"Chat with ID {chat_id} not found")
|
||||
|
||||
chat = self.chats[chat_id]
|
||||
|
||||
# Add user message to chat
|
||||
self.add_message(chat_id, message, user_id, is_user_message=True)
|
||||
|
||||
# Prepare conversation context for the model
|
||||
context = []
|
||||
for msg in chat['messages'][-10:]: # Use last 10 messages as context
|
||||
role = "user" if msg['is_user_message'] else "assistant"
|
||||
context.append({
|
||||
"role": role,
|
||||
"content": msg['content']
|
||||
})
|
||||
|
||||
# Create model parameters
|
||||
model_params = ModelParameters(
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
top_p=top_p,
|
||||
frequency_penalty=frequency_penalty,
|
||||
presence_penalty=presence_penalty,
|
||||
stop_sequences=stop_sequences,
|
||||
system_prompt=system_prompt,
|
||||
min_p=min_p,
|
||||
top_k=top_k,
|
||||
repeat_penalty=repeat_penalty,
|
||||
function_calling=function_calling
|
||||
)
|
||||
|
||||
# Get response from model
|
||||
model_id = chat['model_id']
|
||||
response_text = model_service.generate_response(
|
||||
model_id=model_id,
|
||||
prompt=message,
|
||||
context=context,
|
||||
use_rag=use_rag,
|
||||
model_params=model_params
|
||||
)
|
||||
|
||||
# Add bot response to chat
|
||||
response_message = self.add_message(
|
||||
chat_id=chat_id,
|
||||
content=response_text,
|
||||
user_id=user_id,
|
||||
is_user_message=False
|
||||
)
|
||||
|
||||
return response_message
|
||||
|
||||
|
||||
# Create a singleton instance
|
||||
chat_service = ChatService()
|
||||
@@ -0,0 +1,170 @@
|
||||
"""
|
||||
Model parameters for AI models.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, Optional, List
|
||||
from pydantic import BaseModel, Field, validator
|
||||
|
||||
|
||||
class ModelParameters(BaseModel):
|
||||
"""Parameters for AI model generation."""
|
||||
|
||||
# Basic parameters
|
||||
temperature: Optional[float] = Field(
|
||||
0.7,
|
||||
description="Controls randomness: 0 is deterministic, higher values are more random",
|
||||
ge=0.0,
|
||||
le=2.0
|
||||
)
|
||||
|
||||
max_tokens: Optional[int] = Field(
|
||||
1000,
|
||||
description="Maximum number of tokens to generate",
|
||||
gt=0
|
||||
)
|
||||
|
||||
# Sampling parameters
|
||||
top_p: Optional[float] = Field(
|
||||
1.0,
|
||||
description="Nucleus sampling: consider tokens with top_p probability mass",
|
||||
ge=0.0,
|
||||
le=1.0
|
||||
)
|
||||
|
||||
top_k: Optional[int] = Field(
|
||||
None,
|
||||
description="Only sample from the top k tokens",
|
||||
gt=0
|
||||
)
|
||||
|
||||
# Repetition control
|
||||
frequency_penalty: Optional[float] = Field(
|
||||
0.0,
|
||||
description="Penalizes repeated tokens",
|
||||
ge=-2.0,
|
||||
le=2.0
|
||||
)
|
||||
|
||||
presence_penalty: Optional[float] = Field(
|
||||
0.0,
|
||||
description="Penalizes repeated topics",
|
||||
ge=-2.0,
|
||||
le=2.0
|
||||
)
|
||||
|
||||
# Advanced parameters
|
||||
stop_sequences: Optional[List[str]] = Field(
|
||||
None,
|
||||
description="Sequences where the API will stop generating"
|
||||
)
|
||||
|
||||
min_p: Optional[float] = Field(
|
||||
None,
|
||||
description="Minimum probability threshold for token selection",
|
||||
ge=0.0,
|
||||
le=1.0
|
||||
)
|
||||
|
||||
repeat_penalty: Optional[float] = Field(
|
||||
None,
|
||||
description="Penalty for repeating tokens",
|
||||
ge=0.0
|
||||
)
|
||||
|
||||
presence_penalty_tokens: Optional[int] = Field(
|
||||
None,
|
||||
description="Number of tokens to consider for presence penalty",
|
||||
gt=0
|
||||
)
|
||||
|
||||
# System prompt
|
||||
system_prompt: Optional[str] = Field(
|
||||
None,
|
||||
description="System prompt to guide the model's behavior"
|
||||
)
|
||||
|
||||
# Function calling
|
||||
function_calling: Optional[bool] = Field(
|
||||
None,
|
||||
description="Whether to enable function calling"
|
||||
)
|
||||
|
||||
# Additional parameters that might be model-specific
|
||||
extra_params: Optional[Dict[str, Any]] = Field(
|
||||
None,
|
||||
description="Additional model-specific parameters"
|
||||
)
|
||||
|
||||
@validator('temperature', 'top_p', 'frequency_penalty', 'presence_penalty', pre=True)
|
||||
def validate_float_params(cls, v):
|
||||
"""Validate float parameters."""
|
||||
if v is not None and not isinstance(v, bool): # Avoid converting bool to float
|
||||
return float(v)
|
||||
return v
|
||||
|
||||
@validator('max_tokens', 'top_k', pre=True)
|
||||
def validate_int_params(cls, v):
|
||||
"""Validate integer parameters."""
|
||||
if v is not None and not isinstance(v, bool): # Avoid converting bool to int
|
||||
return int(v)
|
||||
return v
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Convert parameters to a dictionary, excluding None values.
|
||||
|
||||
Returns:
|
||||
Dictionary of parameters.
|
||||
"""
|
||||
result = {}
|
||||
for key, value in self.dict().items():
|
||||
if value is not None and key != 'extra_params':
|
||||
result[key] = value
|
||||
|
||||
# Add any extra parameters
|
||||
if self.extra_params:
|
||||
result.update(self.extra_params)
|
||||
|
||||
return result
|
||||
|
||||
def for_provider(self, provider: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Get parameters formatted for a specific provider.
|
||||
|
||||
Args:
|
||||
provider: Provider name (e.g., 'openai', 'ollama', 'anthropic').
|
||||
|
||||
Returns:
|
||||
Dictionary of parameters formatted for the provider.
|
||||
"""
|
||||
params = self.to_dict()
|
||||
|
||||
# Handle provider-specific parameter naming
|
||||
if provider == 'openai':
|
||||
# OpenAI uses 'stop' instead of 'stop_sequences'
|
||||
if 'stop_sequences' in params:
|
||||
params['stop'] = params.pop('stop_sequences')
|
||||
|
||||
elif provider == 'ollama':
|
||||
# Ollama has specific parameter handling
|
||||
# Remove parameters not supported by Ollama
|
||||
params_to_keep = ['temperature', 'top_p', 'top_k', 'max_tokens', 'stop_sequences']
|
||||
params = {k: v for k, v in params.items() if k in params_to_keep}
|
||||
|
||||
# Rename stop_sequences to stop if present
|
||||
if 'stop_sequences' in params:
|
||||
params['stop'] = params.pop('stop_sequences')
|
||||
|
||||
elif provider == 'anthropic':
|
||||
# Anthropic uses 'stop_sequences' and different temperature scaling
|
||||
if 'temperature' in params:
|
||||
# Anthropic's temperature is typically 0-1
|
||||
params['temperature'] = min(params['temperature'], 1.0)
|
||||
|
||||
elif provider == 'cohere':
|
||||
# Cohere uses 'stop_sequences' and has some unique parameters
|
||||
pass
|
||||
|
||||
# Add more provider-specific conversions as needed
|
||||
|
||||
return params
|
||||
@@ -0,0 +1,243 @@
|
||||
"""
|
||||
Service for model management and interaction.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
from ai_service.config import config
|
||||
from ai_service.embeddings.document_service import document_service
|
||||
from ai_service.models.model_parameters import ModelParameters
|
||||
|
||||
class ModelService:
|
||||
"""Service for model management and interaction."""
|
||||
|
||||
# Available models
|
||||
AVAILABLE_MODELS = {
|
||||
'gemma3': {
|
||||
'name': 'Gemma 3',
|
||||
'description': 'Google Gemma 3 model via Ollama',
|
||||
'provider': 'ollama',
|
||||
'max_tokens': 8192
|
||||
},
|
||||
'llama3.3': {
|
||||
'name': 'Llama 3 (70B)',
|
||||
'description': 'Meta Llama 3 70B model via Ollama',
|
||||
'provider': 'ollama',
|
||||
'max_tokens': 8192
|
||||
},
|
||||
'llama3.1': {
|
||||
'name': 'Llama 3 (8B)',
|
||||
'description': 'Meta Llama 3 8B model via Ollama',
|
||||
'provider': 'ollama',
|
||||
'max_tokens': 8192
|
||||
},
|
||||
'mistral': {
|
||||
'name': 'Mistral',
|
||||
'description': 'Mistral AI model via Ollama',
|
||||
'provider': 'ollama',
|
||||
'max_tokens': 8192
|
||||
},
|
||||
'deepseek': {
|
||||
'name': 'DeepSeek',
|
||||
'description': 'DeepSeek model via Ollama',
|
||||
'provider': 'ollama',
|
||||
'max_tokens': 8192
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the model service."""
|
||||
self.default_model = config.DEFAULT_MODEL
|
||||
self.ollama_api_url = config.OLLAMA_API_URL
|
||||
self.openwebui_url = config.OPENWEBUI_URL
|
||||
self.openwebui_api_key = config.OPENWEBUI_API_KEY
|
||||
|
||||
def get_available_models(self) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get a list of available models.
|
||||
|
||||
Returns:
|
||||
List of model information dictionaries.
|
||||
"""
|
||||
models = []
|
||||
for model_id, model_info in self.AVAILABLE_MODELS.items():
|
||||
model_data = {
|
||||
'id': model_id,
|
||||
'is_default': model_id == self.default_model,
|
||||
**model_info
|
||||
}
|
||||
models.append(model_data)
|
||||
|
||||
return models
|
||||
|
||||
def get_model_info(self, model_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get information about a specific model.
|
||||
|
||||
Args:
|
||||
model_id: ID of the model.
|
||||
|
||||
Returns:
|
||||
Model information dictionary if found, None otherwise.
|
||||
"""
|
||||
if model_id not in self.AVAILABLE_MODELS:
|
||||
return None
|
||||
|
||||
return {
|
||||
'id': model_id,
|
||||
'is_default': model_id == self.default_model,
|
||||
**self.AVAILABLE_MODELS[model_id]
|
||||
}
|
||||
|
||||
def generate_response(self, model_id: str, prompt: str,
|
||||
context: Optional[List[Dict[str, str]]] = None,
|
||||
use_rag: bool = False,
|
||||
model_params: Optional[ModelParameters] = None) -> str:
|
||||
"""
|
||||
Generate a response from the model.
|
||||
|
||||
Args:
|
||||
model_id: ID of the model to use.
|
||||
prompt: User prompt.
|
||||
context: Optional conversation context.
|
||||
use_rag: Whether to use RAG (Retrieval Augmented Generation).
|
||||
model_params: Optional model parameters.
|
||||
|
||||
Returns:
|
||||
Generated response.
|
||||
"""
|
||||
if model_id not in self.AVAILABLE_MODELS:
|
||||
model_id = self.default_model
|
||||
|
||||
# Get the provider for this model
|
||||
provider = self.AVAILABLE_MODELS[model_id].get('provider', 'ollama')
|
||||
|
||||
# Prepare the messages for the API call
|
||||
messages = []
|
||||
|
||||
# Use custom system prompt if provided, otherwise use default
|
||||
system_content = "You are a helpful assistant."
|
||||
if model_params and model_params.system_prompt:
|
||||
system_content = model_params.system_prompt
|
||||
|
||||
messages.append({
|
||||
"role": "system",
|
||||
"content": system_content
|
||||
})
|
||||
|
||||
# Add conversation context if provided
|
||||
if context:
|
||||
messages.extend(context)
|
||||
|
||||
# If RAG is enabled, use OpenWebUI's knowledge database
|
||||
if use_rag:
|
||||
# We'll use OpenWebUI's built-in RAG capabilities
|
||||
# This is handled by sending the request to OpenWebUI instead of Ollama directly
|
||||
try:
|
||||
# Prepare the request for OpenWebUI
|
||||
openwebui_request = {
|
||||
"model": model_id,
|
||||
"messages": messages + [{"role": "user", "content": prompt}],
|
||||
"use_knowledge": True, # Enable RAG
|
||||
"stream": False
|
||||
}
|
||||
|
||||
# Add model parameters if provided
|
||||
if model_params:
|
||||
params = model_params.to_dict()
|
||||
# Map parameters to OpenWebUI format
|
||||
if 'temperature' in params:
|
||||
openwebui_request['temperature'] = params['temperature']
|
||||
if 'max_tokens' in params:
|
||||
openwebui_request['max_tokens'] = params['max_tokens']
|
||||
if 'top_p' in params:
|
||||
openwebui_request['top_p'] = params['top_p']
|
||||
|
||||
# Make the API call to OpenWebUI
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if self.openwebui_api_key:
|
||||
headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
|
||||
|
||||
# OpenWebUI API endpoint is /api/chat/completions
|
||||
response = requests.post(
|
||||
f"{self.openwebui_url}/api/chat/completions",
|
||||
headers=headers,
|
||||
json=openwebui_request,
|
||||
timeout=60 # Longer timeout for RAG
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
# Extract the response content
|
||||
if 'message' in result:
|
||||
return result['message']['content']
|
||||
else:
|
||||
return "Error: Unexpected response format from OpenWebUI"
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error calling OpenWebUI API: {str(e)}")
|
||||
# Fall back to direct Ollama call without RAG
|
||||
print("Falling back to direct Ollama call without RAG")
|
||||
# Continue to the Ollama API call below
|
||||
|
||||
# Add user prompt
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
})
|
||||
|
||||
# Prepare API request parameters for Ollama
|
||||
request_json = {
|
||||
"model": model_id,
|
||||
"messages": messages,
|
||||
"stream": False
|
||||
}
|
||||
|
||||
# Add model parameters if provided
|
||||
if model_params:
|
||||
params = model_params.to_dict()
|
||||
# Map parameters to Ollama format
|
||||
if 'temperature' in params:
|
||||
request_json['temperature'] = params['temperature']
|
||||
if 'top_p' in params:
|
||||
request_json['top_p'] = params['top_p']
|
||||
if 'top_k' in params:
|
||||
request_json['top_k'] = params['top_k']
|
||||
if 'max_tokens' in params:
|
||||
request_json['max_tokens'] = params['max_tokens']
|
||||
|
||||
# Make the API call to Ollama
|
||||
try:
|
||||
# Ollama API endpoint is /api/chat or /api/generate
|
||||
response = requests.post(
|
||||
f"{self.ollama_api_url}/api/generate",
|
||||
headers={"Content-Type": "application/json"},
|
||||
json=request_json,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
# Extract the response content from Ollama
|
||||
# The response format depends on whether we're using /api/chat or /api/generate
|
||||
if 'message' in result and 'content' in result['message']:
|
||||
# Format for /api/chat
|
||||
return result['message']['content']
|
||||
elif 'response' in result:
|
||||
# Format for /api/generate
|
||||
return result['response']
|
||||
else:
|
||||
return "Error: Unexpected response format from Ollama"
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error calling Ollama API: {str(e)}")
|
||||
return f"Error generating response: {str(e)}"
|
||||
|
||||
|
||||
# Create a singleton instance
|
||||
model_service = ModelService()
|
||||
@@ -0,0 +1,28 @@
|
||||
# Core dependencies with fixed versions for stability
|
||||
fastapi==0.103.1
|
||||
uvicorn[standard]==0.23.2
|
||||
pydantic==2.3.0
|
||||
python-dotenv==1.0.0
|
||||
|
||||
# AI/ML dependencies
|
||||
pinecone-client==2.2.2
|
||||
langchain==0.0.267
|
||||
sentence-transformers==2.2.2
|
||||
numpy==1.26.4
|
||||
openai==1.3.0
|
||||
|
||||
# For local storage (will be replaced with MySQL in production)
|
||||
sqlalchemy==2.0.20
|
||||
pymysql==1.1.0
|
||||
cryptography==41.0.3 # Required for PyMySQL
|
||||
|
||||
# Utilities
|
||||
tqdm==4.67.1
|
||||
requests==2.32.3
|
||||
tenacity==8.5.0 # For retrying API calls
|
||||
|
||||
# Production dependencies
|
||||
gunicorn==21.2.0 # Production WSGI server
|
||||
python-json-logger==2.0.7 # Structured logging
|
||||
prometheus-client==0.17.1 # Metrics
|
||||
sentry-sdk==1.39.1 # Error tracking
|
||||
@@ -0,0 +1,19 @@
|
||||
# Core dependencies
|
||||
fastapi==0.103.1
|
||||
uvicorn==0.23.2
|
||||
pydantic==2.3.0
|
||||
python-dotenv==1.0.0
|
||||
|
||||
# AI/ML dependencies
|
||||
pinecone-client==2.2.2
|
||||
langchain==0.0.267
|
||||
sentence-transformers==2.2.2
|
||||
numpy==1.26.4
|
||||
|
||||
# For local storage
|
||||
sqlalchemy==2.0.20
|
||||
sqlite3==0.0.0 # This is a placeholder, sqlite3 is part of Python's standard library
|
||||
|
||||
# Utilities
|
||||
tqdm==4.67.1
|
||||
requests==2.32.3
|
||||
@@ -0,0 +1,21 @@
|
||||
"""
|
||||
Script to run the AI service.
|
||||
"""
|
||||
|
||||
import uvicorn
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Add the parent directory to the path so we can import ai_service
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
|
||||
from ai_service.config import config
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(f"Starting AI service on {config.API_HOST}:{config.API_PORT}")
|
||||
uvicorn.run(
|
||||
"ai_service.api:app",
|
||||
host=config.API_HOST,
|
||||
port=config.API_PORT,
|
||||
reload=True
|
||||
)
|
||||
Reference in New Issue
Block a user