Initial commit for deployment

This commit is contained in:
Iyeoluwa Akinrinola
2025-05-09 15:41:16 +01:00
commit ac98999507
54 changed files with 4343 additions and 0 deletions
+18
View File
@@ -0,0 +1,18 @@
# API configuration
API_HOST=0.0.0.0
API_PORT=5251
# OpenWebUI configuration
OPENWEBUI_URL=http://104.225.217.215:8080
OPENWEBUI_API_KEY=your-openwebui-api-key # Replace with the actual key in your .env file
# Ollama configuration
OLLAMA_API_URL=http://localhost:11434
DEFAULT_MODEL=llama3.1
# Local storage
SQLITE_DB_PATH=ai_service/data/chatbot.db
# Document processing
CHUNK_SIZE=1000
CHUNK_OVERLAP=200
+33
View File
@@ -0,0 +1,33 @@
# API configuration
API_HOST=0.0.0.0
API_PORT=5251
# Pinecone configuration
# Sign up at https://www.pinecone.io/ to get your API key
PINECONE_API_KEY=your-pinecone-api-key-here
PINECONE_ENVIRONMENT=your-pinecone-environment-here
PINECONE_INDEX_NAME=chatbot-index
# Model configuration
# Sign up at https://platform.openai.com/ to get your API key
DEFAULT_MODEL=gpt-3.5-turbo
OPENAI_API_KEY=your-openai-api-key-here
# Local storage
# Path to SQLite database (will be replaced with MySQL in production)
SQLITE_DB_PATH=ai_service/data/chatbot.db
# Document processing
# Adjust these values based on your needs
CHUNK_SIZE=1000
CHUNK_OVERLAP=200
# Embedding model
# Options: all-MiniLM-L6-v2 (default), paraphrase-MiniLM-L3-v2 (smaller/faster)
EMBEDDING_MODEL=all-MiniLM-L6-v2
# Production settings
# Set to 'production' in production environment
ENVIRONMENT=production
LOG_LEVEL=INFO
ENABLE_MOCK=false
+94
View File
@@ -0,0 +1,94 @@
# AI Service for Chatbot Application
This is the AI service component for the chatbot application. It provides APIs for document processing, embeddings, and chat functionality.
## Features
- Document processing and embedding
- Retrieval-augmented generation (RAG)
- Chat functionality with model switching
- Team chat support
## Project Structure
```
ai_service/
├── embeddings/ # Embedding and document processing services
├── models/ # Model and chat services
├── utils/ # Utility functions
├── data/ # Data storage
├── config.py # Configuration settings
├── api.py # FastAPI application
└── run.py # Script to run the service
```
## Setup Instructions
1. Create a virtual environment:
```
python -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
```
2. Install dependencies:
```
pip install -r requirements.txt
```
3. Copy `.env.example` to `.env` and update the values:
```
cp .env.example .env
# Edit the .env file with appropriate values
```
4. Run the service:
```
python run.py
```
## API Documentation
Once the service is running, you can access the API documentation at:
- Swagger UI: http://localhost:5251/docs
- ReDoc: http://localhost:5251/redoc
## Deployment
To deploy the service:
1. Make the deployment script executable:
```
chmod +x deploy.sh
```
2. Run the deployment script:
```
./deploy.sh
```
This will start the service on port 5251 using uvicorn with nohup.
## API Endpoints
### Document Endpoints
- `POST /documents` - Process a document for embedding
- `GET /documents` - Get all documents
- `GET /documents/{doc_id}` - Get a document by ID
- `DELETE /documents/{doc_id}` - Delete a document
- `POST /documents/search` - Search for documents
### Model Endpoints
- `GET /models` - Get available models
- `GET /models/{model_id}` - Get information about a model
### Chat Endpoints
- `POST /chats` - Create a new chat
- `GET /chats/user/{user_id}` - Get all chats for a user
- `GET /chats/{chat_id}` - Get a chat by ID
- `POST /chats/{chat_id}/messages` - Send a message to a chat
- `POST /chats/{chat_id}/members/{user_id}` - Add a user to a team chat
- `DELETE /chats/{chat_id}/members/{user_id}` - Remove a user from a team chat
- `DELETE /chats/{chat_id}` - Delete a chat
+6
View File
@@ -0,0 +1,6 @@
"""
AI service package.
"""
# Import for easier access
from ai_service.api import app
+28
View File
@@ -0,0 +1,28 @@
{
"e2b1bdc2-a384-4775-9c14-42b221e5554f": {
"id": "e2b1bdc2-a384-4775-9c14-42b221e5554f",
"title": "Test Chat",
"user_id": "test_user",
"model_id": "gpt-3.5-turbo",
"is_team_chat": false,
"created_at": "2025-05-06T11:50:43.558931",
"updated_at": "2025-05-06T11:51:20.982846",
"messages": [
{
"id": "865a1e57-c71f-4bab-a4ba-56d630a38631",
"content": "Hello, AI!",
"user_id": "test_user",
"is_user_message": true,
"timestamp": "2025-05-06T11:51:20.051537"
},
{
"id": "eb755b4f-fe55-4bec-b77f-ed20941df360",
"content": "Error generating response: 401 Client Error: Unauthorized for url: https://api.openai.com/v1/chat/completions",
"user_id": null,
"is_user_message": false,
"timestamp": "2025-05-06T11:51:20.982829"
}
],
"team_members": []
}
}
+377
View File
@@ -0,0 +1,377 @@
"""
FastAPI application for the AI service.
"""
from fastapi import FastAPI, HTTPException, Depends, Body, Query, Path
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from typing import List, Dict, Any, Optional
from ai_service.config import config
from ai_service.embeddings.document_service import document_service
from ai_service.models.model_service import model_service
from ai_service.models.chat_service import chat_service
from ai_service.models.model_parameters import ModelParameters
# Create FastAPI app
app = FastAPI(
title="AI Service API",
description="API for the AI service",
version="1.0.0"
)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Allow all origins
allow_credentials=True,
allow_methods=["*"], # Allow all methods
allow_headers=["*"], # Allow all headers
)
# Define API models
class DocumentRequest(BaseModel):
"""Request model for document processing."""
content: str = Field(..., description="Document content")
title: str = Field(..., description="Document title")
description: Optional[str] = Field(None, description="Document description")
metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata")
class DocumentResponse(BaseModel):
"""Response model for document processing."""
id: str = Field(..., description="Document ID")
title: str = Field(..., description="Document title")
description: str = Field(..., description="Document description")
chunk_count: int = Field(..., description="Number of chunks")
metadata: Dict[str, Any] = Field(..., description="Additional metadata")
class SearchRequest(BaseModel):
"""Request model for document search."""
query: str = Field(..., description="Search query")
top_k: int = Field(5, description="Number of results to return")
class SearchResult(BaseModel):
"""Model for a search result."""
id: str = Field(..., description="Result ID")
score: float = Field(..., description="Similarity score")
metadata: Dict[str, Any] = Field(..., description="Result metadata")
class ModelInfo(BaseModel):
"""Model for model information."""
id: str = Field(..., description="Model ID")
name: str = Field(..., description="Model name")
description: str = Field(..., description="Model description")
provider: str = Field(..., description="Model provider")
max_tokens: int = Field(..., description="Maximum tokens")
is_default: bool = Field(..., description="Whether this is the default model")
class ChatRequest(BaseModel):
"""Request model for creating a chat."""
user_id: str = Field(..., description="User ID")
title: Optional[str] = Field(None, description="Chat title")
model_id: Optional[str] = Field(None, description="Model ID")
is_team_chat: bool = Field(False, description="Whether this is a team chat")
class MessageRequest(BaseModel):
"""Request model for sending a message."""
message: str = Field(..., description="Message content")
user_id: str = Field(..., description="User ID")
use_rag: bool = Field(False, description="Whether to use RAG")
# Model parameters
temperature: Optional[float] = Field(None, description="Controls randomness: higher values mean more random completions")
max_tokens: Optional[int] = Field(None, description="Maximum number of tokens to generate")
top_p: Optional[float] = Field(None, description="Nucleus sampling parameter")
frequency_penalty: Optional[float] = Field(None, description="Penalizes repeated tokens")
presence_penalty: Optional[float] = Field(None, description="Penalizes repeated topics")
stop_sequences: Optional[List[str]] = Field(None, description="Sequences where the API will stop generating")
system_prompt: Optional[str] = Field(None, description="System prompt to guide the model's behavior")
# Additional advanced parameters
min_p: Optional[float] = Field(None, description="Minimum probability threshold for token selection")
top_k: Optional[int] = Field(None, description="Only sample from the top k tokens")
repeat_penalty: Optional[float] = Field(None, description="Penalty for repeating tokens")
function_calling: Optional[bool] = Field(None, description="Whether to enable function calling")
class Message(BaseModel):
"""Model for a message."""
id: str = Field(..., description="Message ID")
content: str = Field(..., description="Message content")
user_id: Optional[str] = Field(None, description="User ID")
is_user_message: bool = Field(..., description="Whether this is a user message")
timestamp: str = Field(..., description="Message timestamp")
class Chat(BaseModel):
"""Model for a chat."""
id: str = Field(..., description="Chat ID")
title: str = Field(..., description="Chat title")
user_id: str = Field(..., description="User ID")
model_id: str = Field(..., description="Model ID")
is_team_chat: bool = Field(..., description="Whether this is a team chat")
created_at: str = Field(..., description="Creation timestamp")
updated_at: str = Field(..., description="Update timestamp")
messages: List[Message] = Field(..., description="Chat messages")
team_members: List[str] = Field(..., description="Team members")
# Define API endpoints
@app.get("/health")
async def health_check():
"""
Health check endpoint.
Returns:
Health status.
"""
return {"status": "healthy"}
# Document endpoints
@app.post("/documents", response_model=DocumentResponse)
async def process_document(request: DocumentRequest):
"""
Process a document for embedding.
Args:
request: Document processing request.
Returns:
Processed document information.
"""
doc_id = document_service.process_document(
content=request.content,
title=request.title,
description=request.description,
metadata=request.metadata
)
return document_service.get_document(doc_id)
@app.get("/documents", response_model=List[DocumentResponse])
async def get_all_documents():
"""
Get all documents.
Returns:
List of document information.
"""
return document_service.get_all_documents()
@app.get("/documents/{doc_id}", response_model=DocumentResponse)
async def get_document(doc_id: str):
"""
Get a document by ID.
Args:
doc_id: Document ID.
Returns:
Document information.
"""
doc = document_service.get_document(doc_id)
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
return doc
@app.delete("/documents/{doc_id}")
async def delete_document(doc_id: str):
"""
Delete a document.
Args:
doc_id: Document ID.
Returns:
Deletion status.
"""
success = document_service.delete_document(doc_id)
if not success:
raise HTTPException(status_code=404, detail="Document not found")
return {"status": "success", "message": "Document deleted"}
@app.post("/documents/search", response_model=List[SearchResult])
async def search_documents(request: SearchRequest):
"""
Search for documents.
Args:
request: Search request.
Returns:
Search results.
"""
results = document_service.search_documents(
query=request.query,
top_k=request.top_k
)
return results
# Model endpoints
@app.get("/models", response_model=List[ModelInfo])
async def get_available_models():
"""
Get available models.
Returns:
List of model information.
"""
return model_service.get_available_models()
@app.get("/models/{model_id}", response_model=ModelInfo)
async def get_model_info(model_id: str):
"""
Get information about a model.
Args:
model_id: Model ID.
Returns:
Model information.
"""
model_info = model_service.get_model_info(model_id)
if not model_info:
raise HTTPException(status_code=404, detail="Model not found")
return model_info
# Chat endpoints
@app.post("/chats", response_model=Chat)
async def create_chat(request: ChatRequest):
"""
Create a new chat.
Args:
request: Chat creation request.
Returns:
Created chat.
"""
chat_id = chat_service.create_chat(
user_id=request.user_id,
title=request.title,
model_id=request.model_id,
is_team_chat=request.is_team_chat
)
return chat_service.get_chat(chat_id)
@app.get("/chats/user/{user_id}", response_model=List[Chat])
async def get_user_chats(user_id: str):
"""
Get all chats for a user.
Args:
user_id: User ID.
Returns:
List of chats.
"""
return chat_service.get_user_chats(user_id)
@app.get("/chats/{chat_id}", response_model=Chat)
async def get_chat(chat_id: str):
"""
Get a chat by ID.
Args:
chat_id: Chat ID.
Returns:
Chat information.
"""
chat = chat_service.get_chat(chat_id)
if not chat:
raise HTTPException(status_code=404, detail="Chat not found")
return chat
@app.post("/chats/{chat_id}/messages", response_model=Message)
async def send_message(chat_id: str, request: MessageRequest):
"""
Send a message to a chat.
Args:
chat_id: Chat ID.
request: Message request with optional model parameters.
Returns:
Bot response message.
"""
try:
# Extract model parameters from the request
response = chat_service.get_chat_response(
chat_id=chat_id,
message=request.message,
user_id=request.user_id,
use_rag=request.use_rag,
temperature=request.temperature,
max_tokens=request.max_tokens,
top_p=request.top_p,
frequency_penalty=request.frequency_penalty,
presence_penalty=request.presence_penalty,
stop_sequences=request.stop_sequences,
system_prompt=request.system_prompt,
min_p=request.min_p,
top_k=request.top_k,
repeat_penalty=request.repeat_penalty,
function_calling=request.function_calling
)
return response
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
@app.post("/chats/{chat_id}/members/{user_id}")
async def add_team_member(chat_id: str, user_id: str):
"""
Add a user to a team chat.
Args:
chat_id: Chat ID.
user_id: User ID.
Returns:
Addition status.
"""
success = chat_service.add_team_member(chat_id, user_id)
if not success:
raise HTTPException(status_code=400, detail="Failed to add team member")
return {"status": "success", "message": "Team member added"}
@app.delete("/chats/{chat_id}/members/{user_id}")
async def remove_team_member(chat_id: str, user_id: str):
"""
Remove a user from a team chat.
Args:
chat_id: Chat ID.
user_id: User ID.
Returns:
Removal status.
"""
success = chat_service.remove_team_member(chat_id, user_id)
if not success:
raise HTTPException(status_code=400, detail="Failed to remove team member")
return {"status": "success", "message": "Team member removed"}
@app.delete("/chats/{chat_id}")
async def delete_chat(chat_id: str):
"""
Delete a chat.
Args:
chat_id: Chat ID.
Returns:
Deletion status.
"""
success = chat_service.delete_chat(chat_id)
if not success:
raise HTTPException(status_code=404, detail="Chat not found")
return {"status": "success", "message": "Chat deleted"}
+37
View File
@@ -0,0 +1,37 @@
"""
Configuration settings for the AI service.
"""
import os
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
class Config:
"""Base configuration."""
# API configuration
API_HOST = os.environ.get('API_HOST', '0.0.0.0')
API_PORT = int(os.environ.get('API_PORT', 5251))
# OpenWebUI configuration
OPENWEBUI_URL = os.environ.get('OPENWEBUI_URL', 'http://104.225.217.215:8080')
OPENWEBUI_API_KEY = os.environ.get('OPENWEBUI_API_KEY', '')
# Ollama configuration
OLLAMA_API_URL = os.environ.get('OLLAMA_API_URL', 'http://localhost:11434')
DEFAULT_MODEL = os.environ.get('DEFAULT_MODEL', 'llama3.1')
# Local storage
SQLITE_DB_PATH = os.environ.get('SQLITE_DB_PATH', 'ai_service/data/chatbot.db')
# Document processing
CHUNK_SIZE = int(os.environ.get('CHUNK_SIZE', 1000))
CHUNK_OVERLAP = int(os.environ.get('CHUNK_OVERLAP', 200))
# Embedding model
EMBEDDING_MODEL = os.environ.get('EMBEDDING_MODEL', 'all-MiniLM-L6-v2')
config = Config()
+28
View File
@@ -0,0 +1,28 @@
{
"48b04d66-3ef4-440b-8508-ced930aa42a9": {
"id": "48b04d66-3ef4-440b-8508-ced930aa42a9",
"title": "Test Chat",
"user_id": "test_user",
"model_id": "gpt-3.5-turbo",
"is_team_chat": false,
"created_at": "2025-05-05T20:29:54.936964",
"updated_at": "2025-05-05T20:29:55.394906",
"messages": [
{
"id": "9daafee5-f00c-4493-96ea-96492f97482e",
"content": "Tell me about artificial intelligence",
"user_id": "test_user",
"is_user_message": true,
"timestamp": "2025-05-05T20:29:54.971667"
},
{
"id": "0337e0b1-1e18-4d09-8b8b-c5ee295870a0",
"content": "Error generating response: 401 Client Error: Unauthorized for url: https://api.openai.com/v1/chat/completions",
"user_id": null,
"is_user_message": false,
"timestamp": "2025-05-05T20:29:55.394891"
}
],
"team_members": []
}
}
+20
View File
@@ -0,0 +1,20 @@
#!/bin/bash
# Create a directory for the AI service logs
mkdir -p logs
# Activate virtual environment
source venv/bin/activate
# Export environment variables
export API_HOST=0.0.0.0
export API_PORT=5251
# Make sure the Python path includes the current directory
export PYTHONPATH=$PYTHONPATH:$(pwd)
# Run the application with uvicorn and nohup
nohup uvicorn ai_service.run:app --host $API_HOST --port $API_PORT > logs/ai_service.log 2>&1 &
echo "AI Service started on port $API_PORT. Check ai_service.log for output."
echo "To stop the application, find the process ID with 'ps aux | grep uvicorn' and kill it with 'kill <PID>'."
+261
View File
@@ -0,0 +1,261 @@
"""
Service for document processing and chunking.
"""
import os
import json
import uuid
import requests
import base64
from typing import List, Dict, Any, Optional
from langchain_text_splitters import RecursiveCharacterTextSplitter
from ai_service.config import config
class DocumentService:
"""Service for document processing and chunking."""
def __init__(self):
"""Initialize the document service."""
self.chunk_size = config.CHUNK_SIZE
self.chunk_overlap = config.CHUNK_OVERLAP
self.text_splitter = RecursiveCharacterTextSplitter(
chunk_size=self.chunk_size,
chunk_overlap=self.chunk_overlap,
length_function=len
)
# OpenWebUI configuration
self.openwebui_url = config.OPENWEBUI_URL
self.openwebui_api_key = config.OPENWEBUI_API_KEY
# Ensure data directory exists
os.makedirs(os.path.dirname(config.SQLITE_DB_PATH), exist_ok=True)
# For now, we'll store document metadata in a simple JSON file
self.metadata_file = os.path.join(os.path.dirname(config.SQLITE_DB_PATH), 'document_metadata.json')
self._load_metadata()
def _load_metadata(self):
"""Load document metadata from file."""
if os.path.exists(self.metadata_file):
try:
with open(self.metadata_file, 'r') as f:
self.documents = json.load(f)
except Exception as e:
print(f"Error loading document metadata: {str(e)}")
self.documents = {}
else:
self.documents = {}
def _save_metadata(self):
"""Save document metadata to file."""
try:
with open(self.metadata_file, 'w') as f:
json.dump(self.documents, f, indent=2)
except Exception as e:
print(f"Error saving document metadata: {str(e)}")
def process_document(self, content: str, title: str,
description: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None) -> str:
"""
Process a document for embedding.
Args:
content: Document content.
title: Document title.
description: Optional document description.
metadata: Optional additional metadata.
Returns:
Document ID.
"""
# Generate a unique ID for the document
doc_id = str(uuid.uuid4())
# Upload the document to OpenWebUI for RAG processing
try:
# Prepare headers
headers = {"Content-Type": "application/json"}
if self.openwebui_api_key:
headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
# Prepare the document data
document_data = {
"filename": f"{title}.txt",
"content": base64.b64encode(content.encode('utf-8')).decode('utf-8'),
"description": description or title
}
# Upload to OpenWebUI
response = requests.post(
f"{self.openwebui_url}/api/knowledge/upload",
headers=headers,
json=document_data,
timeout=60
)
response.raise_for_status()
result = response.json()
# Get the OpenWebUI document ID
openwebui_doc_id = result.get('id', '')
# Store document metadata
self.documents[doc_id] = {
'id': doc_id,
'title': title,
'description': description or '',
'openwebui_id': openwebui_doc_id,
'metadata': metadata or {}
}
# Save metadata to file
self._save_metadata()
return doc_id
except Exception as e:
print(f"Error uploading document to OpenWebUI: {str(e)}")
# Fall back to local processing if OpenWebUI upload fails
print("Falling back to local document processing")
# Split the document into chunks for local reference
chunks = self.text_splitter.split_text(content)
# Store document metadata
self.documents[doc_id] = {
'id': doc_id,
'title': title,
'description': description or '',
'chunk_count': len(chunks),
'openwebui_upload_failed': True,
'metadata': metadata or {}
}
# Save metadata to file
self._save_metadata()
return doc_id
def get_document(self, doc_id: str) -> Optional[Dict[str, Any]]:
"""
Get document metadata.
Args:
doc_id: Document ID.
Returns:
Document metadata if found, None otherwise.
"""
return self.documents.get(doc_id)
def get_all_documents(self) -> List[Dict[str, Any]]:
"""
Get all document metadata.
Returns:
List of document metadata.
"""
# Get documents from local storage
local_documents = list(self.documents.values())
# Try to get documents from OpenWebUI as well
try:
# Prepare headers
headers = {"Content-Type": "application/json"}
if self.openwebui_api_key:
headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
# Get documents from OpenWebUI
response = requests.get(
f"{self.openwebui_url}/api/knowledge",
headers=headers,
timeout=30
)
if response.status_code == 200:
openwebui_docs = response.json()
# Update local documents with OpenWebUI information
for doc in local_documents:
if 'openwebui_id' in doc:
for openwebui_doc in openwebui_docs:
if openwebui_doc.get('id') == doc['openwebui_id']:
doc['openwebui_status'] = 'active'
doc['openwebui_info'] = openwebui_doc
break
except Exception as e:
print(f"Error getting documents from OpenWebUI: {str(e)}")
return local_documents
def delete_document(self, doc_id: str) -> bool:
"""
Delete a document and its chunks.
Args:
doc_id: Document ID.
Returns:
True if deletion was successful, False otherwise.
"""
if doc_id not in self.documents:
return False
# Check if document was uploaded to OpenWebUI
doc = self.documents[doc_id]
openwebui_id = doc.get('openwebui_id')
if openwebui_id:
try:
# Prepare headers
headers = {"Content-Type": "application/json"}
if self.openwebui_api_key:
headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
# Delete from OpenWebUI
response = requests.delete(
f"{self.openwebui_url}/api/knowledge/{openwebui_id}",
headers=headers,
timeout=30
)
if response.status_code != 200:
print(f"Warning: Failed to delete document from OpenWebUI: {response.text}")
except Exception as e:
print(f"Error deleting document from OpenWebUI: {str(e)}")
# Delete document metadata
del self.documents[doc_id]
# Save metadata to file
self._save_metadata()
return True
def search_documents(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
"""
Search for documents similar to a query.
Args:
query: Search query.
top_k: Number of results to return.
Returns:
List of similar document chunks with their metadata.
"""
# Note: We don't need to implement this method anymore since
# RAG is handled directly by OpenWebUI when use_rag=True in the model service
# Return empty results - this is just a placeholder
# The actual RAG functionality is in the model_service.generate_response method
return []
# Create a singleton instance
document_service = DocumentService()
+214
View File
@@ -0,0 +1,214 @@
"""
Service for generating and managing embeddings.
"""
import os
import random
import pinecone
import numpy as np
from typing import List, Dict, Any, Optional, Union
from sentence_transformers import SentenceTransformer
from ai_service.config import config
class EmbeddingService:
"""Service for generating and managing embeddings."""
def __init__(self, use_mock=True): # Default to mock implementation
"""Initialize the embedding service."""
self.use_mock = use_mock
if not self.use_mock:
# Use a smaller model for testing
self.model_name = "paraphrase-MiniLM-L3-v2" # Smaller model than the default
try:
self.model = SentenceTransformer(self.model_name)
print(f"Loaded embedding model: {self.model_name}")
except Exception as e:
print(f"Error loading embedding model: {str(e)}")
self.use_mock = True
print("Falling back to mock implementation")
else:
print("Using mock embedding implementation")
self.model_name = "mock-model"
self.model = None
self._initialize_pinecone()
def _initialize_pinecone(self):
"""Initialize Pinecone client."""
if not config.PINECONE_API_KEY or not config.PINECONE_ENVIRONMENT:
print("Warning: Pinecone API key or environment not set. Vector storage will not be available.")
self.index = None
return
try:
pinecone.init(
api_key=config.PINECONE_API_KEY,
environment=config.PINECONE_ENVIRONMENT
)
# Check if index exists, create if it doesn't
if config.PINECONE_INDEX_NAME not in pinecone.list_indexes():
pinecone.create_index(
name=config.PINECONE_INDEX_NAME,
dimension=self.model.get_sentence_embedding_dimension(),
metric="cosine"
)
self.index = pinecone.Index(config.PINECONE_INDEX_NAME)
print(f"Connected to Pinecone index: {config.PINECONE_INDEX_NAME}")
except Exception as e:
print(f"Error connecting to Pinecone: {str(e)}")
self.index = None
def generate_embedding(self, text: str) -> List[float]:
"""
Generate an embedding for a text.
Args:
text: Text to embed.
Returns:
Embedding vector.
"""
if self.use_mock:
# Generate a mock embedding vector (384 dimensions for consistency)
return [random.random() for _ in range(384)]
embedding = self.model.encode(text)
return embedding.tolist()
def generate_embeddings(self, texts: List[str]) -> List[List[float]]:
"""
Generate embeddings for multiple texts.
Args:
texts: List of texts to embed.
Returns:
List of embedding vectors.
"""
if self.use_mock:
# Generate mock embedding vectors
return [[random.random() for _ in range(384)] for _ in texts]
embeddings = self.model.encode(texts)
return embeddings.tolist()
def store_embeddings(self, ids: List[str], embeddings: List[List[float]],
metadata: Optional[List[Dict[str, Any]]] = None) -> bool:
"""
Store embeddings in Pinecone.
Args:
ids: List of IDs for the embeddings.
embeddings: List of embedding vectors.
metadata: Optional list of metadata dictionaries.
Returns:
True if storage was successful, False otherwise.
"""
if self.use_mock:
print(f"Mock: Stored {len(ids)} embeddings")
return True
if self.index is None:
print("Warning: Pinecone index not available. Embeddings not stored.")
return False
if metadata is None:
metadata = [{} for _ in ids]
vectors = [
(id, embedding, meta)
for id, embedding, meta in zip(ids, embeddings, metadata)
]
try:
self.index.upsert(vectors=vectors)
return True
except Exception as e:
print(f"Error storing embeddings in Pinecone: {str(e)}")
return False
def search_similar(self, query_embedding: List[float], top_k: int = 5) -> List[Dict[str, Any]]:
"""
Search for similar embeddings in Pinecone.
Args:
query_embedding: Query embedding vector.
top_k: Number of results to return.
Returns:
List of similar items with their metadata.
"""
if self.use_mock:
# Generate mock search results
print(f"Mock: Searching for similar embeddings (top_k={top_k})")
mock_results = []
for i in range(min(top_k, 3)): # Return at most 3 mock results
mock_results.append({
'id': f"mock_doc_{i}",
'score': 0.9 - (i * 0.1), # Decreasing similarity scores
'metadata': {
'document_id': f"mock_doc_{i}",
'chunk_index': i,
'title': f"Mock Document {i}",
'description': f"This is a mock document {i}",
'chunk_text': f"This is the content of mock document {i}..."
}
})
return mock_results
if self.index is None:
print("Warning: Pinecone index not available. Search not performed.")
return []
try:
results = self.index.query(
vector=query_embedding,
top_k=top_k,
include_metadata=True
)
return [
{
'id': match['id'],
'score': match['score'],
'metadata': match.get('metadata', {})
}
for match in results.get('matches', [])
]
except Exception as e:
print(f"Error searching in Pinecone: {str(e)}")
return []
def delete_embeddings(self, ids: List[str]) -> bool:
"""
Delete embeddings from Pinecone.
Args:
ids: List of IDs to delete.
Returns:
True if deletion was successful, False otherwise.
"""
if self.use_mock:
print(f"Mock: Deleted {len(ids)} embeddings")
return True
if self.index is None:
print("Warning: Pinecone index not available. Deletion not performed.")
return False
try:
self.index.delete(ids=ids)
return True
except Exception as e:
print(f"Error deleting embeddings from Pinecone: {str(e)}")
return False
# Create a singleton instance
embedding_service = EmbeddingService()
+309
View File
@@ -0,0 +1,309 @@
"""
Service for chat functionality.
"""
import os
import json
import uuid
from datetime import datetime
from typing import List, Dict, Any, Optional
from ai_service.config import config
from ai_service.models.model_service import model_service
from ai_service.models.model_parameters import ModelParameters
class ChatService:
"""Service for chat functionality."""
def __init__(self):
"""Initialize the chat service."""
# Ensure data directory exists
os.makedirs(os.path.dirname(config.SQLITE_DB_PATH), exist_ok=True)
# For now, we'll store chat data in a simple JSON file
self.chats_file = os.path.join(os.path.dirname(config.SQLITE_DB_PATH), 'chats.json')
self._load_chats()
def _load_chats(self):
"""Load chats from file."""
if os.path.exists(self.chats_file):
try:
with open(self.chats_file, 'r') as f:
self.chats = json.load(f)
except Exception as e:
print(f"Error loading chats: {str(e)}")
self.chats = {}
else:
self.chats = {}
def _save_chats(self):
"""Save chats to file."""
try:
with open(self.chats_file, 'w') as f:
json.dump(self.chats, f, indent=2)
except Exception as e:
print(f"Error saving chats: {str(e)}")
def create_chat(self, user_id: str, title: Optional[str] = None,
model_id: Optional[str] = None, is_team_chat: bool = False) -> str:
"""
Create a new chat.
Args:
user_id: ID of the user creating the chat.
title: Optional title for the chat.
model_id: Optional model ID to use for this chat.
is_team_chat: Whether this is a team chat.
Returns:
ID of the created chat.
"""
# Generate a unique ID for the chat
chat_id = str(uuid.uuid4())
# Create chat data
self.chats[chat_id] = {
'id': chat_id,
'title': title or f"Chat {len(self.chats) + 1}",
'user_id': user_id,
'model_id': model_id or config.DEFAULT_MODEL,
'is_team_chat': is_team_chat,
'created_at': datetime.utcnow().isoformat(),
'updated_at': datetime.utcnow().isoformat(),
'messages': [],
'team_members': [user_id] if is_team_chat else []
}
# Save chats to file
self._save_chats()
return chat_id
def add_message(self, chat_id: str, content: str, user_id: str,
is_user_message: bool = True) -> Dict[str, Any]:
"""
Add a message to a chat.
Args:
chat_id: ID of the chat.
content: Message content.
user_id: ID of the user sending the message.
is_user_message: Whether this is a user message (vs. bot message).
Returns:
Added message.
"""
if chat_id not in self.chats:
raise ValueError(f"Chat with ID {chat_id} not found")
# Create message data
message = {
'id': str(uuid.uuid4()),
'content': content,
'user_id': user_id if is_user_message else None,
'is_user_message': is_user_message,
'timestamp': datetime.utcnow().isoformat()
}
# Add message to chat
self.chats[chat_id]['messages'].append(message)
# Update chat timestamp
self.chats[chat_id]['updated_at'] = datetime.utcnow().isoformat()
# Save chats to file
self._save_chats()
return message
def get_chat(self, chat_id: str) -> Optional[Dict[str, Any]]:
"""
Get a chat by ID.
Args:
chat_id: ID of the chat.
Returns:
Chat data if found, None otherwise.
"""
return self.chats.get(chat_id)
def get_user_chats(self, user_id: str) -> List[Dict[str, Any]]:
"""
Get all chats for a user.
Args:
user_id: ID of the user.
Returns:
List of chat data.
"""
user_chats = []
for chat_id, chat in self.chats.items():
# Include private chats owned by the user
if chat['user_id'] == user_id and not chat['is_team_chat']:
user_chats.append(chat)
# Include team chats where the user is a member
elif chat['is_team_chat'] and user_id in chat['team_members']:
user_chats.append(chat)
# Sort by updated_at (newest first)
user_chats.sort(key=lambda x: x['updated_at'], reverse=True)
return user_chats
def add_team_member(self, chat_id: str, user_id: str) -> bool:
"""
Add a user to a team chat.
Args:
chat_id: ID of the team chat.
user_id: ID of the user to add.
Returns:
True if addition was successful, False otherwise.
"""
if chat_id not in self.chats:
return False
chat = self.chats[chat_id]
if not chat['is_team_chat']:
return False
if user_id not in chat['team_members']:
chat['team_members'].append(user_id)
self._save_chats()
return True
def remove_team_member(self, chat_id: str, user_id: str) -> bool:
"""
Remove a user from a team chat.
Args:
chat_id: ID of the team chat.
user_id: ID of the user to remove.
Returns:
True if removal was successful, False otherwise.
"""
if chat_id not in self.chats:
return False
chat = self.chats[chat_id]
if not chat['is_team_chat']:
return False
if user_id in chat['team_members']:
chat['team_members'].remove(user_id)
self._save_chats()
return True
def delete_chat(self, chat_id: str) -> bool:
"""
Delete a chat.
Args:
chat_id: ID of the chat to delete.
Returns:
True if deletion was successful, False otherwise.
"""
if chat_id not in self.chats:
return False
del self.chats[chat_id]
self._save_chats()
return True
def get_chat_response(self, chat_id: str, message: str, user_id: str,
use_rag: bool = False, temperature: Optional[float] = None,
max_tokens: Optional[int] = None, top_p: Optional[float] = None,
frequency_penalty: Optional[float] = None, presence_penalty: Optional[float] = None,
stop_sequences: Optional[List[str]] = None, system_prompt: Optional[str] = None,
min_p: Optional[float] = None, top_k: Optional[int] = None,
repeat_penalty: Optional[float] = None, function_calling: Optional[bool] = None) -> Dict[str, Any]:
"""
Get a response from the chatbot.
Args:
chat_id: ID of the chat.
message: User message.
user_id: ID of the user sending the message.
use_rag: Whether to use RAG (Retrieval Augmented Generation).
temperature: Controls randomness in the response.
max_tokens: Maximum number of tokens to generate.
top_p: Nucleus sampling parameter.
frequency_penalty: Penalizes repeated tokens.
presence_penalty: Penalizes repeated topics.
stop_sequences: Sequences where the API will stop generating.
system_prompt: System prompt to guide the model's behavior.
min_p: Minimum probability threshold for token selection.
top_k: Only sample from the top k tokens.
repeat_penalty: Penalty for repeating tokens.
function_calling: Whether to enable function calling.
Returns:
Bot response message.
"""
if chat_id not in self.chats:
raise ValueError(f"Chat with ID {chat_id} not found")
chat = self.chats[chat_id]
# Add user message to chat
self.add_message(chat_id, message, user_id, is_user_message=True)
# Prepare conversation context for the model
context = []
for msg in chat['messages'][-10:]: # Use last 10 messages as context
role = "user" if msg['is_user_message'] else "assistant"
context.append({
"role": role,
"content": msg['content']
})
# Create model parameters
model_params = ModelParameters(
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
frequency_penalty=frequency_penalty,
presence_penalty=presence_penalty,
stop_sequences=stop_sequences,
system_prompt=system_prompt,
min_p=min_p,
top_k=top_k,
repeat_penalty=repeat_penalty,
function_calling=function_calling
)
# Get response from model
model_id = chat['model_id']
response_text = model_service.generate_response(
model_id=model_id,
prompt=message,
context=context,
use_rag=use_rag,
model_params=model_params
)
# Add bot response to chat
response_message = self.add_message(
chat_id=chat_id,
content=response_text,
user_id=user_id,
is_user_message=False
)
return response_message
# Create a singleton instance
chat_service = ChatService()
+170
View File
@@ -0,0 +1,170 @@
"""
Model parameters for AI models.
"""
from typing import Dict, Any, Optional, List
from pydantic import BaseModel, Field, validator
class ModelParameters(BaseModel):
"""Parameters for AI model generation."""
# Basic parameters
temperature: Optional[float] = Field(
0.7,
description="Controls randomness: 0 is deterministic, higher values are more random",
ge=0.0,
le=2.0
)
max_tokens: Optional[int] = Field(
1000,
description="Maximum number of tokens to generate",
gt=0
)
# Sampling parameters
top_p: Optional[float] = Field(
1.0,
description="Nucleus sampling: consider tokens with top_p probability mass",
ge=0.0,
le=1.0
)
top_k: Optional[int] = Field(
None,
description="Only sample from the top k tokens",
gt=0
)
# Repetition control
frequency_penalty: Optional[float] = Field(
0.0,
description="Penalizes repeated tokens",
ge=-2.0,
le=2.0
)
presence_penalty: Optional[float] = Field(
0.0,
description="Penalizes repeated topics",
ge=-2.0,
le=2.0
)
# Advanced parameters
stop_sequences: Optional[List[str]] = Field(
None,
description="Sequences where the API will stop generating"
)
min_p: Optional[float] = Field(
None,
description="Minimum probability threshold for token selection",
ge=0.0,
le=1.0
)
repeat_penalty: Optional[float] = Field(
None,
description="Penalty for repeating tokens",
ge=0.0
)
presence_penalty_tokens: Optional[int] = Field(
None,
description="Number of tokens to consider for presence penalty",
gt=0
)
# System prompt
system_prompt: Optional[str] = Field(
None,
description="System prompt to guide the model's behavior"
)
# Function calling
function_calling: Optional[bool] = Field(
None,
description="Whether to enable function calling"
)
# Additional parameters that might be model-specific
extra_params: Optional[Dict[str, Any]] = Field(
None,
description="Additional model-specific parameters"
)
@validator('temperature', 'top_p', 'frequency_penalty', 'presence_penalty', pre=True)
def validate_float_params(cls, v):
"""Validate float parameters."""
if v is not None and not isinstance(v, bool): # Avoid converting bool to float
return float(v)
return v
@validator('max_tokens', 'top_k', pre=True)
def validate_int_params(cls, v):
"""Validate integer parameters."""
if v is not None and not isinstance(v, bool): # Avoid converting bool to int
return int(v)
return v
def to_dict(self) -> Dict[str, Any]:
"""
Convert parameters to a dictionary, excluding None values.
Returns:
Dictionary of parameters.
"""
result = {}
for key, value in self.dict().items():
if value is not None and key != 'extra_params':
result[key] = value
# Add any extra parameters
if self.extra_params:
result.update(self.extra_params)
return result
def for_provider(self, provider: str) -> Dict[str, Any]:
"""
Get parameters formatted for a specific provider.
Args:
provider: Provider name (e.g., 'openai', 'ollama', 'anthropic').
Returns:
Dictionary of parameters formatted for the provider.
"""
params = self.to_dict()
# Handle provider-specific parameter naming
if provider == 'openai':
# OpenAI uses 'stop' instead of 'stop_sequences'
if 'stop_sequences' in params:
params['stop'] = params.pop('stop_sequences')
elif provider == 'ollama':
# Ollama has specific parameter handling
# Remove parameters not supported by Ollama
params_to_keep = ['temperature', 'top_p', 'top_k', 'max_tokens', 'stop_sequences']
params = {k: v for k, v in params.items() if k in params_to_keep}
# Rename stop_sequences to stop if present
if 'stop_sequences' in params:
params['stop'] = params.pop('stop_sequences')
elif provider == 'anthropic':
# Anthropic uses 'stop_sequences' and different temperature scaling
if 'temperature' in params:
# Anthropic's temperature is typically 0-1
params['temperature'] = min(params['temperature'], 1.0)
elif provider == 'cohere':
# Cohere uses 'stop_sequences' and has some unique parameters
pass
# Add more provider-specific conversions as needed
return params
+243
View File
@@ -0,0 +1,243 @@
"""
Service for model management and interaction.
"""
import os
import json
import requests
from typing import List, Dict, Any, Optional
from ai_service.config import config
from ai_service.embeddings.document_service import document_service
from ai_service.models.model_parameters import ModelParameters
class ModelService:
"""Service for model management and interaction."""
# Available models
AVAILABLE_MODELS = {
'gemma3': {
'name': 'Gemma 3',
'description': 'Google Gemma 3 model via Ollama',
'provider': 'ollama',
'max_tokens': 8192
},
'llama3.3': {
'name': 'Llama 3 (70B)',
'description': 'Meta Llama 3 70B model via Ollama',
'provider': 'ollama',
'max_tokens': 8192
},
'llama3.1': {
'name': 'Llama 3 (8B)',
'description': 'Meta Llama 3 8B model via Ollama',
'provider': 'ollama',
'max_tokens': 8192
},
'mistral': {
'name': 'Mistral',
'description': 'Mistral AI model via Ollama',
'provider': 'ollama',
'max_tokens': 8192
},
'deepseek': {
'name': 'DeepSeek',
'description': 'DeepSeek model via Ollama',
'provider': 'ollama',
'max_tokens': 8192
}
}
def __init__(self):
"""Initialize the model service."""
self.default_model = config.DEFAULT_MODEL
self.ollama_api_url = config.OLLAMA_API_URL
self.openwebui_url = config.OPENWEBUI_URL
self.openwebui_api_key = config.OPENWEBUI_API_KEY
def get_available_models(self) -> List[Dict[str, Any]]:
"""
Get a list of available models.
Returns:
List of model information dictionaries.
"""
models = []
for model_id, model_info in self.AVAILABLE_MODELS.items():
model_data = {
'id': model_id,
'is_default': model_id == self.default_model,
**model_info
}
models.append(model_data)
return models
def get_model_info(self, model_id: str) -> Optional[Dict[str, Any]]:
"""
Get information about a specific model.
Args:
model_id: ID of the model.
Returns:
Model information dictionary if found, None otherwise.
"""
if model_id not in self.AVAILABLE_MODELS:
return None
return {
'id': model_id,
'is_default': model_id == self.default_model,
**self.AVAILABLE_MODELS[model_id]
}
def generate_response(self, model_id: str, prompt: str,
context: Optional[List[Dict[str, str]]] = None,
use_rag: bool = False,
model_params: Optional[ModelParameters] = None) -> str:
"""
Generate a response from the model.
Args:
model_id: ID of the model to use.
prompt: User prompt.
context: Optional conversation context.
use_rag: Whether to use RAG (Retrieval Augmented Generation).
model_params: Optional model parameters.
Returns:
Generated response.
"""
if model_id not in self.AVAILABLE_MODELS:
model_id = self.default_model
# Get the provider for this model
provider = self.AVAILABLE_MODELS[model_id].get('provider', 'ollama')
# Prepare the messages for the API call
messages = []
# Use custom system prompt if provided, otherwise use default
system_content = "You are a helpful assistant."
if model_params and model_params.system_prompt:
system_content = model_params.system_prompt
messages.append({
"role": "system",
"content": system_content
})
# Add conversation context if provided
if context:
messages.extend(context)
# If RAG is enabled, use OpenWebUI's knowledge database
if use_rag:
# We'll use OpenWebUI's built-in RAG capabilities
# This is handled by sending the request to OpenWebUI instead of Ollama directly
try:
# Prepare the request for OpenWebUI
openwebui_request = {
"model": model_id,
"messages": messages + [{"role": "user", "content": prompt}],
"use_knowledge": True, # Enable RAG
"stream": False
}
# Add model parameters if provided
if model_params:
params = model_params.to_dict()
# Map parameters to OpenWebUI format
if 'temperature' in params:
openwebui_request['temperature'] = params['temperature']
if 'max_tokens' in params:
openwebui_request['max_tokens'] = params['max_tokens']
if 'top_p' in params:
openwebui_request['top_p'] = params['top_p']
# Make the API call to OpenWebUI
headers = {"Content-Type": "application/json"}
if self.openwebui_api_key:
headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
# OpenWebUI API endpoint is /api/chat/completions
response = requests.post(
f"{self.openwebui_url}/api/chat/completions",
headers=headers,
json=openwebui_request,
timeout=60 # Longer timeout for RAG
)
response.raise_for_status()
result = response.json()
# Extract the response content
if 'message' in result:
return result['message']['content']
else:
return "Error: Unexpected response format from OpenWebUI"
except Exception as e:
print(f"Error calling OpenWebUI API: {str(e)}")
# Fall back to direct Ollama call without RAG
print("Falling back to direct Ollama call without RAG")
# Continue to the Ollama API call below
# Add user prompt
messages.append({
"role": "user",
"content": prompt
})
# Prepare API request parameters for Ollama
request_json = {
"model": model_id,
"messages": messages,
"stream": False
}
# Add model parameters if provided
if model_params:
params = model_params.to_dict()
# Map parameters to Ollama format
if 'temperature' in params:
request_json['temperature'] = params['temperature']
if 'top_p' in params:
request_json['top_p'] = params['top_p']
if 'top_k' in params:
request_json['top_k'] = params['top_k']
if 'max_tokens' in params:
request_json['max_tokens'] = params['max_tokens']
# Make the API call to Ollama
try:
# Ollama API endpoint is /api/chat or /api/generate
response = requests.post(
f"{self.ollama_api_url}/api/generate",
headers={"Content-Type": "application/json"},
json=request_json,
timeout=30
)
response.raise_for_status()
result = response.json()
# Extract the response content from Ollama
# The response format depends on whether we're using /api/chat or /api/generate
if 'message' in result and 'content' in result['message']:
# Format for /api/chat
return result['message']['content']
elif 'response' in result:
# Format for /api/generate
return result['response']
else:
return "Error: Unexpected response format from Ollama"
except Exception as e:
print(f"Error calling Ollama API: {str(e)}")
return f"Error generating response: {str(e)}"
# Create a singleton instance
model_service = ModelService()
+28
View File
@@ -0,0 +1,28 @@
# Core dependencies with fixed versions for stability
fastapi==0.103.1
uvicorn[standard]==0.23.2
pydantic==2.3.0
python-dotenv==1.0.0
# AI/ML dependencies
pinecone-client==2.2.2
langchain==0.0.267
sentence-transformers==2.2.2
numpy==1.26.4
openai==1.3.0
# For local storage (will be replaced with MySQL in production)
sqlalchemy==2.0.20
pymysql==1.1.0
cryptography==41.0.3 # Required for PyMySQL
# Utilities
tqdm==4.67.1
requests==2.32.3
tenacity==8.5.0 # For retrying API calls
# Production dependencies
gunicorn==21.2.0 # Production WSGI server
python-json-logger==2.0.7 # Structured logging
prometheus-client==0.17.1 # Metrics
sentry-sdk==1.39.1 # Error tracking
+19
View File
@@ -0,0 +1,19 @@
# Core dependencies
fastapi==0.103.1
uvicorn==0.23.2
pydantic==2.3.0
python-dotenv==1.0.0
# AI/ML dependencies
pinecone-client==2.2.2
langchain==0.0.267
sentence-transformers==2.2.2
numpy==1.26.4
# For local storage
sqlalchemy==2.0.20
sqlite3==0.0.0 # This is a placeholder, sqlite3 is part of Python's standard library
# Utilities
tqdm==4.67.1
requests==2.32.3
+21
View File
@@ -0,0 +1,21 @@
"""
Script to run the AI service.
"""
import uvicorn
import os
import sys
# Add the parent directory to the path so we can import ai_service
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from ai_service.config import config
if __name__ == "__main__":
print(f"Starting AI service on {config.API_HOST}:{config.API_PORT}")
uvicorn.run(
"ai_service.api:app",
host=config.API_HOST,
port=config.API_PORT,
reload=True
)