Initial commit for deployment
This commit is contained in:
@@ -0,0 +1,309 @@
|
||||
"""
|
||||
Service for chat functionality.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
from ai_service.config import config
|
||||
from ai_service.models.model_service import model_service
|
||||
from ai_service.models.model_parameters import ModelParameters
|
||||
|
||||
class ChatService:
|
||||
"""Service for chat functionality."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the chat service."""
|
||||
# Ensure data directory exists
|
||||
os.makedirs(os.path.dirname(config.SQLITE_DB_PATH), exist_ok=True)
|
||||
|
||||
# For now, we'll store chat data in a simple JSON file
|
||||
self.chats_file = os.path.join(os.path.dirname(config.SQLITE_DB_PATH), 'chats.json')
|
||||
self._load_chats()
|
||||
|
||||
def _load_chats(self):
|
||||
"""Load chats from file."""
|
||||
if os.path.exists(self.chats_file):
|
||||
try:
|
||||
with open(self.chats_file, 'r') as f:
|
||||
self.chats = json.load(f)
|
||||
except Exception as e:
|
||||
print(f"Error loading chats: {str(e)}")
|
||||
self.chats = {}
|
||||
else:
|
||||
self.chats = {}
|
||||
|
||||
def _save_chats(self):
|
||||
"""Save chats to file."""
|
||||
try:
|
||||
with open(self.chats_file, 'w') as f:
|
||||
json.dump(self.chats, f, indent=2)
|
||||
except Exception as e:
|
||||
print(f"Error saving chats: {str(e)}")
|
||||
|
||||
def create_chat(self, user_id: str, title: Optional[str] = None,
|
||||
model_id: Optional[str] = None, is_team_chat: bool = False) -> str:
|
||||
"""
|
||||
Create a new chat.
|
||||
|
||||
Args:
|
||||
user_id: ID of the user creating the chat.
|
||||
title: Optional title for the chat.
|
||||
model_id: Optional model ID to use for this chat.
|
||||
is_team_chat: Whether this is a team chat.
|
||||
|
||||
Returns:
|
||||
ID of the created chat.
|
||||
"""
|
||||
# Generate a unique ID for the chat
|
||||
chat_id = str(uuid.uuid4())
|
||||
|
||||
# Create chat data
|
||||
self.chats[chat_id] = {
|
||||
'id': chat_id,
|
||||
'title': title or f"Chat {len(self.chats) + 1}",
|
||||
'user_id': user_id,
|
||||
'model_id': model_id or config.DEFAULT_MODEL,
|
||||
'is_team_chat': is_team_chat,
|
||||
'created_at': datetime.utcnow().isoformat(),
|
||||
'updated_at': datetime.utcnow().isoformat(),
|
||||
'messages': [],
|
||||
'team_members': [user_id] if is_team_chat else []
|
||||
}
|
||||
|
||||
# Save chats to file
|
||||
self._save_chats()
|
||||
|
||||
return chat_id
|
||||
|
||||
def add_message(self, chat_id: str, content: str, user_id: str,
|
||||
is_user_message: bool = True) -> Dict[str, Any]:
|
||||
"""
|
||||
Add a message to a chat.
|
||||
|
||||
Args:
|
||||
chat_id: ID of the chat.
|
||||
content: Message content.
|
||||
user_id: ID of the user sending the message.
|
||||
is_user_message: Whether this is a user message (vs. bot message).
|
||||
|
||||
Returns:
|
||||
Added message.
|
||||
"""
|
||||
if chat_id not in self.chats:
|
||||
raise ValueError(f"Chat with ID {chat_id} not found")
|
||||
|
||||
# Create message data
|
||||
message = {
|
||||
'id': str(uuid.uuid4()),
|
||||
'content': content,
|
||||
'user_id': user_id if is_user_message else None,
|
||||
'is_user_message': is_user_message,
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
# Add message to chat
|
||||
self.chats[chat_id]['messages'].append(message)
|
||||
|
||||
# Update chat timestamp
|
||||
self.chats[chat_id]['updated_at'] = datetime.utcnow().isoformat()
|
||||
|
||||
# Save chats to file
|
||||
self._save_chats()
|
||||
|
||||
return message
|
||||
|
||||
def get_chat(self, chat_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get a chat by ID.
|
||||
|
||||
Args:
|
||||
chat_id: ID of the chat.
|
||||
|
||||
Returns:
|
||||
Chat data if found, None otherwise.
|
||||
"""
|
||||
return self.chats.get(chat_id)
|
||||
|
||||
def get_user_chats(self, user_id: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all chats for a user.
|
||||
|
||||
Args:
|
||||
user_id: ID of the user.
|
||||
|
||||
Returns:
|
||||
List of chat data.
|
||||
"""
|
||||
user_chats = []
|
||||
|
||||
for chat_id, chat in self.chats.items():
|
||||
# Include private chats owned by the user
|
||||
if chat['user_id'] == user_id and not chat['is_team_chat']:
|
||||
user_chats.append(chat)
|
||||
|
||||
# Include team chats where the user is a member
|
||||
elif chat['is_team_chat'] and user_id in chat['team_members']:
|
||||
user_chats.append(chat)
|
||||
|
||||
# Sort by updated_at (newest first)
|
||||
user_chats.sort(key=lambda x: x['updated_at'], reverse=True)
|
||||
|
||||
return user_chats
|
||||
|
||||
def add_team_member(self, chat_id: str, user_id: str) -> bool:
|
||||
"""
|
||||
Add a user to a team chat.
|
||||
|
||||
Args:
|
||||
chat_id: ID of the team chat.
|
||||
user_id: ID of the user to add.
|
||||
|
||||
Returns:
|
||||
True if addition was successful, False otherwise.
|
||||
"""
|
||||
if chat_id not in self.chats:
|
||||
return False
|
||||
|
||||
chat = self.chats[chat_id]
|
||||
|
||||
if not chat['is_team_chat']:
|
||||
return False
|
||||
|
||||
if user_id not in chat['team_members']:
|
||||
chat['team_members'].append(user_id)
|
||||
self._save_chats()
|
||||
|
||||
return True
|
||||
|
||||
def remove_team_member(self, chat_id: str, user_id: str) -> bool:
|
||||
"""
|
||||
Remove a user from a team chat.
|
||||
|
||||
Args:
|
||||
chat_id: ID of the team chat.
|
||||
user_id: ID of the user to remove.
|
||||
|
||||
Returns:
|
||||
True if removal was successful, False otherwise.
|
||||
"""
|
||||
if chat_id not in self.chats:
|
||||
return False
|
||||
|
||||
chat = self.chats[chat_id]
|
||||
|
||||
if not chat['is_team_chat']:
|
||||
return False
|
||||
|
||||
if user_id in chat['team_members']:
|
||||
chat['team_members'].remove(user_id)
|
||||
self._save_chats()
|
||||
|
||||
return True
|
||||
|
||||
def delete_chat(self, chat_id: str) -> bool:
|
||||
"""
|
||||
Delete a chat.
|
||||
|
||||
Args:
|
||||
chat_id: ID of the chat to delete.
|
||||
|
||||
Returns:
|
||||
True if deletion was successful, False otherwise.
|
||||
"""
|
||||
if chat_id not in self.chats:
|
||||
return False
|
||||
|
||||
del self.chats[chat_id]
|
||||
self._save_chats()
|
||||
|
||||
return True
|
||||
|
||||
def get_chat_response(self, chat_id: str, message: str, user_id: str,
|
||||
use_rag: bool = False, temperature: Optional[float] = None,
|
||||
max_tokens: Optional[int] = None, top_p: Optional[float] = None,
|
||||
frequency_penalty: Optional[float] = None, presence_penalty: Optional[float] = None,
|
||||
stop_sequences: Optional[List[str]] = None, system_prompt: Optional[str] = None,
|
||||
min_p: Optional[float] = None, top_k: Optional[int] = None,
|
||||
repeat_penalty: Optional[float] = None, function_calling: Optional[bool] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Get a response from the chatbot.
|
||||
|
||||
Args:
|
||||
chat_id: ID of the chat.
|
||||
message: User message.
|
||||
user_id: ID of the user sending the message.
|
||||
use_rag: Whether to use RAG (Retrieval Augmented Generation).
|
||||
temperature: Controls randomness in the response.
|
||||
max_tokens: Maximum number of tokens to generate.
|
||||
top_p: Nucleus sampling parameter.
|
||||
frequency_penalty: Penalizes repeated tokens.
|
||||
presence_penalty: Penalizes repeated topics.
|
||||
stop_sequences: Sequences where the API will stop generating.
|
||||
system_prompt: System prompt to guide the model's behavior.
|
||||
min_p: Minimum probability threshold for token selection.
|
||||
top_k: Only sample from the top k tokens.
|
||||
repeat_penalty: Penalty for repeating tokens.
|
||||
function_calling: Whether to enable function calling.
|
||||
|
||||
Returns:
|
||||
Bot response message.
|
||||
"""
|
||||
if chat_id not in self.chats:
|
||||
raise ValueError(f"Chat with ID {chat_id} not found")
|
||||
|
||||
chat = self.chats[chat_id]
|
||||
|
||||
# Add user message to chat
|
||||
self.add_message(chat_id, message, user_id, is_user_message=True)
|
||||
|
||||
# Prepare conversation context for the model
|
||||
context = []
|
||||
for msg in chat['messages'][-10:]: # Use last 10 messages as context
|
||||
role = "user" if msg['is_user_message'] else "assistant"
|
||||
context.append({
|
||||
"role": role,
|
||||
"content": msg['content']
|
||||
})
|
||||
|
||||
# Create model parameters
|
||||
model_params = ModelParameters(
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
top_p=top_p,
|
||||
frequency_penalty=frequency_penalty,
|
||||
presence_penalty=presence_penalty,
|
||||
stop_sequences=stop_sequences,
|
||||
system_prompt=system_prompt,
|
||||
min_p=min_p,
|
||||
top_k=top_k,
|
||||
repeat_penalty=repeat_penalty,
|
||||
function_calling=function_calling
|
||||
)
|
||||
|
||||
# Get response from model
|
||||
model_id = chat['model_id']
|
||||
response_text = model_service.generate_response(
|
||||
model_id=model_id,
|
||||
prompt=message,
|
||||
context=context,
|
||||
use_rag=use_rag,
|
||||
model_params=model_params
|
||||
)
|
||||
|
||||
# Add bot response to chat
|
||||
response_message = self.add_message(
|
||||
chat_id=chat_id,
|
||||
content=response_text,
|
||||
user_id=user_id,
|
||||
is_user_message=False
|
||||
)
|
||||
|
||||
return response_message
|
||||
|
||||
|
||||
# Create a singleton instance
|
||||
chat_service = ChatService()
|
||||
@@ -0,0 +1,170 @@
|
||||
"""
|
||||
Model parameters for AI models.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, Optional, List
|
||||
from pydantic import BaseModel, Field, validator
|
||||
|
||||
|
||||
class ModelParameters(BaseModel):
|
||||
"""Parameters for AI model generation."""
|
||||
|
||||
# Basic parameters
|
||||
temperature: Optional[float] = Field(
|
||||
0.7,
|
||||
description="Controls randomness: 0 is deterministic, higher values are more random",
|
||||
ge=0.0,
|
||||
le=2.0
|
||||
)
|
||||
|
||||
max_tokens: Optional[int] = Field(
|
||||
1000,
|
||||
description="Maximum number of tokens to generate",
|
||||
gt=0
|
||||
)
|
||||
|
||||
# Sampling parameters
|
||||
top_p: Optional[float] = Field(
|
||||
1.0,
|
||||
description="Nucleus sampling: consider tokens with top_p probability mass",
|
||||
ge=0.0,
|
||||
le=1.0
|
||||
)
|
||||
|
||||
top_k: Optional[int] = Field(
|
||||
None,
|
||||
description="Only sample from the top k tokens",
|
||||
gt=0
|
||||
)
|
||||
|
||||
# Repetition control
|
||||
frequency_penalty: Optional[float] = Field(
|
||||
0.0,
|
||||
description="Penalizes repeated tokens",
|
||||
ge=-2.0,
|
||||
le=2.0
|
||||
)
|
||||
|
||||
presence_penalty: Optional[float] = Field(
|
||||
0.0,
|
||||
description="Penalizes repeated topics",
|
||||
ge=-2.0,
|
||||
le=2.0
|
||||
)
|
||||
|
||||
# Advanced parameters
|
||||
stop_sequences: Optional[List[str]] = Field(
|
||||
None,
|
||||
description="Sequences where the API will stop generating"
|
||||
)
|
||||
|
||||
min_p: Optional[float] = Field(
|
||||
None,
|
||||
description="Minimum probability threshold for token selection",
|
||||
ge=0.0,
|
||||
le=1.0
|
||||
)
|
||||
|
||||
repeat_penalty: Optional[float] = Field(
|
||||
None,
|
||||
description="Penalty for repeating tokens",
|
||||
ge=0.0
|
||||
)
|
||||
|
||||
presence_penalty_tokens: Optional[int] = Field(
|
||||
None,
|
||||
description="Number of tokens to consider for presence penalty",
|
||||
gt=0
|
||||
)
|
||||
|
||||
# System prompt
|
||||
system_prompt: Optional[str] = Field(
|
||||
None,
|
||||
description="System prompt to guide the model's behavior"
|
||||
)
|
||||
|
||||
# Function calling
|
||||
function_calling: Optional[bool] = Field(
|
||||
None,
|
||||
description="Whether to enable function calling"
|
||||
)
|
||||
|
||||
# Additional parameters that might be model-specific
|
||||
extra_params: Optional[Dict[str, Any]] = Field(
|
||||
None,
|
||||
description="Additional model-specific parameters"
|
||||
)
|
||||
|
||||
@validator('temperature', 'top_p', 'frequency_penalty', 'presence_penalty', pre=True)
|
||||
def validate_float_params(cls, v):
|
||||
"""Validate float parameters."""
|
||||
if v is not None and not isinstance(v, bool): # Avoid converting bool to float
|
||||
return float(v)
|
||||
return v
|
||||
|
||||
@validator('max_tokens', 'top_k', pre=True)
|
||||
def validate_int_params(cls, v):
|
||||
"""Validate integer parameters."""
|
||||
if v is not None and not isinstance(v, bool): # Avoid converting bool to int
|
||||
return int(v)
|
||||
return v
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Convert parameters to a dictionary, excluding None values.
|
||||
|
||||
Returns:
|
||||
Dictionary of parameters.
|
||||
"""
|
||||
result = {}
|
||||
for key, value in self.dict().items():
|
||||
if value is not None and key != 'extra_params':
|
||||
result[key] = value
|
||||
|
||||
# Add any extra parameters
|
||||
if self.extra_params:
|
||||
result.update(self.extra_params)
|
||||
|
||||
return result
|
||||
|
||||
def for_provider(self, provider: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Get parameters formatted for a specific provider.
|
||||
|
||||
Args:
|
||||
provider: Provider name (e.g., 'openai', 'ollama', 'anthropic').
|
||||
|
||||
Returns:
|
||||
Dictionary of parameters formatted for the provider.
|
||||
"""
|
||||
params = self.to_dict()
|
||||
|
||||
# Handle provider-specific parameter naming
|
||||
if provider == 'openai':
|
||||
# OpenAI uses 'stop' instead of 'stop_sequences'
|
||||
if 'stop_sequences' in params:
|
||||
params['stop'] = params.pop('stop_sequences')
|
||||
|
||||
elif provider == 'ollama':
|
||||
# Ollama has specific parameter handling
|
||||
# Remove parameters not supported by Ollama
|
||||
params_to_keep = ['temperature', 'top_p', 'top_k', 'max_tokens', 'stop_sequences']
|
||||
params = {k: v for k, v in params.items() if k in params_to_keep}
|
||||
|
||||
# Rename stop_sequences to stop if present
|
||||
if 'stop_sequences' in params:
|
||||
params['stop'] = params.pop('stop_sequences')
|
||||
|
||||
elif provider == 'anthropic':
|
||||
# Anthropic uses 'stop_sequences' and different temperature scaling
|
||||
if 'temperature' in params:
|
||||
# Anthropic's temperature is typically 0-1
|
||||
params['temperature'] = min(params['temperature'], 1.0)
|
||||
|
||||
elif provider == 'cohere':
|
||||
# Cohere uses 'stop_sequences' and has some unique parameters
|
||||
pass
|
||||
|
||||
# Add more provider-specific conversions as needed
|
||||
|
||||
return params
|
||||
@@ -0,0 +1,243 @@
|
||||
"""
|
||||
Service for model management and interaction.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
from ai_service.config import config
|
||||
from ai_service.embeddings.document_service import document_service
|
||||
from ai_service.models.model_parameters import ModelParameters
|
||||
|
||||
class ModelService:
|
||||
"""Service for model management and interaction."""
|
||||
|
||||
# Available models
|
||||
AVAILABLE_MODELS = {
|
||||
'gemma3': {
|
||||
'name': 'Gemma 3',
|
||||
'description': 'Google Gemma 3 model via Ollama',
|
||||
'provider': 'ollama',
|
||||
'max_tokens': 8192
|
||||
},
|
||||
'llama3.3': {
|
||||
'name': 'Llama 3 (70B)',
|
||||
'description': 'Meta Llama 3 70B model via Ollama',
|
||||
'provider': 'ollama',
|
||||
'max_tokens': 8192
|
||||
},
|
||||
'llama3.1': {
|
||||
'name': 'Llama 3 (8B)',
|
||||
'description': 'Meta Llama 3 8B model via Ollama',
|
||||
'provider': 'ollama',
|
||||
'max_tokens': 8192
|
||||
},
|
||||
'mistral': {
|
||||
'name': 'Mistral',
|
||||
'description': 'Mistral AI model via Ollama',
|
||||
'provider': 'ollama',
|
||||
'max_tokens': 8192
|
||||
},
|
||||
'deepseek': {
|
||||
'name': 'DeepSeek',
|
||||
'description': 'DeepSeek model via Ollama',
|
||||
'provider': 'ollama',
|
||||
'max_tokens': 8192
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the model service."""
|
||||
self.default_model = config.DEFAULT_MODEL
|
||||
self.ollama_api_url = config.OLLAMA_API_URL
|
||||
self.openwebui_url = config.OPENWEBUI_URL
|
||||
self.openwebui_api_key = config.OPENWEBUI_API_KEY
|
||||
|
||||
def get_available_models(self) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get a list of available models.
|
||||
|
||||
Returns:
|
||||
List of model information dictionaries.
|
||||
"""
|
||||
models = []
|
||||
for model_id, model_info in self.AVAILABLE_MODELS.items():
|
||||
model_data = {
|
||||
'id': model_id,
|
||||
'is_default': model_id == self.default_model,
|
||||
**model_info
|
||||
}
|
||||
models.append(model_data)
|
||||
|
||||
return models
|
||||
|
||||
def get_model_info(self, model_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get information about a specific model.
|
||||
|
||||
Args:
|
||||
model_id: ID of the model.
|
||||
|
||||
Returns:
|
||||
Model information dictionary if found, None otherwise.
|
||||
"""
|
||||
if model_id not in self.AVAILABLE_MODELS:
|
||||
return None
|
||||
|
||||
return {
|
||||
'id': model_id,
|
||||
'is_default': model_id == self.default_model,
|
||||
**self.AVAILABLE_MODELS[model_id]
|
||||
}
|
||||
|
||||
def generate_response(self, model_id: str, prompt: str,
|
||||
context: Optional[List[Dict[str, str]]] = None,
|
||||
use_rag: bool = False,
|
||||
model_params: Optional[ModelParameters] = None) -> str:
|
||||
"""
|
||||
Generate a response from the model.
|
||||
|
||||
Args:
|
||||
model_id: ID of the model to use.
|
||||
prompt: User prompt.
|
||||
context: Optional conversation context.
|
||||
use_rag: Whether to use RAG (Retrieval Augmented Generation).
|
||||
model_params: Optional model parameters.
|
||||
|
||||
Returns:
|
||||
Generated response.
|
||||
"""
|
||||
if model_id not in self.AVAILABLE_MODELS:
|
||||
model_id = self.default_model
|
||||
|
||||
# Get the provider for this model
|
||||
provider = self.AVAILABLE_MODELS[model_id].get('provider', 'ollama')
|
||||
|
||||
# Prepare the messages for the API call
|
||||
messages = []
|
||||
|
||||
# Use custom system prompt if provided, otherwise use default
|
||||
system_content = "You are a helpful assistant."
|
||||
if model_params and model_params.system_prompt:
|
||||
system_content = model_params.system_prompt
|
||||
|
||||
messages.append({
|
||||
"role": "system",
|
||||
"content": system_content
|
||||
})
|
||||
|
||||
# Add conversation context if provided
|
||||
if context:
|
||||
messages.extend(context)
|
||||
|
||||
# If RAG is enabled, use OpenWebUI's knowledge database
|
||||
if use_rag:
|
||||
# We'll use OpenWebUI's built-in RAG capabilities
|
||||
# This is handled by sending the request to OpenWebUI instead of Ollama directly
|
||||
try:
|
||||
# Prepare the request for OpenWebUI
|
||||
openwebui_request = {
|
||||
"model": model_id,
|
||||
"messages": messages + [{"role": "user", "content": prompt}],
|
||||
"use_knowledge": True, # Enable RAG
|
||||
"stream": False
|
||||
}
|
||||
|
||||
# Add model parameters if provided
|
||||
if model_params:
|
||||
params = model_params.to_dict()
|
||||
# Map parameters to OpenWebUI format
|
||||
if 'temperature' in params:
|
||||
openwebui_request['temperature'] = params['temperature']
|
||||
if 'max_tokens' in params:
|
||||
openwebui_request['max_tokens'] = params['max_tokens']
|
||||
if 'top_p' in params:
|
||||
openwebui_request['top_p'] = params['top_p']
|
||||
|
||||
# Make the API call to OpenWebUI
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if self.openwebui_api_key:
|
||||
headers["Authorization"] = f"Bearer {self.openwebui_api_key}"
|
||||
|
||||
# OpenWebUI API endpoint is /api/chat/completions
|
||||
response = requests.post(
|
||||
f"{self.openwebui_url}/api/chat/completions",
|
||||
headers=headers,
|
||||
json=openwebui_request,
|
||||
timeout=60 # Longer timeout for RAG
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
# Extract the response content
|
||||
if 'message' in result:
|
||||
return result['message']['content']
|
||||
else:
|
||||
return "Error: Unexpected response format from OpenWebUI"
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error calling OpenWebUI API: {str(e)}")
|
||||
# Fall back to direct Ollama call without RAG
|
||||
print("Falling back to direct Ollama call without RAG")
|
||||
# Continue to the Ollama API call below
|
||||
|
||||
# Add user prompt
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
})
|
||||
|
||||
# Prepare API request parameters for Ollama
|
||||
request_json = {
|
||||
"model": model_id,
|
||||
"messages": messages,
|
||||
"stream": False
|
||||
}
|
||||
|
||||
# Add model parameters if provided
|
||||
if model_params:
|
||||
params = model_params.to_dict()
|
||||
# Map parameters to Ollama format
|
||||
if 'temperature' in params:
|
||||
request_json['temperature'] = params['temperature']
|
||||
if 'top_p' in params:
|
||||
request_json['top_p'] = params['top_p']
|
||||
if 'top_k' in params:
|
||||
request_json['top_k'] = params['top_k']
|
||||
if 'max_tokens' in params:
|
||||
request_json['max_tokens'] = params['max_tokens']
|
||||
|
||||
# Make the API call to Ollama
|
||||
try:
|
||||
# Ollama API endpoint is /api/chat or /api/generate
|
||||
response = requests.post(
|
||||
f"{self.ollama_api_url}/api/generate",
|
||||
headers={"Content-Type": "application/json"},
|
||||
json=request_json,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
# Extract the response content from Ollama
|
||||
# The response format depends on whether we're using /api/chat or /api/generate
|
||||
if 'message' in result and 'content' in result['message']:
|
||||
# Format for /api/chat
|
||||
return result['message']['content']
|
||||
elif 'response' in result:
|
||||
# Format for /api/generate
|
||||
return result['response']
|
||||
else:
|
||||
return "Error: Unexpected response format from Ollama"
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error calling Ollama API: {str(e)}")
|
||||
return f"Error generating response: {str(e)}"
|
||||
|
||||
|
||||
# Create a singleton instance
|
||||
model_service = ModelService()
|
||||
Reference in New Issue
Block a user