From f00941cece56636675db109fa3a6d096fb96aac2 Mon Sep 17 00:00:00 2001 From: Iyeoluwa Akinrinola Date: Mon, 12 May 2025 16:30:35 +0100 Subject: [PATCH] fix --- ai_service/api.py | 127 ++++++++++++++++++++++++++++- ai_service/config.py | 1 + ai_service/models/chat_service.py | 42 ++++++---- ai_service/models/model_service.py | 31 +++++-- 4 files changed, 180 insertions(+), 21 deletions(-) diff --git a/ai_service/api.py b/ai_service/api.py index a05d250..9ec2d4e 100644 --- a/ai_service/api.py +++ b/ai_service/api.py @@ -7,6 +7,8 @@ from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel, Field from typing import List, Optional +import uuid +from datetime import datetime, timezone from ai_service.models.model_service import model_service from ai_service.models.chat_service import chat_service @@ -120,7 +122,7 @@ async def test_ollama_connection(): try: # Try to connect to Ollama API - response = requests.get(f"{config.OLLAMA_API_URL}/api/tags", timeout=5) + response = requests.get(f"{config.OLLAMA_API_URL}/api/tags", timeout=config.API_TIMEOUT) response.raise_for_status() # Return the models from Ollama @@ -130,6 +132,106 @@ async def test_ollama_connection(): "ollama_url": config.OLLAMA_API_URL, "models": response.json() } + except requests.exceptions.Timeout as e: + return { + "status": "error", + "message": f"Timeout connecting to Ollama API: {str(e)}. The request exceeded the {config.API_TIMEOUT} second timeout.", + "ollama_url": config.OLLAMA_API_URL + } + except requests.exceptions.ConnectionError as e: + return { + "status": "error", + "message": f"Connection error to Ollama API: {str(e)}. Please check if Ollama is running at {config.OLLAMA_API_URL}.", + "ollama_url": config.OLLAMA_API_URL + } + except Exception as e: + return { + "status": "error", + "message": f"Failed to connect to Ollama API: {str(e)}", + "ollama_url": config.OLLAMA_API_URL + } + +@app.post("/test-chat") +async def test_chat_completion(): + """ + Test the chat completion with a simple prompt. + + Returns: + Model response. + """ + try: + # Use the model service directly + response = model_service.generate_response( + model_id=config.DEFAULT_MODEL, + prompt="Hello, how are you?", + context=[], + use_rag=False + ) + + return { + "status": "success", + "model": config.DEFAULT_MODEL, + "response": response, + "ollama_url": config.OLLAMA_API_URL + } + except Exception as e: + return { + "status": "error", + "message": f"Failed to get chat completion: {str(e)}", + "ollama_url": config.OLLAMA_API_URL + } + +@app.post("/test-ollama-direct") +async def test_ollama_direct(): + """ + Test the Ollama API directly with a simple chat request. + + Returns: + Raw Ollama API response. + """ + import requests + + try: + # Prepare a simple chat request + request_json = { + "model": config.DEFAULT_MODEL, + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello, how are you?"} + ], + "stream": False + } + + # Make the API call to Ollama + print(f"Sending direct request to Ollama API at: {config.OLLAMA_API_URL}/api/chat") + response = requests.post( + f"{config.OLLAMA_API_URL}/api/chat", + headers={"Content-Type": "application/json"}, + json=request_json, + timeout=config.API_TIMEOUT + ) + + response.raise_for_status() + result = response.json() + + return { + "status": "success", + "ollama_url": config.OLLAMA_API_URL, + "request": request_json, + "response": result + } + except requests.exceptions.Timeout as e: + return { + "status": "error", + "message": f"Timeout connecting to Ollama API: {str(e)}. The request exceeded the {config.API_TIMEOUT} second timeout.", + "ollama_url": config.OLLAMA_API_URL + } + except requests.exceptions.ConnectionError as e: + return { + "status": "error", + "message": f"Connection error to Ollama API: {str(e)}. Please check if Ollama is running at {config.OLLAMA_API_URL}.", + "ollama_url": config.OLLAMA_API_URL + } except Exception as e: return { "status": "error", @@ -151,6 +253,7 @@ async def get_config(): "openwebui_url": config.OPENWEBUI_URL, "ollama_api_url": config.OLLAMA_API_URL, "default_model": config.DEFAULT_MODEL, + "api_timeout": config.API_TIMEOUT, "available_models": list(model_service.AVAILABLE_MODELS.keys()) } @@ -251,6 +354,11 @@ async def send_message(chat_id: str, request: MessageRequest): Bot response message. """ try: + print(f"Processing message for chat {chat_id} from user {request.user_id}") + print(f"Message: {request.message[:50]}...") # Print first 50 chars of message + print(f"Using RAG: {request.use_rag}") + print(f"Model parameters: temperature={request.temperature}, max_tokens={request.max_tokens}") + # Extract model parameters from the request response = chat_service.get_chat_response( chat_id=chat_id, @@ -270,10 +378,25 @@ async def send_message(chat_id: str, request: MessageRequest): function_calling=request.function_calling ) + print(f"Response received. Length: {len(response.get('content', ''))}") return response except ValueError as e: - raise HTTPException(status_code=404, detail=str(e)) + error_msg = f"Chat not found: {str(e)}" + print(f"ERROR: {error_msg}") + raise HTTPException(status_code=404, detail=error_msg) + except Exception as e: + error_msg = f"Error processing message: {str(e)}" + print(f"ERROR: {error_msg}") + # Return an error message instead of raising an exception + # This ensures the client gets a proper response + return { + "id": str(uuid.uuid4()), + "content": f"Error processing message: {str(e)}", + "user_id": None, + "is_user_message": False, + "timestamp": datetime.now(timezone.utc).isoformat() + } @app.post("/chats/{chat_id}/members/{user_id}") async def add_team_member(chat_id: str, user_id: str): diff --git a/ai_service/config.py b/ai_service/config.py index b9f3c8f..3c065e0 100644 --- a/ai_service/config.py +++ b/ai_service/config.py @@ -24,6 +24,7 @@ class Config: # Ollama configuration OLLAMA_API_URL = os.environ.get('OLLAMA_API_URL', 'http://127.0.0.1:11434') DEFAULT_MODEL = os.environ.get('DEFAULT_MODEL', 'llama3.1') + API_TIMEOUT = int(os.environ.get('API_TIMEOUT', 300)) # Default timeout of 5 minutes (300 seconds) # Document processing CHUNK_SIZE = int(os.environ.get('CHUNK_SIZE', 1000)) diff --git a/ai_service/models/chat_service.py b/ai_service/models/chat_service.py index 54fa9a8..e9416a2 100644 --- a/ai_service/models/chat_service.py +++ b/ai_service/models/chat_service.py @@ -286,21 +286,35 @@ class ChatService: # Get response from model model_id = chat['model_id'] - response_text = model_service.generate_response( - model_id=model_id, - prompt=message, - context=context, - use_rag=use_rag, - model_params=model_params - ) + try: + print(f"Requesting response from model {model_id} for chat {chat_id}") + response_text = model_service.generate_response( + model_id=model_id, + prompt=message, + context=context, + use_rag=use_rag, + model_params=model_params + ) - # Add bot response to chat - response_message = self.add_message( - chat_id=chat_id, - content=response_text, - user_id=user_id, - is_user_message=False - ) + # Add bot response to chat + response_message = self.add_message( + chat_id=chat_id, + content=response_text, + user_id=user_id, + is_user_message=False + ) + + except Exception as e: + error_message = f"Error generating response: {str(e)}" + print(f"ERROR: {error_message}") + + # Add error message to chat + response_message = self.add_message( + chat_id=chat_id, + content=error_message, + user_id=user_id, + is_user_message=False + ) return response_message diff --git a/ai_service/models/model_service.py b/ai_service/models/model_service.py index 7f91804..a98262b 100644 --- a/ai_service/models/model_service.py +++ b/ai_service/models/model_service.py @@ -52,6 +52,7 @@ class ModelService: self.ollama_api_url = config.OLLAMA_API_URL self.openwebui_url = config.OPENWEBUI_URL self.openwebui_api_key = config.OPENWEBUI_API_KEY + self.api_timeout = config.API_TIMEOUT def get_available_models(self) -> List[Dict[str, Any]]: """ @@ -193,7 +194,7 @@ class ModelService: f"{self.openwebui_url}/api/chat/completions", headers=headers, json=openwebui_request, - timeout=60 # Longer timeout for RAG + timeout=self.api_timeout ) response.raise_for_status() @@ -205,9 +206,19 @@ class ModelService: else: return "Error: Unexpected response format from OpenWebUI" + except requests.exceptions.Timeout as e: + error_msg = f"Timeout error connecting to OpenWebUI API: {str(e)}. The request exceeded the {self.api_timeout} second timeout." + print(f"ERROR: {error_msg}") + print("Falling back to direct Ollama call without RAG") + # Continue to the Ollama API call below + except requests.exceptions.ConnectionError as e: + error_msg = f"Connection error to OpenWebUI API: {str(e)}. Please check if OpenWebUI is running at {self.openwebui_url}." + print(f"ERROR: {error_msg}") + print("Falling back to direct Ollama call without RAG") + # Continue to the Ollama API call below except Exception as e: - print(f"Error calling OpenWebUI API: {str(e)}") - # Fall back to direct Ollama call without RAG + error_msg = f"Error calling OpenWebUI API: {str(e)}" + print(f"ERROR: {error_msg}") print("Falling back to direct Ollama call without RAG") # Continue to the Ollama API call below @@ -250,7 +261,7 @@ class ModelService: f"{self.ollama_api_url}/api/chat", headers={"Content-Type": "application/json"}, json=request_json, - timeout=60 + timeout=self.api_timeout ) response.raise_for_status() @@ -263,8 +274,18 @@ class ModelService: else: return "Error: Unexpected response format from Ollama" + except requests.exceptions.Timeout as e: + error_msg = f"Timeout error connecting to Ollama API: {str(e)}. The request exceeded the {self.api_timeout} second timeout." + print(f"ERROR: {error_msg}") + return error_msg + except requests.exceptions.ConnectionError as e: + error_msg = f"Connection error to Ollama API: {str(e)}. Please check if Ollama is running at {self.ollama_api_url}." + print(f"ERROR: {error_msg}") + return error_msg except Exception as e: - return f"Error generating response: {str(e)}" + error_msg = f"Error generating response: {str(e)}" + print(f"ERROR: {error_msg}") + return error_msg # Create a singleton instance