fix

2025-05-12 16:30:35 +01:00
parent 7c61e98340
commit f00941cece
4 changed files with 180 additions and 21 deletions
@@ -7,6 +7,8 @@ from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
 from typing import List, Optional
 import uuid
 from datetime import datetime, timezone
 from ai_service.models.model_service import model_service
 from ai_service.models.chat_service import chat_service
@@ -120,7 +122,7 @@ async def test_ollama_connection():
    try:
        # Try to connect to Ollama API
-        response = requests.get(f"{config.OLLAMA_API_URL}/api/tags", timeout=5)
+        response = requests.get(f"{config.OLLAMA_API_URL}/api/tags", timeout=config.API_TIMEOUT)
        response.raise_for_status()
        # Return the models from Ollama
@@ -130,6 +132,106 @@ async def test_ollama_connection():
            "ollama_url": config.OLLAMA_API_URL,
            "models": response.json()
        }
    except requests.exceptions.Timeout as e:
        return {
            "status": "error",
            "message": f"Timeout connecting to Ollama API: {str(e)}. The request exceeded the {config.API_TIMEOUT} second timeout.",
            "ollama_url": config.OLLAMA_API_URL
        }
    except requests.exceptions.ConnectionError as e:
        return {
            "status": "error",
            "message": f"Connection error to Ollama API: {str(e)}. Please check if Ollama is running at {config.OLLAMA_API_URL}.",
            "ollama_url": config.OLLAMA_API_URL
        }
    except Exception as e:
        return {
            "status": "error",
            "message": f"Failed to connect to Ollama API: {str(e)}",
            "ollama_url": config.OLLAMA_API_URL
        }
@app.post("/test-chat")
 async def test_chat_completion():
    """
    Test the chat completion with a simple prompt.
    Returns:
        Model response.
    """
    try:
        # Use the model service directly
        response = model_service.generate_response(
            model_id=config.DEFAULT_MODEL,
            prompt="Hello, how are you?",
            context=[],
            use_rag=False
        )
        return {
            "status": "success",
            "model": config.DEFAULT_MODEL,
            "response": response,
            "ollama_url": config.OLLAMA_API_URL
        }
    except Exception as e:
        return {
            "status": "error",
            "message": f"Failed to get chat completion: {str(e)}",
            "ollama_url": config.OLLAMA_API_URL
        }
@app.post("/test-ollama-direct")
 async def test_ollama_direct():
    """
    Test the Ollama API directly with a simple chat request.
    Returns:
        Raw Ollama API response.
    """
    import requests
    try:
        # Prepare a simple chat request
        request_json = {
            "model": config.DEFAULT_MODEL,
            "messages": [
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": "Hello, how are you?"}
            ],
            "stream": False
        }
        # Make the API call to Ollama
        print(f"Sending direct request to Ollama API at: {config.OLLAMA_API_URL}/api/chat")
        response = requests.post(
            f"{config.OLLAMA_API_URL}/api/chat",
            headers={"Content-Type": "application/json"},
            json=request_json,
            timeout=config.API_TIMEOUT
        )
        response.raise_for_status()
        result = response.json()
        return {
            "status": "success",
            "ollama_url": config.OLLAMA_API_URL,
            "request": request_json,
            "response": result
        }
    except requests.exceptions.Timeout as e:
        return {
            "status": "error",
            "message": f"Timeout connecting to Ollama API: {str(e)}. The request exceeded the {config.API_TIMEOUT} second timeout.",
            "ollama_url": config.OLLAMA_API_URL
        }
    except requests.exceptions.ConnectionError as e:
        return {
            "status": "error",
            "message": f"Connection error to Ollama API: {str(e)}. Please check if Ollama is running at {config.OLLAMA_API_URL}.",
            "ollama_url": config.OLLAMA_API_URL
        }
    except Exception as e:
        return {
            "status": "error",
@@ -151,6 +253,7 @@ async def get_config():
        "openwebui_url": config.OPENWEBUI_URL,
        "ollama_api_url": config.OLLAMA_API_URL,
        "default_model": config.DEFAULT_MODEL,
        "api_timeout": config.API_TIMEOUT,
        "available_models": list(model_service.AVAILABLE_MODELS.keys())
    }
@@ -251,6 +354,11 @@ async def send_message(chat_id: str, request: MessageRequest):
        Bot response message.
    """
    try:
        print(f"Processing message for chat {chat_id} from user {request.user_id}")
        print(f"Message: {request.message[:50]}...")  # Print first 50 chars of message
        print(f"Using RAG: {request.use_rag}")
        print(f"Model parameters: temperature={request.temperature}, max_tokens={request.max_tokens}")
        # Extract model parameters from the request
        response = chat_service.get_chat_response(
            chat_id=chat_id,
@@ -270,10 +378,25 @@ async def send_message(chat_id: str, request: MessageRequest):
            function_calling=request.function_calling
        )
        print(f"Response received. Length: {len(response.get('content', ''))}")
        return response
    except ValueError as e:
-        raise HTTPException(status_code=404, detail=str(e))
+        error_msg = f"Chat not found: {str(e)}"
        print(f"ERROR: {error_msg}")
        raise HTTPException(status_code=404, detail=error_msg)
    except Exception as e:
        error_msg = f"Error processing message: {str(e)}"
        print(f"ERROR: {error_msg}")
        # Return an error message instead of raising an exception
        # This ensures the client gets a proper response
        return {
            "id": str(uuid.uuid4()),
            "content": f"Error processing message: {str(e)}",
            "user_id": None,
            "is_user_message": False,
            "timestamp": datetime.now(timezone.utc).isoformat()
        }
@app.post("/chats/{chat_id}/members/{user_id}")
 async def add_team_member(chat_id: str, user_id: str):
@@ -24,6 +24,7 @@ class Config:
    # Ollama configuration
    OLLAMA_API_URL = os.environ.get('OLLAMA_API_URL', 'http://127.0.0.1:11434')
    DEFAULT_MODEL = os.environ.get('DEFAULT_MODEL', 'llama3.1')
    API_TIMEOUT = int(os.environ.get('API_TIMEOUT', 300))  # Default timeout of 5 minutes (300 seconds)
    # Document processing
    CHUNK_SIZE = int(os.environ.get('CHUNK_SIZE', 1000))
@@ -286,21 +286,35 @@ class ChatService:
        # Get response from model
        model_id = chat['model_id']
-        response_text = model_service.generate_response(
+        try:
-            model_id=model_id,
+            print(f"Requesting response from model {model_id} for chat {chat_id}")
-            prompt=message,
+            response_text = model_service.generate_response(
-            context=context,
+                model_id=model_id,
-            use_rag=use_rag,
+                prompt=message,
-            model_params=model_params
+                context=context,
-        )
+                use_rag=use_rag,
                model_params=model_params
            )
-        # Add bot response to chat
+            # Add bot response to chat
-        response_message = self.add_message(
+            response_message = self.add_message(
-            chat_id=chat_id,
+                chat_id=chat_id,
-            content=response_text,
+                content=response_text,
-            user_id=user_id,
+                user_id=user_id,
-            is_user_message=False
+                is_user_message=False
-        )
+            )
        except Exception as e:
            error_message = f"Error generating response: {str(e)}"
            print(f"ERROR: {error_message}")
            # Add error message to chat
            response_message = self.add_message(
                chat_id=chat_id,
                content=error_message,
                user_id=user_id,
                is_user_message=False
            )
        return response_message
@@ -52,6 +52,7 @@ class ModelService:
        self.ollama_api_url = config.OLLAMA_API_URL
        self.openwebui_url = config.OPENWEBUI_URL
        self.openwebui_api_key = config.OPENWEBUI_API_KEY
        self.api_timeout = config.API_TIMEOUT
    def get_available_models(self) -> List[Dict[str, Any]]:
        """
@@ -193,7 +194,7 @@ class ModelService:
                    f"{self.openwebui_url}/api/chat/completions",
                    headers=headers,
                    json=openwebui_request,
-                    timeout=60  # Longer timeout for RAG
+                    timeout=self.api_timeout
                )
                response.raise_for_status()
@@ -205,9 +206,19 @@ class ModelService:
                else:
                    return "Error: Unexpected response format from OpenWebUI"
            except requests.exceptions.Timeout as e:
                error_msg = f"Timeout error connecting to OpenWebUI API: {str(e)}. The request exceeded the {self.api_timeout} second timeout."
                print(f"ERROR: {error_msg}")
                print("Falling back to direct Ollama call without RAG")
                # Continue to the Ollama API call below
            except requests.exceptions.ConnectionError as e:
                error_msg = f"Connection error to OpenWebUI API: {str(e)}. Please check if OpenWebUI is running at {self.openwebui_url}."
                print(f"ERROR: {error_msg}")
                print("Falling back to direct Ollama call without RAG")
                # Continue to the Ollama API call below
            except Exception as e:
-                print(f"Error calling OpenWebUI API: {str(e)}")
+                error_msg = f"Error calling OpenWebUI API: {str(e)}"
-                # Fall back to direct Ollama call without RAG
+                print(f"ERROR: {error_msg}")
                print("Falling back to direct Ollama call without RAG")
                # Continue to the Ollama API call below
@@ -250,7 +261,7 @@ class ModelService:
                f"{self.ollama_api_url}/api/chat",
                headers={"Content-Type": "application/json"},
                json=request_json,
-                timeout=60
+                timeout=self.api_timeout
            )
            response.raise_for_status()
@@ -263,8 +274,18 @@ class ModelService:
            else:
                return "Error: Unexpected response format from Ollama"
        except requests.exceptions.Timeout as e:
            error_msg = f"Timeout error connecting to Ollama API: {str(e)}. The request exceeded the {self.api_timeout} second timeout."
            print(f"ERROR: {error_msg}")
            return error_msg
        except requests.exceptions.ConnectionError as e:
            error_msg = f"Connection error to Ollama API: {str(e)}. Please check if Ollama is running at {self.ollama_api_url}."
            print(f"ERROR: {error_msg}")
            return error_msg
        except Exception as e:
-            return f"Error generating response: {str(e)}"
+            error_msg = f"Error generating response: {str(e)}"
            print(f"ERROR: {error_msg}")
            return error_msg
 # Create a singleton instance