This commit is contained in:
teslim
2025-06-13 19:06:18 +01:00
parent 76e0fe1cae
commit dd616913e0
+6 -1
View File
@@ -1,5 +1,6 @@
import os import os
import json import json
import re
from openai import OpenAI from openai import OpenAI
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from typing import List, Dict, Optional from typing import List, Dict, Optional
@@ -79,10 +80,14 @@ class Chatbot:
self.api_key = os.getenv("OPENAI_API_KEY") self.api_key = os.getenv("OPENAI_API_KEY")
self.client = OpenAI(api_key=self.api_key) self.client = OpenAI(api_key=self.api_key)
self.model = "gpt-4o-mini" self.model = "gpt-4o-mini"
def clean_text(self, text):
# Remove all surrogate characters
return re.sub(r'[\uD800-\uDFFF]', '', text)
def _extract_text_from_docs(self, docs): def _extract_text_from_docs(self, docs):
"""Extract text content from document objects.""" """Extract text content from document objects."""
return [doc.page_content for doc in docs] return [self.clean_text(doc.page_content) for doc in docs]
# Existing methods... # Existing methods...
def validate_worker(self, question, docs) -> VisionMissionResponse: def validate_worker(self, question, docs) -> VisionMissionResponse: