This commit is contained in:
teslim
2025-06-13 19:06:18 +01:00
parent 76e0fe1cae
commit dd616913e0
+6 -1
View File
@@ -1,5 +1,6 @@
import os
import json
import re
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import List, Dict, Optional
@@ -80,9 +81,13 @@ class Chatbot:
self.client = OpenAI(api_key=self.api_key)
self.model = "gpt-4o-mini"
def clean_text(self, text):
# Remove all surrogate characters
return re.sub(r'[\uD800-\uDFFF]', '', text)
def _extract_text_from_docs(self, docs):
"""Extract text content from document objects."""
return [doc.page_content for doc in docs]
return [self.clean_text(doc.page_content) for doc in docs]
# Existing methods...
def validate_worker(self, question, docs) -> VisionMissionResponse: