fix
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from typing import List, Dict, Optional
|
from typing import List, Dict, Optional
|
||||||
@@ -80,9 +81,13 @@ class Chatbot:
|
|||||||
self.client = OpenAI(api_key=self.api_key)
|
self.client = OpenAI(api_key=self.api_key)
|
||||||
self.model = "gpt-4o-mini"
|
self.model = "gpt-4o-mini"
|
||||||
|
|
||||||
|
def clean_text(self, text):
|
||||||
|
# Remove all surrogate characters
|
||||||
|
return re.sub(r'[\uD800-\uDFFF]', '', text)
|
||||||
|
|
||||||
def _extract_text_from_docs(self, docs):
|
def _extract_text_from_docs(self, docs):
|
||||||
"""Extract text content from document objects."""
|
"""Extract text content from document objects."""
|
||||||
return [doc.page_content for doc in docs]
|
return [self.clean_text(doc.page_content) for doc in docs]
|
||||||
# Existing methods...
|
# Existing methods...
|
||||||
|
|
||||||
def validate_worker(self, question, docs) -> VisionMissionResponse:
|
def validate_worker(self, question, docs) -> VisionMissionResponse:
|
||||||
|
|||||||
Reference in New Issue
Block a user