fix
This commit is contained in:
@@ -38,13 +38,9 @@ def validate_worker_document():
|
|||||||
upload_folder = current_app.config['UPLOAD_FOLDER']
|
upload_folder = current_app.config['UPLOAD_FOLDER']
|
||||||
file_path = os.path.join(upload_folder, filename)
|
file_path = os.path.join(upload_folder, filename)
|
||||||
file.save(file_path)
|
file.save(file_path)
|
||||||
|
|
||||||
print("loading document")
|
|
||||||
|
|
||||||
# Load the document for processing
|
# Load the document for processing
|
||||||
docs = load_document(file_path)
|
docs = load_document(file_path)
|
||||||
|
|
||||||
print("document loaded")
|
|
||||||
|
|
||||||
# Instantiate the chatbot service
|
# Instantiate the chatbot service
|
||||||
chatbot = Chatbot()
|
chatbot = Chatbot()
|
||||||
|
|||||||
@@ -77,7 +77,7 @@ dummy_data = generate_dummy_data(num_assessments=100, max_users_per_assessment=5
|
|||||||
#SopGeneratorDocument
|
#SopGeneratorDocument
|
||||||
class Chatbot:
|
class Chatbot:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.api_key = os.getenv("OPENAI_API_KEY")
|
self.api_key = os.getenv("OPENAI_API_KEY") or "sk-svcacct-v2m4jSLxCTLR-WizUhZnkOHEuftWNVy2k7vIGWDGJogaBr5VogTTVT3BlbkFJjCxOowETlz7muR8eAS7ExO1NA7kvcrZ4HVhS66jxK8PpvNce1kAAA"
|
||||||
self.client = OpenAI(api_key=self.api_key)
|
self.client = OpenAI(api_key=self.api_key)
|
||||||
self.model = "gpt-4o-mini"
|
self.model = "gpt-4o-mini"
|
||||||
|
|
||||||
@@ -87,11 +87,7 @@ class Chatbot:
|
|||||||
|
|
||||||
def _extract_text_from_docs(self, docs):
|
def _extract_text_from_docs(self, docs):
|
||||||
"""Extract text content from document objects."""
|
"""Extract text content from document objects."""
|
||||||
|
|
||||||
|
|
||||||
print(docs)
|
print(docs)
|
||||||
|
|
||||||
|
|
||||||
return [self.clean_text(doc.page_content) for doc in docs]
|
return [self.clean_text(doc.page_content) for doc in docs]
|
||||||
# Existing methods...
|
# Existing methods...
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,16 @@ from reportlab.lib.pagesizes import letter
|
|||||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
|
||||||
from reportlab.lib.styles import getSampleStyleSheet
|
from reportlab.lib.styles import getSampleStyleSheet
|
||||||
from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader
|
from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader
|
||||||
|
import pdfplumber
|
||||||
|
from langchain_core.documents import Document
|
||||||
|
|
||||||
|
def load_pdf_with_pdfplumber(file_path):
|
||||||
|
docs = []
|
||||||
|
with pdfplumber.open(file_path) as pdf:
|
||||||
|
for i, page in enumerate(pdf.pages):
|
||||||
|
text = page.extract_text()
|
||||||
|
docs.append(Document(page_content=text, metadata={"page": i}))
|
||||||
|
return docs
|
||||||
|
|
||||||
def convert_word_to_pdf(doc_path: str) -> str:
|
def convert_word_to_pdf(doc_path: str) -> str:
|
||||||
"""
|
"""
|
||||||
@@ -61,8 +71,7 @@ def load_document(file_path: str):
|
|||||||
loader = PyPDFLoader(pdf_path)
|
loader = PyPDFLoader(pdf_path)
|
||||||
return loader.load()
|
return loader.load()
|
||||||
elif extension == '.pdf':
|
elif extension == '.pdf':
|
||||||
loader = PyPDFLoader(file_path)
|
return load_pdf_with_pdfplumber(file_path)
|
||||||
return loader.load()
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported file type: {extension}. Only .pdf, .docx, and .doc are supported.")
|
raise ValueError(f"Unsupported file type: {extension}. Only .pdf, .docx, and .doc are supported.")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user