fix
This commit is contained in:
@@ -39,13 +39,9 @@ def validate_worker_document():
|
||||
file_path = os.path.join(upload_folder, filename)
|
||||
file.save(file_path)
|
||||
|
||||
print("loading document")
|
||||
|
||||
# Load the document for processing
|
||||
docs = load_document(file_path)
|
||||
|
||||
print("document loaded")
|
||||
|
||||
# Instantiate the chatbot service
|
||||
chatbot = Chatbot()
|
||||
|
||||
|
||||
@@ -77,7 +77,7 @@ dummy_data = generate_dummy_data(num_assessments=100, max_users_per_assessment=5
|
||||
#SopGeneratorDocument
|
||||
class Chatbot:
|
||||
def __init__(self):
|
||||
self.api_key = os.getenv("OPENAI_API_KEY")
|
||||
self.api_key = os.getenv("OPENAI_API_KEY") or "sk-svcacct-v2m4jSLxCTLR-WizUhZnkOHEuftWNVy2k7vIGWDGJogaBr5VogTTVT3BlbkFJjCxOowETlz7muR8eAS7ExO1NA7kvcrZ4HVhS66jxK8PpvNce1kAAA"
|
||||
self.client = OpenAI(api_key=self.api_key)
|
||||
self.model = "gpt-4o-mini"
|
||||
|
||||
@@ -87,11 +87,7 @@ class Chatbot:
|
||||
|
||||
def _extract_text_from_docs(self, docs):
|
||||
"""Extract text content from document objects."""
|
||||
|
||||
|
||||
print(docs)
|
||||
|
||||
|
||||
return [self.clean_text(doc.page_content) for doc in docs]
|
||||
# Existing methods...
|
||||
|
||||
|
||||
@@ -4,6 +4,16 @@ from reportlab.lib.pagesizes import letter
|
||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
|
||||
from reportlab.lib.styles import getSampleStyleSheet
|
||||
from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader
|
||||
import pdfplumber
|
||||
from langchain_core.documents import Document
|
||||
|
||||
def load_pdf_with_pdfplumber(file_path):
|
||||
docs = []
|
||||
with pdfplumber.open(file_path) as pdf:
|
||||
for i, page in enumerate(pdf.pages):
|
||||
text = page.extract_text()
|
||||
docs.append(Document(page_content=text, metadata={"page": i}))
|
||||
return docs
|
||||
|
||||
def convert_word_to_pdf(doc_path: str) -> str:
|
||||
"""
|
||||
@@ -61,8 +71,7 @@ def load_document(file_path: str):
|
||||
loader = PyPDFLoader(pdf_path)
|
||||
return loader.load()
|
||||
elif extension == '.pdf':
|
||||
loader = PyPDFLoader(file_path)
|
||||
return loader.load()
|
||||
return load_pdf_with_pdfplumber(file_path)
|
||||
else:
|
||||
raise ValueError(f"Unsupported file type: {extension}. Only .pdf, .docx, and .doc are supported.")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user