fix mix up updates
This commit is contained in:
@@ -0,0 +1 @@
|
||||
python=3.11
|
||||
+24
-22
@@ -1,23 +1,25 @@
|
||||
openai
|
||||
pandas
|
||||
python-dotenv
|
||||
fastapi
|
||||
uvicorn
|
||||
langchain-community
|
||||
langchain-openai
|
||||
pydantic
|
||||
pypdf
|
||||
pypandoc
|
||||
Spire.Doc
|
||||
openai==1.72.0
|
||||
pandas==2.2.3
|
||||
python-dotenv==1.1.0
|
||||
fastapi==0.115.9
|
||||
uvicorn==0.34.0
|
||||
langchain-community==0.3.21
|
||||
langchain-openai==0.3.12
|
||||
pydantic==2.11.3
|
||||
pypdf==5.4.0
|
||||
pypandoc==1.15
|
||||
plum-dispatch==1.7.4
|
||||
scikit-learn
|
||||
werkzeug
|
||||
python-multipart
|
||||
langgraph
|
||||
tiktoken
|
||||
langchainhub
|
||||
chromadb
|
||||
langchain
|
||||
langchain-text-splitters
|
||||
beautifulsoup4
|
||||
langchain-core
|
||||
scikit-learn==1.6.1
|
||||
Werkzeug==3.1.3
|
||||
python-multipart==0.0.20
|
||||
langgraph==0.3.27
|
||||
tiktoken==0.9.0
|
||||
langchainhub==0.1.21
|
||||
chromadb==1.0.3
|
||||
langchain==0.3.23
|
||||
langchain-text-splitters==0.3.8
|
||||
beautifulsoup4==4.13.3
|
||||
langchain-core==0.3.51
|
||||
PyPDF2==3.0.1
|
||||
reportlab==4.3.1
|
||||
python-docx==1.1.2
|
||||
|
||||
@@ -149,10 +149,18 @@ def generate_quiz(startpop_pdf, quiz_type=None) -> dict:
|
||||
- Conclude strongly, avoiding phrases like “and so yeah…”.
|
||||
----END------
|
||||
|
||||
NOTE: THE QUIZ FOCUES ON BULIDNG USER CONFIDENCE BY ANANLYZING THE QUESTIONS AND FRAMEWORK FOR EACH QUESTION IN THE STARTPOP FRAMEWORK PDF,SOLELY USE THIS PDF PROVIDED BY THE USER
|
||||
BASED ON THIS FRAMEWORK , CREATE INTERVIEW BASED QUIZ FOR FIRE FIGHTING ROLE BY ANALYZING THIS DOCUMENT
|
||||
NOTE : THE QUIZ SHOULD NOT BE BASED ON STARTPOP FRAMEWORK ITSELF BUT ANALYZE THE STARTPOP FRAMEWORK PRESENTED TO GENERATE INTERVIEW BASED QUIZ
|
||||
e.g "The STARTPOP framework is specifically designed for firefighter interviews", THIS KIND OF QUESTION SHOULD NOT BE ASKED IN THE QUIZ....
|
||||
### Instructions:
|
||||
- Analyze the provided STARTPOP PDF to extract relevant themes and concepts.
|
||||
- Generate a quiz that builds user confidence by focusing on interview-based scenarios.
|
||||
- Avoid questions directly about the STARTPOP framework itself (e.g., "What is STARTPOP?").
|
||||
- Use the specified quiz type (`quiz_type`) to determine the output format.
|
||||
- Generate at least 15 questions and above
|
||||
|
||||
NOTE: The quiz focuses on building user confidence by analyzing the questions and framework presented in the STARTPOP PDF provided by the user.
|
||||
Based on this framework, create an interview-based quiz specifically for firefighting roles by thoroughly analyzing the document.
|
||||
|
||||
IMPORTANT: The quiz should not directly reference the STARTPOP framework itself. Instead, it should generate interview-based questions derived from the insights of the STARTPOP framework.
|
||||
For example, avoid questions like "The STARTPOP framework is specifically designed for firefighter interviews." Such questions should not be included in the quiz.
|
||||
Thank you for your thorough and precise processing!
|
||||
STARTPOP FULL PDF :{startpop_pdf}
|
||||
question type : {quiz_type}
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
import subprocess
|
||||
import re
|
||||
|
||||
# List of packages you want to include
|
||||
packages = [
|
||||
"openai",
|
||||
"pandas",
|
||||
"python-dotenv",
|
||||
"fastapi",
|
||||
"uvicorn",
|
||||
"langchain-community",
|
||||
"langchain-openai",
|
||||
"pydantic",
|
||||
"pypdf",
|
||||
"pypandoc",
|
||||
"plum-dispatch==1.7.4", # You specified exact version here
|
||||
"scikit-learn",
|
||||
"werkzeug",
|
||||
"python-multipart",
|
||||
"langgraph",
|
||||
"tiktoken",
|
||||
"langchainhub",
|
||||
"chromadb",
|
||||
"langchain",
|
||||
"langchain-text-splitters",
|
||||
"beautifulsoup4",
|
||||
"langchain-core",
|
||||
"PyPDF2",
|
||||
"reportlab",
|
||||
"python-docx"
|
||||
]
|
||||
|
||||
# Get all installed packages with versions
|
||||
result = subprocess.run(["pip", "freeze"], capture_output=True, text=True)
|
||||
installed_packages = result.stdout.strip().split('\n')
|
||||
|
||||
# Create a dictionary of package names to their full name with version
|
||||
package_dict = {}
|
||||
for pkg in installed_packages:
|
||||
if '==' in pkg:
|
||||
name = pkg.split('==')[0].lower()
|
||||
package_dict[name] = pkg
|
||||
|
||||
# Write only the requested packages to requirements.txt
|
||||
with open('requirements.txt', 'w') as f:
|
||||
for package in packages:
|
||||
# Handle cases where version is already specifixed
|
||||
if '==' in package:
|
||||
f.write(f"{package}\n")
|
||||
continue
|
||||
|
||||
# Try to find the package in installed packages
|
||||
pkg_name = package.lower()
|
||||
if pkg_name in package_dict:
|
||||
f.write(f"{package_dict[pkg_name]}\n")
|
||||
else:
|
||||
# If not found, just write the package name
|
||||
f.write(f"{package}\n")
|
||||
print(f"Warning: {package} not found in installed packages")
|
||||
|
||||
print("requirements.txt has been generated.")
|
||||
+32
-14
@@ -1,32 +1,45 @@
|
||||
import os
|
||||
from spire.doc import Document, FileFormat
|
||||
from langchain_community.document_loaders import PyPDFLoader
|
||||
from docx import Document as DocxDocument
|
||||
from reportlab.lib.pagesizes import letter
|
||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
|
||||
from reportlab.lib.styles import getSampleStyleSheet
|
||||
from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader
|
||||
|
||||
def convert_word_to_pdf(doc_path: str) -> str:
|
||||
"""
|
||||
Convert a .doc or .docx file to PDF using Spire.Doc.
|
||||
Convert a .docx file to PDF using python-docx and reportlab.
|
||||
|
||||
Args:
|
||||
doc_path (str): The path to the .doc or .docx file.
|
||||
doc_path (str): The path to the .docx file.
|
||||
|
||||
Returns:
|
||||
str: The path to the converted PDF file.
|
||||
"""
|
||||
pdf_path = os.path.splitext(doc_path)[0] + '.pdf'
|
||||
|
||||
# Create a Document object
|
||||
document = Document()
|
||||
# Load the Word document
|
||||
document.LoadFromFile(doc_path)
|
||||
# Save as PDF
|
||||
document.SaveToFile(pdf_path, FileFormat.PDF)
|
||||
document.Close()
|
||||
doc = DocxDocument(doc_path)
|
||||
|
||||
# Create a PDF
|
||||
pdf = SimpleDocTemplate(pdf_path, pagesize=letter)
|
||||
styles = getSampleStyleSheet()
|
||||
flowables = []
|
||||
|
||||
# Extract text from paragraphs and add to PDF
|
||||
for para in doc.paragraphs:
|
||||
if para.text:
|
||||
p = Paragraph(para.text, styles['Normal'])
|
||||
flowables.append(p)
|
||||
flowables.append(Spacer(1, 12))
|
||||
|
||||
# Build the PDF
|
||||
pdf.build(flowables)
|
||||
|
||||
return pdf_path
|
||||
|
||||
def load_document(file_path: str):
|
||||
"""
|
||||
Utility function to load a PDF, DOCX, or DOC file by first converting it to PDF.
|
||||
Utility function to load a PDF, DOCX, or DOC file.
|
||||
|
||||
Args:
|
||||
file_path (str): The path to the file to load.
|
||||
@@ -38,16 +51,21 @@ def load_document(file_path: str):
|
||||
try:
|
||||
extension = os.path.splitext(file_path)[1].lower()
|
||||
|
||||
if extension in ['.doc', '.docx']:
|
||||
# Convert .doc or .docx to PDF first
|
||||
if extension == '.docx':
|
||||
# For .docx files, use UnstructuredWordDocumentLoader directly
|
||||
loader = UnstructuredWordDocumentLoader(file_path)
|
||||
return loader.load()
|
||||
elif extension == '.doc':
|
||||
# Convert .doc to .pdf first
|
||||
pdf_path = convert_word_to_pdf(file_path)
|
||||
loader = PyPDFLoader(pdf_path)
|
||||
return loader.load()
|
||||
elif extension == '.pdf':
|
||||
loader = PyPDFLoader(file_path)
|
||||
return loader.load()
|
||||
else:
|
||||
raise ValueError(f"Unsupported file type: {extension}. Only .pdf, .docx, and .doc are supported.")
|
||||
|
||||
return loader.load()
|
||||
except Exception as e:
|
||||
print(f"Error loading document: {str(e)}")
|
||||
return None
|
||||
Reference in New Issue
Block a user