fix mix up updates
This commit is contained in:
@@ -0,0 +1 @@
|
|||||||
|
python=3.11
|
||||||
+24
-22
@@ -1,23 +1,25 @@
|
|||||||
openai
|
openai==1.72.0
|
||||||
pandas
|
pandas==2.2.3
|
||||||
python-dotenv
|
python-dotenv==1.1.0
|
||||||
fastapi
|
fastapi==0.115.9
|
||||||
uvicorn
|
uvicorn==0.34.0
|
||||||
langchain-community
|
langchain-community==0.3.21
|
||||||
langchain-openai
|
langchain-openai==0.3.12
|
||||||
pydantic
|
pydantic==2.11.3
|
||||||
pypdf
|
pypdf==5.4.0
|
||||||
pypandoc
|
pypandoc==1.15
|
||||||
Spire.Doc
|
|
||||||
plum-dispatch==1.7.4
|
plum-dispatch==1.7.4
|
||||||
scikit-learn
|
scikit-learn==1.6.1
|
||||||
werkzeug
|
Werkzeug==3.1.3
|
||||||
python-multipart
|
python-multipart==0.0.20
|
||||||
langgraph
|
langgraph==0.3.27
|
||||||
tiktoken
|
tiktoken==0.9.0
|
||||||
langchainhub
|
langchainhub==0.1.21
|
||||||
chromadb
|
chromadb==1.0.3
|
||||||
langchain
|
langchain==0.3.23
|
||||||
langchain-text-splitters
|
langchain-text-splitters==0.3.8
|
||||||
beautifulsoup4
|
beautifulsoup4==4.13.3
|
||||||
langchain-core
|
langchain-core==0.3.51
|
||||||
|
PyPDF2==3.0.1
|
||||||
|
reportlab==4.3.1
|
||||||
|
python-docx==1.1.2
|
||||||
|
|||||||
@@ -149,10 +149,18 @@ def generate_quiz(startpop_pdf, quiz_type=None) -> dict:
|
|||||||
- Conclude strongly, avoiding phrases like “and so yeah…”.
|
- Conclude strongly, avoiding phrases like “and so yeah…”.
|
||||||
----END------
|
----END------
|
||||||
|
|
||||||
NOTE: THE QUIZ FOCUES ON BULIDNG USER CONFIDENCE BY ANANLYZING THE QUESTIONS AND FRAMEWORK FOR EACH QUESTION IN THE STARTPOP FRAMEWORK PDF,SOLELY USE THIS PDF PROVIDED BY THE USER
|
### Instructions:
|
||||||
BASED ON THIS FRAMEWORK , CREATE INTERVIEW BASED QUIZ FOR FIRE FIGHTING ROLE BY ANALYZING THIS DOCUMENT
|
- Analyze the provided STARTPOP PDF to extract relevant themes and concepts.
|
||||||
NOTE : THE QUIZ SHOULD NOT BE BASED ON STARTPOP FRAMEWORK ITSELF BUT ANALYZE THE STARTPOP FRAMEWORK PRESENTED TO GENERATE INTERVIEW BASED QUIZ
|
- Generate a quiz that builds user confidence by focusing on interview-based scenarios.
|
||||||
e.g "The STARTPOP framework is specifically designed for firefighter interviews", THIS KIND OF QUESTION SHOULD NOT BE ASKED IN THE QUIZ....
|
- Avoid questions directly about the STARTPOP framework itself (e.g., "What is STARTPOP?").
|
||||||
|
- Use the specified quiz type (`quiz_type`) to determine the output format.
|
||||||
|
- Generate at least 15 questions and above
|
||||||
|
|
||||||
|
NOTE: The quiz focuses on building user confidence by analyzing the questions and framework presented in the STARTPOP PDF provided by the user.
|
||||||
|
Based on this framework, create an interview-based quiz specifically for firefighting roles by thoroughly analyzing the document.
|
||||||
|
|
||||||
|
IMPORTANT: The quiz should not directly reference the STARTPOP framework itself. Instead, it should generate interview-based questions derived from the insights of the STARTPOP framework.
|
||||||
|
For example, avoid questions like "The STARTPOP framework is specifically designed for firefighter interviews." Such questions should not be included in the quiz.
|
||||||
Thank you for your thorough and precise processing!
|
Thank you for your thorough and precise processing!
|
||||||
STARTPOP FULL PDF :{startpop_pdf}
|
STARTPOP FULL PDF :{startpop_pdf}
|
||||||
question type : {quiz_type}
|
question type : {quiz_type}
|
||||||
|
|||||||
@@ -0,0 +1,61 @@
|
|||||||
|
import subprocess
|
||||||
|
import re
|
||||||
|
|
||||||
|
# List of packages you want to include
|
||||||
|
packages = [
|
||||||
|
"openai",
|
||||||
|
"pandas",
|
||||||
|
"python-dotenv",
|
||||||
|
"fastapi",
|
||||||
|
"uvicorn",
|
||||||
|
"langchain-community",
|
||||||
|
"langchain-openai",
|
||||||
|
"pydantic",
|
||||||
|
"pypdf",
|
||||||
|
"pypandoc",
|
||||||
|
"plum-dispatch==1.7.4", # You specified exact version here
|
||||||
|
"scikit-learn",
|
||||||
|
"werkzeug",
|
||||||
|
"python-multipart",
|
||||||
|
"langgraph",
|
||||||
|
"tiktoken",
|
||||||
|
"langchainhub",
|
||||||
|
"chromadb",
|
||||||
|
"langchain",
|
||||||
|
"langchain-text-splitters",
|
||||||
|
"beautifulsoup4",
|
||||||
|
"langchain-core",
|
||||||
|
"PyPDF2",
|
||||||
|
"reportlab",
|
||||||
|
"python-docx"
|
||||||
|
]
|
||||||
|
|
||||||
|
# Get all installed packages with versions
|
||||||
|
result = subprocess.run(["pip", "freeze"], capture_output=True, text=True)
|
||||||
|
installed_packages = result.stdout.strip().split('\n')
|
||||||
|
|
||||||
|
# Create a dictionary of package names to their full name with version
|
||||||
|
package_dict = {}
|
||||||
|
for pkg in installed_packages:
|
||||||
|
if '==' in pkg:
|
||||||
|
name = pkg.split('==')[0].lower()
|
||||||
|
package_dict[name] = pkg
|
||||||
|
|
||||||
|
# Write only the requested packages to requirements.txt
|
||||||
|
with open('requirements.txt', 'w') as f:
|
||||||
|
for package in packages:
|
||||||
|
# Handle cases where version is already specifixed
|
||||||
|
if '==' in package:
|
||||||
|
f.write(f"{package}\n")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Try to find the package in installed packages
|
||||||
|
pkg_name = package.lower()
|
||||||
|
if pkg_name in package_dict:
|
||||||
|
f.write(f"{package_dict[pkg_name]}\n")
|
||||||
|
else:
|
||||||
|
# If not found, just write the package name
|
||||||
|
f.write(f"{package}\n")
|
||||||
|
print(f"Warning: {package} not found in installed packages")
|
||||||
|
|
||||||
|
print("requirements.txt has been generated.")
|
||||||
+32
-14
@@ -1,32 +1,45 @@
|
|||||||
import os
|
import os
|
||||||
from spire.doc import Document, FileFormat
|
from docx import Document as DocxDocument
|
||||||
from langchain_community.document_loaders import PyPDFLoader
|
from reportlab.lib.pagesizes import letter
|
||||||
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
|
||||||
|
from reportlab.lib.styles import getSampleStyleSheet
|
||||||
|
from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader
|
||||||
|
|
||||||
def convert_word_to_pdf(doc_path: str) -> str:
|
def convert_word_to_pdf(doc_path: str) -> str:
|
||||||
"""
|
"""
|
||||||
Convert a .doc or .docx file to PDF using Spire.Doc.
|
Convert a .docx file to PDF using python-docx and reportlab.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
doc_path (str): The path to the .doc or .docx file.
|
doc_path (str): The path to the .docx file.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: The path to the converted PDF file.
|
str: The path to the converted PDF file.
|
||||||
"""
|
"""
|
||||||
pdf_path = os.path.splitext(doc_path)[0] + '.pdf'
|
pdf_path = os.path.splitext(doc_path)[0] + '.pdf'
|
||||||
|
|
||||||
# Create a Document object
|
|
||||||
document = Document()
|
|
||||||
# Load the Word document
|
# Load the Word document
|
||||||
document.LoadFromFile(doc_path)
|
doc = DocxDocument(doc_path)
|
||||||
# Save as PDF
|
|
||||||
document.SaveToFile(pdf_path, FileFormat.PDF)
|
# Create a PDF
|
||||||
document.Close()
|
pdf = SimpleDocTemplate(pdf_path, pagesize=letter)
|
||||||
|
styles = getSampleStyleSheet()
|
||||||
|
flowables = []
|
||||||
|
|
||||||
|
# Extract text from paragraphs and add to PDF
|
||||||
|
for para in doc.paragraphs:
|
||||||
|
if para.text:
|
||||||
|
p = Paragraph(para.text, styles['Normal'])
|
||||||
|
flowables.append(p)
|
||||||
|
flowables.append(Spacer(1, 12))
|
||||||
|
|
||||||
|
# Build the PDF
|
||||||
|
pdf.build(flowables)
|
||||||
|
|
||||||
return pdf_path
|
return pdf_path
|
||||||
|
|
||||||
def load_document(file_path: str):
|
def load_document(file_path: str):
|
||||||
"""
|
"""
|
||||||
Utility function to load a PDF, DOCX, or DOC file by first converting it to PDF.
|
Utility function to load a PDF, DOCX, or DOC file.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file_path (str): The path to the file to load.
|
file_path (str): The path to the file to load.
|
||||||
@@ -38,16 +51,21 @@ def load_document(file_path: str):
|
|||||||
try:
|
try:
|
||||||
extension = os.path.splitext(file_path)[1].lower()
|
extension = os.path.splitext(file_path)[1].lower()
|
||||||
|
|
||||||
if extension in ['.doc', '.docx']:
|
if extension == '.docx':
|
||||||
# Convert .doc or .docx to PDF first
|
# For .docx files, use UnstructuredWordDocumentLoader directly
|
||||||
|
loader = UnstructuredWordDocumentLoader(file_path)
|
||||||
|
return loader.load()
|
||||||
|
elif extension == '.doc':
|
||||||
|
# Convert .doc to .pdf first
|
||||||
pdf_path = convert_word_to_pdf(file_path)
|
pdf_path = convert_word_to_pdf(file_path)
|
||||||
loader = PyPDFLoader(pdf_path)
|
loader = PyPDFLoader(pdf_path)
|
||||||
|
return loader.load()
|
||||||
elif extension == '.pdf':
|
elif extension == '.pdf':
|
||||||
loader = PyPDFLoader(file_path)
|
loader = PyPDFLoader(file_path)
|
||||||
|
return loader.load()
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported file type: {extension}. Only .pdf, .docx, and .doc are supported.")
|
raise ValueError(f"Unsupported file type: {extension}. Only .pdf, .docx, and .doc are supported.")
|
||||||
|
|
||||||
return loader.load()
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error loading document: {str(e)}")
|
print(f"Error loading document: {str(e)}")
|
||||||
return None
|
return None
|
||||||
Reference in New Issue
Block a user