diff --git a/docs/.python-version b/docs/.python-version new file mode 100644 index 0000000..423f188 --- /dev/null +++ b/docs/.python-version @@ -0,0 +1 @@ +python=3.11 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 1381aca..72223b8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,25 +1,25 @@ -openai -pandas -python-dotenv -fastapi -uvicorn -langchain-community -langchain-openai -pydantic -pypdf -pypandoc -Spire.Doc +openai==1.72.0 +pandas==2.2.3 +python-dotenv==1.1.0 +fastapi==0.115.9 +uvicorn==0.34.0 +langchain-community==0.3.21 +langchain-openai==0.3.12 +pydantic==2.11.3 +pypdf==5.4.0 +pypandoc==1.15 plum-dispatch==1.7.4 -scikit-learn -werkzeug -python-multipart -langgraph -tiktoken -langchainhub -chromadb -langchain -langchain-text-splitters -beautifulsoup4 -langchain-core -PyPDF2 -reportlab \ No newline at end of file +scikit-learn==1.6.1 +Werkzeug==3.1.3 +python-multipart==0.0.20 +langgraph==0.3.27 +tiktoken==0.9.0 +langchainhub==0.1.21 +chromadb==1.0.3 +langchain==0.3.23 +langchain-text-splitters==0.3.8 +beautifulsoup4==4.13.3 +langchain-core==0.3.51 +PyPDF2==3.0.1 +reportlab==4.3.1 +python-docx==1.1.2 diff --git a/scripts/generate_quiz.py b/scripts/generate_quiz.py index a2ebdbc..cd0c0fc 100644 --- a/scripts/generate_quiz.py +++ b/scripts/generate_quiz.py @@ -21,104 +21,163 @@ load_dotenv() os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") -llm_temp = ChatOpenAI(model="gpt-4o-mini", temperature=0.7,max_tokens=10000) +llm_temp = ChatOpenAI(model="gpt-4o-mini", temperature=0.7) + def generate_quiz(startpop_pdf, quiz_type=None) -> dict: try: - # Define the prompt template for generating the quiz + # Define prompt for summarizing and extracting the required fields quiz_prompt = PromptTemplate( - template=""" -<|begin_of_text|><|start_header_id|>system<|end_header_id|> -You are an assistant designed to generate firefighter interview quizzes based on the STARTPOP framework provided in a PDF document. Your task is to analyze the content of the PDF and create a quiz tailored to the specified quiz type. + template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> + You are a Fire Fighter Interview preparation assistant that generates QUIZ for user based on STARTPOP FORMAT PDF BASED on + + IN THE STARTPOP FORMAT PDF, each theme has its own questions with corresponding STARTPOP framework for each question. -### Quiz Types: -1. **Single Line Text Inputs**: - - Output format: `{"question": "Your question", "correct_answer": "Your correct answer"}` + Your responsibility is to carefully analyze the provided PDF data and then generate a quiz for the user. + You will also be provided with the type of quiz. -2. **Multiple Choice Questions**: - - Output format: `{"question": "Your question", "options": ["Option 1", "Option 2"], "correct_answer": "Correct Option"}` + There are three different types of quizzes namely: -3. **True or False Questions**: - - Output format: `{"question": "Your question", "options": ["True", "False"], "correct_answer": "True or False"}` + 1- Single line text inputs + 2- Multiple Choice questions + 3- True or False questions -Each quiz must include a field called `"quiz_type"` with values `1`, `2`, or `3` corresponding to the quiz type. + For each quiz type, return the following JSON format: -### Project Overview: -Firefighter interviews evaluate candidates based on **7 Main Concepts** and **20 Important Themes**. These are used to assess alignment with firefighting principles, communication skills, problem-solving abilities, and overall competence. + 1. For Single Line Text Inputs: + - A list of objects, each with {{"question": "Your question", "correct_answer": "Your correct answer"}} -#### 7 Main Concepts: -- High Performance Teams -- Situational Awareness -- Being a Great Problem Solver -- Customer Service -- Building Construction & Mechanical Aptitude -- Emergency Medicine Experience -- Mental & Physical Health + 2. For Multiple Choice Questions: + - A list of objects, each with {{"question": "Your question", "options": ["Option 1", "Option 2"], "correct_answer": "Correct Option"}} -#### 20 Important Themes: -- Customer Service -- Conflict -- Challenge -- Leadership -- Stress -- Successful Team -- Diversity -- Mistake -- Unsuccessful Team -- Disagreement -- Bent a Rule -- Delivered a Difficult Message -- Displayed Integrity -- Took a Shortcut -- Didn’t Follow the Rules -- Emergency Response -- Dealt with Disabilities -- Solved a Big Problem -- Continuous Improvement -- Handled Sensitive Information + 3. For True or False Questions: + - A list of objects, each with {{"question": "Your question", "options": ["True", "False"], "correct_answer": "True or False"}} + + Each response should also include a field called "quiz_type" which can be either 1, 2, or 3 respectively. -#### Behavioral Question Starters: -Questions often begin with phrases like: -- "Tell me a time when..." -- "Can you tell me about a time when you..." -- "Describe a situation where you had to..." -- "Give me an example of how you..." -- "Have you ever been in a position where you needed to..." -- "Walk me through a time when you..." + Return just the JSON output without any other explanation or comments. -#### STARTPOP Framework: -The STARTPOP framework enhances the STAR method by adding depth and variety to responses. Its components are: -1. **Situation**: Set up the context (dates, places, circumstances). -2. **Task**: Explain what needed to be done and why. -3. **Actions**: Outline both positive and negative approaches. -4. **Results**: Share outcomes in a time-specific manner. -5. **Transitions**: Ensure professional coherence. -6. **Personal Lessons**: Discuss what you learned. -7. **Other People Observations**: Share insights about others involved. -8. **Professional Connection**: Relate the experience to firefighting. + TO KNOW MORE ABOUT THE PROJECT READ BELOW + ----START------ + Throughout most Probationary Firefighter Interviews, they will be evaluating a ton of things. Typically, they want to see how you align with the **7 Main Concepts of Firefighting**. They are also watching how nervous you are, your communication skills, and your overall general competence for the role. At the end of the day, you want them to like you. -### Instructions: -- Analyze the provided STARTPOP PDF to extract relevant themes and concepts. -- Generate a quiz that builds user confidence by focusing on interview-based scenarios. -- Avoid questions directly about the STARTPOP framework itself (e.g., "What is STARTPOP?"). -- Use the specified quiz type (`quiz_type`) to determine the output format. + ### 7 Main Concepts: + - **High Performance Teams** + - **Situational Awareness** + - **Being a Great Problem Solver** + - **Customer Service** + - **Building Construction, Mechanical Aptitude** + - **Emergency Medicine Experience** + - **Mental and Physical Health** -STARTPOP FULL PDF: {startpop_pdf} -QUIZ TYPE: {quiz_type} + Your crew of four firefighters is usually comprised of a Driver, a Captain, and two firefighters in the back. That is a High-Performance Team. -<|eot_id|><|start_header_id|>user<|end_header_id|> -""", - input_variables=["startpop_pdf", "quiz_type"], + We are frequently dispatched to calls that require using our understanding of Building Construction Concepts, Mechanical Aptitude, and Emergency Medical Experience. When you respond to an emergency event that is inherently dangerous (like a vehicle fire, a car accident in a slanted ditch, a person trapped under a machine, a house fire, or a chemical suicide), you need to use your Situational Awareness to keep that crew safe. + + Sometimes the tools, training, and tactics that you have been taught work perfectly. Sometimes they don’t. Can you be a Good Problem Solver to quickly come up with something to make the situation better for the people, places, and environments that we protect? + + Ultimately, your crew will be serving the public, and the chiefs need to know that you can be trained to be above their desired standard so that you give the public great Customer Service. + + ### 20 Important Themes + Consider the 7 concepts to be the soil. All of your stories grow out of that soil. But not every story works for every question. You need to handpick the right one at the right times to give them. Sort of like how you handpick flowers out of the soil. You NEED to have **20 different flowers** so that you are fully prepared for whatever behavioral question they throw at you. These are the **20 Themes** that you would use for behavioral questions: + - Customer Service + - Conflict + - Challenge + - Leadership + - Stress + - Successful Team + - Diversity + - Mistake + - Unsuccessful Team + - Disagreement + - Bent a Rule + - Delivered a Difficult Message + - Displayed Integrity + - Took a Shortcut + - Didn’t Follow the Rules + - Emergency Response + - Dealt with Disabilities + - Solved a Big Problem + - Continuous Improvement + - Handled Sensitive Information + + ### Behavioral Question Starters + Behavioral questions usually start with phrases like: + - “Tell me a time when…” + - “Can you tell me about a time when you…” + - "Describe a situation where you had to…" + - "Give me an example of how you…" + - "Have you ever been in a position where you needed to…" + - "Walk me through a time when you…" + + ### STARTPOP Framework + The STAR Format is what most people tell you to do in order to answer a firefighter interview question. It’s a great framework. I highly recommend it. I just advise that you pump it up even further. I call it **STARTPOP**. + + Try and pull from different parts of your life. My Chief Training Officer told me that he enjoys candidates that are able to use different experiences to answer the questions. Listening to someone drone on and on about a singular time or type of event in their life is a massive turn-off to the interview panel. That’s a bad thing. Just like most things, variety is the spice of life. + + #### Components of STARTPOP: + 1. **Situation**: + - Set up the answer in the mind of the question asker. + - Your storytelling skills matter here. It has to be concise and impactful (no more than 25 seconds long). + - Include dates, ages, places, and circumstances. + + 2. **Task**: + - Explain what you needed to do and why you needed to do it. + - Recap the situation quickly from a different angle. + + 3. **Actions**: + - Outline both the negative and the positive way of doing things. + - Show high moral character in every question. + + 4. **Results**: + - Explain what happened as a result of your actions. + - Share results in a time-specific manner (e.g., “5 months later X happened”). + + 5. **Transitions**: + - Speak in a way that aligns with professional expectations. + - Ensure coherence in your responses. + + 6. **Personal Lessons**: + - Discuss what you learned about yourself. + - Address any concerns the interviewers might have about hiring you. + + 7. **Other People Observations**: + - Share insights about others in the situation. + - Keep it short and to the point. + + 8. **Professional Connection**: + - Relate your experience directly to the fire service. + - Conclude strongly, avoiding phrases like “and so yeah…”. + ----END------ + + ### Instructions: + - Analyze the provided STARTPOP PDF to extract relevant themes and concepts. + - Generate a quiz that builds user confidence by focusing on interview-based scenarios. + - Avoid questions directly about the STARTPOP framework itself (e.g., "What is STARTPOP?"). + - Use the specified quiz type (`quiz_type`) to determine the output format. + - Generate at least 15 questions and above + + NOTE: The quiz focuses on building user confidence by analyzing the questions and framework presented in the STARTPOP PDF provided by the user. + Based on this framework, create an interview-based quiz specifically for firefighting roles by thoroughly analyzing the document. + + IMPORTANT: The quiz should not directly reference the STARTPOP framework itself. Instead, it should generate interview-based questions derived from the insights of the STARTPOP framework. + For example, avoid questions like "The STARTPOP framework is specifically designed for firefighter interviews." Such questions should not be included in the quiz. + Thank you for your thorough and precise processing! + STARTPOP FULL PDF :{startpop_pdf} + question type : {quiz_type} + P + <|eot_id|><|start_header_id|>user<|end_header_id|>""", + input_variables=["startpop_pdf", "quiz_type", "question"], ) - # Pipeline to process the prompt and parse the output + # Pipeline to process the prompt and parse output quiz_router = quiz_prompt | llm_temp | JsonOutputParser() # Call the pipeline and generate the cohesive output - output = quiz_router.invoke({"startpop_pdf": startpop_pdf, "quiz_type": quiz_type}) - return output + output = quiz_router.invoke({"startpop_pdf": startpop_pdf, "quiz_type": quiz_type, "question": "Your question here"}) + return output except Exception as e: - print(f"Error: {e}") + print(f"Error:{e}") return {} diff --git a/test.py b/test.py index a7a6951..04f71c5 100644 --- a/test.py +++ b/test.py @@ -1,28 +1,61 @@ -import os -import requests -import json -from typing import List -from dotenv import load_dotenv -load_dotenv() +import subprocess +import re +# List of packages you want to include +packages = [ + "openai", + "pandas", + "python-dotenv", + "fastapi", + "uvicorn", + "langchain-community", + "langchain-openai", + "pydantic", + "pypdf", + "pypandoc", + "plum-dispatch==1.7.4", # You specified exact version here + "scikit-learn", + "werkzeug", + "python-multipart", + "langgraph", + "tiktoken", + "langchainhub", + "chromadb", + "langchain", + "langchain-text-splitters", + "beautifulsoup4", + "langchain-core", + "PyPDF2", + "reportlab", + "python-docx" +] -from src.llm import ai_chat +# Get all installed packages with versions +result = subprocess.run(["pip", "freeze"], capture_output=True, text=True) +installed_packages = result.stdout.strip().split('\n') +# Create a dictionary of package names to their full name with version +package_dict = {} +for pkg in installed_packages: + if '==' in pkg: + name = pkg.split('==')[0].lower() + package_dict[name] = pkg -#conversation_id = "12345" # Replace with the actual conversation ID -query = "Hello let us continue" -theme_id = 1 -resume = "Emergency Response Specialist" -conversation_id = 1 -response = ai_chat(query, conversation_id, theme_id, resume) -print(response) -""" -with open(file_path, 'rb') as file: - files = {'file': file} - response = requests.post(upload_url, files=files) - response.raise_for_status() # Ensure we raise an error for bad responses - response_data = response.json() # Get the response in JSON format - print(response_data) - -""" +# Write only the requested packages to requirements.txt +with open('requirements.txt', 'w') as f: + for package in packages: + # Handle cases where version is already specifixed + if '==' in package: + f.write(f"{package}\n") + continue + + # Try to find the package in installed packages + pkg_name = package.lower() + if pkg_name in package_dict: + f.write(f"{package_dict[pkg_name]}\n") + else: + # If not found, just write the package name + f.write(f"{package}\n") + print(f"Warning: {package} not found in installed packages") +print("requirements.txt has been generated.") \ No newline at end of file diff --git a/utils/document_loader.py b/utils/document_loader.py index b30d61a..a9743f9 100644 --- a/utils/document_loader.py +++ b/utils/document_loader.py @@ -1,32 +1,45 @@ import os -from spire.doc import Document, FileFormat -from langchain_community.document_loaders import PyPDFLoader +from docx import Document as DocxDocument +from reportlab.lib.pagesizes import letter +from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer +from reportlab.lib.styles import getSampleStyleSheet +from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader def convert_word_to_pdf(doc_path: str) -> str: """ - Convert a .doc or .docx file to PDF using Spire.Doc. + Convert a .docx file to PDF using python-docx and reportlab. Args: - doc_path (str): The path to the .doc or .docx file. + doc_path (str): The path to the .docx file. Returns: str: The path to the converted PDF file. """ pdf_path = os.path.splitext(doc_path)[0] + '.pdf' - # Create a Document object - document = Document() # Load the Word document - document.LoadFromFile(doc_path) - # Save as PDF - document.SaveToFile(pdf_path, FileFormat.PDF) - document.Close() + doc = DocxDocument(doc_path) + + # Create a PDF + pdf = SimpleDocTemplate(pdf_path, pagesize=letter) + styles = getSampleStyleSheet() + flowables = [] + + # Extract text from paragraphs and add to PDF + for para in doc.paragraphs: + if para.text: + p = Paragraph(para.text, styles['Normal']) + flowables.append(p) + flowables.append(Spacer(1, 12)) + + # Build the PDF + pdf.build(flowables) return pdf_path def load_document(file_path: str): """ - Utility function to load a PDF, DOCX, or DOC file by first converting it to PDF. + Utility function to load a PDF, DOCX, or DOC file. Args: file_path (str): The path to the file to load. @@ -38,16 +51,21 @@ def load_document(file_path: str): try: extension = os.path.splitext(file_path)[1].lower() - if extension in ['.doc', '.docx']: - # Convert .doc or .docx to PDF first + if extension == '.docx': + # For .docx files, use UnstructuredWordDocumentLoader directly + loader = UnstructuredWordDocumentLoader(file_path) + return loader.load() + elif extension == '.doc': + # Convert .doc to .pdf first pdf_path = convert_word_to_pdf(file_path) loader = PyPDFLoader(pdf_path) + return loader.load() elif extension == '.pdf': loader = PyPDFLoader(file_path) + return loader.load() else: raise ValueError(f"Unsupported file type: {extension}. Only .pdf, .docx, and .doc are supported.") - return loader.load() except Exception as e: print(f"Error loading document: {str(e)}") return None \ No newline at end of file