fix mix up updates

2025-04-09 00:59:41 +01:00
parent 1b98f5b130 d34e304017
commit 2c19c3265d
5 changed files with 130 additions and 40 deletions
@@ -0,0 +1 @@
 python=3.11
@@ -1,23 +1,25 @@
-openai
+openai==1.72.0
-pandas
+pandas==2.2.3
-python-dotenv
+python-dotenv==1.1.0
-fastapi
+fastapi==0.115.9
-uvicorn
+uvicorn==0.34.0
-langchain-community
+langchain-community==0.3.21
-langchain-openai
+langchain-openai==0.3.12
-pydantic
+pydantic==2.11.3
-pypdf
+pypdf==5.4.0
-pypandoc
+pypandoc==1.15
 Spire.Doc
 plum-dispatch==1.7.4
-scikit-learn
+scikit-learn==1.6.1
-werkzeug
+Werkzeug==3.1.3
-python-multipart
+python-multipart==0.0.20
-langgraph
+langgraph==0.3.27
-tiktoken
+tiktoken==0.9.0
-langchainhub
+langchainhub==0.1.21
-chromadb
+chromadb==1.0.3
-langchain
+langchain==0.3.23
-langchain-text-splitters
+langchain-text-splitters==0.3.8
-beautifulsoup4
+beautifulsoup4==4.13.3
-langchain-core
+langchain-core==0.3.51
 PyPDF2==3.0.1
 reportlab==4.3.1
 python-docx==1.1.2
@@ -149,10 +149,18 @@ def generate_quiz(startpop_pdf, quiz_type=None) -> dict:
                - Conclude strongly, avoiding phrases like “and so yeah…”.
                ----END------
-                NOTE: THE QUIZ FOCUES ON BULIDNG USER CONFIDENCE BY ANANLYZING  THE QUESTIONS AND FRAMEWORK FOR EACH QUESTION IN THE STARTPOP FRAMEWORK PDF,SOLELY USE THIS PDF PROVIDED BY THE USER 
+               ### Instructions:
-                BASED ON THIS FRAMEWORK , CREATE INTERVIEW BASED QUIZ FOR FIRE FIGHTING ROLE BY ANALYZING THIS DOCUMENT
+                - Analyze the provided STARTPOP PDF to extract relevant themes and concepts.
-                NOTE : THE QUIZ SHOULD NOT BE  BASED ON STARTPOP FRAMEWORK ITSELF BUT ANALYZE THE STARTPOP FRAMEWORK PRESENTED TO GENERATE INTERVIEW BASED QUIZ
+                - Generate a quiz that builds user confidence by focusing on interview-based scenarios.
-                e.g  "The STARTPOP framework is specifically designed for firefighter interviews", THIS KIND OF QUESTION SHOULD NOT BE ASKED IN THE QUIZ....
+                - Avoid questions directly about the STARTPOP framework itself (e.g., "What is STARTPOP?").
                - Use the specified quiz type (`quiz_type`) to determine the output format.
                - Generate at least 15 questions and above
                NOTE: The quiz focuses on building user confidence by analyzing the questions and framework presented in the STARTPOP PDF provided by the user. 
                Based on this framework, create an interview-based quiz specifically for firefighting roles by thoroughly analyzing the document.
                IMPORTANT: The quiz should not directly reference the STARTPOP framework itself. Instead, it should generate interview-based questions derived from the insights of the STARTPOP framework.
                For example, avoid questions like "The STARTPOP framework is specifically designed for firefighter interviews." Such questions should not be included in the quiz.
                Thank you for your thorough and precise processing!
                            STARTPOP FULL PDF :{startpop_pdf}
                            question type : {quiz_type}
@@ -0,0 +1,61 @@
 import subprocess
 import re
 # List of packages you want to include
 packages = [
    "openai",
    "pandas",
    "python-dotenv",
    "fastapi",
    "uvicorn",
    "langchain-community",
    "langchain-openai",
    "pydantic",
    "pypdf",
    "pypandoc",
    "plum-dispatch==1.7.4",  # You specified exact version here
    "scikit-learn",
    "werkzeug",
    "python-multipart",
    "langgraph",
    "tiktoken",
    "langchainhub",
    "chromadb",
    "langchain",
    "langchain-text-splitters",
    "beautifulsoup4",
    "langchain-core",
    "PyPDF2",
    "reportlab",
    "python-docx"
 ]
 # Get all installed packages with versions
 result = subprocess.run(["pip", "freeze"], capture_output=True, text=True)
 installed_packages = result.stdout.strip().split('\n')
 # Create a dictionary of package names to their full name with version
 package_dict = {}
 for pkg in installed_packages:
    if '==' in pkg:
        name = pkg.split('==')[0].lower()
        package_dict[name] = pkg
 # Write only the requested packages to requirements.txt
 with open('requirements.txt', 'w') as f:
    for package in packages:
        # Handle cases where version is already specifixed
        if '==' in package:
            f.write(f"{package}\n")
            continue
        # Try to find the package in installed packages
        pkg_name = package.lower()
        if pkg_name in package_dict:
            f.write(f"{package_dict[pkg_name]}\n")
        else:
            # If not found, just write the package name
            f.write(f"{package}\n")
            print(f"Warning: {package} not found in installed packages")
 print("requirements.txt has been generated.")
@@ -1,32 +1,45 @@
 import os
-from spire.doc import Document, FileFormat
+from docx import Document as DocxDocument
-from langchain_community.document_loaders import PyPDFLoader
+from reportlab.lib.pagesizes import letter
 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
 from reportlab.lib.styles import getSampleStyleSheet
 from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader
 def convert_word_to_pdf(doc_path: str) -> str:
    """
-    Convert a .doc or .docx file to PDF using Spire.Doc.
+    Convert a .docx file to PDF using python-docx and reportlab.
    Args:
-        doc_path (str): The path to the .doc or .docx file.
+        doc_path (str): The path to the .docx file.
    Returns:
        str: The path to the converted PDF file.
    """
    pdf_path = os.path.splitext(doc_path)[0] + '.pdf'
    # Create a Document object
    document = Document()
    # Load the Word document
-    document.LoadFromFile(doc_path)
+    doc = DocxDocument(doc_path)
-    # Save as PDF
+    
-    document.SaveToFile(pdf_path, FileFormat.PDF)
+    # Create a PDF
-    document.Close()
+    pdf = SimpleDocTemplate(pdf_path, pagesize=letter)
    styles = getSampleStyleSheet()
    flowables = []
    # Extract text from paragraphs and add to PDF
    for para in doc.paragraphs:
        if para.text:
            p = Paragraph(para.text, styles['Normal'])
            flowables.append(p)
            flowables.append(Spacer(1, 12))
    # Build the PDF
    pdf.build(flowables)
    return pdf_path
 def load_document(file_path: str):
    """
-    Utility function to load a PDF, DOCX, or DOC file by first converting it to PDF.
+    Utility function to load a PDF, DOCX, or DOC file.
    Args:
        file_path (str): The path to the file to load.
@@ -38,16 +51,21 @@ def load_document(file_path: str):
    try:
        extension = os.path.splitext(file_path)[1].lower()
-        if extension in ['.doc', '.docx']:
+        if extension == '.docx':
-            # Convert .doc or .docx to PDF first
+            # For .docx files, use UnstructuredWordDocumentLoader directly
            loader = UnstructuredWordDocumentLoader(file_path)
            return loader.load()
        elif extension == '.doc':
            # Convert .doc to .pdf first
            pdf_path = convert_word_to_pdf(file_path)
            loader = PyPDFLoader(pdf_path)
            return loader.load()
        elif extension == '.pdf':
            loader = PyPDFLoader(file_path)
            return loader.load()
        else:
            raise ValueError(f"Unsupported file type: {extension}. Only .pdf, .docx, and .doc are supported.")
        return loader.load()
    except Exception as e:
        print(f"Error loading document: {str(e)}")
        return None