fix mix up updates

2025-04-09 00:59:41 +01:00
parent 1b98f5b130 d34e304017
commit 2c19c3265d
5 changed files with 130 additions and 40 deletions
@@ -0,0 +1 @@
+python=3.11
@@ -1,23 +1,25 @@
-openai
-pandas
-python-dotenv
-fastapi
-uvicorn
-langchain-community
-langchain-openai
-pydantic
-pypdf
-pypandoc
-Spire.Doc
+openai==1.72.0
+pandas==2.2.3
+python-dotenv==1.1.0
+fastapi==0.115.9
+uvicorn==0.34.0
+langchain-community==0.3.21
+langchain-openai==0.3.12
+pydantic==2.11.3
+pypdf==5.4.0
+pypandoc==1.15
 plum-dispatch==1.7.4
-scikit-learn
-werkzeug
-python-multipart
-langgraph
-tiktoken
-langchainhub
-chromadb
-langchain
-langchain-text-splitters
-beautifulsoup4
-langchain-core
+scikit-learn==1.6.1
+Werkzeug==3.1.3
+python-multipart==0.0.20
+langgraph==0.3.27
+tiktoken==0.9.0
+langchainhub==0.1.21
+chromadb==1.0.3
+langchain==0.3.23
+langchain-text-splitters==0.3.8
+beautifulsoup4==4.13.3
+langchain-core==0.3.51
+PyPDF2==3.0.1
+reportlab==4.3.1
+python-docx==1.1.2
@@ -149,10 +149,18 @@ def generate_quiz(startpop_pdf, quiz_type=None) -> dict:
                - Conclude strongly, avoiding phrases like “and so yeah…”.
                ----END------
                               
-                NOTE: THE QUIZ FOCUES ON BULIDNG USER CONFIDENCE BY ANANLYZING  THE QUESTIONS AND FRAMEWORK FOR EACH QUESTION IN THE STARTPOP FRAMEWORK PDF,SOLELY USE THIS PDF PROVIDED BY THE USER 
-                BASED ON THIS FRAMEWORK , CREATE INTERVIEW BASED QUIZ FOR FIRE FIGHTING ROLE BY ANALYZING THIS DOCUMENT
-                NOTE : THE QUIZ SHOULD NOT BE  BASED ON STARTPOP FRAMEWORK ITSELF BUT ANALYZE THE STARTPOP FRAMEWORK PRESENTED TO GENERATE INTERVIEW BASED QUIZ
-                e.g  "The STARTPOP framework is specifically designed for firefighter interviews", THIS KIND OF QUESTION SHOULD NOT BE ASKED IN THE QUIZ....
+               ### Instructions:
+                - Analyze the provided STARTPOP PDF to extract relevant themes and concepts.
+                - Generate a quiz that builds user confidence by focusing on interview-based scenarios.
+                - Avoid questions directly about the STARTPOP framework itself (e.g., "What is STARTPOP?").
+                - Use the specified quiz type (`quiz_type`) to determine the output format.
+                - Generate at least 15 questions and above
+                
+                NOTE: The quiz focuses on building user confidence by analyzing the questions and framework presented in the STARTPOP PDF provided by the user. 
+                Based on this framework, create an interview-based quiz specifically for firefighting roles by thoroughly analyzing the document.
+                
+                IMPORTANT: The quiz should not directly reference the STARTPOP framework itself. Instead, it should generate interview-based questions derived from the insights of the STARTPOP framework.
+                For example, avoid questions like "The STARTPOP framework is specifically designed for firefighter interviews." Such questions should not be included in the quiz.
                Thank you for your thorough and precise processing!
                            STARTPOP FULL PDF :{startpop_pdf}
                            question type : {quiz_type}
@@ -0,0 +1,61 @@
+import subprocess
+import re
+
+# List of packages you want to include
+packages = [
+    "openai",
+    "pandas",
+    "python-dotenv",
+    "fastapi",
+    "uvicorn",
+    "langchain-community",
+    "langchain-openai",
+    "pydantic",
+    "pypdf",
+    "pypandoc",
+    "plum-dispatch==1.7.4",  # You specified exact version here
+    "scikit-learn",
+    "werkzeug",
+    "python-multipart",
+    "langgraph",
+    "tiktoken",
+    "langchainhub",
+    "chromadb",
+    "langchain",
+    "langchain-text-splitters",
+    "beautifulsoup4",
+    "langchain-core",
+    "PyPDF2",
+    "reportlab",
+    "python-docx"
+]
+
+# Get all installed packages with versions
+result = subprocess.run(["pip", "freeze"], capture_output=True, text=True)
+installed_packages = result.stdout.strip().split('\n')
+
+# Create a dictionary of package names to their full name with version
+package_dict = {}
+for pkg in installed_packages:
+    if '==' in pkg:
+        name = pkg.split('==')[0].lower()
+        package_dict[name] = pkg
+
+# Write only the requested packages to requirements.txt
+with open('requirements.txt', 'w') as f:
+    for package in packages:
+        # Handle cases where version is already specifixed
+        if '==' in package:
+            f.write(f"{package}\n")
+            continue
+            
+        # Try to find the package in installed packages
+        pkg_name = package.lower()
+        if pkg_name in package_dict:
+            f.write(f"{package_dict[pkg_name]}\n")
+        else:
+            # If not found, just write the package name
+            f.write(f"{package}\n")
+            print(f"Warning: {package} not found in installed packages")
+
+print("requirements.txt has been generated.")
@@ -1,32 +1,45 @@
 import os
-from spire.doc import Document, FileFormat
-from langchain_community.document_loaders import PyPDFLoader
+from docx import Document as DocxDocument
+from reportlab.lib.pagesizes import letter
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
+from reportlab.lib.styles import getSampleStyleSheet
+from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader

 def convert_word_to_pdf(doc_path: str) -> str:
    """
-    Convert a .doc or .docx file to PDF using Spire.Doc.
+    Convert a .docx file to PDF using python-docx and reportlab.
    
    Args:
-        doc_path (str): The path to the .doc or .docx file.
+        doc_path (str): The path to the .docx file.

    Returns:
        str: The path to the converted PDF file.
    """
    pdf_path = os.path.splitext(doc_path)[0] + '.pdf'
    
-    # Create a Document object
-    document = Document()
    # Load the Word document
-    document.LoadFromFile(doc_path)
-    # Save as PDF
-    document.SaveToFile(pdf_path, FileFormat.PDF)
-    document.Close()
+    doc = DocxDocument(doc_path)
+    
+    # Create a PDF
+    pdf = SimpleDocTemplate(pdf_path, pagesize=letter)
+    styles = getSampleStyleSheet()
+    flowables = []
+    
+    # Extract text from paragraphs and add to PDF
+    for para in doc.paragraphs:
+        if para.text:
+            p = Paragraph(para.text, styles['Normal'])
+            flowables.append(p)
+            flowables.append(Spacer(1, 12))
+    
+    # Build the PDF
+    pdf.build(flowables)
    
    return pdf_path

 def load_document(file_path: str):
    """
-    Utility function to load a PDF, DOCX, or DOC file by first converting it to PDF.
+    Utility function to load a PDF, DOCX, or DOC file.

    Args:
        file_path (str): The path to the file to load.
@@ -38,16 +51,21 @@ def load_document(file_path: str):
    try:
        extension = os.path.splitext(file_path)[1].lower()
        
-        if extension in ['.doc', '.docx']:
-            # Convert .doc or .docx to PDF first
+        if extension == '.docx':
+            # For .docx files, use UnstructuredWordDocumentLoader directly
+            loader = UnstructuredWordDocumentLoader(file_path)
+            return loader.load()
+        elif extension == '.doc':
+            # Convert .doc to .pdf first
            pdf_path = convert_word_to_pdf(file_path)
            loader = PyPDFLoader(pdf_path)
+            return loader.load()
        elif extension == '.pdf':
            loader = PyPDFLoader(file_path)
+            return loader.load()
        else:
            raise ValueError(f"Unsupported file type: {extension}. Only .pdf, .docx, and .doc are supported.")
        
-        return loader.load()
    except Exception as e:
        print(f"Error loading document: {str(e)}")
        return None