ds apis implemneted

Stop tracking .env file
Stop tracking virtual environment
2025-02-06 20:12:43 +00:00 · 2024-08-30 01:49:42 +00:00 · 2024-08-30 01:48:22 +00:00 · 2024-08-30 01:45:18 +00:00 · 2024-08-30 01:42:26 +00:00 · 2024-08-16 23:09:27 +01:00
53 changed files with 6437 additions and 16 deletions
@@ -0,0 +1,2 @@
+GROQ_API_KEY = "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"
+OPENAI_API_KEY = "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"
@@ -1,2 +1 @@
-data
-.env
+data
@@ -0,0 +1,83 @@
+import sys, os
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+from fastapi import FastAPI, File, UploadFile, BackgroundTasks, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pathlib import Path
+from utils import load_documents_from_directory, create_vector_store, save_embedded_data, process_directory
+from pydantic import BaseModel
+from search import search_and_summarize
+from typing import List
+
+
+app = FastAPI()
+
+
+# Define allowed origins for CORS
+origins = [
+    "http://localhost",
+    "http://localhost:8000",
+    "http://localhost:3000",
+    # Add other allowed origins here
+]
+
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,  # Allows requests from listed origins
+    allow_credentials=True,
+    allow_methods=["*"],  # Allows all HTTP methods
+    allow_headers=["*"],  # Allows all headers
+)
+
+# Define the directory where you want to save uploaded files
+UPLOAD_DIR = Path("./uploads")
+
+# Ensure the directory exists
+UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
+
+class SearchRequest(BaseModel):
+    query: str
+
+def load_documents(directory: str):
+    
+    # loading the documents from the directory
+    documents, docs_id, num_pages = load_documents_from_directory(directory)
+    # embedding the documents
+    embed_db = create_vector_store(documents, docs_id, num_pages)
+    # saving the embedded data
+    status = save_embedded_data(embed_db)
+    # creating the thumbnails
+    status = process_directory(directory)
+    
+    return {"status": "Documents loaded successfully"}
+
+class SearchRequest(BaseModel):
+    query: str
+
+@app.post("/search/")
+def search(request: SearchRequest):
+    
+    # Perform search using the utility function
+    results = search_and_summarize(request.query)
+    
+    return {"results": results}
+
+@app.post("/upload/")
+async def upload_file(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
+    
+    file_location = UPLOAD_DIR/file.filename
+    
+    # Save the uploaded file to the specified location
+    with open(file_location, "wb") as buffer:
+        buffer.write(await file.read())
+    
+    # Add the load_documents function to the background tasks
+    background_tasks.add_task(load_documents, str(UPLOAD_DIR))
+    
+    # Return the location of the saved file and inform about the successful upload
+    return {"message": "Upload successful. Document loading will begin shortly.", "file_location": str(UPLOAD_DIR)}
+
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
@@ -0,0 +1,127 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+Pipfile.lock
+
+# poetry
+poetry.lock
+.poetry/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+erp/
+env.bak/
+venv.bak/
+.spyproject
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pyright type checker
+.pyright/
+
+# VS Code
+.vscode/
@@ -0,0 +1,184 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.document_loaders import PyPDFLoader\n",
+    "loader = PyPDFLoader(\"/content/Example SOP (1) (1).pdf\")\n",
+    "docs = loader.load()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'SOPsResponse' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[1], line 22\u001b[0m\n\u001b[1;32m     19\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mRoles_response\u001b[39;00m(BaseModel):\n\u001b[1;32m     20\u001b[0m     roles: \u001b[38;5;28mlist\u001b[39m[\u001b[38;5;28mstr\u001b[39m]\n\u001b[0;32m---> 22\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mSopGenerator\u001b[39;00m:\n\u001b[1;32m     23\u001b[0m     \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m     24\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapi_key \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOPENAI_API_KEY\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "Cell \u001b[0;32mIn[1], line 89\u001b[0m, in \u001b[0;36mSopGenerator\u001b[0;34m()\u001b[0m\n\u001b[1;32m     60\u001b[0m   response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclient\u001b[38;5;241m.\u001b[39mbeta\u001b[38;5;241m.\u001b[39mchat\u001b[38;5;241m.\u001b[39mcompletions\u001b[38;5;241m.\u001b[39mparse(\n\u001b[1;32m     61\u001b[0m             model\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel,\n\u001b[1;32m     62\u001b[0m             messages\u001b[38;5;241m=\u001b[39m[\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     83\u001b[0m     temperature\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.1\u001b[39m\n\u001b[1;32m     84\u001b[0m     )\n\u001b[1;32m     86\u001b[0m   \u001b[38;5;28;01mreturn\u001b[39;00m json\u001b[38;5;241m.\u001b[39mloads(response\u001b[38;5;241m.\u001b[39mchoices[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mmessage\u001b[38;5;241m.\u001b[39mcontent)\n\u001b[0;32m---> 89\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mgenerate_sops\u001b[39m(\u001b[38;5;28mself\u001b[39m, roles, docs_text) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[43mSOPsResponse\u001b[49m:\n\u001b[1;32m     90\u001b[0m     roles_sops_all \u001b[38;5;241m=\u001b[39m {}\n\u001b[1;32m     92\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m role \u001b[38;5;129;01min\u001b[39;00m roles:\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'SOPsResponse' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import json\n",
+    "from openai import OpenAI\n",
+    "from pydantic import BaseModel, Field\n",
+    "from typing import List, Dict, Optional\n",
+    "\n",
+    "class SOPs(BaseModel):\n",
+    "    must: Optional[List[str]] = Field(default_factory=list)\n",
+    "    shall: Optional[List[str]] = Field(default_factory=list)\n",
+    "    will: Optional[List[str]] = Field(default_factory=list)\n",
+    "\n",
+    "class RoleSOPs(BaseModel):\n",
+    "    sops: SOPs\n",
+    "\n",
+    "class SOPsFound(BaseModel):\n",
+    "    message:str\n",
+    "    status:bool\n",
+    "\n",
+    "class Roles_response(BaseModel):\n",
+    "    roles: list[str]\n",
+    "    \n",
+    "class SOPsResponse(BaseModel):\n",
+    "    roles_sops: Dict[str, SOPs] = Field(default_factory=dict)\n",
+    "class SopGenerator:\n",
+    "    def __init__(self):\n",
+    "        self.api_key = os.getenv(\"OPENAI_API_KEY\")\n",
+    "        self.client = OpenAI(api_key=self.api_key)\n",
+    "        self.model = \"gpt-4o-mini\"\n",
+    "\n",
+    "    def get_roles(self, docs):\n",
+    "        # Extract the text content from the Document objects\n",
+    "        docs_text = [doc.page_content for doc in docs] \n",
+    "        response = self.client.beta.chat.completions.parse(\n",
+    "        model=self.model,\n",
+    "        messages=[\n",
+    "            {\n",
+    "                \"role\": \"system\",\n",
+    "                \"content\": '''Suppose you are a role/postion extractor from a company document ,\n",
+    "                 you extract the roles as a list e.g[\"finacial analyist,\"data scientist]... etc\n",
+    "                 if no roles are found return and empty list''',\n",
+    "            },\n",
+    "            {\n",
+    "                \"role\": \"user\",\n",
+    "                \"content\": [\n",
+    "                    {\n",
+    "                        \"type\": \"text\", # Changed from \"document chunk\" to \"text\"\n",
+    "                        \"text\": text\n",
+    "                    } for text in docs_text\n",
+    "                ]\n",
+    "            }\n",
+    "        ],\n",
+    "        response_format=Roles_response,\n",
+    "        max_tokens=1024,\n",
+    "        temperature=0.1\n",
+    "        )\n",
+    "    \n",
+    "        return json.loads(response.choices[0].message.content)\n",
+    "    \n",
+    "    def check_role_sop(self, role, docs):\n",
+    "\n",
+    "      docs_text = [doc.page_content for doc in docs] \n",
+    "      response = self.client.beta.chat.completions.parse(\n",
+    "                model=self.model,\n",
+    "                messages=[\n",
+    "                    {\n",
+    "                        \"role\": \"system\",\n",
+    "                        \"content\": f'''Tour roles is to check if the sops for the provided roles: {roles} are found in the document\n",
+    "                        i.e You are validating the document can provide the sops \n",
+    "                        you retur status = True if yes and a proper message and status=False and a proper message if no\n",
+    "                        keep the message short e.g \"sops found for the roles: roles..\n",
+    "                        '''\n",
+    "                    },\n",
+    "                    {\n",
+    "                \"role\": \"user\",\n",
+    "                \"content\": [\n",
+    "                    {\n",
+    "                        \"type\": \"text\", # Changed from \"document chunk\" to \"text\"\n",
+    "                        \"text\": text\n",
+    "                    } for text in docs_text\n",
+    "                ]\n",
+    "            }\n",
+    "        ],\n",
+    "        response_format=SOPsFound,\n",
+    "        max_tokens=1024,\n",
+    "        temperature=0.1\n",
+    "        )\n",
+    "      \n",
+    "      return json.loads(response.choices[0].message.content)\n",
+    "\n",
+    "\n",
+    "    def generate_sops(self, roles, docs_text) -> SOPsResponse:\n",
+    "        roles_sops_all = {}\n",
+    "\n",
+    "        for role in roles:\n",
+    "            docs_text = [doc.page_content for doc in docs] \n",
+    "            response = self.client.beta.chat.completions.parse(\n",
+    "                model=self.model,\n",
+    "                messages=[\n",
+    "                    {\n",
+    "                        \"role\": \"system\",\n",
+    "                        \"content\": f'''You are a Standard Operating Procedure (SOP) extractor. Your task is to find SOPs for the role \"{role}\" in the provided text. '\n",
+    "                            'SOPs should be categorized under \"must\", \"shall\", and \"will\". '\n",
+    "                            'If no SOPs are found for the role, return an empty list for each category'''\n",
+    "                    },\n",
+    "                    {\n",
+    "                \"role\": \"user\",\n",
+    "                \"content\": [\n",
+    "                    {\n",
+    "                        \"type\": \"text\", # Changed from \"document chunk\" to \"text\"\n",
+    "                        \"text\": text\n",
+    "                    } for text in docs_text\n",
+    "                ]\n",
+    "            }\n",
+    "        ],\n",
+    "        response_format=RoleSOPs,\n",
+    "        max_tokens=1024,\n",
+    "        temperature=0.1\n",
+    "        )\n",
+    "            role_sop = json.loads(response.choices[0].message.content)\n",
+    "            roles_sops_all[role] = role_sop\n",
+    "            \n",
+    "        return roles_sops_all\n",
+    "           \n",
+    "# Example usage:\n",
+    "service = SopGenerator()\n",
+    "roles = [\"Devops engineers\"]\n",
+    "sops_response = service.check_role_sop(roles,docs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "erp",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
@@ -0,0 +1,6 @@
+openai
+langchain-community
+langchain-openai
+pydantic
+flask
+python-dotenv
@@ -0,0 +1,58 @@
+import os
+
+def create_folders(base_dir):
+    folders = [
+        "data/raw",
+        "data/processed",
+        "data/external",
+        "data/interim",
+        "notebooks",
+        "src/data",
+        "src/features",
+        "src/models",
+        "src/api",
+        "src/services",
+        "src/services/background_tasks",
+        "src/utils",
+        "tests/test_services",
+        "scripts",
+        "models",
+        "docs",
+        "config"
+    ]
+    
+    # Create all the folders
+    for folder in folders:
+        folder_path = os.path.join(base_dir, folder)
+        os.makedirs(folder_path, exist_ok=True)
+        print(f"Created folder: {folder_path}")
+
+    # Create some essential files
+    essential_files = [
+        "requirements.txt",
+        "environment.yml",
+        ".gitignore",
+        "README.md",
+        "setup.py",
+        "src/__init__.py",
+        "src/data/__init__.py",
+        "src/features/__init__.py",
+        "src/models/__init__.py",
+        "src/api/__init__.py",
+        "src/services/__init__.py",
+        "src/services/background_tasks/__init__.py",
+        "src/utils/__init__.py",
+        "tests/__init__.py",
+        "tests/test_services/__init__.py"
+    ]
+    
+    for file in essential_files:
+        file_path = os.path.join(base_dir, file)
+        if not os.path.exists(file_path):
+            with open(file_path, 'w') as f:
+                f.write("")  # Create an empty file
+            print(f"Created file: {file_path}")
+
+if __name__ == "__main__":
+    project_root = "."  # Use the current directory as the project root
+    create_folders(project_root)
@@ -0,0 +1,57 @@
+# Python bytecode
+__pycache__/
+*.py[cod]
+
+# Distribution / packaging
+.Python
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+*.egg-info/
+dist/
+build/
+
+# IDEs and editors
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# Jupyter Notebook checkpoints
+.ipynb_checkpoints
+
+# Pytest cache
+.cache
+
+# Coverage reports
+htmlcov/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+test-results/
+
+# MyPy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# C extensions
+*.so
+
+# Data files
+*.csv
+*.dat
+*.db
+*.sqlite3
+
+# Logs
+*.log
+
+# Environment variables
+.env
@@ -0,0 +1,353 @@
+import os
+from typing import Optional
+from fastapi import FastAPI, HTTPException, Security, Depends
+from fastapi.security import APIKeyHeader
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from dotenv import load_dotenv
+from utils.document_loader import load_document
+import json
+from pydantic import BaseModel
+from src.llm import ai_chat
+from langchain_openai import ChatOpenAI
+import requests
+import tempfile
+from scripts.generate_pdf  import create_pdf
+from scripts.generate_theme import generate_theme
+from scripts.generate_quiz import generate_quiz
+from typing import Dict, Any
+from fastapi.responses import Response
+from datetime import datetime
+from fastapi import HTTPException
+from pydantic import BaseModel
+from typing import Optional, Union, Dict, Any
+import os
+import requests
+import os
+from PyPDF2 import PdfReader
+from config import QUIZ_TYPES
+# Load environment variables
+load_dotenv()
+API_KEY = os.getenv("API_KEY_ACCESS")
+
+base_path = os.path.join("data", "config_files")
+QUESTIONS_PATH = os.path.join(base_path, "questions.json")
+THEME_CONTEXT_PATH = os.path.join(base_path, "theme_context.json")
+
+# Load themes at module level
+with open(THEME_CONTEXT_PATH, "r") as f:
+    themes = json.load(f)
+
+# Initialize FastAPI app
+app = FastAPI(
+    title="Fire Fighter Interview API",
+    description="API For fire fighter",
+    version="1.0.0"
+)
+
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Setup API key authentication
+api_key_header = APIKeyHeader(name="Authorization", auto_error=False)
+
+
+
+async def get_api_key(api_key_header: str = Security(api_key_header)) -> str:
+    """Validate API key from header"""
+    if not api_key_header or not api_key_header.startswith('Bearer '):
+        raise HTTPException(
+            status_code=401,
+            detail={"error": "Unauthorized", "message": "API key is missing or invalid."}
+        )
+    
+    token = api_key_header.split(' ')[1]
+    if token != API_KEY:
+        raise HTTPException(
+            status_code=401,
+            detail={"error": "Unauthorized", "message": "API key does not match."}
+        )
+    
+    return token
+
+class ChatRequest(BaseModel):
+    resume_url: Optional[str] = None
+    query: str=None
+    conversation_id: str
+    theme_id: Optional[int] = 1
+
+class ChatResponse(BaseModel):
+    message: str
+    end: bool
+    error: Optional[str] = None
+
+
+
+class GeneratePDFRequest(BaseModel):
+    conversation_id: str
+    feedback: Optional[str] = None
+    previous_results: Optional[Dict[str, Any]] = None
+    resume_url: Optional[str] = None
+    full_history_url: Optional[str] = None
+
+class QuizRequest(BaseModel):
+    pdf_url: str
+    quiz_type: int  # 1, 2, or 3 corresponding to QUIZ_TYPES
+
+class QuizResponse(BaseModel):
+    success: bool
+    message: str
+    quiz_data: Optional[Dict[str, Any]] = None
+    error: Optional[str] = None
+    
+
+async def extract_pdf_text(pdf_url: str) -> Union[str, None]:
+    """Extract text from PDF and handle potential errors."""
+    try:
+        response = requests.get(pdf_url)
+        response.raise_for_status()
+        
+        # Create a temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf:
+            temp_pdf.write(response.content)
+            temp_path = temp_pdf.name
+
+        # Extract text from PDF
+        reader = PdfReader(temp_path)
+        text = "\n\n".join(
+            page.extract_text() for page in reader.pages if page.extract_text()
+        )
+
+        # Clean up temporary file
+        os.unlink(temp_path)
+
+        if not text.strip():
+            return None
+        return text
+
+    except requests.RequestException as e:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Error downloading PDF: {str(e)}"
+        )
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Error processing PDF: {str(e)}"
+        )
+
+@app.post("/rescue-career/chat", response_model=ChatResponse)
+async def chat_endpoint(
+    request: ChatRequest,
+    api_key: str = Depends(get_api_key)
+):
+    try:
+        # Validate theme
+        matching_themes = [t for t in themes if t["id"] == request.theme_id]
+        if not matching_themes:
+            raise HTTPException(
+                status_code=400,
+                detail=f"No theme found with ID {request.theme_id}"
+            )
+
+        # Only try to load document if resume_url is provided
+        resume_docs = ""
+        if request.resume_url:
+            docs = load_document(request.resume_url)
+            if not docs:
+                raise HTTPException(
+                    status_code=400, 
+                    detail="Invalid resume URL: Unable to fetch document"
+                )
+            resume_docs = "\n".join(f"- {doc.page_content}" for doc in docs)
+
+        # Get AI chat response
+        response = ai_chat(
+            query=request.query,
+            conversation_id=request.conversation_id,
+            theme_id=request.theme_id,
+            resume=resume_docs
+        )
+
+        # Parse response
+        try:
+            parsed_response = json.loads(response)
+            return ChatResponse(
+                message=parsed_response.get("message", ""),
+                end=parsed_response.get("end", "no") == "yes",
+                error=None
+            )
+        except json.JSONDecodeError:
+            return ChatResponse(
+                message=response,
+                end=False,
+                error=None
+            )
+
+    except HTTPException as e:
+        # Re-raise HTTP exceptions
+        raise
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Error processing chat request: {str(e)}"
+        )
+     
+
+    
+@app.post("/rescue-career/generate-theme")
+async def generate_pdf_endpoint(
+    request: GeneratePDFRequest,
+    api_key: str = Depends(get_api_key)
+):
+    
+    try:
+        # Here you would fetch the conversation data using the conversation_id
+        # This is a placeholder - replace with your actual conversation data fetching logic
+        conversation_data = await get_conversation_data(request.conversation_id)
+        
+        if not conversation_data:
+            raise HTTPException(
+                status_code=404,
+                detail=f"No conversation found with ID {request.conversation_id}"
+            )
+        
+        resume_docs = ""
+        if request.resume_url:
+            docs = load_document(request.resume_url)
+            if not docs:
+                raise HTTPException(
+                    status_code=400, 
+                    detail="Invalid resume URL: Unable to fetch document"
+                )
+            resume_docs = "\n".join(f"- {doc.page_content}" for doc in docs)
+
+
+        full_history_docs = ""
+        if request.full_history_url:
+            docs = load_document(request.full_history_url)
+            if not docs:
+                raise HTTPException(
+                    status_code=400, 
+                    detail="Invalid full_history URL: Unable to fetch document"
+                )
+            full_history_docs = "\n".join(f"- {doc.page_content}" for doc in docs)
+        # Generate theme data using the generate_theme function
+        theme_data = generate_theme(
+            conversation_data=conversation_data,
+            feedback=request.feedback,
+            previous_result=request.previous_results,
+            resume = resume_docs,
+            full_history = full_history_docs
+        )  
+        
+        if not theme_data:
+            raise HTTPException(
+                status_code=500,
+                detail="Failed to generate theme data"
+            )
+        
+        # Generate the PDF using the create_pdf function
+        pdf_content = create_pdf(theme_data)
+        
+        # Create filename with timestamp
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        filename = f"theme_{timestamp}.pdf"
+        
+        # Return the PDF as a response
+        return Response(
+            content=pdf_content,
+            media_type="application/pdf",
+            headers={
+                "Content-Disposition": f'attachment; filename="{filename}"'
+            }
+        )
+    
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Error generating PDF: {str(e)}"
+        )
+        
+@app.post("/rescue-career/generate-quiz", response_model=QuizResponse)
+async def generate_quiz_endpoint(
+    request: QuizRequest,
+    api_key: str = Depends(get_api_key)
+):
+    """Generate quiz based on PDF content and quiz type."""
+    # Validate quiz type
+    if request.quiz_type not in QUIZ_TYPES:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid quiz type. Must be one of: {list(QUIZ_TYPES)}"
+        )
+
+    try:
+        # Extract text from PDF
+        pdf_text = await extract_pdf_text(request.pdf_url)
+        if not pdf_text:
+            return QuizResponse(
+                success=False,
+                message="PDF extraction completed but no text content found",
+                error="Empty PDF content"
+            )
+            
+        # Generate quiz using the existing function
+        quiz_data = generate_quiz(
+            startpop_pdf=pdf_text,
+            quiz_type=request.quiz_type
+        )
+
+        if not quiz_data:
+            return QuizResponse(
+                success=False,
+                message="Quiz generation failed",
+                error="Unable to generate quiz from the provided content"
+            )
+
+        return QuizResponse(
+            success=True,
+            message="Quiz generated successfully",
+            quiz_data=quiz_data
+        )
+
+    except HTTPException as he:
+        raise he
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Unexpected error during quiz generation: {str(e)}"
+        )
+        
+        
+        
+async def get_conversation_data(conversation_id: str) -> dict:
+    """
+    Fetch conversation data using the conversation ID.
+    Replace this with your actual implementation to fetch conversation data.
+    """
+    try:
+        storage_path = "conversations.json"
+        with open(storage_path, 'r') as f:
+                convs =  json.load(f)
+        convs_id = convs[conversation_id]
+        return convs_id
+    except Exception as e:
+        print(f"Error fetching conversation data: {e}")
+        return None
+    
+    
+@app.on_event("startup")
+async def startup_event():
+    """Initialize required components on startup"""
+    pass
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("app:app", host="0.0.0.0", port=5048, reload=True)
@@ -0,0 +1,20 @@
+QUIZ_TYPES = {
+    1: {
+        "name": "Single Line Text Inputs",
+        "format": """
+        {"question": "Your question here", "correct_answer": "Your correct answer here"}
+        """
+    },
+    2: {
+        "name": "Multiple Choice Questions",
+        "format": """
+        {"question": "Your question here", "options": ["Option 1", "Option 2", "Option 3"], "correct_answer": "Correct Option"}
+        """
+    },
+    3: {
+        "name": "True or False Questions",
+        "format": """
+        {"question": "Your question here", "options": ["True", "False"], "correct_answer": "True or False"}
+        """
+    }
+}
@@ -0,0 +1,112 @@
+%PDF-1.4
+%“Œ‹ž ReportLab Generated PDF document http://www.reportlab.com
+1 0 obj
+<<
+/F1 2 0 R /F2 3 0 R
+>>
+endobj
+2 0 obj
+<<
+/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
+>>
+endobj
+3 0 obj
+<<
+/BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font
+>>
+endobj
+4 0 obj
+<<
+/Contents 10 0 R /MediaBox [ 0 0 612 792 ] /Parent 9 0 R /Resources <<
+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
+>> /Rotate 0 /Trans <<
+
+>> 
+  /Type /Page
+>>
+endobj
+5 0 obj
+<<
+/Contents 11 0 R /MediaBox [ 0 0 612 792 ] /Parent 9 0 R /Resources <<
+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
+>> /Rotate 0 /Trans <<
+
+>> 
+  /Type /Page
+>>
+endobj
+6 0 obj
+<<
+/Contents 12 0 R /MediaBox [ 0 0 612 792 ] /Parent 9 0 R /Resources <<
+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
+>> /Rotate 0 /Trans <<
+
+>> 
+  /Type /Page
+>>
+endobj
+7 0 obj
+<<
+/PageMode /UseNone /Pages 9 0 R /Type /Catalog
+>>
+endobj
+8 0 obj
+<<
+/Author (\(anonymous\)) /CreationDate (D:20250120220756+00'00') /Creator (\(unspecified\)) /Keywords () /ModDate (D:20250120220756+00'00') /Producer (ReportLab PDF Library - www.reportlab.com) 
+  /Subject (\(unspecified\)) /Title (\(anonymous\)) /Trapped /False
+>>
+endobj
+9 0 obj
+<<
+/Count 3 /Kids [ 4 0 R 5 0 R 6 0 R ] /Type /Pages
+>>
+endobj
+10 0 obj
+<<
+/Filter [ /ASCII85Decode /FlateDecode ] /Length 1471
+>>
+stream
+Gat=+gMRrh&:O:SbZil>G-rjRP4K==Nf+fpi[!5#.*rcf40)r3,o)Rn7K*&m\JN5e9I1&I!jp.oR5=$MN"QuT-OJ\C!k?M5\).ii;TK29-5R?:*oT:Eq0J42/#P1jNr3@42@@dSnq=FtrCOB*U/TM>'RrS-:+rcRl"ua(C%fUG]DL4Lp0iDkqsaBBr<XQ1hp8WaVt]di+T!+`?fCq[%0YT6I@c#"RU1:Os*ZpLa.SD^LTm?+0a0XEK0q)]6=:1,4(-tLrRdFg]Id7r:4q);6VAO<73Qn-*NY6,8??L$OOs_U]%(C=e.S.F`>^CEjU_k8_I+OQV*jhY^6HjXQ<(L._$X7"-aYo:VK+*XQL;imebtm^]oWN:^$-RYUh4`I"8PN[F"m'#IiUWIE]SXEk`ifa%NbUN6MoWj`q*'p)I?e5jUAR'#!%PMk-Tq#8kYKaK3%'3S7&Y#`U<uP*$+<0q*Qd/5tm"<nTh5O0Or%Uag7);E4OR)Wb!c?JCoaU&u6#780-E/>!Ws5H)YS;9nqoq5-qge]iA_s"]#Bc_E&%SdW#ZUN;!&._Eb8jNFE<#YN`/cSr\-/amaP]%W[Jc95RPo@*SHRo.u9L#*FNMG1FPK.'[Q?/GCfgm_`!pfidM]"L?8jM&WgAoV:L1eXSA+M@+VaM\M'@V.0TRpWD%kpOSK!eLT7klRTo(+=C7\'Lgs8O@X1l6(Fpb,B0h;=B;*LKn>"X0oEt3]Z-Tc8t-Pp[%PbmD!g:OnqmO_*0?.!I^H=Q@YHm=^n'&2F@PuM@4PS#.L`W*a3H+]^+9CJaJ$n@FL=s8!\l]ER"Jel7i+acVDC!V[OW+O<!2.3<(qnBf[=PeZO.05n>,1fSkjn2260,_*W6\""h)i0%!<c7"hC[(4Rs:.KH5,+SAY);,F3mH<uP?tbNF3B=G]UU#!B#J>1?Cra0Ws]Qcgb`e5gFr.l3+kS<o:rl=W675kCFcE_oV+Wb_BhSA6'fg8'CXCF/mGi>ROVo2E6]/foJc/TFb!9JXRk*%g3bP_:+6JA?cl90@KjCQc,lI8Pls)@oB]#AQt:Obt6;X/">b@\>(97q>)s@>i8e:JZW3/Cq(3@ini>X`><uf"OT(8&@u5_qX&rA5`?S_V'/+"q\>_\o+;u9W6;E8M7;cHoZ4UBo!p<j^IrKq]IhnBaOtmlMJ`CRmT4W:ii6E(<n2)`Hnaa^'_icQB!22j0:Uq$Eg"G6i83'UlnM`1=oMpf&<`Gr<2)7RP;I$h4jA7SuT5)dI*SNg\<O!9ffe$HkB+2"'0Ae4)_NMQNE*D`7Bu]9bn[b.N[a!34r*VH7q>(k^nnPi3k_L'HV1baY[Jk*uOP;)jM1i?J$P50/<sqLJ181bXM_T;ejpcJOqGP1UA08>FLZ-glfrF2Z+haiS`nk2i4jnMejQ4BVli:ZaK,Q'b0P=1*\5sV)kUfF%[AZZ(?/.A1tU`YL*;W_o][hBH=G~>endstream
+endobj
+11 0 obj
+<<
+/Filter [ /ASCII85Decode /FlateDecode ] /Length 1371
+>>
+stream
+Gat=+968iG&AJ$ClnR`QY"6aAge/tgdn+179q(S($OA@V<$4Hd+\);#?%=,9,n:W)1uj^3$Jb*Thl\_:dOU6*GmEhc!PeW%6'al86ajR26#d.uMMq@ilu;.4#1f8$h+ZFdnX=/Ek\Jk!;RV=SULWljQuL-)[nOVEH`5;3,>&(C[RVrb_81s<@;'YnULb2=ZP&Q]#-gPW?kBK;CCXMA&rQ^^p]XuGfTg-Cnj]Q)I*k22%+_.e?+<dX-d]S^pNiLMWd(m:X5si-[EJ['Wg87Kf@,iboH/B"K3dXhYka)443%,"37",AlF5j%=3D[-SNd\]c/J\72`L6HLmf_H9W7Fr^S'Q&idrf6-mbRYkocF^Wo1(Ke-`Q,&kI\i2k'7PS=YSFU@XPYTFgY44bDdoQk:6iU5OR]U$)p>1QIenLs,B5^.7CdIP_80&SNHlVbGB90?>t:+p"Ib]s`$X[t+sA"5pj/I<88ebm^aC([/Z'Hm_b\IBrk<c0G,i(X-c\_I'J[%@$nVfLX/_ft_jf+\i9NH=ej=6ghl&jZYqjpO6b=2!jP8Cl&5N.I55\+)a^_ojG$jHW^BLQ77j!QbfsGh*.:VDM==>%TKlsZDB(a3^]8nr+&T00,*`%_Q]Y\e"Cho6;iPb)k@h%+T+-,Us?6<CVW3U""thQK^(]\e`\W5A[ZAb8Kt3uCohO"YPKL:33c1X)e;u>P>&,>C81s>PU1@-=5b]pW$l';NDSN08%=6_Pck:.8G+DtC`:@ZHlR)"<p;nZeR0YkUEQpGWJT2Yc&Am];$a@?ed3e(,e>$#38g?Ibdc+:kK-m%Zk'-m]l[a1Kji-D>dI"3?JQq:ItF+b_/D%r\aIO\*VFmf%/Nq$O@-#'K,r>l^cH)%Jho3DdeD,hG-.nk$DFsf]f)YRhcV<uiRRdPA-G,18i.AO%baYk)S'4n\mjotKc1^S<c8gd!2gE\?[=P;!8n9+#54#0=.ABrrt16MA*a]-4fS=8,p<l>GK<GX*Cp_^WT.IW;Z>mOEgbGG]HT+_XH>(MVZo[X+Vo)@%6[A-Q5j@=2bJH\e@&l8a0KI)a72msfTq6%he4?RnERBk9hUt(4Q_f\k-nu:oV&X*G*/5\fH7%gP?'0jS?:<9Zi_<Nbs<O>OZC:BUOC0g2\NqVf"3>s%/QipT4k>p8UkN"W$`!*BDgtQRWc`7na/H<i9)FdITE3\>n[V+cM(?iKr50#Su$Y3Vpj_WnoB#f>d#SRL8/FM6NXm6ATmdK$9N>HP?^aP6cM7RU*(!tBtDZ)&`]<#2Y!d3AUHrY>b'H<Zd]Mf;a1htMn6/b7OtG!B^)'CJ/h!d3%`;`H)nlsa)NRl]fVDk.,V`32WT_B4+&M#iSo>2VY.t~>endstream
+endobj
+12 0 obj
+<<
+/Filter [ /ASCII85Decode /FlateDecode ] /Length 1226
+>>
+stream
+GatU29lo&I&A@sBm&atL9gr3j4M>91:HL_LFdm=sQWtYP8YQ7e('(K+htI-ceRWEI'6Q-p,m]7^^5p5K[)/S/YHfDLWMCVb.B$?I=];SNZ_?r@IP.i'^+)l>GIM\#>@TJ?<b]f9=898aI+%\FapuIncc<sk?JPuVUE.Fb_D)?(Wae)u4KTYBMnO(\ClL+H+=2AA&"3QXN^>85[oE>qj`g>h885\B/)PB$]s8AVZY>Y<-jY<I9-:C-SCL;uWm^FN(?90CVSY"G];+jTP0YN0+lGn?4"m8d5OnNSlHO1KlLe+unR^,:<>\,#f)1mu]"!%]kVZ[aSj!!]Z.mY2lOKT()DIk=C\W60"Y)_d?.B:X.O.s4#">T>@ZqJ=Wq8irG:]ab>2!!d9YjW-ReBFu;45SAaWb)oZH/YN,lrK8gn\d$6[5dQK/s"+G;cZ6d01PT,r[fW$2Sar;.2jY;7hMX)\:8J2@FMO]D6d/9)'ebflik,^k-+keiF?q!`,7%#3gL#P(CH*O"@LG@NR`C*\+>GGdZ.%eYJHrJt*iTFk5hld"\*IH]TnCQYh)?2dAh\47<d*Ud/>N"Or74WYhA>Ls,K_2?ZD.,^7X\89BNu#HYfEY'%`Z0>Y5`MZMSfB!T%I<\g8">ZE9.#D`&u(fd#<J;GKUHJZGGkbgUf`(l)*f(XJQ9BAYc\B>MiagKd65S&$iC08"E3$YY[6UOR%jk5+5jM,D%&E\L-0k5Kk2VX19?@qm2rLh%2TQC6QNhZ?^C=da]1*jLsh]M'(d[C0.T%$=<_As3FpZui,`Tj-t>W?qq49Oo-ebMq-P`A@XU>aW!U_s$Zq'(D-hUggWc-7A9lH`WsSYSVg"QUkNWseRqK:TIU^`GbqOtbJqEQZf2Ng6Rq]NERQ[GlLMkIj#R1.\IR5W;Bgk%%R'/'lYb[%8T%qL=/'^GHluFhMogL#`LIm<FS!`6lIl^!M--(mh2qKB#(e9+pj+p["JL[6=Y:"=CUNbN4<Nh$,0$Ki_NMBMS2p63X2\4JZlHEOQ1?I*n:9jA$bT4eeK*F%8Y$Kbeb,63\c][Bn\=YKo>4MpgR#%@pKkaLsHcQg^I;/i.]&gc6Zqk"i6D-/+R5]X7)pLNMaOR.@*lpE[lnK4#*$mT`G:7fIbl5enEfb'S6D\K(%@&4#89hiM:X_h@RO@;]taMW=&RdA:Q6lT4V3`,Sg3-L)sBRqVR1bMg"9e($[sf5F@~>endstream
+endobj
+xref
+0 13
+0000000000 65535 f 
+0000000073 00000 n 
+0000000114 00000 n 
+0000000221 00000 n 
+0000000333 00000 n 
+0000000527 00000 n 
+0000000721 00000 n 
+0000000915 00000 n 
+0000000983 00000 n 
+0000001266 00000 n 
+0000001337 00000 n 
+0000002900 00000 n 
+0000004363 00000 n 
+trailer
+<<
+/ID 
+[<4b0a13e77a14658d2c9c7ba2786bbc6e><4b0a13e77a14658d2c9c7ba2786bbc6e>]
+% ReportLab generated PDF document -- digest (http://www.reportlab.com)
+
+/Info 8 0 R
+/Root 7 0 R
+/Size 13
+>>
+startxref
+5681
+%%EOF
@@ -0,0 +1,23 @@
+openai
+pandas
+python-dotenv
+fastapi
+uvicorn
+langchain-community
+langchain-openai
+pydantic
+pypdf
+pypandoc
+Spire.Doc
+plum-dispatch==1.7.4
+scikit-learn
+werkzeug
+python-multipart
+langgraph
+tiktoken
+langchainhub
+chromadb
+langchain
+langchain-text-splitters
+beautifulsoup4
+langchain-core
@@ -0,0 +1,101 @@
+from reportlab.lib import colors
+from reportlab.lib.pagesizes import letter
+from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
+from reportlab.lib.enums import TA_CENTER, TA_LEFT
+from io import BytesIO
+
+
+def create_pdf(data, output_filename=None):
+    try:
+        # Create a BytesIO buffer to store the PDF
+        buffer = BytesIO()
+
+        # Create the PDF document using the buffer
+        doc = SimpleDocTemplate(buffer, pagesize=letter)
+        styles = getSampleStyleSheet()
+
+        # All content will use the same font size
+        STANDARD_FONT_SIZE = 12
+
+        # Create custom styles with consistent font size
+        styles.add(ParagraphStyle(
+            name='ThemeTitle',
+            fontSize=STANDARD_FONT_SIZE,
+            alignment=TA_CENTER,
+            spaceAfter=15,
+            fontName='Helvetica-Bold',  # Bold style
+            leading=14  # Line spacing (1.0)
+        ))
+
+        styles.add(ParagraphStyle(
+            name='QuestionTitle',
+            fontSize=STANDARD_FONT_SIZE,
+            alignment=TA_LEFT,
+            spaceAfter=10,
+            fontName='Helvetica-Bold',
+            leading=14,
+            textColor=colors.black
+        ))
+
+        styles.add(ParagraphStyle(
+            name='SectionTitle',
+            fontSize=STANDARD_FONT_SIZE,
+            alignment=TA_LEFT,
+            spaceAfter=4,
+            fontName='Helvetica-Bold',
+            leading=14
+        ))
+
+        styles.add(ParagraphStyle(
+            name='NormalText',
+            fontSize=STANDARD_FONT_SIZE,
+            alignment=TA_LEFT,
+            spaceAfter=2,
+            leftIndent=20,
+            leading=14,
+            fontName='Helvetica'  # Regular font
+        ))
+
+        # Build the document content
+        story = []
+
+        # Add theme title on first page
+        if data:
+            theme_title = data.get('theme_title', 'No Title Provided')
+            story.append(Paragraph(f"THEME: {theme_title.upper()}", styles['ThemeTitle']))
+            story.append(Spacer(1, 10))
+
+        # Process each question data
+        for i, item in enumerate(data if isinstance(data, list) else [data]):
+            story.append(Paragraph(f"<b>{item['question']}</b>", styles['QuestionTitle']))
+
+            # Add each section with proper handling
+            sections = ['Situation', 'Task', 'Action', 'Results and Transitions', 'Personal Lessons', 
+                        'Observations of Others', 'Professional Connection']
+            for section in sections:
+                story.append(Paragraph(f"{section}:", styles['SectionTitle']))
+                for point in item.get(section, []):
+                    story.append(Paragraph(f"• {point}", styles['NormalText']))
+
+            # Add a page break after each question except the last one
+            if i < len(data) - 1:
+                story.append(PageBreak())
+
+        # Build the PDF into the buffer
+        doc.build(story)
+
+        # Get the PDF content from the buffer
+        pdf_content = buffer.getvalue()
+        buffer.close()
+
+        # If output_filename is provided, also save to file
+        if output_filename:
+            with open(output_filename, 'wb') as f:
+                f.write(pdf_content)
+
+        return pdf_content
+
+    except Exception as e:
+        print(f"Error: {e}")
+        return {}
@@ -0,0 +1,175 @@
+import os
+import requests
+from dotenv import load_dotenv
+from langchain_openai import ChatOpenAI
+from langchain_core.prompts.prompt import PromptTemplate
+from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
+from langchain_openai import OpenAIEmbeddings
+from langchain_core.documents import Document
+from uuid import uuid4
+import json
+import getpass
+import numpy as np
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from sklearn.metrics.pairwise import cosine_similarity
+from typing import List
+import time
+from datetime import datetime
+import pytz
+import logging
+load_dotenv()
+
+os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
+
+llm_temp = ChatOpenAI(model="gpt-4o-mini", temperature=0.7)
+
+def generate_quiz(startpop_pdf, quiz_type=None) -> dict:
+    try:
+        # Define prompt for summarizing and extracting the required fields
+        quiz_prompt = PromptTemplate(
+        template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
+            You are a Fire Fighter Interview preparation assistant that generates QUIZ for user based on STARTPOP FORMAT PDF BASED on
+            
+            IN THE STARTPOP FORMAT PDF, each theme has its own questions with corresponding STARTPOP framework for each question.
+
+                Your responsibility is to carefully analyze the provided PDF data and then generate a quiz for the user.
+                You will also be provided with the type of quiz.
+
+                There are three different types of quizzes namely:
+
+                1- Single line text inputs
+                2- Multiple Choice questions
+                3- True or False questions
+
+                For each quiz type, return the following JSON format:
+
+                1. For Single Line Text Inputs:
+                - A list of objects, each with {{"question": "Your question", "correct_answer": "Your correct answer"}}
+
+                2. For Multiple Choice Questions:
+                - A list of objects, each with {{"question": "Your question", "options": ["Option 1", "Option 2"], "correct_answer": "Correct Option"}}
+
+                3. For True or False Questions:
+                - A list of objects, each with {{"question": "Your question", "options": ["True", "False"], "correct_answer": "True or False"}}
+                
+                Each response should also include a field called "quiz_type" which can be either 1, 2, or 3 respectively.
+
+                Return just the JSON output without any other explanation or comments.
+
+                TO KNOW MORE ABOUT THE PROJECT READ BELOW
+                ----START------
+                Throughout most Probationary Firefighter Interviews, they will be evaluating a ton of things. Typically, they want to see how you align with the **7 Main Concepts of Firefighting**. They are also watching how nervous you are, your communication skills, and your overall general competence for the role. At the end of the day, you want them to like you.
+
+                ### 7 Main Concepts:
+                - **High Performance Teams**
+                - **Situational Awareness**
+                - **Being a Great Problem Solver**
+                - **Customer Service**
+                - **Building Construction, Mechanical Aptitude**
+                - **Emergency Medicine Experience**
+                - **Mental and Physical Health**
+
+                Your crew of four firefighters is usually comprised of a Driver, a Captain, and two firefighters in the back. That is a High-Performance Team. 
+
+                We are frequently dispatched to calls that require using our understanding of Building Construction Concepts, Mechanical Aptitude, and Emergency Medical Experience. When you respond to an emergency event that is inherently dangerous (like a vehicle fire, a car accident in a slanted ditch, a person trapped under a machine, a house fire, or a chemical suicide), you need to use your Situational Awareness to keep that crew safe. 
+
+                Sometimes the tools, training, and tactics that you have been taught work perfectly. Sometimes they don’t. Can you be a Good Problem Solver to quickly come up with something to make the situation better for the people, places, and environments that we protect?
+
+                Ultimately, your crew will be serving the public, and the chiefs need to know that you can be trained to be above their desired standard so that you give the public great Customer Service.
+
+                ### 20 Important Themes
+                Consider the 7 concepts to be the soil. All of your stories grow out of that soil. But not every story works for every question. You need to handpick the right one at the right times to give them. Sort of like how you handpick flowers out of the soil. You NEED to have **20 different flowers** so that you are fully prepared for whatever behavioral question they throw at you. These are the **20 Themes** that you would use for behavioral questions:
+                - Customer Service
+                - Conflict
+                - Challenge
+                - Leadership
+                - Stress
+                - Successful Team
+                - Diversity
+                - Mistake
+                - Unsuccessful Team     
+                - Disagreement
+                - Bent a Rule
+                - Delivered a Difficult Message
+                - Displayed Integrity
+                - Took a Shortcut
+                - Didn’t Follow the Rules
+                - Emergency Response
+                - Dealt with Disabilities
+                - Solved a Big Problem
+                - Continuous Improvement
+                - Handled Sensitive Information
+
+                ### Behavioral Question Starters
+                Behavioral questions usually start with phrases like:
+                - “Tell me a time when…”
+                - “Can you tell me about a time when you…”
+                - "Describe a situation where you had to…"
+                - "Give me an example of how you…"
+                - "Have you ever been in a position where you needed to…"
+                - "Walk me through a time when you…"
+
+                ### STARTPOP Framework
+                The STAR Format is what most people tell you to do in order to answer a firefighter interview question. It’s a great framework. I highly recommend it. I just advise that you pump it up even further. I call it **STARTPOP**. 
+
+                Try and pull from different parts of your life. My Chief Training Officer told me that he enjoys candidates that are able to use different experiences to answer the questions. Listening to someone drone on and on about a singular time or type of event in their life is a massive turn-off to the interview panel. That’s a bad thing. Just like most things, variety is the spice of life.
+
+                #### Components of STARTPOP:
+                1. **Situation**: 
+                - Set up the answer in the mind of the question asker. 
+                - Your storytelling skills matter here. It has to be concise and impactful (no more than 25 seconds long).
+                - Include dates, ages, places, and circumstances.
+
+                2. **Task**: 
+                - Explain what you needed to do and why you needed to do it.
+                - Recap the situation quickly from a different angle.
+
+                3. **Actions**: 
+                - Outline both the negative and the positive way of doing things.
+                - Show high moral character in every question.
+
+                4. **Results**: 
+                - Explain what happened as a result of your actions.
+                - Share results in a time-specific manner (e.g., “5 months later X happened”).
+
+                5. **Transitions**: 
+                - Speak in a way that aligns with professional expectations.
+                - Ensure coherence in your responses.
+
+                6. **Personal Lessons**: 
+                - Discuss what you learned about yourself.
+                - Address any concerns the interviewers might have about hiring you.
+
+                7. **Other People Observations**: 
+                - Share insights about others in the situation.
+                - Keep it short and to the point.
+
+                8. **Professional Connection**: 
+                - Relate your experience directly to the fire service.
+                - Conclude strongly, avoiding phrases like “and so yeah…”.
+                ----END------
+                               
+                NOTE: THE QUIZ FOCUES ON BULIDNG USER CONFIDENCE BY ANANLYZING  THE QUESTIONS AND FRAMEWORK FOR EACH QUESTION IN THE STARTPOP FRAMEWORK PDF,SOLELY USE THIS PDF PROVIDED BY THE USER 
+                BASED ON THIS FRAMEWORK , CREATE INTERVIEW BASED QUIZ FOR FIRE FIGHTING ROLE BY ANALYZING THIS DOCUMENT
+                NOTE : THE QUIZ SHOULD NOT BE  BASED ON STARTPOP FRAMEWORK ITSELF BUT ANALYZE THE STARTPOP FRAMEWORK PRESENTED TO GENERATE INTERVIEW BASED QUIZ
+                e.g  "The STARTPOP framework is specifically designed for firefighter interviews", THIS KIND OF QUESTION SHOULD NOT BE ASKED IN THE QUIZ....
+                Thank you for your thorough and precise processing!
+                            STARTPOP FULL PDF :{startpop_pdf}
+                            question type : {quiz_type}
+                            P
+                            <|eot_id|><|start_header_id|>user<|end_header_id|>""",
+            input_variables=["startpop_pdf", "quiz_type", "question"],
+        )
+
+        # Pipeline to process the prompt and parse output
+        quiz_router = quiz_prompt | llm_temp | JsonOutputParser()
+
+        # Call the pipeline and generate the cohesive output
+        output = quiz_router.invoke({"startpop_pdf": startpop_pdf, "quiz_type": quiz_type, "question": "Your question here"})
+
+        return output
+    except Exception as e:
+        print(f"Error:{e}")
+        return {}
+    
+    
@@ -0,0 +1,239 @@
+import os
+import requests
+from dotenv import load_dotenv
+from langchain_openai import ChatOpenAI
+from langchain_core.prompts.prompt import PromptTemplate
+from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
+from langchain_openai import OpenAIEmbeddings
+from langchain_core.documents import Document
+from uuid import uuid4
+import json
+import getpass
+import numpy as np
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from sklearn.metrics.pairwise import cosine_similarity
+from typing import List
+import time
+from datetime import datetime
+import pytz
+import logging
+load_dotenv()
+
+os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
+
+llm_temp = ChatOpenAI(model="gpt-4o-mini", temperature=0.7)
+
+def generate_theme(conversation_data,resume,full_history,feedback=None, previous_result=None) -> dict:
+    try:
+        # Define prompt for summarizing and extracting the required fields
+        theme_prompt = PromptTemplate(
+            template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
+            You are a Fire Fighter Interview preparation assistant that generates STARTPOP FORMAT BASED on user interaction with AI.
+            You will be provided with the current theme, user interaction with AI (alongside user resume), and data.
+
+Your responsibility is to carefully analyze user interaction with AI, the theme, and the user RESUME to generate a STARTPOP format for the theme.
+NOTE: A SINGLE QUESTION IS GENERATED WITH DETAILED STARTPOP FORMAT
+NOTE: For more Context, user full work history may also be provided
+TO KNOW MORE ABOUT THE PROJECT READ BELOW
+---START------
+Throughout most Probationary Firefighter Interviews, they will be evaluating a ton of things. Typically, they want to see how you align with the **7 Main Concepts of Firefighting**. They are also watching how nervous you are, your communication skills, and your overall general competence for the role. At the end of the day, you want them to like you.
+
+### 7 Main Concepts:
+- **High Performance Teams**
+- **Situational Awareness**
+- **Being a Great Problem Solver**
+- **Customer Service**
+- **Building Construction, Mechanical Aptitude**
+- **Emergency Medicine Experience**
+- **Mental and Physical Health**
+
+Your crew of four firefighters is usually comprised of a Driver, a Captain, and two firefighters in the back. That is a High-Performance Team. 
+
+We are frequently dispatched to calls that require using our understanding of Building Construction Concepts, Mechanical Aptitude, and Emergency Medical Experience. When you respond to an emergency event that is inherently dangerous (like a vehicle fire, a car accident in a slanted ditch, a person trapped under a machine, a house fire, or a chemical suicide), you need to use your Situational Awareness to keep that crew safe. 
+
+Sometimes the tools, training, and tactics that you have been taught work perfectly. Sometimes they don’t. Can you be a Good Problem Solver to quickly come up with something to make the situation better for the people, places, and environments that we protect?
+
+Ultimately, your crew will be serving the public, and the chiefs need to know that you can be trained to be above their desired standard so that you give the public great Customer Service.
+
+### 20 Important Themes
+Consider the 7 concepts to be the soil. All of your stories grow out of that soil. But not every story works for every question. You need to handpick the right one at the right times to give them. Sort of like how you handpick flowers out of the soil. You NEED to have **20 different flowers** so that you are fully prepared for whatever behavioral question they throw at you. These are the **20 Themes** that you would use for behavioral questions:
+- Customer Service
+- Conflict
+- Challenge
+- Leadership
+- Stress
+- Successful Team
+- Diversity
+- Mistake
+- Unsuccessful Team     
+- Disagreement
+- Bent a Rule
+- Delivered a Difficult Message
+- Displayed Integrity
+- Took a Shortcut
+- Didn’t Follow the Rules
+- Emergency Response
+- Dealt with Disabilities
+- Solved a Big Problem
+- Continuous Improvement
+- Handled Sensitive Information
+
+### Behavioral Question Starters
+Behavioral questions usually start with phrases like:
+- “Tell me a time when…”
+- “Can you tell me about a time when you…”
+- "Describe a situation where you had to…"
+- "Give me an example of how you…"
+- "Have you ever been in a position where you needed to…"
+- "Walk me through a time when you…"
+
+### STARTPOP Framework
+The STAR Format is what most people tell you to do in order to answer a firefighter interview question. It’s a great framework. I highly recommend it. I just advise that you pump it up even further. I call it **STARTPOP**. 
+
+Try and pull from different parts of your life. My Chief Training Officer told me that he enjoys candidates that are able to use different experiences to answer the questions. Listening to someone drone on and on about a singular time or type of event in their life is a massive turn-off to the interview panel. That’s a bad thing. Just like most things, variety is the spice of life.
+
+#### Components of STARTPOP:
+1. **Situation**: 
+   - Set up the answer in the mind of the question asker. 
+   - Your storytelling skills matter here. It has to be concise and impactful (no more than 25 seconds long).
+   - Include dates, ages, places, and circumstances.
+
+2. **Task**: 
+   - Explain what you needed to do and why you needed to do it.
+   - Recap the situation quickly from a different angle.
+
+3. **Actions**: 
+   - Outline both the negative and the positive way of doing things.
+   - Show high moral character in every question.
+
+4. **Results**: 
+   - Explain what happened as a result of your actions.
+   - Share results in a time-specific manner (e.g., “5 months later X happened”).
+
+5. **Transitions**: 
+   - Speak in a way that aligns with professional expectations.
+   - Ensure coherence in your responses.
+
+6. **Personal Lessons**: 
+   - Discuss what you learned about yourself.
+   - Address any concerns the interviewers might have about hiring you.
+
+7. **Other People Observations**: 
+   - Share insights about others in the situation.
+   - Keep it short and to the point.
+
+8. **Professional Connection**: 
+   - Relate your experience directly to the fire service.
+   - Conclude strongly, avoiding phrases like “and so yeah…”.
+
+EXAMPLE STARTPOP
+
+question: Tell me a time when you made a MISTAKE how did you fix it? (Eaves Cleaning Mistake)
+Situation:
+• In the Fall my business, Tiger Building Services, does a lot of eavestrough cleaning.
+• Back in 2019 I was working with an employee in my truck. We were working nicely to hit my daily revenue target.
+• We got to the last job of the day; we were tired and running out of sunlight. But I really wanted to squeeze it in.
+• We have procedures to follow in order to work safely and effectively. My goal is to be as low impact as possible.
+• I made a mistake when we used the handheld blowers on their eaves to blow out the debris without checking how
+wet the debris was or the ground around the back of the house. It made a muddy mess all over their white deck.
+• They were livid. Swearing and completely unhappy with how we were doing the work. I take ownership of my
+mistakes and realized I screwed up by using blowers instead of hand bombing it.
+Task:
+• My task was to defuse the situation and clean up the mess as quickly as possible.
+• I had to do it because as the owner of the company it was my reputation on the line. We got the job through one
+of the apps that we use to fill out our schedule and it is imperative that I make sure their customers have good
+experiences with us so that we keep our top position on the app.
+• I am also a man of integrity and try to be always empathetic, so I felt obligated to correct the mistake.
+Action:
+• The wrong approach would have been to match the customers energy and just as belligerent and abrasive. It
+would have escalated the situation to a point where things could have gotten ugly and pretty physical.
+• It would have also been wrong to just ignore or make fun of the customer and the problem we created, or to just
+pack our ladders and tools and run away as quickly as possible.
+• The correct approach was to get off the roof safely and speak with the customer on the ground eye to eye.
+• I made sure to do that and then apologized for the mess that we made. I empathized with them and the way they
+were feeling. I told them that it was our mistake, and we will work to correct it immediately.
+• I switched our strategy. Told the employee to clean use their hand for the gutters while I cleaned the deck.
+Results and Transitions:
+• It was a losing situation for me in the short run. The job ended up taking a bit longer than expected and I actually
+told them that we would waive the fees due to the inconveniences we created.
+• After we finished up, I gave her a plan of action. She would get the eaves cleaning for free, and we would return
+the following day with our soft wash system to make sure that she had a sparkling clean deck also free of charge.
+• The next morning when we finished the free soft wash, she was happy with the resolution plan and Jiffy was
+impressed with our ability to correct the mistake and alleviate the situation.
+Personal Lessons:
+• What I learned about myself was that I do make mistakes, but I am the type of person that owns up to it.
+• I am also honest and empathetic, and I can perform in stressful situations and that I could de-escalate tense
+situations, to be adaptable and think quickly on the fly.
+• I used the LAST tactic for good customer service: Listened, Apologized, Solved the problem, then thanked them.
+• I took the full brunt of their anger, made an action plan that instantly calmed the situation and then acted on it to
+make them happy with the service.
+Observations of Others:
+• What I learned about other people is that people are entitled to their reactions, emotions, and feelings.
+• I respect those emotions and have learned that following actionable game plans will help avoid or resolve issues.
+• I know the term proper planning prevents poor performance is applicable here.
+• There is a reason organization’s have SOPs and SOGs. They are there to be followed in order to avoid mistakes.
+Professional Connection:
+• My biggest takeaway was it is okay to make mistakes, but it is not okay to not learn from them.
+• I know that the team on Markham Fire sometimes makes mistakes on the firegrounds, but they are also the type
+of people that own up to their mistake and learn from them.
+• I also know that Chief Grant promotes having an open and transparent organization that is not afraid from
+admitting an error or correcting it.
+
+---END------
+
+
+JSON Output Requirements: Generate a list of well-structured JSON output STARTPOP with question and correcpoding STARTPPOP with the following fields:
+                - theme_title: The title the theme provided
+                - question: The question 
+                - Situation: A bulleted list of texts as seen in examples
+                - Task: A bulleted list of texts as seen in examples
+                - Action: A bulleted list of texts as seen in examples
+                - Personal Lessons: A bulleted list of texts as seen in examples
+                - Results and Transitions: A bulleted list of texts as seen in examples
+                - Observations of Others: A bulleted list of texts as seen in examples
+                - Professional Connection: A bulleted list of texts as seen in examples
+
+Review Process: 
+                - Carefully review all news items to confirm they align with the specified theme and meet relevance criteria.
+                - Ensure the JSON format is flawless, comprehensive, and well-structured, with all fields included and correctly formatted.
+
+NOTE: 1. you MAY BE PROVIDED WITH FEEDBACK AND PREVIOUS RESULT, MEANING AI HAS GENERATED STARTPOP BEFORE AND MAYBE USER IS NOT SATISFIED WITH THE RESULT THEN YOU GENERATE A NEW ONE BASED ON THE FEEDBACK
+NOTE: Each question will have a correpoding STARTPOP feilds
+Return just the JSON output without any other explanation or comments.
+
+Thank you for your thorough and precise processing!
+            CONVERSATION DATA :{conversation_data}
+            FEEDBACK: {feedback}
+            PREVIOUS RESULT: {previous_result}
+            USER RESUME : {resume}
+            FULL WORK HISTORY : {full_history}
+            <|eot_id|><|start_header_id|>user<|end_header_id|>
+            
+            RULES FOR GENERATING EACH COMPONENT - FOLLOW THESE RULES THOROUGHLY MAKE SURE YOUR OUTPUT IS WELL  DETAILED
+            
+            THE FRAME WORK MUST BE DETAILED WITH THE FOLLWWING RULES
+            1. Situation : 75 - 100 words  
+            2. Task: 50 words
+            3. Actions: 2 Negative actions and 2 positive actions
+            4. Results: 25 - 5o words
+            5. Personal Lessons : 25 - 50 words
+            6. Observation of others: 25 words
+            7. Professional connections: 25 - 50 words and in addition to the 25-50 words:
+                - Connect to the theme of questions (Be creative here)
+                - Ask to be part of their team(be creattive here)  
+            """,
+            input_variables=["resume","conversation_data", "feedback", "previous_result","full_history"],
+        )
+
+        # Pipeline to process the prompt and parse output
+        theme_router = theme_prompt | llm_temp | JsonOutputParser()
+
+        # Call the pipeline and generate the cohesive output
+        output = theme_router.invoke({"conversation_data": conversation_data, "feedback": feedback, "previous_result": previous_result,"resume":resume,"full_history":full_history})
+        
+        print(f"Output : {output}")
+        return output
+     
+    except Exception as e:
+        print(f"Error:{e}")
+        return {} 
@@ -0,0 +1,197 @@
+import json
+from typing import List, Dict, Optional, TypedDict, Sequence, Annotated
+from dataclasses import dataclass
+from pathlib import Path
+from datetime import datetime
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_core.messages import HumanMessage, AIMessage, BaseMessage
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.graph import START, MessagesState, StateGraph
+from utils.utils import format_questions_text
+from src.prompts import chat_prompt
+from langchain_openai import ChatOpenAI
+@dataclass
+class Message:
+    role: str  # 'human' or 'ai'
+    content: str
+    timestamp: str
+
+QUESTIONS_PATH = "./data/config_files/questions.json"
+with open(QUESTIONS_PATH, "r") as f:
+    questions = json.load(f)
+
+prompt_template = None
+MODEL = "gpt-4o-mini"
+def initialize_workflow(model) -> StateGraph:
+    """Initialize LangGraph workflow"""
+    workflow = StateGraph(state_schema=MessagesState)
+    memory = MemorySaver()
+
+    def call_model(state: MessagesState):
+        prompt = prompt_template.invoke({"messages": state["messages"], "language": state["language"]})
+        response = model.invoke(prompt)
+        return {"messages": [response]}
+
+    workflow.add_edge(START, "model")
+    workflow.add_node("model", call_model)
+    return workflow.compile(checkpointer=memory)
+
+
+def setup_prompt_template(theme: int, resume: str) -> ChatPromptTemplate:
+    """Set up the prompt template"""
+    return ChatPromptTemplate.from_messages([
+        ("system", chat_prompt(theme, resume)),
+        MessagesPlaceholder(variable_name="messages")
+    ])
+
+def parse_ai_response(content: str) -> Dict:
+    """Parse AI response content into expected format"""
+    try:
+        response = json.loads(content)
+        return {
+            "message": response.get("message", ""),
+            "end": response.get("end", "no") == "yes"
+        }
+    except json.JSONDecodeError:
+        return {
+            "message": content,
+            "end": False
+        }
+
+def add_message(storage_path: Path, conversation_id: str, role: str, content: str) -> None:
+    """Add a message to the conversation history"""
+    message_data = {
+        "role": role,
+        "content": content,
+        "timestamp": datetime.now().isoformat()
+    }
+
+    conversations = load_conversations(storage_path)
+    if conversation_id not in conversations:
+        conversations[conversation_id] = {"messages": []}
+    conversations[conversation_id]["messages"].append(message_data)
+    save_conversations(storage_path, conversations)
+
+
+def get_conversation_history(conversation_id: str, storage_path: Path) -> List[Message]:
+    """Get the conversation history"""
+    conversations = load_conversations(storage_path)
+    if conversation_id not in conversations:
+        return None
+
+    return [
+        Message(
+            role=msg["role"],
+            content=msg["content"],
+            timestamp=msg["timestamp"]
+        )
+        for msg in conversations[conversation_id]["messages"]
+    ]
+
+def load_conversations(storage_path: Path) -> Dict:
+    """Load conversations from storage file"""
+    try:
+        with open(storage_path, 'r') as f:
+            return json.load(f)
+    except FileNotFoundError:
+        return {}
+
+def save_conversations(storage_path: Path, conversations: Dict) -> None:
+    """Save conversations to storage file"""
+    with open(storage_path, 'w') as f:
+        json.dump(conversations, f, indent=2)
+
+def convert_to_langchain_messages(messages: List[Message]) -> List[HumanMessage | AIMessage]:
+    """Convert our Message objects to LangChain message objects"""
+    converted_messages = []
+    for msg in messages:
+        if msg.role == "human":
+            converted_messages.append(HumanMessage(content=msg.content))
+        else:
+            converted_messages.append(AIMessage(content=msg.content))
+    return converted_messages
+
+
+def ai_chat(query: str, conversation_id: str, theme_id: int, resume: str) -> str:
+    """Main chat function that processes queries and manages conversation"""
+    storage_path = Path("conversations.json")
+    
+    class State(TypedDict):
+        messages: Annotated[Sequence[BaseMessage], "The messages in the conversation"]
+        language: str
+    # Initialize model and workflow
+    model = ChatOpenAI(model=MODEL)
+    workflow = StateGraph(state_schema=State)
+    
+    
+    
+    def call_model(state: State):
+        prompt_template = setup_prompt_template(theme_id, resume)
+        prompt = prompt_template.invoke({
+            "messages": state["messages"], 
+            "language": state["language"]
+        })
+        response = model.invoke(prompt)
+        return {"messages": [response]}
+
+    workflow.add_edge(START, "model")
+    workflow.add_node("model", call_model)
+    
+    memory = MemorySaver()
+    app = workflow.compile(checkpointer=memory)
+
+    # Get conversation history
+    history = get_conversation_history(conversation_id, storage_path)
+    
+    config = {"configurable": {"thread_id": conversation_id}}
+    language = "English"
+
+    if not history:
+        # New conversation
+        input_messages = [HumanMessage(content=query)] if query else [HumanMessage(content="Let's get started")]
+        output = app.invoke(
+            {"messages": input_messages, "language": language},
+            config
+        )
+    else:
+        # Existing conversation
+        history = convert_to_langchain_messages(history)
+        input_messages = history + [HumanMessage(content=query)] if query else history
+        output = app.invoke(
+            {"messages": input_messages, "language": language},
+            config
+        )
+
+    # Store messages
+    if query:
+        add_message(storage_path, conversation_id, "human", query)
+    add_message(storage_path, conversation_id, "ai", output["messages"][-1].content)
+
+    return output["messages"][-1].content
+
+
+# Example usage:
+if __name__ == "__main__":
+    # Sample resume
+    sample_resume = """
+    John Doe
+    EMT-B Certified
+    5 years experience as volunteer firefighter
+    Bachelor's in Fire Science
+    """
+    
+    # Sample conversation
+    conversation_id = "12345"
+    theme_id = 1  # Customer Service theme
+    
+    # Start conversation
+    
+    
+    # Continue conversation
+    follow_up = ai_chat(
+        query="What was my last questions?",
+        conversation_id=conversation_id,
+        theme_id=theme_id,
+        resume=sample_resume
+    )
+    print("AI:", follow_up)
@@ -0,0 +1,134 @@
+import json
+from typing import List, Dict, Optional
+from dataclasses import dataclass
+from utils.utils import format_questions_text, format_theme_text
+@dataclass
+class Message:
+    role: str  # 'human' or 'ai'
+    content: str
+    timestamp: str
+
+
+
+QUESTIONS_PATH = "./data/config_files/questions.json"
+with open(QUESTIONS_PATH, "r") as f:
+            questions = json.load(f)
+
+def chat_prompt(theme,resume):
+    return f"""  
+                You are a Fire Fighter Interview preparation assistant.
+
+Throughout most Probationary Firefighter Interviews, they will be evaluating a ton of things. Typically, they want to see how you align with the **7 Main Concepts of Firefighting**. They are also watching how nervous you are, your communication skills, and your overall general competence for the role. At the end of the day, you want them to like you.
+
+### 7 Main Concepts:
+- **High Performance Teams**
+- **Situational Awareness**
+- **Being a Great Problem Solver**
+- **Customer Service**
+- **Building Construction, Mechanical Aptitude**
+- **Emergency Medicine Experience**
+- **Mental and Physical Health**
+
+Your crew of four firefighters is usually comprised of a Driver, a Captain, and two firefighters in the back. That is a High-Performance Team. 
+
+We are frequently dispatched to calls that require using our understanding of Building Construction Concepts, Mechanical Aptitude, and Emergency Medical Experience. When you respond to an emergency event that is inherently dangerous (like a vehicle fire, a car accident in a slanted ditch, a person trapped under a machine, a house fire, or a chemical suicide), you need to use your Situational Awareness to keep that crew safe. 
+
+Sometimes the tools, training, and tactics that you have been taught work perfectly. Sometimes they don’t. Can you be a Good Problem Solver to quickly come up with something to make the situation better for the people, places, and environments that we protect?
+
+Ultimately, your crew will be serving the public, and the chiefs need to know that you can be trained to be above their desired standard so that you give the public great Customer Service.
+
+### 20 Important Themes
+Consider the 7 concepts to be the soil. All of your stories grow out of that soil. But not every story works for every question. You need to handpick the right one at the right times to give them. Sort of like how you handpick flowers out of the soil. You NEED to have **20 different flowers** so that you are fully prepared for whatever behavioral question they throw at you. These are the **20 Themes** that you would use for behavioral questions:
+- Customer Service
+- Conflict
+- Challenge
+- Leadership
+- Stress
+- Successful Team
+- Diversity
+- Mistake
+- Unsuccessful Team     
+- Disagreement
+- Bent a Rule
+- Delivered a Difficult Message
+- Displayed Integrity
+- Took a Shortcut
+- Didn’t Follow the Rules
+- Emergency Response
+- Dealt with Disabilities
+- Solved a Big Problem
+- Continuous Improvement
+- Handled Sensitive Information
+
+### Behavioral Question Starters
+Behavioral questions usually start with phrases like:
+- “Tell me a time when…”
+- “Can you tell me about a time when you…”
+- "Describe a situation where you had to…"
+- "Give me an example of how you…"
+- "Have you ever been in a position where you needed to…"
+- "Walk me through a time when you…"
+
+Your goal is to engage in conversation with the user. You will be provided with the current theme, the resume of the user, and example general competency questions and behavioral questions.
+USER_RESUME FROM START TO END : 
+--- START ---
+{resume}
+--- END --- 
+
+### STARTPOP Framework
+The STAR Format is what most people tell you to do in order to answer a firefighter interview question. It’s a great framework. I highly recommend it. I just advise that you pump it up even further. I call it **STARTPOP**. 
+
+Try and pull from different parts of your life. My Chief Training Officer told me that he enjoys candidates that are able to use different experiences to answer the questions. Listening to someone drone on and on about a singular time or type of event in their life is a massive turn-off to the interview panel. That’s a bad thing. Just like most things, variety is the spice of life.
+
+#### Components of STARTPOP:
+1. **Situation**: 
+   - Set up the answer in the mind of the question asker. 
+   - Your storytelling skills matter here. It has to be concise and impactful (no more than 25 seconds long).
+   - Include dates, ages, places, and circumstances.
+
+2. **Task**: 
+   - Explain what you needed to do and why you needed to do it.
+   - Recap the situation quickly from a different angle.
+
+3. **Actions**: 
+   - Outline both the negative and the positive way of doing things.
+   - Show high moral character in every question.
+
+4. **Results**: 
+   - Explain what happened as a result of your actions.
+   - Share results in a time-specific manner (e.g., “5 months later X happened”).
+
+5. **Transitions**: 
+   - Speak in a way that aligns with professional expectations.
+   - Ensure coherence in your responses.
+
+6. **Personal Lessons**: 
+   - Discuss what you learned about yourself.
+   - Address any concerns the interviewers might have about hiring you.
+
+7. **Other People Observations**: 
+   - Share insights about others in the situation.
+   - Keep it short and to the point.
+
+8. **Professional Connection**: 
+   - Relate your experience directly to the fire service.
+   - Conclude strongly, avoiding phrases like “and so yeah…”.
+                Current theme with  More context about the theme for Creating The Professional Connection (Lessons Learned): {format_theme_text(theme)}
+                
+Sample General Competency QUESTIONS and Situational Questions: {format_questions_text(questions,'General Competency Questions')}
+Sample Situational Questions: {format_questions_text(questions,'Situational Questions')}
+
+Your task is to engage the user in conversation, ask relevant questions, that will ultimately help them prepare a strong STARTPOP response based on their experiences and the current theme.
+YOU WILL BE PROVIDED WITH THE USER RESUME, ASK 1 QUESTION AT A TIME AND MAKE IT CONVERSATIONAL AND INTERESTING.
+These responses will be saved and later used to generate a STARTPOP framework by US (DO NOT WORRY ABOUT THAT, WE WILL BE THE ONE TO GENERATE, JUST ENGAGE USER WITH QUESTION AND ANSWER).
+Output format
+CUURENT TEHEME USER IS INTERESTED IN {format_theme_text(theme)}
+NOTE: !!! EXPLICITLY FOCUS ON THE CURRENT THEME SPECIFIED
+WILL BE IN JSON, avoid puttting ```json, before or after , return the excat json with nothing else
+   message:
+   end: "yes" or "no" if you are done with asking questions and confident the responses are okay enough to prepare STARTPOP by us
+NOTE: DO NOT KEEP THE CONVERSATION , CAREFULL ANALYZE USER RESUME AND THE PROVIDED EXAMPLES QUESTIONS AND ALL CONTEXT , ASK RELEVANT QUESTION BASED ON THE THEME AND THAT IS ALL 
+"""
+
+
+
@@ -0,0 +1,195 @@
+import json
+from typing import List, Dict, Optional, TypedDict, Sequence, Annotated
+from dataclasses import dataclass
+from pathlib import Path
+from datetime import datetime
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_core.messages import HumanMessage, AIMessage, BaseMessage
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.graph import START, MessagesState, StateGraph
+from utils.utils import format_questions_text
+from src.prompts import chat_prompt
+from langchain_openai import ChatOpenAI
+@dataclass
+class Message:
+    role: str  # 'human' or 'ai'
+    content: str
+    timestamp: str
+
+QUESTIONS_PATH = "./data/config_files/questions.json"
+with open(QUESTIONS_PATH, "r") as f:
+    questions = json.load(f)
+
+prompt_template = None
+MODEL = "gpt-4o"
+def initialize_workflow(model) -> StateGraph:
+    """Initialize LangGraph workflow"""
+    workflow = StateGraph(state_schema=MessagesState)
+    memory = MemorySaver()
+
+    def call_model(state: MessagesState):
+        prompt = prompt_template.invoke({"messages": state["messages"], "language": state["language"]})
+        response = model.invoke(prompt)
+        return {"messages": [response]}
+
+    workflow.add_edge(START, "model")
+    workflow.add_node("model", call_model)
+    return workflow.compile(checkpointer=memory)
+
+
+def setup_prompt_template(theme: int, resume: str) -> ChatPromptTemplate:
+    """Set up the prompt template"""
+    return ChatPromptTemplate.from_messages([
+        ("system", chat_prompt(theme, resume)),
+        MessagesPlaceholder(variable_name="messages")
+    ])
+
+def parse_ai_response(content: str) -> Dict:
+    """Parse AI response content into expected format"""
+    try:
+        response = json.loads(content)
+        return {
+            "message": response.get("message", ""),
+            "end": response.get("end", "no") == "yes"
+        }
+    except json.JSONDecodeError:
+        return {
+            "message": content,
+            "end": False
+        }
+
+def add_message(storage_path: Path, conversation_id: str, role: str, content: str) -> None:
+    """Add a message to the conversation history"""
+    message_data = {
+        "role": role,
+        "content": content,
+        "timestamp": datetime.now().isoformat()
+    }
+
+    conversations = load_conversations(storage_path)
+    if conversation_id not in conversations:
+        conversations[conversation_id] = {"messages": []}
+    conversations[conversation_id]["messages"].append(message_data)
+    save_conversations(storage_path, conversations)
+
+
+def get_conversation_history(conversation_id: str, storage_path: Path) -> List[Message]:
+    """Get the conversation history"""
+    conversations = load_conversations(storage_path)
+    if conversation_id not in conversations:
+        return None
+
+    return [
+        Message(
+            role=msg["role"],
+            content=msg["content"],
+            timestamp=msg["timestamp"]
+        )
+        for msg in conversations[conversation_id]["messages"]
+    ]
+
+def load_conversations(storage_path: Path) -> Dict:
+    """Load conversations from storage file"""
+    try:
+        with open(storage_path, 'r') as f:
+            return json.load(f)
+    except FileNotFoundError:
+        return {}
+
+def save_conversations(storage_path: Path, conversations: Dict) -> None:
+    """Save conversations to storage file"""
+    with open(storage_path, 'w') as f:
+        json.dump(conversations, f, indent=2)
+
+def convert_to_langchain_messages(messages: List[Message]) -> List[HumanMessage | AIMessage]:
+    """Convert our Message objects to LangChain message objects"""
+    converted_messages = []
+    for msg in messages:
+        if msg.role == "human":
+            converted_messages.append(HumanMessage(content=msg.content))
+        else:
+            converted_messages.append(AIMessage(content=msg.content))
+    return converted_messages
+def ai_chat(query: str, conversation_id: str, theme_id: int, resume: str) -> str:
+    """Main chat function that processes queries and manages conversation"""
+    storage_path = Path("conversations.json")
+    
+    class State(TypedDict):
+        messages: Annotated[Sequence[BaseMessage], "The messages in the conversation"]
+        language: str
+    # Initialize model and workflow
+    model = ChatOpenAI(model=MODEL)
+    workflow = StateGraph(state_schema=State)
+    
+    
+    
+    def call_model(state: State):
+        prompt_template = setup_prompt_template(theme_id, resume)
+        prompt = prompt_template.invoke({
+            "messages": state["messages"], 
+            "language": state["language"]
+        })
+        response = model.invoke(prompt)
+        return {"messages": [response]}
+
+    workflow.add_edge(START, "model")
+    workflow.add_node("model", call_model)
+    
+    memory = MemorySaver()
+    app = workflow.compile(checkpointer=memory)
+
+    # Get conversation history
+    history = get_conversation_history(conversation_id, storage_path)
+    
+    config = {"configurable": {"thread_id": conversation_id}}
+    language = "English"
+
+    if not history:
+        # New conversation
+        input_messages = [HumanMessage(content=query)] if query else [HumanMessage(content="Let's get started")]
+        output = app.invoke(
+            {"messages": input_messages, "language": language},
+            config
+        )
+    else:
+        # Existing conversation
+        history = convert_to_langchain_messages(history)
+        input_messages = history + [HumanMessage(content=query)] if query else history
+        output = app.invoke(
+            {"messages": input_messages, "language": language},
+            config
+        )
+
+    # Store messages
+    if query:
+        add_message(storage_path, conversation_id, "human", query)
+    add_message(storage_path, conversation_id, "ai", output["messages"][-1].content)
+
+    return output["messages"][-1].content
+
+
+# Example usage:
+if __name__ == "__main__":
+    # Sample resume
+    sample_resume = """
+    John Doe
+    EMT-B Certified
+    5 years experience as volunteer firefighter
+    Bachelor's in Fire Science
+    """
+    
+    # Sample conversation
+    conversation_id = "12345"
+    theme_id = 1  # Customer Service theme
+    
+    # Start conversation
+    
+    
+    # Continue conversation
+    follow_up = ai_chat(
+        query="What was my last questions?",
+        conversation_id=conversation_id,
+        theme_id=theme_id,
+        resume=sample_resume
+    )
+    print("AI:", follow_up)
@@ -0,0 +1,53 @@
+import os
+from spire.doc import Document, FileFormat
+from langchain_community.document_loaders import PyPDFLoader
+
+def convert_word_to_pdf(doc_path: str) -> str:
+    """
+    Convert a .doc or .docx file to PDF using Spire.Doc.
+    
+    Args:
+        doc_path (str): The path to the .doc or .docx file.
+
+    Returns:
+        str: The path to the converted PDF file.
+    """
+    pdf_path = os.path.splitext(doc_path)[0] + '.pdf'
+    
+    # Create a Document object
+    document = Document()
+    # Load the Word document
+    document.LoadFromFile(doc_path)
+    # Save as PDF
+    document.SaveToFile(pdf_path, FileFormat.PDF)
+    document.Close()
+    
+    return pdf_path
+
+def load_document(file_path: str):
+    """
+    Utility function to load a PDF, DOCX, or DOC file by first converting it to PDF.
+
+    Args:
+        file_path (str): The path to the file to load.
+
+    Returns:
+        List[Document]: A list of Document objects representing the contents of the file.
+    """
+    
+    try:
+        extension = os.path.splitext(file_path)[1].lower()
+        
+        if extension in ['.doc', '.docx']:
+            # Convert .doc or .docx to PDF first
+            pdf_path = convert_word_to_pdf(file_path)
+            loader = PyPDFLoader(pdf_path)
+        elif extension == '.pdf':
+            loader = PyPDFLoader(file_path)
+        else:
+            raise ValueError(f"Unsupported file type: {extension}. Only .pdf, .docx, and .doc are supported.")
+        
+        return loader.load()
+    except Exception as e:
+        print(f"Error loading document: {str(e)}")
+        return None
@@ -0,0 +1,73 @@
+import os
+import requests
+import json
+from PyPDF2 import PdfReader
+
+base_path = os.path.join("data", "config_files")
+THEME_CONTEXT_PATH = os.path.join(base_path, "theme_context.json")
+
+with open(THEME_CONTEXT_PATH, "r") as f:
+    themes = json.load(f)
+
+def delete_file(file_path):
+    try:
+        os.remove(file_path)
+        print(f"Deleted file: {file_path}")
+    except OSError as e:
+        print(f"Error deleting file {file_path}: {e}")
+
+
+
+def delete_all_files_in_directory(directory_path):
+    try:
+        for filename in os.listdir(directory_path):
+            file_path = os.path.join(directory_path, filename)
+            if os.path.isfile(file_path):
+                os.remove(file_path)
+                print(f"Deleted file: {file_path}")
+    except OSError as e:
+        print(f"Error deleting files in {directory_path}: {e}")
+
+
+def format_questions_text(questions_dict,key):
+    """Format questions as text with dashes."""
+    formatted_text = ""
+    for question in questions_dict[key]:
+        formatted_text += f"- {question['question']}\n"
+    return formatted_text.strip()
+
+
+
+def format_theme_text(theme_id):
+    """Format questions as text with dashes."""
+    formatted_text = "" 
+    matching_themes = [t for t in themes if t["id"] == theme_id]
+    current_theme = matching_themes[0]
+    formatted_text += f"- {current_theme['id']}\n"
+    formatted_text += f"- {current_theme['theme']}\n"
+    formatted_text += f"- {current_theme['context']}\n"
+    
+    return formatted_text.strip()
+
+
+
+def download_pdf_and_extract_text(url: str) -> str:
+    # Create a temporary file path
+    temp_file_path = 'temp.pdf'
+    
+    # Download the PDF from the URL
+    response = requests.get(url)
+    response.raise_for_status()  # Raise an error for bad responses
+    with open(temp_file_path, 'wb') as f:
+        f.write(response.content)
+
+    # Load the PDF
+    reader = PdfReader(temp_file_path)
+
+    # Extract text from all pages and combine into one text
+    combined_text = "\n\n".join(page.extract_text() for page in reader.pages if page.extract_text())
+
+    # Delete the temporary file
+    os.remove(temp_file_path)
+
+    return combined_text
@@ -2808,3 +2808,350 @@ To avoid injury, hold up the wheel when unscrewing the bolts.
 2024-08-16 17:37:05,637 - INFO - PyTorch version 2.4.0+cu124 available.
 2024-08-16 17:37:06,226 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
 2024-08-16 17:37:13,187 - INFO - Embedding model loaded
+2024-08-16 17:38:12,435 - INFO - Searching for the query
+2024-08-16 17:38:12,436 - INFO - Searching for Wirebrush
+2024-08-16 17:38:12,436 - INFO - Loading embedded data
+2024-08-16 17:38:14,222 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
+  attn_output = torch.nn.functional.scaled_dot_product_attention(
+
+2024-08-16 17:38:14,372 - INFO - Search completed
+2024-08-16 17:38:14,373 - INFO - Search completed
+2024-08-16 17:38:14,373 - INFO - Summarizing search results
+2024-08-16 17:38:14,375 - INFO - Search results summarized
+2024-08-16 20:07:32,804 - INFO - Loading the embedding model
+2024-08-16 20:07:41,433 - INFO - PyTorch version 2.4.0+cu124 available.
+2024-08-16 20:07:41,857 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
+2024-08-16 20:07:49,114 - INFO - Embedding model loaded
+2024-08-16 20:10:52,736 - INFO - Loading the embedding model
+2024-08-16 20:10:58,470 - INFO - PyTorch version 2.4.0+cu124 available.
+2024-08-16 20:10:58,831 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
+2024-08-16 20:11:04,768 - INFO - Embedding model loaded
+2024-08-16 20:11:44,178 - INFO - Loading the embedding model
+2024-08-16 20:11:51,103 - INFO - PyTorch version 2.4.0+cu124 available.
+2024-08-16 20:11:51,549 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
+2024-08-16 20:11:56,825 - INFO - Embedding model loaded
+2024-08-16 20:15:04,657 - INFO - Searching for the query
+2024-08-16 20:15:04,657 - INFO - Searching for wirebrush
+2024-08-16 20:15:04,657 - INFO - Loading embedded data
+2024-08-16 20:16:13,332 - INFO - Loading the embedding model
+2024-08-16 20:16:19,225 - INFO - PyTorch version 2.4.0+cu124 available.
+2024-08-16 20:16:19,602 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
+2024-08-16 20:16:24,953 - INFO - Embedding model loaded
+2024-08-16 20:16:51,193 - INFO - Searching for the query
+2024-08-16 20:16:51,193 - INFO - Searching for wirebrush
+2024-08-16 20:16:51,193 - INFO - Loading embedded data
+2024-08-16 20:16:53,308 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
+  attn_output = torch.nn.functional.scaled_dot_product_attention(
+
+2024-08-16 20:16:53,475 - INFO - Search completed
+2024-08-16 20:16:53,475 - INFO - Search completed
+2024-08-16 20:16:53,475 - INFO - Summarizing search results
+2024-08-16 20:16:53,475 - INFO - Search results summarized
+2024-08-16 20:17:37,427 - INFO - Searching for the query
+2024-08-16 20:17:37,427 - INFO - Searching for 
+    query IntrospectionQuery {
+      __schema {
+        
+        queryType { name }
+        mutationType { name }
+        subscriptionType { name }
+        types {
+          ...FullType
+        }
+        directives {
+          name
+          description
+          
+          locations
+          args {
+            ...InputValue
+          }
+        }
+      }
+    }
+
+    fragment FullType on __Type {
+      kind
+      name
+      description
+      
+      fields(includeDeprecated: true) {
+        name
+        description
+        args {
+          ...InputValue
+        }
+        type {
+          ...TypeRef
+        }
+        isDeprecated
+        deprecationReason
+      }
+      inputFields {
+        ...InputValue
+      }
+      interfaces {
+        ...TypeRef
+      }
+      enumValues(includeDeprecated: true) {
+        name
+        description
+        isDeprecated
+        deprecationReason
+      }
+      possibleTypes {
+        ...TypeRef
+      }
+    }
+
+    fragment InputValue on __InputValue {
+      name
+      description
+      type { ...TypeRef }
+      defaultValue
+      
+      
+    }
+
+    fragment TypeRef on __Type {
+      kind
+      name
+      ofType {
+        kind
+        name
+        ofType {
+          kind
+          name
+          ofType {
+            kind
+            name
+            ofType {
+              kind
+              name
+              ofType {
+                kind
+                name
+                ofType {
+                  kind
+                  name
+                  ofType {
+                    kind
+                    name
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  
+2024-08-16 20:17:37,443 - INFO - Loading embedded data
+2024-08-16 20:17:37,511 - INFO - Search completed
+2024-08-16 20:17:37,511 - INFO - Search completed
+2024-08-16 20:17:37,511 - INFO - Summarizing search results
+2024-08-16 20:17:37,527 - INFO - Search results summarized
+2024-08-16 20:18:42,832 - INFO - Searching for the query
+2024-08-16 20:18:42,832 - INFO - Searching for wirebrush
+2024-08-16 20:18:42,832 - INFO - Loading embedded data
+2024-08-16 20:18:42,882 - INFO - Search completed
+2024-08-16 20:18:42,899 - INFO - Search completed
+2024-08-16 20:18:42,899 - INFO - Summarizing search results
+2024-08-16 20:18:42,901 - INFO - Search results summarized
+2024-08-16 20:22:46,791 - INFO - Loading the embedding model
+2024-08-16 20:22:53,018 - INFO - PyTorch version 2.4.0+cu124 available.
+2024-08-16 20:22:53,427 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
+2024-08-16 20:22:59,566 - INFO - Embedding model loaded
+2024-08-16 20:24:21,213 - INFO - Loading the embedding model
+2024-08-16 20:24:26,903 - INFO - PyTorch version 2.4.0+cu124 available.
+2024-08-16 20:24:27,281 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
+2024-08-16 20:24:33,819 - INFO - Embedding model loaded
+2024-08-16 20:24:42,819 - INFO - Loading pdf document from uploads\Employee_Attrition_Action_Plan.pdf
+2024-08-16 20:24:43,532 - INFO - Summarizing document
+2024-08-16 20:24:45,028 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+2024-08-16 20:24:45,559 - INFO - Creating vector store
+2024-08-16 20:24:46,277 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
+  attn_output = torch.nn.functional.scaled_dot_product_attention(
+
+2024-08-16 20:24:46,583 - INFO - Vector store created
+2024-08-16 20:24:46,583 - INFO - Saving the vector store
+2024-08-16 20:26:45,690 - INFO - Loading the embedding model
+2024-08-16 20:26:51,361 - INFO - PyTorch version 2.4.0+cu124 available.
+2024-08-16 20:26:51,689 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
+2024-08-16 20:26:57,758 - INFO - Embedding model loaded
+2024-08-16 20:27:35,992 - INFO - Loading pdf document from uploads\Employee_Attrition_Action_Plan.pdf
+2024-08-16 20:27:36,682 - INFO - Summarizing document
+2024-08-16 20:27:37,718 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+2024-08-16 20:27:38,243 - INFO - Creating vector store
+2024-08-16 20:27:39,588 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
+  attn_output = torch.nn.functional.scaled_dot_product_attention(
+
+2024-08-16 20:27:39,850 - INFO - Vector store created
+2024-08-16 20:27:39,851 - INFO - Saving the vector store
+2024-08-16 20:28:37,928 - INFO - Loading the embedding model
+2024-08-16 20:28:43,166 - INFO - PyTorch version 2.4.0+cu124 available.
+2024-08-16 20:28:43,498 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
+2024-08-16 20:28:49,596 - INFO - Embedding model loaded
+2024-08-16 20:30:50,050 - INFO - Loading the embedding model
+2024-08-16 20:30:55,432 - INFO - PyTorch version 2.4.0+cu124 available.
+2024-08-16 20:30:55,755 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
+2024-08-16 20:31:01,549 - INFO - Embedding model loaded
+2024-08-16 20:32:24,048 - INFO - Loading the embedding model
+2024-08-16 20:32:29,504 - INFO - PyTorch version 2.4.0+cu124 available.
+2024-08-16 20:32:29,834 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
+2024-08-16 20:32:35,855 - INFO - Embedding model loaded
+2024-08-16 20:32:49,136 - INFO - Loading pdf document from uploads\Employee_Attrition_Action_Plan.pdf
+2024-08-16 20:32:49,964 - INFO - Summarizing document
+2024-08-16 20:32:50,973 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+2024-08-16 20:32:51,487 - INFO - Creating vector store
+2024-08-16 20:32:52,933 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
+  attn_output = torch.nn.functional.scaled_dot_product_attention(
+
+2024-08-16 20:32:53,278 - INFO - Vector store created
+2024-08-16 20:32:53,278 - INFO - Saving the vector store
+2024-08-16 20:32:53,278 - INFO - Saving embeddings
+2024-08-16 20:32:53,278 - INFO - Vector store saved
+2024-08-16 20:32:53,278 - INFO - Saving embeddings
+2024-08-16 20:32:53,278 - INFO - Creating thumbnail for uploads\Employee_Attrition_Action_Plan.pdf
+2024-08-16 20:36:58,132 - INFO - Searching for the query
+2024-08-16 20:36:58,132 - INFO - Searching for Implement Career Development Programs
+2024-08-16 20:36:58,133 - INFO - Loading embedded data
+2024-08-16 20:36:59,422 - INFO - Search completed
+2024-08-16 20:36:59,422 - INFO - Search completed
+2024-08-16 20:36:59,422 - INFO - Summarizing search results
+2024-08-16 20:36:59,422 - INFO - Search results summarized
+2024-08-16 20:40:29,336 - INFO - Loading the embedding model
+2024-08-16 20:40:37,718 - INFO - PyTorch version 2.4.0+cu124 available.
+2024-08-16 20:40:38,253 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
+2024-08-16 20:40:44,320 - INFO - Embedding model loaded
+2024-08-16 20:40:59,981 - INFO - Searching for the query
+2024-08-16 20:40:59,984 - INFO - Searching for Implement Career Development Programs
+2024-08-16 20:40:59,985 - INFO - Loading embedded data
+2024-08-16 20:41:01,671 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
+  attn_output = torch.nn.functional.scaled_dot_product_attention(
+
+2024-08-16 20:41:01,754 - INFO - Search completed
+2024-08-16 20:41:01,754 - INFO - Search completed
+2024-08-16 20:41:01,754 - INFO - Summarizing search results
+2024-08-16 20:41:01,754 - INFO - Search results summarized
+2024-08-16 20:42:39,142 - INFO - Loading pdf document from uploads\Employee_Attrition_Action_Plan.pdf
+2024-08-16 20:42:39,142 - INFO - Loading docx document from uploads\How to change rear windshield wipers on TOYOTA Corolla.docx
+2024-08-16 20:42:39,504 - INFO - Creating documents from text
+2024-08-16 20:42:39,998 - INFO - Summarizing document
+2024-08-16 20:42:41,011 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+2024-08-16 20:42:41,534 - INFO - Summarizing document
+2024-08-16 20:42:42,011 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+2024-08-16 20:42:42,536 - INFO - Creating vector store
+2024-08-16 20:42:44,194 - INFO - Vector store created
+2024-08-16 20:42:44,194 - INFO - Saving the vector store
+2024-08-16 20:42:44,194 - INFO - Saving embeddings
+2024-08-16 20:42:44,194 - INFO - Vector store saved
+2024-08-16 20:42:44,194 - INFO - Saving embeddings
+2024-08-16 20:42:44,194 - INFO - Creating thumbnail for uploads\Employee_Attrition_Action_Plan.pdf
+2024-08-16 20:42:44,212 - INFO - Creating thumbnail for uploads\How to change rear windshield wipers on TOYOTA Corolla.docx
+2024-08-16 20:42:55,518 - INFO - Searching for the query
+2024-08-16 20:42:55,518 - INFO - Searching for wirebrush
+2024-08-16 20:42:55,518 - INFO - Loading embedded data
+2024-08-16 20:42:55,581 - INFO - Search completed
+2024-08-16 20:42:55,581 - INFO - Search completed
+2024-08-16 20:42:55,581 - INFO - Summarizing search results
+2024-08-16 20:42:55,581 - INFO - Search results summarized
+2024-08-16 20:43:13,345 - INFO - Searching for the query
+2024-08-16 20:43:13,345 - INFO - Searching for Implement Career Development Programs
+2024-08-16 20:43:13,345 - INFO - Loading embedded data
+2024-08-16 20:43:13,362 - INFO - Search completed
+2024-08-16 20:43:13,362 - INFO - Search completed
+2024-08-16 20:43:13,362 - INFO - Summarizing search results
+2024-08-16 20:43:13,362 - INFO - Search results summarized
+2024-08-16 20:43:37,481 - INFO - Searching for the query
+2024-08-16 20:43:37,481 - INFO - Searching for toyota corolla
+2024-08-16 20:43:37,481 - INFO - Loading embedded data
+2024-08-16 20:43:37,497 - INFO - Search completed
+2024-08-16 20:43:37,497 - INFO - Search completed
+2024-08-16 20:43:37,497 - INFO - Summarizing search results
+2024-08-16 20:43:37,497 - INFO - Search results summarized
+2024-08-16 20:45:57,119 - INFO - Loading pdf document from uploads\corolla-2020-toyota-owners-manual.pdf
+2024-08-16 20:45:57,119 - INFO - Loading pdf document from uploads\Employee_Attrition_Action_Plan.pdf
+2024-08-16 20:45:57,136 - INFO - Loading docx document from uploads\How to change rear windshield wipers on TOYOTA Corolla.docx
+2024-08-16 20:45:57,495 - INFO - Creating documents from text
+2024-08-16 20:46:44,699 - INFO - Summarizing document
+2024-08-16 20:46:45,723 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+2024-08-16 20:46:46,242 - INFO - Summarizing document
+2024-08-16 20:46:46,879 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+2024-08-16 20:46:47,382 - INFO - Summarizing document
+2024-08-16 20:46:47,888 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+2024-08-16 20:46:48,402 - INFO - Creating vector store
+2024-08-16 20:46:58,675 - INFO - Searching for the query
+2024-08-16 20:46:58,675 - INFO - Searching for Manual for toyota corolla
+2024-08-16 20:46:58,675 - INFO - Loading embedded data
+2024-08-16 20:46:59,733 - INFO - Search completed
+2024-08-16 20:46:59,749 - INFO - Search completed
+2024-08-16 20:46:59,749 - INFO - Summarizing search results
+2024-08-16 20:46:59,749 - INFO - Search results summarized
+2024-08-16 20:47:07,643 - INFO - Vector store created
+2024-08-16 20:47:07,643 - INFO - Saving the vector store
+2024-08-16 20:47:07,643 - INFO - Saving embeddings
+2024-08-16 20:47:07,643 - INFO - Vector store saved
+2024-08-16 20:47:07,643 - INFO - Saving embeddings
+2024-08-16 20:47:07,659 - INFO - Creating thumbnail for uploads\corolla-2020-toyota-owners-manual.pdf
+2024-08-16 20:47:07,674 - INFO - Creating thumbnail for uploads\Employee_Attrition_Action_Plan.pdf
+2024-08-16 20:47:07,674 - INFO - Creating thumbnail for uploads\How to change rear windshield wipers on TOYOTA Corolla.docx
+2024-08-16 20:48:03,817 - INFO - Searching for the query
+2024-08-16 20:48:03,818 - INFO - Searching for Manual for toyota corolla
+2024-08-16 20:48:03,818 - INFO - Loading embedded data
+2024-08-16 20:48:04,276 - INFO - Search completed
+2024-08-16 20:48:04,276 - INFO - Search completed
+2024-08-16 20:48:04,276 - INFO - Summarizing search results
+2024-08-16 20:48:04,276 - INFO - Search results summarized
+2024-08-16 20:48:14,034 - INFO - Searching for the query
+2024-08-16 20:48:14,044 - INFO - Searching for Manual for toyota corolla
+2024-08-16 20:48:14,044 - INFO - Loading embedded data
+2024-08-16 20:48:14,067 - INFO - Search completed
+2024-08-16 20:48:14,067 - INFO - Search completed
+2024-08-16 20:48:14,074 - INFO - Summarizing search results
+2024-08-16 20:48:14,074 - INFO - Search results summarized
+2024-08-16 21:32:02,645 - INFO - Searching for the query
+2024-08-16 21:32:02,645 - INFO - Searching for Manual Of toyota corolla
+2024-08-16 21:32:02,645 - INFO - Loading embedded data
+2024-08-16 21:32:02,795 - INFO - Search completed
+2024-08-16 21:32:02,799 - INFO - Search completed
+2024-08-16 21:32:02,800 - INFO - Summarizing search results
+2024-08-16 21:32:02,804 - INFO - Search results summarized
+2024-08-16 21:33:39,365 - INFO - Transcribing audio chunks from uploads\audio-2.mp3
+2024-08-16 21:33:39,373 - INFO - Splitting audio file uploads\audio-2.mp3 by duration
+2024-08-16 21:33:39,378 - INFO - Loading pdf document from uploads\corolla-2020-toyota-owners-manual.pdf
+2024-08-16 21:33:39,404 - INFO - Loading pdf document from uploads\Employee_Attrition_Action_Plan.pdf
+2024-08-16 21:33:39,411 - INFO - Loading docx document from uploads\How to change rear windshield wipers on TOYOTA Corolla.docx
+2024-08-16 21:33:39,938 - INFO - Creating documents from text
+2024-08-16 21:34:58,915 - INFO - Transcribing audio file audio-2_chunks/audio-2_chunk1.mp3
+2024-08-16 21:35:15,275 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
+2024-08-16 21:35:18,193 - INFO - Transcribing audio file audio-2_chunks/audio-2_chunk2.mp3
+2024-08-16 21:35:28,536 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
+2024-08-16 21:35:31,402 - INFO - Transcribing audio file audio-2_chunks/audio-2_chunk3.mp3
+2024-08-16 21:35:38,932 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
+2024-08-16 21:35:43,996 - INFO - Transcribing audio file audio-2_chunks/audio-2_chunk4.mp3
+2024-08-16 21:36:00,061 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
+2024-08-16 21:36:02,748 - INFO - Transcribing audio file audio-2_chunks/audio-2_chunk5.mp3
+2024-08-16 21:36:09,913 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
+2024-08-16 21:36:12,705 - INFO - Transcribing audio file audio-2_chunks/audio-2_chunk6.mp3
+2024-08-16 21:36:21,767 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
+2024-08-16 21:36:25,045 - INFO - Transcribing audio file audio-2_chunks/audio-2_chunk7.mp3
+2024-08-16 21:36:32,036 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
+2024-08-16 21:36:37,503 - INFO - Summarizing document
+2024-08-16 21:36:39,003 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+2024-08-16 21:36:39,512 - INFO - Summarizing document
+2024-08-16 21:36:40,128 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+2024-08-16 21:36:40,631 - INFO - Summarizing document
+2024-08-16 21:36:42,227 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+2024-08-16 21:36:42,730 - INFO - Summarizing document
+2024-08-16 21:36:43,327 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
+2024-08-16 21:36:43,852 - INFO - Creating vector store
+2024-08-16 21:37:03,431 - INFO - Vector store created
+2024-08-16 21:37:03,431 - INFO - Saving the vector store
+2024-08-16 21:37:03,431 - INFO - Saving embeddings
+2024-08-16 21:37:03,441 - INFO - Vector store saved
+2024-08-16 21:37:03,441 - INFO - Saving embeddings
+2024-08-16 21:37:03,447 - INFO - Creating thumbnail for uploads\audio-2.mp3
+2024-08-16 21:37:03,458 - INFO - Creating thumbnail for uploads\corolla-2020-toyota-owners-manual.pdf
+2024-08-16 21:37:03,471 - INFO - Creating thumbnail for uploads\Employee_Attrition_Action_Plan.pdf
+2024-08-16 21:37:03,483 - INFO - Creating thumbnail for uploads\How to change rear windshield wipers on TOYOTA Corolla.docx
@@ -4,7 +4,7 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-from utils import  load_embedded_data, load_documents_from_directory, create_vector_store, save_embedded_data
+from utils import  load_embedded_data, load_documents_from_directory, create_vector_store, save_embedded_data, process_directory
 from search import search_and_summarize
 from data_ingest import load_data

@@ -31,7 +31,7 @@ app.add_middleware(
 class SearchRequest(BaseModel):
    query: str

-@app.get("/load_documents")
+@app.post("/load_documents")
 def load_documents(directory: str):
    
    # loading the documents from the directory
@@ -40,6 +40,8 @@ def load_documents(directory: str):
    embed_db = create_vector_store(documents, docs_id, num_pages)
    # saving the embedded data
    status = save_embedded_data(embed_db)
+    # creating the thumbnails
+    status = process_directory(directory)
    
    return {"status": "Documents loaded successfully"}

@@ -12,7 +12,7 @@ def get_data_description(data_path):
    data_name = data_path.split('/')[-1].split('\\')[-1].split('.')[0]
    # print(data_name)
    # open the data.json file
-    with open('data/data.json') as f:
+    with open('uploads/data.json') as f:
        data = json.load(f)
        existing_data = data.keys()
        if data_name in existing_data:
@@ -25,10 +25,10 @@ def get_data_thumbnail(data_path, timestamp = None):
    # ensuring no // or / or extension is present
    file_name = data_path.split('/')[-1].split('\\')[-1].split('.')[0]
    # first check is to see if the file_name has a .png image in the thumbnail folder
-    if os.path.exists(f'data/thumbnails/{file_name}.png'):
-        return f'data/thumbnails/{file_name}.png'
+    if os.path.exists(f'uploads/thumbnails/{file_name}.png'):
+        return f'uploads/thumbnails/{file_name}.png'
    # the second check is to see if we have a folder with this file_name
-    elif os.path.exists(f'data/{file_name}'):
+    elif os.path.exists(f'uploads/{file_name}'):
        # so now we want to access the first timestamp
        if timestamp:
            first = timestamp[0]
@@ -40,8 +40,8 @@ def get_data_thumbnail(data_path, timestamp = None):
            # bringing them together
            image_file = f"{start}-{end}s.png"
            # niw checkin if the file exists
-            if os.path.exists(f'data/{file_name}/{image_file}'):
-                return f'data/{file_name}/{image_file}'
+            if os.path.exists(f'uploads/{file_name}/{image_file}'):
+                return f'uploads/{file_name}/{image_file}'

 def summarize_doc_search(data):
    summary = {}
@@ -0,0 +1 @@
+{"Employee_Attrition_Action_Plan": {"doc_id": "18ab2b5127504921b6f316475dbf7c69", "num_pages": 3, "doc_summary": "This document is about an Action Plan to Prevent Employee Attrition, which outlines five strategies to reduce employee turnover and improve job satisfaction. The plan includes:\n\n* Conducting comprehensive exit interviews to identify patterns and common issues\n* Implementing career development programs to provide employees with a clear career path\n* Enhancing compensation and benefits to ensure they are competitive\n* Fostering a positive work environment through diversity, equity, and inclusion initiatives\n* Improving work-life balance by promoting flexibility and reducing burnout"}, "How to change rear windshield wipers on TOYOTA Corolla": {"doc_id": "ee5616e1611d43f3bc1896a2da892c64", "num_pages": 6, "doc_summary": "This document is about a replacement guide for the rear windshield wipers on a TOYOTA Corolla VI Liftback (E90) model. The guide provides instructions on how to change the rear windshield wipers on various trim levels of the vehicle, including 1.6, 1.8 D, 1.3 i, 1.6 GTI, 1.6 4WD, 1.3, 1.4, and 1.6 EFi"}, "corolla-2020-toyota-owners-manual": {"doc_id": "b3d96d6eb1ab452e9b2c8228b15cda13", "num_pages": 588, "doc_summary": "This document is about the user manual for a Toyota Corolla, providing information and instructions on various aspects of the vehicle, including safety and security, vehicle status, driving operations, interior features, maintenance, and troubleshooting. The manual covers topics such as child seat installation, theft deterrent systems, reading driving-related information, operating the Entune audio system, and caring for the vehicle's interior and exterior. It also includes information on reporting safety defects and provides instructions for Canadian owners on seat belt and SRS air"}, "audio-2": {"doc_id": "5216621677ff4c7ca7c9029907d41e81", "num_pages": 7, "doc_summary": "This document is about a tutorial on how to change the oil in a car or truck, specifically geared towards beginners who have never done an oil change before. The video covers the necessary tools and materials needed, including a socket set, gloves, paper towels, oil and filter, oil filter wrench, oil catch pan, funnel, and breaker bar. The tutorial also explains how to find the correct oil and filter for the vehicle, and how to jack up the vehicle to access the oil drain plug. The"}}
@@ -49,7 +49,7 @@ GROQ_LLM = ChatGroq(temperature=0, model_name="llama3-8b-8192", max_tokens=100)
 # loading the embedding model
 def load_embedding_model():
    model_name = "BAAI/bge-small-en"
-    model_kwargs = {"device": "cuda"} #can also be cpu
+    model_kwargs = {"device": "cpu"} #can also be cpu
    encode_kwargs = {"normalize_embeddings": True}
    embeddings = HuggingFaceBgeEmbeddings(
                model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
@@ -405,15 +405,15 @@ def doc_summarizer(document_page: list) -> str:

 #-----------------------------------------------------OTHERS--------------------------------------------------------------

-def save_embedded_data(embeddings, key="data"):
+def save_embedded_data(embeddings, path = "index/faiss_index",):
    logger.info(f"Saving embeddings")
-    embeddings.save_local(f"index/faiss_index_{key}")
+    embeddings.save_local(f"index/faiss_index")
    print("Embeddings saved")
    return 'saved'

-def load_embedded_data(embeddings=embeddings, key="data"):
+def load_embedded_data(embeddings=embeddings, path = "index/faiss_index"):
    logger.info(f"Loading embedded data")
-    embed_db = FAISS.load_local(f"index/faiss_index_{key}", embeddings, allow_dangerous_deserialization=True)
+    embed_db = FAISS.load_local(f"index/faiss_index", embeddings, allow_dangerous_deserialization=True)
    return embed_db

 #-----------------------------------------------------Data Loading Process----------------------------------------------------
@@ -522,7 +522,7 @@ def create_vector_store(documents: list, docs_id: list, num_pages: list):
    logger.info(f"Vector store created")
    logger.info(f"Saving the vector store")
    # saving the vector store automatically
-    save_embedded_data(vector_store, key="data")
+    save_embedded_data(vector_store)
    logger.info(f"Vector store saved")
    return vector_store
Author	SHA1	Message	Date
kowshik	4cd9aeac51	ds apis implemneted	2025-02-06 20:12:43 +00:00
kowshik	58e0cbfa3c	Stop tracking .env file	2024-08-30 01:49:42 +00:00
kowshik	c6e8820352	Stop tracking virtual environment	2024-08-30 01:48:22 +00:00
kowshik	fe00d27258	setup folders and run tests on sop generation' exit() clear	2024-08-30 01:45:18 +00:00
kowshik	5fde58fe82	created foolders and subfolders and test sops generation	2024-08-30 01:42:26 +00:00
timothyafolami	126c9e96cd	embedding loading on cpu	2024-08-16 23:09:27 +01:00
timothyafolami	65a5bcfe43	added env file	2024-08-16 22:34:54 +01:00
timothyafolami	dd99ec2bbf	logs	2024-08-16 21:39:49 +01:00
timothyafolami	ddd0dda276	last commi	2024-08-16 21:39:28 +01:00
timothyafolami	83ecbded4b	thumbnails generation added	2024-08-16 17:50:51 +01:00
				`@@ -0,0 +1 @@`
				{"Employee_Attrition_Action_Plan": {"doc_id": "18ab2b5127504921b6f316475dbf7c69", "num_pages": 3, "doc_summary": "This document is about an Action Plan to Prevent Employee Attrition, which outlines five strategies to reduce employee turnover and improve job satisfaction. The plan includes:\n\n* Conducting comprehensive exit interviews to identify patterns and common issues\n* Implementing career development programs to provide employees with a clear career path\n* Enhancing compensation and benefits to ensure they are competitive\n* Fostering a positive work environment through diversity, equity, and inclusion initiatives\n* Improving work-life balance by promoting flexibility and reducing burnout"}, "How to change rear windshield wipers on TOYOTA Corolla": {"doc_id": "ee5616e1611d43f3bc1896a2da892c64", "num_pages": 6, "doc_summary": "This document is about a replacement guide for the rear windshield wipers on a TOYOTA Corolla VI Liftback (E90) model. The guide provides instructions on how to change the rear windshield wipers on various trim levels of the vehicle, including 1.6, 1.8 D, 1.3 i, 1.6 GTI, 1.6 4WD, 1.3, 1.4, and 1.6 EFi"}, "corolla-2020-toyota-owners-manual": {"doc_id": "b3d96d6eb1ab452e9b2c8228b15cda13", "num_pages": 588, "doc_summary": "This document is about the user manual for a Toyota Corolla, providing information and instructions on various aspects of the vehicle, including safety and security, vehicle status, driving operations, interior features, maintenance, and troubleshooting. The manual covers topics such as child seat installation, theft deterrent systems, reading driving-related information, operating the Entune audio system, and caring for the vehicle's interior and exterior. It also includes information on reporting safety defects and provides instructions for Canadian owners on seat belt and SRS air"}, "audio-2": {"doc_id": "5216621677ff4c7ca7c9029907d41e81", "num_pages": 7, "doc_summary": "This document is about a tutorial on how to change the oil in a car or truck, specifically geared towards beginners who have never done an oil change before. The video covers the necessary tools and materials needed, including a socket set, gloves, paper towels, oil and filter, oil filter wrench, oil catch pan, funnel, and breaker bar. The tutorial also explains how to find the correct oil and filter for the vehicle, and how to jack up the vehicle to access the oil drain plug. The"}}