from langchain_community.document_loaders import PyPDFLoader from utils import create_vector_store, save_embedded_data import sys, os # Add the root directory to sys.path sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from loggings.logging_config import logger # A function to load the pdf document def load_pdf_document(document_path: str): logger.info(f"Loading document from {document_path}") logger.info(f"Checking if the document is a pdf") if document_path.endswith(".pdf"): logger.info(f"Document is a pdf") logger.info(f"Loading and splitting the document") pdf_doc = PyPDFLoader(document_path) pages = pdf_doc.load_and_split() logger.info(f"Document loaded and split into {len(pages)} pages") return pages else: logger.error(f"Unsupported document type for {document_path}") raise ValueError(f"Unsupported document type for {document_path}") # creating a function that loads the pdf document and creates the vector store def load_and_create_vector_store(document_path: str): logger.info(f"Loading and creating vector store for {document_path}") pages = load_pdf_document(document_path) logger.info(f"Creating vector store") embed_db = create_vector_store(pages) logger.info(f"Vector store created") logger.info(f"Saving the vector store") # saving the embedded data save_embedded_data(embed_db) logger.info(f"Vector store saved") return "Vector store created and saved" if __name__ == "__main__": document_path = "./data/corolla-2020-toyota-owners-manual.pdf" load_and_create_vector_store(document_path)