Files
ds_erp_ai/data_ingestion/data_ingest.py
T
timothyafolami 9a2a4c5fdd bug fixed
2024-08-07 18:55:56 +01:00

32 lines
1.1 KiB
Python

import sys, os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from data_ingestion.utils import create_vector_store, save_embedded_data, load_documents_from_directory, load_embedding_model
from loggings.logging_config import logger
# This module will load in the data, you only need to add the data path to it.
data_path = './data'
# loading the embeddings
logger.info(f"Loading the embeddings")
embeddings = load_embedding_model()
logger.info(f"Embeddings loaded")
def load_data(data_path: str):
logger.info(f"Loading data from {data_path}")
documents, docs_id, num_pages = load_documents_from_directory(data_path)
logger.info(f"Data loaded")
logger.info(f"Creating vector store")
embed_db = create_vector_store(embeddings,documents, docs_id, num_pages)
logger.info(f"Vector store created")
logger.info(f"Saving the vector store")
# saving the embedded data
save_embedded_data(embed_db)
logger.info(f"Vector store saved")
print("Vector store created and saved")
return embed_db
if __name__ == "__main__":
load_data(data_path)