created a data ingestion module.
This commit is contained in:
@@ -0,0 +1,33 @@
|
||||
from utils import create_vector_store, save_embedded_data, load_documents_from_directory, load_embedding_model
|
||||
import sys, os
|
||||
|
||||
# Add the root directory to sys.path
|
||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
from loggings.logging_config import logger
|
||||
|
||||
|
||||
# This module will load in the data, you only need to add the data path to it.
|
||||
data_path = './data'
|
||||
|
||||
# loading the embeddings
|
||||
logger.info(f"Loading the embeddings")
|
||||
embeddings = load_embedding_model()
|
||||
logger.info(f"Embeddings loaded")
|
||||
|
||||
def load_data(data_path: str):
|
||||
logger.info(f"Loading data from {data_path}")
|
||||
documents, docs_id, num_pages = load_documents_from_directory(data_path)
|
||||
logger.info(f"Data loaded")
|
||||
logger.info(f"Creating vector store")
|
||||
embed_db = create_vector_store(embeddings,documents, docs_id, num_pages)
|
||||
logger.info(f"Vector store created")
|
||||
logger.info(f"Saving the vector store")
|
||||
# saving the embedded data
|
||||
save_embedded_data(embed_db)
|
||||
logger.info(f"Vector store saved")
|
||||
|
||||
return "Vector store created and saved"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
load_data(data_path)
|
||||
Reference in New Issue
Block a user