AI indexing completed

This commit is contained in:
timothyafolami
2024-08-16 17:37:28 +01:00
parent 713354371e
commit cff9511d86
13 changed files with 2843 additions and 257 deletions
+14 -6
View File
@@ -1,19 +1,18 @@
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from utils import create_vector_store, save_embedded_data, load_documents_from_directory, load_embedding_model
from utils import create_vector_store, save_embedded_data, load_documents_from_directory, process_directory
from loggings.logging_config import logger
import time
# This module will load in the data, you only need to add the data path to it.
data_path = './data'
# # loading the embeddings
# logger.info(f"Loading the embeddings")
# embeddings = load_embedding_model()
# logger.info(f"Embeddings loaded")
def load_data(data_path: str):
logger.info(f"Loading data from {data_path}")
start_time = time.time()
# logging the start time
logger.info(f"Start time: {start_time}")
documents, docs_id, num_pages = load_documents_from_directory(data_path)
logger.info(f"Data loaded")
logger.info(f"Creating vector store")
@@ -23,8 +22,17 @@ def load_data(data_path: str):
# saving the embedded data
save_embedded_data(embed_db)
logger.info(f"Vector store saved")
end_time = time.time()
logger.info(f"End time: {end_time}")
time_taken = end_time - start_time
logger.info(f"Time taken: {time_taken}")
print("Vector store created and saved")
# creating the thumbnails
logger.info(f"Creating thumbnails")
status = process_directory(data_path)
print(f"{status}: Thumbnails created.")
logger.info(f"Thumbnails created")
return embed_db