2024-08-07 18:03:15 +01:00
|
|
|
import sys, os
|
|
|
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
2024-08-16 17:37:28 +01:00
|
|
|
from utils import create_vector_store, save_embedded_data, load_documents_from_directory, process_directory
|
2024-08-07 18:03:15 +01:00
|
|
|
from loggings.logging_config import logger
|
2024-08-16 17:37:28 +01:00
|
|
|
import time
|
2024-08-07 18:03:15 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
# This module will load in the data, you only need to add the data path to it.
|
|
|
|
|
data_path = './data'
|
|
|
|
|
|
|
|
|
|
def load_data(data_path: str):
|
|
|
|
|
logger.info(f"Loading data from {data_path}")
|
2024-08-16 17:37:28 +01:00
|
|
|
start_time = time.time()
|
|
|
|
|
# logging the start time
|
|
|
|
|
logger.info(f"Start time: {start_time}")
|
2024-08-07 18:03:15 +01:00
|
|
|
documents, docs_id, num_pages = load_documents_from_directory(data_path)
|
|
|
|
|
logger.info(f"Data loaded")
|
|
|
|
|
logger.info(f"Creating vector store")
|
2024-08-14 23:09:10 +01:00
|
|
|
embed_db = create_vector_store(documents, docs_id, num_pages)
|
2024-08-07 18:03:15 +01:00
|
|
|
logger.info(f"Vector store created")
|
|
|
|
|
logger.info(f"Saving the vector store")
|
|
|
|
|
# saving the embedded data
|
|
|
|
|
save_embedded_data(embed_db)
|
|
|
|
|
logger.info(f"Vector store saved")
|
2024-08-16 17:37:28 +01:00
|
|
|
end_time = time.time()
|
|
|
|
|
logger.info(f"End time: {end_time}")
|
|
|
|
|
time_taken = end_time - start_time
|
|
|
|
|
logger.info(f"Time taken: {time_taken}")
|
2024-08-07 18:03:15 +01:00
|
|
|
|
2024-08-07 18:27:42 +01:00
|
|
|
print("Vector store created and saved")
|
2024-08-16 17:37:28 +01:00
|
|
|
# creating the thumbnails
|
|
|
|
|
logger.info(f"Creating thumbnails")
|
|
|
|
|
status = process_directory(data_path)
|
|
|
|
|
print(f"{status}: Thumbnails created.")
|
|
|
|
|
logger.info(f"Thumbnails created")
|
2024-08-07 18:27:42 +01:00
|
|
|
return embed_db
|
2024-08-07 18:03:15 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
load_data(data_path)
|