Pdf Ingestion pipeline completed

This commit is contained in:
timothyafolami
2024-08-05 22:14:19 +01:00
parent b0c3eb8032
commit c34de21971
15 changed files with 318 additions and 90 deletions
View File
Binary file not shown.
Binary file not shown.
+28
View File
@@ -0,0 +1,28 @@
2024-08-05 22:09:11,365 - INFO - Loading and creating vector store for ./data/corolla-2020-toyota-owners-manual.pdf
2024-08-05 22:09:11,365 - INFO - Loading document from ./data/corolla-2020-toyota-owners-manual.pdf
2024-08-05 22:09:11,365 - INFO - Checking if the document is a pdf
2024-08-05 22:09:11,365 - INFO - Document is a pdf
2024-08-05 22:09:11,365 - INFO - Loading and splitting the document
2024-08-05 22:09:56,949 - INFO - Document loaded and split into 588 pages
2024-08-05 22:09:56,949 - INFO - Creating vector store
2024-08-05 22:10:06,736 - INFO - Loading faiss with AVX2 support.
2024-08-05 22:10:06,774 - INFO - Successfully loaded faiss with AVX2 support.
2024-08-05 22:10:06,800 - INFO - Vector store created
2024-08-05 22:10:06,802 - INFO - Saving the vector store
2024-08-05 22:11:24,966 - INFO - Loading and creating vector store for ./data/corolla-2020-toyota-owners-manual.pdf
2024-08-05 22:11:24,966 - INFO - Loading document from ./data/corolla-2020-toyota-owners-manual.pdf
2024-08-05 22:11:24,966 - INFO - Checking if the document is a pdf
2024-08-05 22:11:24,966 - INFO - Document is a pdf
2024-08-05 22:11:24,966 - INFO - Loading and splitting the document
2024-08-05 22:12:09,202 - INFO - Document loaded and split into 588 pages
2024-08-05 22:12:09,202 - INFO - Creating vector store
2024-08-05 22:12:19,066 - INFO - Loading faiss with AVX2 support.
2024-08-05 22:12:19,089 - INFO - Successfully loaded faiss with AVX2 support.
2024-08-05 22:12:19,123 - INFO - Vector store created
2024-08-05 22:12:19,123 - INFO - Saving the vector store
2024-08-05 22:12:19,131 - INFO - Vector store saved
2024-08-05 22:12:55,111 - INFO - Loading faiss with AVX2 support.
2024-08-05 22:12:55,144 - INFO - Successfully loaded faiss with AVX2 support.
2024-08-05 22:12:55,205 - INFO - Receiving the search query
2024-08-05 22:13:04,060 - INFO - Searching for what is LDA?
2024-08-05 22:13:04,241 - INFO - Search completed
+19
View File
@@ -0,0 +1,19 @@
import logging
import logging.handlers
import os
# Create loggings directory if it doesn't exist
if not os.path.exists('loggings'):
os.makedirs('loggings')
# Define the logging configuration
LOG_FILE = 'loggings/app.log'
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(LOG_FILE),
logging.StreamHandler()
])
logger = logging.getLogger(__name__)