Project restructuring..

This commit is contained in:
timothyafolami
2024-08-14 23:09:10 +01:00
parent dbcb8da015
commit ae151a2eff
17 changed files with 281 additions and 26 deletions
-2
View File
@@ -1,4 +1,2 @@
Ai indexing
data data
images
.env .env
+1 -1
View File
@@ -1 +1 @@
{"doc_names": ["1", "2", "3", "audio-1", "audio-2", "Car-Repair-Receipt-repair", "Car-Repair-Receipt-service", "Car-Repair-Receipt-tire", "Car-Repair-Receipt-tuning", "Car-Repair-Receipt-wash", "corolla-2020-toyota-owners-manual", "data\\dodge-challenger-auto-body-repair-after", "data\\dodge-challenger-auto-body-repair-before", "How to change engine oil and filter on TOYOTA Corolla", "How to change front brake pads on TOYOTA Corolla", "How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]", "How to change rear wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]", "How to change rear windshield wipers on TOYOTA Corolla", "How to change spark plugs on TOYOTA COROLLA", "data\\hyundai-sonata-auto-body-repair-after", "data\\hyundai-sonata-auto-body-repair-before", "data\\IMG_1436", "data\\IMG_1437", "data\\IMG_1438", "data\\IMG_1440", "data\\IMG_1441", "data\\IMG_1442", "data\\IMG_1443", "data\\IMG_1444", "data\\pontiac-vibe-auto-body-repair-after", "data\\pontiac-vibe-auto-body-repair-before", "test_rec", "data\\toyota-tacoma-auto-body-repair-after", "data\\toyota-tacoma-auto-body-repair-before"], "docs_id": ["8f1285940d9046b8a55dd9f00d41236e", "5ded0177f2494fcab28162f2841bd0d5", "2bb18beeb51d48158479734397b1f083", "0b79c6e0caad44ac8522edc1171e1dc5", "29b8ea4fd99e44a3b8fbbf8ef8ac6880", "4cb7fb1d4eb84ec2b719658fbe92f274", "fb9ee5da26a54e838617165893275681", "f77f280337a6485dac6dc15edec7b677", "8369ffb34d914a50b45cf0ad7c74b7f1", "ee8416cff59d42078fbe287c2ad268b7", "226fd18f73bb4cdab20ad614c0d8a569", "97d55511b936467e9573ae114314022d", "3d66c21fec1e4e1bae2b53cca97742c9", "0769d12c411348c8878552b504401168", "7dc6da9cdfce4c5992e4005f10300453", "c930ad016dd5447a8daeb7c453c6bf56", "afb0b0ef3d8b42feb278aa9d208a0f11", "72f6a7c3e5f74a7f9b3da83a49c65f59", "a6a0e43d096e439c91bbac4a6e806b4b", "d7dde21b648b45d182aa4bdf047fa39f", "f3c26f26b90e4b40ba8b96afc6875558", "4a1c69f51f4f4198b81b01d65ad5a63b", "23e8b0f4d54e491fb67e0bf2119cda45", "78e38b7d8ff34b12aaadfa9e51fef335", "9cbbfdc5c9b744a8baa74e4b8a30fc6c", "aaba2d8becb643319a000a4c55b2cf4a", "f53630b793bb404f89fdcec8ef4ba9c3", "d14dea4b06124374b8bc2081837ea965", "2d133c8456b44b3f86a2769429f74dc4", "f99af89c2ea34836b2281a50b12d1366", "b7bd085714554ab1bb7c9bd281231640", "bc0089a01ea440848cd5ca54e08295a7", "fab25d545fb6407c93f828896a22af4a", "7cc4793d3d8144cebbc9d101d7643597"], "num_pages": [2, 2, 3, 7, 7, 1, 2, 2, 2, 1, 588, 1, 1, 6, 7, 4, 2, 6, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]} {"doc_names": ["1", "2", "3", "audio-1", "audio-2", "Car-Repair-Receipt-repair", "Car-Repair-Receipt-service", "Car-Repair-Receipt-tire", "Car-Repair-Receipt-tuning", "Car-Repair-Receipt-wash", "corolla-2020-toyota-owners-manual", "data\\dodge-challenger-auto-body-repair-after", "data\\dodge-challenger-auto-body-repair-before", "How to change engine oil and filter on TOYOTA Corolla", "How to change front brake pads on TOYOTA Corolla", "How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]", "How to change rear wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]", "How to change rear windshield wipers on TOYOTA Corolla", "How to change spark plugs on TOYOTA COROLLA", "data\\hyundai-sonata-auto-body-repair-after", "data\\hyundai-sonata-auto-body-repair-before", "data\\IMG_1436", "data\\IMG_1437", "data\\IMG_1438", "data\\IMG_1440", "data\\IMG_1441", "data\\IMG_1442", "data\\IMG_1443", "data\\IMG_1444", "data\\pontiac-vibe-auto-body-repair-after", "data\\pontiac-vibe-auto-body-repair-before", "test_rec", "data\\toyota-tacoma-auto-body-repair-after", "data\\toyota-tacoma-auto-body-repair-before"], "docs_id": ["bf82a6b7ca514278ba8932028a25b042", "cfb1612d88634e87951abb081739732c", "0723acf3b939428ead3cceb48100e24b", "0560ebf944a34167a1f229c1044dcfc3", "6ba0680250244342b89c381f5e4e6328", "3c2d5843c3064acbbed980bb582d40f4", "ffeb04e7984544ff836088bb2d02cdae", "0b27e4d6792c4cb9aa7b7dfbcbf49f91", "35bd436246cb43b49d9fc54c258f8ed4", "5396d068e7c34a9b90c5d855d5aca866", "195a6c87111945f2b69b4d7e103d71ce", "064f8868a05c4d64abe7a1a0eef71ba2", "24e52042078c4fb5b958eda35cf46434", "4e95cb740bca4168b35e25a5c836b455", "9314f57385b441a4a292d3ce00ce723a", "28447944861f4fbc92346d48a97aaddc", "2dd4fd849f524c55bdb6dff3d3851e11", "c7d937e4456b40939b616f819b10e722", "059f19a2626e46b4a0807e60086b0f2f", "0fc5a9f521b0403b95c38cbe3c94fda5", "3cd0784c36904989be09aa4eb729bb77", "1a519aac07a142b28bde145d7420529d", "829b10ce5b32482caefc63474193c51f", "c7ca7c530dd34f6d96033717427aeebb", "598c3071c42346ca8897e6abee47e20f", "964b57f2e95a4d34a50107dc552c8891", "55841526690a4c8db8306c5c76a961cb", "d8b2cfc7062747b1be0e63cd54b2c495", "dd09b8faa8cd47d3a73b9c5bb21a5948", "eb818ea381de4ff98bb530e4a80c0fef", "7d8858bca658467dacdabce3fd26f5a6", "db24261507654f7e99b1627f8ea7e3b4", "b6365f8d8e744bb180e93e0d30e8d6d5", "96652ec08a874b4db58e7ede6e17feb3"], "num_pages": [2, 2, 3, 7, 7, 1, 2, 2, 2, 1, 588, 1, 1, 6, 7, 4, 2, 6, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
@@ -1,23 +1,23 @@
import sys, os import sys, os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from data_ingestion.utils import create_vector_store, save_embedded_data, load_documents_from_directory, load_embedding_model from utils import create_vector_store, save_embedded_data, load_documents_from_directory, load_embedding_model
from loggings.logging_config import logger from loggings.logging_config import logger
# This module will load in the data, you only need to add the data path to it. # This module will load in the data, you only need to add the data path to it.
data_path = './data' data_path = './data'
# loading the embeddings # # loading the embeddings
logger.info(f"Loading the embeddings") # logger.info(f"Loading the embeddings")
embeddings = load_embedding_model() # embeddings = load_embedding_model()
logger.info(f"Embeddings loaded") # logger.info(f"Embeddings loaded")
def load_data(data_path: str): def load_data(data_path: str):
logger.info(f"Loading data from {data_path}") logger.info(f"Loading data from {data_path}")
documents, docs_id, num_pages = load_documents_from_directory(data_path) documents, docs_id, num_pages = load_documents_from_directory(data_path)
logger.info(f"Data loaded") logger.info(f"Data loaded")
logger.info(f"Creating vector store") logger.info(f"Creating vector store")
embed_db = create_vector_store(embeddings,documents, docs_id, num_pages) embed_db = create_vector_store(documents, docs_id, num_pages)
logger.info(f"Vector store created") logger.info(f"Vector store created")
logger.info(f"Saving the vector store") logger.info(f"Saving the vector store")
# saving the embedded data # saving the embedded data
View File
Binary file not shown.
+139
View File
@@ -215,3 +215,142 @@
2024-08-13 22:07:11,760 - INFO - Search completed 2024-08-13 22:07:11,760 - INFO - Search completed
2024-08-13 22:07:11,762 - INFO - Page content: The image shows a black Toyota truck, likely a Toyota Tacoma, parked in what appears to be a dealership or repair lot. The caption indicates that it is an "accidented car: after repair," suggesting that the vehicle had previously been involved in an accident but has since been repaired. In the background, theres a gray Jeep vehicle. 2024-08-13 22:07:11,762 - INFO - Page content: The image shows a black Toyota truck, likely a Toyota Tacoma, parked in what appears to be a dealership or repair lot. The caption indicates that it is an "accidented car: after repair," suggesting that the vehicle had previously been involved in an accident but has since been repaired. In the background, theres a gray Jeep vehicle.
2024-08-13 22:41:00,464 - INFO - Loading data from ./data
2024-08-13 22:41:11,993 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:41:14,815 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:41:27,644 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:41:34,133 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:41:52,811 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:41:59,166 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:42:04,204 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:42:55,776 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:43:10,660 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:43:24,516 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:43:38,693 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:43:52,185 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:44:04,990 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:44:15,706 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:45:21,645 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:45:36,103 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:45:51,008 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:46:04,708 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:46:14,074 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:46:25,725 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:46:34,718 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:48:57,742 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:49:05,954 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:49:12,044 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:49:15,968 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:49:57,797 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:50:04,165 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:50:42,751 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-13 22:50:54,836 - INFO - Data loaded
2024-08-13 22:50:54,836 - INFO - Creating vector store
2024-08-13 22:50:55,834 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
attn_output = torch.nn.functional.scaled_dot_product_attention(
2024-08-13 22:51:07,264 - INFO - Vector store created
2024-08-13 22:51:07,272 - INFO - Saving the vector store
2024-08-13 22:51:07,287 - INFO - Vector store saved
2024-08-13 22:53:54,905 - INFO - Receiving the search query
2024-08-13 22:54:16,883 - INFO - Searching for wheel impact socket
2024-08-13 22:54:17,600 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
attn_output = torch.nn.functional.scaled_dot_product_attention(
2024-08-13 22:54:17,747 - INFO - Search completed
2024-08-13 22:54:17,748 - INFO - Page content: Wheel impact socket #21
Brake caliper wind back tool
Torque wrench
Crow bar
Wheel chock
REPLACEMENT: BRAKE PADS – TOYOTA COROLLA IX HATCHBACK
(E120). USE THE FOLLOWING PROCEDURE:
1
Open the bonnet. Unscrew the brake fluid reservoir cap.
2
Secure the wheels with chocks.
3
Loosen the wheel mounting bolts. Use wheel impact socket #21.
4
Raise the front of the car and secure on supports.
CLUB.AUTODOC.CO.UK 3–11
Perform the replacement of brake pads in complete set for each axis. This
provides effective braking.
The replacement procedure is identical for all brake pads on the same axle.
All work should be done with the engine stopped.
Replacement: brake pads – TOYOTA Corolla IX Hatchback (E120). AUTODOC
recommends:
5
Unscrew the wheel bolts.
6
Remove the wheel.
7
Spread the brake pads. Use a crowbar.
8
Clean the brake caliper fasteners. Use a wire brush. Use WD-40 spray.
CLUB.AUTODOC.CO.UK 4–11
To avoid injury, hold up the wheel when unscrewing the bolts.
2024-08-13 22:55:23,880 - INFO - Loading data from ./data
2024-08-14 16:09:40,712 - INFO - Loading data from ./data
2024-08-14 16:10:29,337 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:10:34,891 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:10:58,387 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:11:05,277 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:11:38,763 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:11:53,979 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:12:06,400 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:13:11,827 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:13:22,387 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:13:32,389 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:13:42,675 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:13:53,025 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:14:03,157 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:14:13,677 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:15:26,159 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:15:38,007 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:15:58,459 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:16:10,230 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:16:31,849 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:16:52,117 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:17:09,952 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:21:00,090 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:21:07,227 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:21:13,632 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:21:19,028 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:22:03,178 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:22:07,252 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:22:57,035 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
2024-08-14 16:23:08,267 - INFO - Data loaded
2024-08-14 16:23:08,267 - INFO - Creating vector store
2024-08-14 16:23:12,507 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
attn_output = torch.nn.functional.scaled_dot_product_attention(
2024-08-14 16:23:59,231 - INFO - Vector store created
2024-08-14 16:23:59,231 - INFO - Saving the vector store
2024-08-14 16:23:59,332 - INFO - Vector store saved
2024-08-14 16:32:26,857 - INFO - Receiving the search query
2024-08-14 16:34:27,630 - INFO - Searching for How to make a You can buy spare parts from us on our website or in the Autodoc app
2024-08-14 16:34:30,808 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
attn_output = torch.nn.functional.scaled_dot_product_attention(
2024-08-14 16:34:31,257 - INFO - Search completed
2024-08-14 16:34:31,257 - INFO - Page content: How to make a You can buy spare parts from us on our website or in the Autodoc app. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. 15. Remove the rear window cover. 16. Remove the rear window cover. 17. Remove the rear window cover. 18. Remove the rear window cover. 19. Remove the rear window cover. I'll see you next time.
2024-08-14 16:38:20,487 - INFO - Receiving the search query
2024-08-14 16:38:34,760 - INFO - Searching for How to make a You can buy spare parts from us on our website or in the Autodoc app
2024-08-14 16:38:35,979 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
attn_output = torch.nn.functional.scaled_dot_product_attention(
2024-08-14 16:38:36,197 - INFO - Search completed
2024-08-14 16:38:36,197 - INFO - Page content: How to make a You can buy spare parts from us on our website or in the Autodoc app. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. 15. Remove the rear window cover. 16. Remove the rear window cover. 17. Remove the rear window cover. 18. Remove the rear window cover. 19. Remove the rear window cover. I'll see you next time.
2024-08-14 16:41:52,357 - INFO - Receiving the search query
2024-08-14 16:42:17,598 - INFO - Searching for how do i fix my car?
2024-08-14 16:42:18,947 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
attn_output = torch.nn.functional.scaled_dot_product_attention(
2024-08-14 16:42:19,137 - INFO - Search completed
2024-08-14 16:45:51,829 - INFO - Receiving the search query
2024-08-14 16:45:55,638 - INFO - Searching for how do i fix my car?
2024-08-14 16:45:56,129 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
attn_output = torch.nn.functional.scaled_dot_product_attention(
2024-08-14 16:45:56,307 - INFO - Search completed
+2 -2
View File
@@ -4,8 +4,8 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from fastapi import FastAPI, HTTPException from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel from pydantic import BaseModel
from data_ingestion.utils import search, load_embedded_data from utils import search, load_embedded_data
from data_ingestion.data_ingest import load_data from data_ingest import load_data
app = FastAPI() app = FastAPI()
+2 -2
View File
@@ -1,4 +1,4 @@
from data_ingestion.utils import search, load_embedded_data from utils import search
import sys, os import sys, os
# Add the root directory to sys.path # Add the root directory to sys.path
@@ -16,6 +16,6 @@ if __name__ == "__main__":
page_content, all, pages = search(query) page_content, all, pages = search(query)
logger.info("Search completed") logger.info("Search completed")
logger.info(f"Page content: {page_content}") logger.info(f"Page content: {page_content}")
print(f"Page content: {page_content}") print(f"Page content: {all}")
print(f"Pages: {pages}") print(f"Pages: {pages}")
print("Search completed") print("Search completed")
+63
View File
@@ -0,0 +1,63 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from utils import search\n",
"import sys, os"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "smog_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
+55
View File
@@ -0,0 +1,55 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"{'filename': 'How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]', 'duration': '3-6 minutes', 'file_type': 'video'},\n",
"{'filename': 'How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]', 'duration': '0-3 minutes', 'file_type': 'video'}, \n",
"{'source': './data\\\\How to change rear windshield wipers on TOYOTA Corolla.docx', 'page': 4, 'file_type': 'text'}, \n",
"{'source': './data\\\\How to change front brake pads on TOYOTA Corolla.txt', 'page': 6, 'file_type': 'text'}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"[{\" How to make a You can buy spare parts from us on our website or in the Autodoc app. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. 15. Remove the rear window cover. 16. Remove the rear window cover. 17. Remove the rear window cover. 18. Remove the rear window cover. 19. Remove the rear window cover. I'll see you next time.\"}, \n",
"\n",
"{\" Hi everybody! Here's the latest installment of AutoDoc's video tutorials on replacing car parts. The channel so you never miss a video. We post new ones every week!\"}, \n",
"\n",
"{'G\\n\\nAUTODOC MOBILE APP: GREAT DEALS AND CONVENIEN\\n\\nT\\n\\nSHOPPIN\\n\\nG\\n\\n\\n\\nA GREAT SELECTION OF SPARE PARTS FOR YOUR CA\\n\\nR\\n\\nWINDSHIELD WIPERS: A WIDE SELECTIO\\n\\nN\\n\\nA GREAT SELECTION OF SPARE PARTS FOR YOUR CA\\n\\nR\\n\\nWINDSHIELD WIPERS: A WIDE SELECTIO\\n\\nN\\n\\n DISCLAIMER:\\n\\nThe document contains only general recommendations that may be useful for you when you perform repair or replacement work. AUTODOC shall not be liable for any loss, injury, damage of property occurring in the repair or replacement process due to incorrect use or misinterpretation of the provided information.\\n\\nAUTODOC shall not be liable for any possible mistakes and uncertainties in this guide. The information provided is for information purposes only and cannot replace advice from specialists.'}, \n",
"\n",
"{'replace advice from specialists.\\nAUTODOC shall not be liable for incorrect or hazardous usage of equipment, tools and car parts. AUTODOC strongly recommends to be careful and observe\\nthe safety rules when performing repair or replacement works. Remember: usage of low quality auto parts does not guarantee you the appropriate level of\\nroad safety.\\n© Copyright 2022 – All the contents of this website, in particular texts, photographs and graphics, are protected by copyright. All rights, including\\nreproduction, publication, editing and translation rights, are reserved by AUTODOC GmbH.\\nCLUB.AUTODOC.CO.UK 11–11\\nDISCLAIMER:\\nAUTODOC MOBILE APP: GREAT DEALS AND CONVENIENT\\nSHOPPING'}]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "smog_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
+7 -10
View File
@@ -302,15 +302,12 @@ def preprocess_video_data(video_path: str, time_interval: int):
snapshot_dir = os.path.join(os.path.dirname(video_path), f"{video_name}_snapshots") snapshot_dir = os.path.join(os.path.dirname(video_path), f"{video_name}_snapshots")
os.makedirs(snapshot_dir, exist_ok=True) os.makedirs(snapshot_dir, exist_ok=True)
# Set the interval to 3 minutes (180 seconds)
interval = 180
# Get the duration of the video using ffmpeg # Get the duration of the video using ffmpeg
probe = ffmpeg.probe(video_path) probe = ffmpeg.probe(video_path)
duration = float(probe['format']['duration']) duration = float(probe['format']['duration'])
# Loop through the video and take snapshots at 0s, 3min, 6min, etc. # Loop through the video and take snapshots at 0s, 3min, 6min, etc.
for i in range(0, int(duration), interval): for i in range(0, int(duration), time_interval):
# Calculate the time for the current frame # Calculate the time for the current frame
frame_time = i frame_time = i
# Save the snapshot as an image file in the created folder # Save the snapshot as an image file in the created folder
@@ -328,7 +325,7 @@ def preprocess_video_data(video_path: str, time_interval: int):
# now creating document from the audio file # now creating document from the audio file
documents = create_audio_document(audio_path, file_type='video') documents = create_audio_document(audio_path, chunk_duration_minutes=0.5, file_type='video')
return documents return documents
@@ -398,7 +395,7 @@ def load_documents_from_directory(directory_path: str):
print(f"Document {doc[0].metadata['filename']} loaded") print(f"Document {doc[0].metadata['filename']} loaded")
elif extension in video_doc: elif extension in video_doc:
# creating a video document # creating a video document
doc = preprocess_video_data(path, time_interval=180) doc = preprocess_video_data(path, time_interval=30)
# appending the document to the documents list # appending the document to the documents list
documents.append(doc) documents.append(doc)
# appending the number of pages in the document # appending the number of pages in the document
@@ -429,7 +426,7 @@ def load_documents_from_directory(directory_path: str):
# A function to create vector store # A function to create vector store
def create_vector_store(embeddings, documents: list, docs_id: list, num_pages: list): def create_vector_store(documents: list, docs_id: list, num_pages: list):
# index set up with the embedding dimension # index set up with the embedding dimension
index = faiss.IndexFlatL2(384) index = faiss.IndexFlatL2(384)
# Initialize the FAISS vector store # Initialize the FAISS vector store
@@ -462,9 +459,9 @@ def add_documents_to_vector_store(embeddings, documents: list, docs_id: list, nu
# A document search function # A document search function
# loading the embedded data def search(query, k=20):
embed_db = load_embedded_data() # loading the embedded data
def search(query, k=4): embed_db = load_embedded_data()
db = embed_db db = embed_db
docs = db.similarity_search(query, k) docs = db.similarity_search(query, k)
all = [] all = []
Binary file not shown.
Binary file not shown.
+6 -3
View File
@@ -20,7 +20,7 @@
"import os\n", "import os\n",
"import ffmpeg\n", "import ffmpeg\n",
"# importing module that prerocess the audio file \n", "# importing module that prerocess the audio file \n",
"from data_ingestion.utils import create_audio_document\n" "from utils import create_audio_document\n"
] ]
}, },
{ {
@@ -62,7 +62,7 @@
" os.makedirs(snapshot_dir, exist_ok=True)\n", " os.makedirs(snapshot_dir, exist_ok=True)\n",
"\n", "\n",
" # Set the interval to 3 minutes (180 seconds)\n", " # Set the interval to 3 minutes (180 seconds)\n",
" interval = 180\n", " interval = 30\n",
"\n", "\n",
" # Get the duration of the video using ffmpeg\n", " # Get the duration of the video using ffmpeg\n",
" probe = ffmpeg.probe(video_path)\n", " probe = ffmpeg.probe(video_path)\n",
@@ -164,7 +164,10 @@
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": [
"# [{filename: \"car repair.pdf\", pages: [12, 55, 356], description: \"lorem ipsum\", filetype:\"pdf\", thumbnail:\"carrepair.jpg\"}\n",
"# ,{filename: \"how to repair car.mp4\", pages: [12, 55, 356], description: \"lorem ipsum\", filetype:\"video\", thumbnail:\"how to repair car.jpg\"}]"
]
} }
], ],
"metadata": { "metadata": {