diff --git a/.gitignore b/.gitignore index 45ea6e7a..3f8be5fb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,2 @@ -Ai indexing data -images .env \ No newline at end of file diff --git a/data_ingestion/__pycache__/utils.cpython-311.pyc b/__pycache__/utils.cpython-311.pyc similarity index 81% rename from data_ingestion/__pycache__/utils.cpython-311.pyc rename to __pycache__/utils.cpython-311.pyc index db9f721a..27ee61c9 100644 Binary files a/data_ingestion/__pycache__/utils.cpython-311.pyc and b/__pycache__/utils.cpython-311.pyc differ diff --git a/data/documents.json b/data/documents.json index 62baf4f7..ccc91f32 100644 --- a/data/documents.json +++ b/data/documents.json @@ -1 +1 @@ -{"doc_names": ["1", "2", "3", "audio-1", "audio-2", "Car-Repair-Receipt-repair", "Car-Repair-Receipt-service", "Car-Repair-Receipt-tire", "Car-Repair-Receipt-tuning", "Car-Repair-Receipt-wash", "corolla-2020-toyota-owners-manual", "data\\dodge-challenger-auto-body-repair-after", "data\\dodge-challenger-auto-body-repair-before", "How to change engine oil and filter on TOYOTA Corolla", "How to change front brake pads on TOYOTA Corolla", "How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]", "How to change rear wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]", "How to change rear windshield wipers on TOYOTA Corolla", "How to change spark plugs on TOYOTA COROLLA", "data\\hyundai-sonata-auto-body-repair-after", "data\\hyundai-sonata-auto-body-repair-before", "data\\IMG_1436", "data\\IMG_1437", "data\\IMG_1438", "data\\IMG_1440", "data\\IMG_1441", "data\\IMG_1442", "data\\IMG_1443", "data\\IMG_1444", "data\\pontiac-vibe-auto-body-repair-after", "data\\pontiac-vibe-auto-body-repair-before", "test_rec", "data\\toyota-tacoma-auto-body-repair-after", "data\\toyota-tacoma-auto-body-repair-before"], "docs_id": ["8f1285940d9046b8a55dd9f00d41236e", "5ded0177f2494fcab28162f2841bd0d5", "2bb18beeb51d48158479734397b1f083", "0b79c6e0caad44ac8522edc1171e1dc5", "29b8ea4fd99e44a3b8fbbf8ef8ac6880", "4cb7fb1d4eb84ec2b719658fbe92f274", "fb9ee5da26a54e838617165893275681", "f77f280337a6485dac6dc15edec7b677", "8369ffb34d914a50b45cf0ad7c74b7f1", "ee8416cff59d42078fbe287c2ad268b7", "226fd18f73bb4cdab20ad614c0d8a569", "97d55511b936467e9573ae114314022d", "3d66c21fec1e4e1bae2b53cca97742c9", "0769d12c411348c8878552b504401168", "7dc6da9cdfce4c5992e4005f10300453", "c930ad016dd5447a8daeb7c453c6bf56", "afb0b0ef3d8b42feb278aa9d208a0f11", "72f6a7c3e5f74a7f9b3da83a49c65f59", "a6a0e43d096e439c91bbac4a6e806b4b", "d7dde21b648b45d182aa4bdf047fa39f", "f3c26f26b90e4b40ba8b96afc6875558", "4a1c69f51f4f4198b81b01d65ad5a63b", "23e8b0f4d54e491fb67e0bf2119cda45", "78e38b7d8ff34b12aaadfa9e51fef335", "9cbbfdc5c9b744a8baa74e4b8a30fc6c", "aaba2d8becb643319a000a4c55b2cf4a", "f53630b793bb404f89fdcec8ef4ba9c3", "d14dea4b06124374b8bc2081837ea965", "2d133c8456b44b3f86a2769429f74dc4", "f99af89c2ea34836b2281a50b12d1366", "b7bd085714554ab1bb7c9bd281231640", "bc0089a01ea440848cd5ca54e08295a7", "fab25d545fb6407c93f828896a22af4a", "7cc4793d3d8144cebbc9d101d7643597"], "num_pages": [2, 2, 3, 7, 7, 1, 2, 2, 2, 1, 588, 1, 1, 6, 7, 4, 2, 6, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]} \ No newline at end of file +{"doc_names": ["1", "2", "3", "audio-1", "audio-2", "Car-Repair-Receipt-repair", "Car-Repair-Receipt-service", "Car-Repair-Receipt-tire", "Car-Repair-Receipt-tuning", "Car-Repair-Receipt-wash", "corolla-2020-toyota-owners-manual", "data\\dodge-challenger-auto-body-repair-after", "data\\dodge-challenger-auto-body-repair-before", "How to change engine oil and filter on TOYOTA Corolla", "How to change front brake pads on TOYOTA Corolla", "How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]", "How to change rear wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]", "How to change rear windshield wipers on TOYOTA Corolla", "How to change spark plugs on TOYOTA COROLLA", "data\\hyundai-sonata-auto-body-repair-after", "data\\hyundai-sonata-auto-body-repair-before", "data\\IMG_1436", "data\\IMG_1437", "data\\IMG_1438", "data\\IMG_1440", "data\\IMG_1441", "data\\IMG_1442", "data\\IMG_1443", "data\\IMG_1444", "data\\pontiac-vibe-auto-body-repair-after", "data\\pontiac-vibe-auto-body-repair-before", "test_rec", "data\\toyota-tacoma-auto-body-repair-after", "data\\toyota-tacoma-auto-body-repair-before"], "docs_id": ["bf82a6b7ca514278ba8932028a25b042", "cfb1612d88634e87951abb081739732c", "0723acf3b939428ead3cceb48100e24b", "0560ebf944a34167a1f229c1044dcfc3", "6ba0680250244342b89c381f5e4e6328", "3c2d5843c3064acbbed980bb582d40f4", "ffeb04e7984544ff836088bb2d02cdae", "0b27e4d6792c4cb9aa7b7dfbcbf49f91", "35bd436246cb43b49d9fc54c258f8ed4", "5396d068e7c34a9b90c5d855d5aca866", "195a6c87111945f2b69b4d7e103d71ce", "064f8868a05c4d64abe7a1a0eef71ba2", "24e52042078c4fb5b958eda35cf46434", "4e95cb740bca4168b35e25a5c836b455", "9314f57385b441a4a292d3ce00ce723a", "28447944861f4fbc92346d48a97aaddc", "2dd4fd849f524c55bdb6dff3d3851e11", "c7d937e4456b40939b616f819b10e722", "059f19a2626e46b4a0807e60086b0f2f", "0fc5a9f521b0403b95c38cbe3c94fda5", "3cd0784c36904989be09aa4eb729bb77", "1a519aac07a142b28bde145d7420529d", "829b10ce5b32482caefc63474193c51f", "c7ca7c530dd34f6d96033717427aeebb", "598c3071c42346ca8897e6abee47e20f", "964b57f2e95a4d34a50107dc552c8891", "55841526690a4c8db8306c5c76a961cb", "d8b2cfc7062747b1be0e63cd54b2c495", "dd09b8faa8cd47d3a73b9c5bb21a5948", "eb818ea381de4ff98bb530e4a80c0fef", "7d8858bca658467dacdabce3fd26f5a6", "db24261507654f7e99b1627f8ea7e3b4", "b6365f8d8e744bb180e93e0d30e8d6d5", "96652ec08a874b4db58e7ede6e17feb3"], "num_pages": [2, 2, 3, 7, 7, 1, 2, 2, 2, 1, 588, 1, 1, 6, 7, 4, 2, 6, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]} \ No newline at end of file diff --git a/data_ingestion/data_ingest.py b/data_ingest.py similarity index 67% rename from data_ingestion/data_ingest.py rename to data_ingest.py index 27c8718b..79529011 100644 --- a/data_ingestion/data_ingest.py +++ b/data_ingest.py @@ -1,23 +1,23 @@ import sys, os sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) -from data_ingestion.utils import create_vector_store, save_embedded_data, load_documents_from_directory, load_embedding_model +from utils import create_vector_store, save_embedded_data, load_documents_from_directory, load_embedding_model from loggings.logging_config import logger # This module will load in the data, you only need to add the data path to it. data_path = './data' -# loading the embeddings -logger.info(f"Loading the embeddings") -embeddings = load_embedding_model() -logger.info(f"Embeddings loaded") +# # loading the embeddings +# logger.info(f"Loading the embeddings") +# embeddings = load_embedding_model() +# logger.info(f"Embeddings loaded") def load_data(data_path: str): logger.info(f"Loading data from {data_path}") documents, docs_id, num_pages = load_documents_from_directory(data_path) logger.info(f"Data loaded") logger.info(f"Creating vector store") - embed_db = create_vector_store(embeddings,documents, docs_id, num_pages) + embed_db = create_vector_store(documents, docs_id, num_pages) logger.info(f"Vector store created") logger.info(f"Saving the vector store") # saving the embedded data diff --git a/data_ingestion/__init__.py b/data_ingestion/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/data_ingestion/__pycache__/__init__.cpython-311.pyc b/data_ingestion/__pycache__/__init__.cpython-311.pyc deleted file mode 100644 index 09e0fb0e..00000000 Binary files a/data_ingestion/__pycache__/__init__.cpython-311.pyc and /dev/null differ diff --git a/data_ingestion/__pycache__/data_ingest.cpython-311.pyc b/data_ingestion/__pycache__/data_ingest.cpython-311.pyc deleted file mode 100644 index de57984a..00000000 Binary files a/data_ingestion/__pycache__/data_ingest.cpython-311.pyc and /dev/null differ diff --git a/loggings/app.log b/loggings/app.log index f1043db7..5a766d3e 100644 --- a/loggings/app.log +++ b/loggings/app.log @@ -215,3 +215,142 @@ 2024-08-13 22:07:11,760 - INFO - Search completed 2024-08-13 22:07:11,762 - INFO - Page content: The image shows a black Toyota truck, likely a Toyota Tacoma, parked in what appears to be a dealership or repair lot. The caption indicates that it is an "accidented car: after repair," suggesting that the vehicle had previously been involved in an accident but has since been repaired. In the background, there’s a gray Jeep vehicle. +2024-08-13 22:41:00,464 - INFO - Loading data from ./data +2024-08-13 22:41:11,993 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:41:14,815 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:41:27,644 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:41:34,133 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:41:52,811 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:41:59,166 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:42:04,204 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:42:55,776 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:43:10,660 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:43:24,516 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:43:38,693 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:43:52,185 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:44:04,990 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:44:15,706 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:45:21,645 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:45:36,103 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:45:51,008 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:46:04,708 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:46:14,074 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:46:25,725 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:46:34,718 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:48:57,742 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:49:05,954 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:49:12,044 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:49:15,968 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:49:57,797 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:50:04,165 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:50:42,751 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-13 22:50:54,836 - INFO - Data loaded +2024-08-13 22:50:54,836 - INFO - Creating vector store +2024-08-13 22:50:55,834 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.) + attn_output = torch.nn.functional.scaled_dot_product_attention( + +2024-08-13 22:51:07,264 - INFO - Vector store created +2024-08-13 22:51:07,272 - INFO - Saving the vector store +2024-08-13 22:51:07,287 - INFO - Vector store saved +2024-08-13 22:53:54,905 - INFO - Receiving the search query +2024-08-13 22:54:16,883 - INFO - Searching for wheel impact socket +2024-08-13 22:54:17,600 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.) + attn_output = torch.nn.functional.scaled_dot_product_attention( + +2024-08-13 22:54:17,747 - INFO - Search completed +2024-08-13 22:54:17,748 - INFO - Page content: Wheel impact socket #21 +Brake caliper wind back tool +Torque wrench +Crow bar +Wheel chock +REPLACEMENT: BRAKE PADS – TOYOTA COROLLA IX HATCHBACK +(E120). USE THE FOLLOWING PROCEDURE: +1 +Open the bonnet. Unscrew the brake fluid reservoir cap. +2 +Secure the wheels with chocks. +3 +Loosen the wheel mounting bolts. Use wheel impact socket #21. +4 +Raise the front of the car and secure on supports. +CLUB.AUTODOC.CO.UK 3–11 +Perform the replacement of brake pads in complete set for each axis. This +provides effective braking. +The replacement procedure is identical for all brake pads on the same axle. +All work should be done with the engine stopped. +Replacement: brake pads – TOYOTA Corolla IX Hatchback (E120). AUTODOC +recommends: +5 +Unscrew the wheel bolts. +6 +Remove the wheel. +7 +Spread the brake pads. Use a crowbar. +8 +Clean the brake caliper fasteners. Use a wire brush. Use WD-40 spray. +CLUB.AUTODOC.CO.UK 4–11 +To avoid injury, hold up the wheel when unscrewing the bolts. +2024-08-13 22:55:23,880 - INFO - Loading data from ./data +2024-08-14 16:09:40,712 - INFO - Loading data from ./data +2024-08-14 16:10:29,337 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:10:34,891 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:10:58,387 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:11:05,277 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:11:38,763 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:11:53,979 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:12:06,400 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:13:11,827 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:13:22,387 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:13:32,389 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:13:42,675 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:13:53,025 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:14:03,157 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:14:13,677 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:15:26,159 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:15:38,007 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:15:58,459 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:16:10,230 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:16:31,849 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:16:52,117 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:17:09,952 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:21:00,090 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:21:07,227 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:21:13,632 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:21:19,028 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:22:03,178 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:22:07,252 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:22:57,035 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK" +2024-08-14 16:23:08,267 - INFO - Data loaded +2024-08-14 16:23:08,267 - INFO - Creating vector store +2024-08-14 16:23:12,507 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.) + attn_output = torch.nn.functional.scaled_dot_product_attention( + +2024-08-14 16:23:59,231 - INFO - Vector store created +2024-08-14 16:23:59,231 - INFO - Saving the vector store +2024-08-14 16:23:59,332 - INFO - Vector store saved +2024-08-14 16:32:26,857 - INFO - Receiving the search query +2024-08-14 16:34:27,630 - INFO - Searching for How to make a You can buy spare parts from us on our website or in the Autodoc app +2024-08-14 16:34:30,808 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.) + attn_output = torch.nn.functional.scaled_dot_product_attention( + +2024-08-14 16:34:31,257 - INFO - Search completed +2024-08-14 16:34:31,257 - INFO - Page content: How to make a You can buy spare parts from us on our website or in the Autodoc app. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. 15. Remove the rear window cover. 16. Remove the rear window cover. 17. Remove the rear window cover. 18. Remove the rear window cover. 19. Remove the rear window cover. I'll see you next time. +2024-08-14 16:38:20,487 - INFO - Receiving the search query +2024-08-14 16:38:34,760 - INFO - Searching for How to make a You can buy spare parts from us on our website or in the Autodoc app +2024-08-14 16:38:35,979 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.) + attn_output = torch.nn.functional.scaled_dot_product_attention( + +2024-08-14 16:38:36,197 - INFO - Search completed +2024-08-14 16:38:36,197 - INFO - Page content: How to make a You can buy spare parts from us on our website or in the Autodoc app. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. 15. Remove the rear window cover. 16. Remove the rear window cover. 17. Remove the rear window cover. 18. Remove the rear window cover. 19. Remove the rear window cover. I'll see you next time. +2024-08-14 16:41:52,357 - INFO - Receiving the search query +2024-08-14 16:42:17,598 - INFO - Searching for how do i fix my car? +2024-08-14 16:42:18,947 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.) + attn_output = torch.nn.functional.scaled_dot_product_attention( + +2024-08-14 16:42:19,137 - INFO - Search completed +2024-08-14 16:45:51,829 - INFO - Receiving the search query +2024-08-14 16:45:55,638 - INFO - Searching for how do i fix my car? +2024-08-14 16:45:56,129 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.) + attn_output = torch.nn.functional.scaled_dot_product_attention( + +2024-08-14 16:45:56,307 - INFO - Search completed diff --git a/main.py b/main.py index 1df6dd6d..3ff18c58 100644 --- a/main.py +++ b/main.py @@ -4,8 +4,8 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel -from data_ingestion.utils import search, load_embedded_data -from data_ingestion.data_ingest import load_data +from utils import search, load_embedded_data +from data_ingest import load_data app = FastAPI() diff --git a/data_ingestion/pdf_ingest.py b/pdf_ingest.py similarity index 100% rename from data_ingestion/pdf_ingest.py rename to pdf_ingest.py diff --git a/search.py b/search.py index 72e1b293..c518de15 100644 --- a/search.py +++ b/search.py @@ -1,4 +1,4 @@ -from data_ingestion.utils import search, load_embedded_data +from utils import search import sys, os # Add the root directory to sys.path @@ -16,6 +16,6 @@ if __name__ == "__main__": page_content, all, pages = search(query) logger.info("Search completed") logger.info(f"Page content: {page_content}") - print(f"Page content: {page_content}") + print(f"Page content: {all}") print(f"Pages: {pages}") print("Search completed") \ No newline at end of file diff --git a/search_note.ipynb b/search_note.ipynb new file mode 100644 index 00000000..ad86dd88 --- /dev/null +++ b/search_note.ipynb @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import search\n", + "import sys, os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "smog_env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/test.ipynb b/test.ipynb new file mode 100644 index 00000000..84dc7549 --- /dev/null +++ b/test.ipynb @@ -0,0 +1,55 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "{'filename': 'How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]', 'duration': '3-6 minutes', 'file_type': 'video'},\n", + "{'filename': 'How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]', 'duration': '0-3 minutes', 'file_type': 'video'}, \n", + "{'source': './data\\\\How to change rear windshield wipers on TOYOTA Corolla.docx', 'page': 4, 'file_type': 'text'}, \n", + "{'source': './data\\\\How to change front brake pads on TOYOTA Corolla.txt', 'page': 6, 'file_type': 'text'}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[{\" How to make a You can buy spare parts from us on our website or in the Autodoc app. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. I'm going to make a hole in the bottom of the box. 15. Remove the rear window cover. 16. Remove the rear window cover. 17. Remove the rear window cover. 18. Remove the rear window cover. 19. Remove the rear window cover. I'll see you next time.\"}, \n", + "\n", + "{\" Hi everybody! Here's the latest installment of AutoDoc's video tutorials on replacing car parts. The channel so you never miss a video. We post new ones every week!\"}, \n", + "\n", + "{'G\\n\\nAUTODOC MOBILE APP: GREAT DEALS AND CONVENIEN\\n\\nT\\n\\nSHOPPIN\\n\\nG\\n\\n\\n\\nA GREAT SELECTION OF SPARE PARTS FOR YOUR CA\\n\\nR\\n\\nWINDSHIELD WIPERS: A WIDE SELECTIO\\n\\nN\\n\\nA GREAT SELECTION OF SPARE PARTS FOR YOUR CA\\n\\nR\\n\\nWINDSHIELD WIPERS: A WIDE SELECTIO\\n\\nN\\n\\n DISCLAIMER:\\n\\nThe document contains only general recommendations that may be useful for you when you perform repair or replacement work. AUTODOC shall not be liable for any loss, injury, damage of property occurring in the repair or replacement process due to incorrect use or misinterpretation of the provided information.\\n\\nAUTODOC shall not be liable for any possible mistakes and uncertainties in this guide. The information provided is for information purposes only and cannot replace advice from specialists.'}, \n", + "\n", + "{'replace advice from specialists.\\nAUTODOC shall not be liable for incorrect or hazardous usage of equipment, tools and car parts. AUTODOC strongly recommends to be careful and observe\\nthe safety rules when performing repair or replacement works. Remember: usage of low quality auto parts does not guarantee you the appropriate level of\\nroad safety.\\n© Copyright 2022 – All the contents of this website, in particular texts, photographs and graphics, are protected by copyright. All rights, including\\nreproduction, publication, editing and translation rights, are reserved by AUTODOC GmbH.\\nCLUB.AUTODOC.CO.UK 11–11\\nDISCLAIMER:\\nAUTODOC MOBILE APP: GREAT DEALS AND CONVENIENT\\nSHOPPING'}]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "smog_env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/data_ingestion/utils.py b/utils.py similarity index 97% rename from data_ingestion/utils.py rename to utils.py index bb60facb..44223aed 100644 --- a/data_ingestion/utils.py +++ b/utils.py @@ -302,15 +302,12 @@ def preprocess_video_data(video_path: str, time_interval: int): snapshot_dir = os.path.join(os.path.dirname(video_path), f"{video_name}_snapshots") os.makedirs(snapshot_dir, exist_ok=True) - # Set the interval to 3 minutes (180 seconds) - interval = 180 - # Get the duration of the video using ffmpeg probe = ffmpeg.probe(video_path) duration = float(probe['format']['duration']) # Loop through the video and take snapshots at 0s, 3min, 6min, etc. - for i in range(0, int(duration), interval): + for i in range(0, int(duration), time_interval): # Calculate the time for the current frame frame_time = i # Save the snapshot as an image file in the created folder @@ -328,7 +325,7 @@ def preprocess_video_data(video_path: str, time_interval: int): # now creating document from the audio file - documents = create_audio_document(audio_path, file_type='video') + documents = create_audio_document(audio_path, chunk_duration_minutes=0.5, file_type='video') return documents @@ -398,7 +395,7 @@ def load_documents_from_directory(directory_path: str): print(f"Document {doc[0].metadata['filename']} loaded") elif extension in video_doc: # creating a video document - doc = preprocess_video_data(path, time_interval=180) + doc = preprocess_video_data(path, time_interval=30) # appending the document to the documents list documents.append(doc) # appending the number of pages in the document @@ -429,7 +426,7 @@ def load_documents_from_directory(directory_path: str): # A function to create vector store -def create_vector_store(embeddings, documents: list, docs_id: list, num_pages: list): +def create_vector_store(documents: list, docs_id: list, num_pages: list): # index set up with the embedding dimension index = faiss.IndexFlatL2(384) # Initialize the FAISS vector store @@ -462,9 +459,9 @@ def add_documents_to_vector_store(embeddings, documents: list, docs_id: list, nu # A document search function -# loading the embedded data -embed_db = load_embedded_data() -def search(query, k=4): +def search(query, k=20): + # loading the embedded data + embed_db = load_embedded_data() db = embed_db docs = db.similarity_search(query, k) all = [] diff --git a/vec-db/index/faiss_index_data/index.faiss b/vec-db/index/faiss_index_data/index.faiss index 2b5a5560..023cd697 100644 Binary files a/vec-db/index/faiss_index_data/index.faiss and b/vec-db/index/faiss_index_data/index.faiss differ diff --git a/vec-db/index/faiss_index_data/index.pkl b/vec-db/index/faiss_index_data/index.pkl index 8a16f21e..e7ddad08 100644 Binary files a/vec-db/index/faiss_index_data/index.pkl and b/vec-db/index/faiss_index_data/index.pkl differ diff --git a/video_experiment.ipynb b/video_experiment.ipynb index 75d2d2a2..1c0454bf 100644 --- a/video_experiment.ipynb +++ b/video_experiment.ipynb @@ -20,7 +20,7 @@ "import os\n", "import ffmpeg\n", "# importing module that prerocess the audio file \n", - "from data_ingestion.utils import create_audio_document\n" + "from utils import create_audio_document\n" ] }, { @@ -62,7 +62,7 @@ " os.makedirs(snapshot_dir, exist_ok=True)\n", "\n", " # Set the interval to 3 minutes (180 seconds)\n", - " interval = 180\n", + " interval = 30\n", "\n", " # Get the duration of the video using ffmpeg\n", " probe = ffmpeg.probe(video_path)\n", @@ -164,7 +164,10 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# [{filename: \"car repair.pdf\", pages: [12, 55, 356], description: \"lorem ipsum\", filetype:\"pdf\", thumbnail:\"carrepair.jpg\"}\n", + "# ,{filename: \"how to repair car.mp4\", pages: [12, 55, 356], description: \"lorem ipsum\", filetype:\"video\", thumbnail:\"how to repair car.jpg\"}]" + ] } ], "metadata": {