parallel processing added
This commit is contained in:
Binary file not shown.
@@ -1 +0,0 @@
|
||||
{"doc_names": ["audio-2", "data\\Car-Repair-Receipt-repair", "data\\Car-Repair-Receipt-service", "data\\Car-Repair-Receipt-tire", "data\\Car-Repair-Receipt-tuning", "data\\Car-Repair-Receipt-wash", "data\\corolla-2020-toyota-owners-manual", "data\\dodge-challenger-auto-body-repair-after", "data\\dodge-challenger-auto-body-repair-before", "data\\How to change engine oil and filter on TOYOTA Corolla", "data\\How to change front brake pads on TOYOTA Corolla", "How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]", "How to change rear wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]", "data\\How to change rear windshield wipers on TOYOTA Corolla", "data\\How to change spark plugs on TOYOTA COROLLA", "data\\hyundai-sonata-auto-body-repair-after", "data\\hyundai-sonata-auto-body-repair-before", "data\\IMG_1436", "data\\IMG_1437", "data\\IMG_1438", "data\\IMG_1440", "data\\IMG_1441", "data\\IMG_1442", "data\\IMG_1443", "data\\IMG_1444", "data\\pontiac-vibe-auto-body-repair-after", "data\\pontiac-vibe-auto-body-repair-before", "test_rec", "data\\toyota-tacoma-auto-body-repair-after", "data\\toyota-tacoma-auto-body-repair-before"], "docs_id": ["e7280b3ec313491f8ce5c5d59b52788e", "880e0ad1d2ce43c39716e8d45d584000", "3b1c312fe9d0490ba3b7a841ce3fb136", "a6f5d1b0ba8d4fa7a828fdad8ae17bb7", "41f450a6b2c24af6a45065b2d3eba6e5", "d7af8ae82db341bf818e8f16420a9570", "eb95b3ced46548ba9d18eacba3c3e00c", "806b676fecfd47339c506ece2af3122d", "fcdc8661e11541c0a56c825626f2467e", "5ab12ad4f0ce4ce98e27e56fe5663ebd", "adf95aa758254b069cac03f52f0993b8", "1c3ef8dd242f40418dd34bfd3ce95b8a", "47d4abbea3da4121930fc33b3c472fdf", "10963a5e3985497888ce30cc66ae6314", "111c4a4e07534f47b1ed671ee2b684df", "2aa09a5f3fb54728b8370875ddc3d1fa", "7f902694851e41218ef8498b6c5c1553", "e1b0b06f6dad4c56bf976f6c2135df35", "00564531a33c4896b0d329170ce1f04f", "3aa1149c564c47c4b7b1f6a641e37770", "34a3e18b638b454ba283ba5741bad3a7", "0c7b7bf52d684f198dee19e87a5000bc", "9f1fc0cdcf104e4d948321f0ee5875b9", "113fab4522f94b7b956236b769e391c7", "8a75069c830640b5bddd661adcd612a9", "428d5aa38b9148ef95cf0662d50e0e18", "4cbb1c9f8d8a44feb44b554f9a39bd59", "04fe5073479c462aa819b0131fc9bc9b", "037fd77b66f045a0ac0682a07d57885b", "299625168b8e492baafdbb2a871cd061"], "num_pages": [7, 1, 2, 2, 2, 1, 588, 1, 1, 6, 7, 21, 12, 6, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
|
||||
Binary file not shown.
Binary file not shown.
@@ -533,3 +533,131 @@ To avoid injury, hold up the wheel when unscrewing the bolts.
|
||||
2024-08-15 12:50:08,732 - INFO - Vector store created
|
||||
2024-08-15 12:50:08,732 - INFO - Saving the vector store
|
||||
2024-08-15 12:50:08,745 - INFO - Vector store saved
|
||||
2024-08-15 22:27:53,640 - INFO - Loading data from ./data
|
||||
2024-08-15 22:28:26,454 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:15,773 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:18,312 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:21,194 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:23,563 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:25,707 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:28,079 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:30,046 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:32,244 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:33,863 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:34,996 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:37,244 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:39,593 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:41,416 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:42,381 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:43,283 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:45,605 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:46,734 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:50,144 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:53,144 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:54,149 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:29:56,683 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:30:01,059 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:30:04,416 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:30:07,578 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:30:08,304 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:30:12,904 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:30:16,304 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:30:19,593 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:30:20,690 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:30:22,950 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:30:27,134 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:30:29,830 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 503 Service Unavailable"
|
||||
2024-08-15 22:30:31,234 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:30:44,310 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:30:53,513 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:30:57,355 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:30:57,849 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:30:58,324 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:30:58,763 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:30:59,189 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:30:59,624 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:31:00,064 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:31:00,434 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:31:00,843 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:31:01,323 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:31:01,834 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:38:18,873 - INFO - Loading data from ./data
|
||||
2024-08-15 22:38:45,628 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:39:49,550 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:39:54,348 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:39:57,845 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:01,631 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:04,163 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:06,901 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:07,582 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:08,282 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:09,997 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:12,182 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:12,765 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:15,845 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:17,389 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:18,451 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:21,247 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:21,334 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:23,360 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:24,218 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:24,552 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:28,755 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:33,043 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:35,045 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:37,390 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:41,155 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:45,315 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:49,518 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:50,481 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:53,073 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:40:56,877 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:01,323 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:02,802 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:05,165 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:09,849 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:13,516 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:13,872 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:16,590 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:20,432 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:23,509 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:25,284 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:25,999 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/translations "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:32,085 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:32,567 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:32,987 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:33,440 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:33,916 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:34,331 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:34,824 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:35,191 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:35,573 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:36,235 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:36,731 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:37,163 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:37,556 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:38,165 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:38,601 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:38,987 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:39,409 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:39,901 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:40,290 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:40,640 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:41,031 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:41,393 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:41,835 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:42,190 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:42,604 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:42,974 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:43,339 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:43,773 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:44,140 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:44,574 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
|
||||
2024-08-15 22:41:44,574 - INFO - Data loaded
|
||||
2024-08-15 22:41:44,574 - INFO - Creating vector store
|
||||
2024-08-15 22:41:46,104 - WARNING - C:\Users\timmy_3aupohg\anaconda3\envs\smog_env\Lib\site-packages\transformers\models\bert\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:555.)
|
||||
attn_output = torch.nn.functional.scaled_dot_product_attention(
|
||||
|
||||
2024-08-15 22:41:56,183 - INFO - Vector store created
|
||||
2024-08-15 22:41:56,198 - INFO - Saving the vector store
|
||||
2024-08-15 22:41:56,198 - INFO - Vector store saved
|
||||
|
||||
+2
-1
@@ -18,4 +18,5 @@ groq
|
||||
python-dotenv
|
||||
pydub
|
||||
moviepy
|
||||
ffmpeg-python
|
||||
ffmpeg-python
|
||||
langchain-groq
|
||||
+278
-9
@@ -6,30 +6,299 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from utils import search\n",
|
||||
"import sys, os"
|
||||
"# !pip install langchain-groq"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from utils import search\n",
|
||||
"import sys, os\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from langchain_groq import ChatGroq\n",
|
||||
"from langchain_core.prompts.prompt import PromptTemplate\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"load_dotenv()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"# setting up groq api key\n",
|
||||
"os.environ[\"GROQ_API_KEY\"] = os.getenv('GROQ_API_KEY')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"\n",
|
||||
"# chat set up\n",
|
||||
"GROQ_LLM = ChatGroq(temperature=0, model_name=\"llama3-8b-8192\", max_tokens=100)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### Chains #####\n",
|
||||
"# Initiator\n",
|
||||
"def doc_summarizer(document_page: list) -> str:\n",
|
||||
" initiator_prompt = PromptTemplate(\n",
|
||||
" template=\"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n",
|
||||
" Create a short summary of the document based on the provided text. \n",
|
||||
" \n",
|
||||
" Start with: This document is about...\n",
|
||||
" \n",
|
||||
" <|eot_id|><|start_header_id|>user<|end_header_id|>\n",
|
||||
" DOCUMENT: {document_page} \\n\n",
|
||||
" \n",
|
||||
" <|eot_id|><|start_header_id|>assistant<|end_header_id|>\"\"\",\n",
|
||||
" input_variables=[\"document_page\"],\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" initiator_router = initiator_prompt | GROQ_LLM | StrOutputParser()\n",
|
||||
" output = initiator_router.invoke({\"document_page\":document_page})\n",
|
||||
" return output\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"document_page = 'How to change the engine oil of a toyota corrolla.'\n",
|
||||
"# testing the function\n",
|
||||
"summary = doc_summarizer(document_page)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'This document is about providing a step-by-step guide on how to change the engine oil of a Toyota Corolla.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"summary"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = search(document_page)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'source': './data\\\\How to change engine oil and filter on TOYOTA Corolla.txt',\n",
|
||||
" 'page': 1,\n",
|
||||
" 'file_type': 'text'},\n",
|
||||
" {'source': './data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 438},\n",
|
||||
" {'source': './data\\\\How to change engine oil and filter on TOYOTA Corolla.txt',\n",
|
||||
" 'page': 3,\n",
|
||||
" 'file_type': 'text'},\n",
|
||||
" {'source': './data\\\\How to change engine oil and filter on TOYOTA Corolla.txt',\n",
|
||||
" 'page': 2,\n",
|
||||
" 'file_type': 'text'},\n",
|
||||
" {'source': './data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 525},\n",
|
||||
" {'source': './data\\\\How to change spark plugs on TOYOTA COROLLA.docx',\n",
|
||||
" 'page': 2,\n",
|
||||
" 'file_type': 'text'},\n",
|
||||
" {'source': './data\\\\How to change spark plugs on TOYOTA COROLLA.docx',\n",
|
||||
" 'page': 3,\n",
|
||||
" 'file_type': 'text'},\n",
|
||||
" {'source': './data\\\\How to change engine oil and filter on TOYOTA Corolla.txt',\n",
|
||||
" 'page': 0,\n",
|
||||
" 'file_type': 'text'},\n",
|
||||
" {'source': './data\\\\How to change spark plugs on TOYOTA COROLLA.docx',\n",
|
||||
" 'page': 5,\n",
|
||||
" 'file_type': 'text'},\n",
|
||||
" {'source': './data\\\\How to change spark plugs on TOYOTA COROLLA.docx',\n",
|
||||
" 'page': 6,\n",
|
||||
" 'file_type': 'text'},\n",
|
||||
" {'source': './data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 526},\n",
|
||||
" {'source': './data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 422},\n",
|
||||
" {'source': './data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 514},\n",
|
||||
" {'source': './data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 153},\n",
|
||||
" {'filename': 'audio-2', 'duration': '0-3 minutes', 'file_type': 'audio'},\n",
|
||||
" {'filename': 'audio-2', 'duration': '3-6 minutes', 'file_type': 'audio'},\n",
|
||||
" {'source': './data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 149},\n",
|
||||
" {'source': './data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 513},\n",
|
||||
" {'source': './data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 436},\n",
|
||||
" {'source': './data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 148}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from collections import defaultdict\n",
|
||||
"\n",
|
||||
"def transform_file_data(input_data):\n",
|
||||
" # Create a dictionary to aggregate data by filename\n",
|
||||
" aggregated_data = defaultdict(lambda: {\n",
|
||||
" 'filename': '',\n",
|
||||
" 'pages': [],\n",
|
||||
" 'timestamps': [],\n",
|
||||
" 'description': 'lorem ipsum',\n",
|
||||
" 'filetype': '',\n",
|
||||
" 'thumbnail': '',\n",
|
||||
" 'track_id': 123\n",
|
||||
" })\n",
|
||||
"\n",
|
||||
" for item in input_data:\n",
|
||||
" if 'source' in item:\n",
|
||||
" file_path = item['source']\n",
|
||||
" filename = file_path.split('\\\\')[-1]\n",
|
||||
" extension = filename.split('.')[-1]\n",
|
||||
"\n",
|
||||
" aggregated_data[filename]['filename'] = filename\n",
|
||||
" aggregated_data[filename]['filetype'] = extension\n",
|
||||
" aggregated_data[filename]['thumbnail'] = f\"{filename.split('.')[0]}.jpg\"\n",
|
||||
"\n",
|
||||
" if extension in ['pdf', 'txt', 'docx']:\n",
|
||||
" aggregated_data[filename]['pages'].append(item['page'])\n",
|
||||
" elif extension in ['mp4', 'mkv', 'flv']:\n",
|
||||
" aggregated_data[filename]['timestamps'].append(item['page'])\n",
|
||||
" elif extension in ['mp3', 'wav', 'flac']:\n",
|
||||
" aggregated_data[filename]['timestamps'].append(item['page'])\n",
|
||||
" elif extension in ['jpg', 'jpeg', 'png', 'gif', 'bmp']:\n",
|
||||
" aggregated_data[filename].pop('pages', None) # Remove pages if it's an image\n",
|
||||
" aggregated_data[filename].pop('timestamps', None) # Remove timestamps if it's an image\n",
|
||||
"\n",
|
||||
" elif 'filename' in item:\n",
|
||||
" filename = item['filename']\n",
|
||||
" extension = item['file_type']\n",
|
||||
" aggregated_data[filename]['filename'] = f\"{filename}.{extension}\"\n",
|
||||
" aggregated_data[filename]['filetype'] = extension\n",
|
||||
" aggregated_data[filename]['thumbnail'] = f\"{filename}.jpg\"\n",
|
||||
" if 'duration' in item:\n",
|
||||
" start_time, end_time = item['duration'].split(' minutes')[0].split('-')\n",
|
||||
" aggregated_data[filename]['timestamps'].append((int(start_time), int(end_time)))\n",
|
||||
"\n",
|
||||
" # Convert aggregated data to the desired output format\n",
|
||||
" output_data = []\n",
|
||||
" for filename, data in aggregated_data.items():\n",
|
||||
" # Remove empty lists for pages and timestamps\n",
|
||||
" if not data['pages']:\n",
|
||||
" data.pop('pages', None)\n",
|
||||
" if not data['timestamps']:\n",
|
||||
" data.pop('timestamps', None)\n",
|
||||
" output_data.append(data)\n",
|
||||
"\n",
|
||||
" return output_data\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'filename': 'How to change engine oil and filter on TOYOTA Corolla.txt', 'pages': [1, 3, 2, 0], 'description': 'lorem ipsum', 'filetype': 'txt', 'thumbnail': 'How to change engine oil and filter on TOYOTA Corolla.jpg', 'track_id': 123}\n",
|
||||
"{'filename': 'corolla-2020-toyota-owners-manual.pdf', 'pages': [438, 525, 526, 422, 514, 153, 149, 513, 436, 148], 'description': 'lorem ipsum', 'filetype': 'pdf', 'thumbnail': 'corolla-2020-toyota-owners-manual.jpg', 'track_id': 123}\n",
|
||||
"{'filename': 'How to change spark plugs on TOYOTA COROLLA.docx', 'pages': [2, 3, 5, 6], 'description': 'lorem ipsum', 'filetype': 'docx', 'thumbnail': 'How to change spark plugs on TOYOTA COROLLA.jpg', 'track_id': 123}\n",
|
||||
"{'filename': 'audio-2.audio', 'timestamps': [(0, 3), (3, 6)], 'description': 'lorem ipsum', 'filetype': 'audio', 'thumbnail': 'audio-2.jpg', 'track_id': 123}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"output = transform_file_data(docs)\n",
|
||||
"for item in output:\n",
|
||||
" print(item)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'filename': 'How to change engine oil and filter on TOYOTA Corolla.txt',\n",
|
||||
" 'pages': [1, 3, 2, 0],\n",
|
||||
" 'description': 'lorem ipsum',\n",
|
||||
" 'filetype': 'txt',\n",
|
||||
" 'thumbnail': 'How to change engine oil and filter on TOYOTA Corolla.jpg',\n",
|
||||
" 'track_id': 123},\n",
|
||||
" {'filename': 'corolla-2020-toyota-owners-manual.pdf',\n",
|
||||
" 'pages': [438, 525, 526, 422, 514, 153, 149, 513, 436, 148],\n",
|
||||
" 'description': 'lorem ipsum',\n",
|
||||
" 'filetype': 'pdf',\n",
|
||||
" 'thumbnail': 'corolla-2020-toyota-owners-manual.jpg',\n",
|
||||
" 'track_id': 123},\n",
|
||||
" {'filename': 'How to change spark plugs on TOYOTA COROLLA.docx',\n",
|
||||
" 'pages': [2, 3, 5, 6],\n",
|
||||
" 'description': 'lorem ipsum',\n",
|
||||
" 'filetype': 'docx',\n",
|
||||
" 'thumbnail': 'How to change spark plugs on TOYOTA COROLLA.jpg',\n",
|
||||
" 'track_id': 123},\n",
|
||||
" {'filename': 'audio-2.audio',\n",
|
||||
" 'timestamps': [(0, 3), (3, 6)],\n",
|
||||
" 'description': 'lorem ipsum',\n",
|
||||
" 'filetype': 'audio',\n",
|
||||
" 'thumbnail': 'audio-2.jpg',\n",
|
||||
" 'track_id': 123}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
|
||||
@@ -6,14 +6,19 @@ from langchain_community.vectorstores import FAISS
|
||||
from langchain_community.document_loaders import PyPDFLoader
|
||||
from langchain_community.document_loaders import TextLoader
|
||||
from langchain_community.document_loaders import Docx2txtLoader
|
||||
from langchain_groq import ChatGroq
|
||||
from langchain_core.prompts.prompt import PromptTemplate
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from uuid import uuid4
|
||||
from langchain_core.documents import Document
|
||||
from text_extractor import TextExtractor
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import math
|
||||
import json
|
||||
from groq import Groq
|
||||
import re
|
||||
import time
|
||||
import shutil
|
||||
import numpy as np
|
||||
from pydub import AudioSegment
|
||||
@@ -26,10 +31,15 @@ load_dotenv()
|
||||
|
||||
# OpenAI API Key
|
||||
api_key = os.getenv('OPENAI_API_KEY')
|
||||
# setting up groq api key
|
||||
os.environ["GROQ_API_KEY"] = os.getenv('GROQ_API_KEY')
|
||||
client = Groq(api_key = os.getenv('GROQ_API_KEY'))
|
||||
model = 'whisper-large-v3'
|
||||
|
||||
|
||||
# chat set up
|
||||
GROQ_LLM = ChatGroq(temperature=0, model_name="llama3-8b-8192", max_tokens=100)
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
# loading the embedding model
|
||||
def load_embedding_model():
|
||||
@@ -337,6 +347,25 @@ def preprocess_video_data(video_path: str, time_interval: int):
|
||||
return documents
|
||||
|
||||
|
||||
#----------------------------------------------------DOC SUMMARIZER --------------------------------------------------
|
||||
def doc_summarizer(document_page: list) -> str:
|
||||
initiator_prompt = PromptTemplate(
|
||||
template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
||||
Create a short summary of the document based on the provided text.
|
||||
|
||||
Start with: This document is about...
|
||||
|
||||
<|eot_id|><|start_header_id|>user<|end_header_id|>
|
||||
DOCUMENT: {document_page} \n
|
||||
|
||||
<|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
|
||||
input_variables=["document_page"],
|
||||
)
|
||||
|
||||
initiator_router = initiator_prompt | GROQ_LLM | StrOutputParser()
|
||||
output = initiator_router.invoke({"document_page":document_page})
|
||||
return output
|
||||
|
||||
|
||||
#-----------------------------------------------------OTHERS--------------------------------------------------------------
|
||||
|
||||
@@ -348,88 +377,86 @@ def load_embedded_data(embeddings=embeddings, key="data"):
|
||||
embed_db = FAISS.load_local(f"index/faiss_index_{key}", embeddings, allow_dangerous_deserialization=True)
|
||||
return embed_db
|
||||
|
||||
#-----------------------------------------------------Data Loading Process----------------------------------------------------
|
||||
|
||||
# creating a function to load all documents from a directory.
|
||||
def process_document(path, extension, text_doc, image_doc, audio_doc, video_doc):
|
||||
doc_name = os.path.basename(path).split('.')[0]
|
||||
|
||||
process_map = {
|
||||
"text": load_document,
|
||||
"image": create_image_document,
|
||||
"audio": create_audio_document,
|
||||
"video": preprocess_video_data
|
||||
}
|
||||
|
||||
if extension in text_doc:
|
||||
doc = process_map["text"](path)
|
||||
num_pages = len(doc)
|
||||
elif extension in image_doc:
|
||||
doc = process_map["image"](path)
|
||||
num_pages = 1
|
||||
doc_name = doc[0].metadata['filename']
|
||||
elif extension in audio_doc:
|
||||
doc = process_map["audio"](path)
|
||||
num_pages = len(doc)
|
||||
doc_name = doc[0].metadata['filename']
|
||||
elif extension in video_doc:
|
||||
doc = process_map["video"](path, time_interval=30)
|
||||
num_pages = len(doc)
|
||||
doc_name = doc[0].metadata['filename']
|
||||
else:
|
||||
return None, None, None # Unhandled extension
|
||||
|
||||
print(f"Document {doc_name} loaded")
|
||||
return doc, doc_name, num_pages
|
||||
|
||||
def load_documents_from_directory(directory_path: str):
|
||||
text_doc = ['pdf', 'txt', 'docx', 'doc', 'md']
|
||||
image_doc = ['jpg', 'jpeg', 'png', 'gif', 'bmp']
|
||||
audio_doc = ['mp3', 'wav', 'flac', 'ogg', 'm4a']
|
||||
video_doc = ['mp4', 'avi', 'mkv', 'flv', 'mov']
|
||||
|
||||
# accessing the name of the files in the directory
|
||||
files = os.listdir(directory_path)
|
||||
# creating a list to store the documents
|
||||
documents = []
|
||||
# another list for the document names
|
||||
doc_names = []
|
||||
# counting the number of pages in the document
|
||||
num_pages= []
|
||||
# iterating through the files in the directory
|
||||
for file in files:
|
||||
# updating the path
|
||||
path = os.path.join(directory_path, file)
|
||||
# getting the file extension and doc name
|
||||
doc_name, extension = path.split('/')[-1].split('.')[0] , file.split('.')[-1]
|
||||
# checking if the file is a text document
|
||||
if extension in text_doc:
|
||||
# loading the document
|
||||
doc = load_document(path)
|
||||
# appending the document to the documents list
|
||||
num_pages = []
|
||||
doc_summary = []
|
||||
|
||||
def process_with_delay(file):
|
||||
result = process_document(os.path.join(directory_path, file), file.split('.')[-1], text_doc, image_doc, audio_doc, video_doc)
|
||||
time.sleep(0.1) # Introduce a 0.1s delay between processing each document
|
||||
return result
|
||||
|
||||
with ThreadPoolExecutor() as executor:
|
||||
results = executor.map(process_with_delay, files)
|
||||
|
||||
for doc, doc_name, pages in results:
|
||||
if doc is not None:
|
||||
documents.append(doc)
|
||||
# appending the number of pages in the document
|
||||
num_pages.append(len(doc))
|
||||
# adding the document name to the doc_names list
|
||||
doc_names.append(doc_name)
|
||||
print(f"Document {doc_name} loaded")
|
||||
elif extension in image_doc:
|
||||
# creating an image document
|
||||
doc = create_image_document(path)
|
||||
# appending the document to the documents list
|
||||
documents.append(doc)
|
||||
# appending the number of pages in the document
|
||||
num_pages.append(1)
|
||||
# adding the document name to the doc_names list
|
||||
doc_names.append(doc[0].metadata['filename'])
|
||||
print(f"Document {doc[0].metadata['filename']} loaded")
|
||||
elif extension in audio_doc:
|
||||
# creating an audio document
|
||||
doc = create_audio_document(path)
|
||||
# appending the document to the documents list
|
||||
documents.append(doc)
|
||||
# appending the number of pages in the document
|
||||
num_pages.append(len(doc))
|
||||
# adding the document name to the doc_names list
|
||||
doc_names.append(doc[0].metadata['filename'])
|
||||
print(f"Document {doc[0].metadata['filename']} loaded")
|
||||
elif extension in video_doc:
|
||||
# creating a video document
|
||||
doc = preprocess_video_data(path, time_interval=30)
|
||||
# appending the document to the documents list
|
||||
documents.append(doc)
|
||||
# appending the number of pages in the document
|
||||
num_pages.append(len(doc))
|
||||
# adding the document name to the doc_names list
|
||||
doc_names.append(doc[0].metadata['filename'])
|
||||
print(f"Document {doc[0].metadata['filename']} loaded")
|
||||
num_pages.append(pages)
|
||||
|
||||
# so we need to create a document id for each document
|
||||
docs_id = [uuid4().hex for i in range(len(documents))]
|
||||
# creating a json file to store the documents, checking if it exists then open it, else create it
|
||||
json_file = f"{directory_path}/documents.json"
|
||||
# creating doc summary
|
||||
first_page = doc[0].page_content
|
||||
summary = doc_summarizer(first_page)
|
||||
doc_summary.append(summary)
|
||||
|
||||
docs_id = [uuid4().hex for _ in range(len(documents))]
|
||||
|
||||
json_file = os.path.join(directory_path, 'data.json')
|
||||
data = {'doc_names': doc_names, 'docs_id': docs_id, 'num_pages': num_pages, 'doc_summaary': doc_summary}
|
||||
|
||||
if os.path.exists(json_file):
|
||||
with open(json_file, 'r') as f:
|
||||
data = json.load(f)
|
||||
data['doc_names'] = doc_names
|
||||
data['docs_id'] = docs_id
|
||||
data['num_pages'] = num_pages
|
||||
with open(json_file, 'w') as f:
|
||||
json.dump(data, f)
|
||||
with open(json_file, 'r+') as f:
|
||||
existing_data = json.load(f)
|
||||
existing_data.update(data)
|
||||
f.seek(0)
|
||||
json.dump(existing_data, f)
|
||||
else:
|
||||
data = {'doc_names': doc_names, 'docs_id': docs_id, 'num_pages': num_pages}
|
||||
with open(json_file, 'w') as f:
|
||||
json.dump(data, f)
|
||||
|
||||
# returning the documents, and doc ids
|
||||
return documents, docs_id, num_pages
|
||||
|
||||
|
||||
@@ -475,6 +502,6 @@ def search(query, k=20):
|
||||
all = []
|
||||
info = []
|
||||
for doc in docs:
|
||||
all.append({doc.page_content})
|
||||
# all.append({doc.page_content})
|
||||
info.append(dict(doc.metadata))
|
||||
return docs[0].page_content, all, info
|
||||
return info
|
||||
|
||||
Reference in New Issue
Block a user