commit b0c3eb803288f90e89ab402595f8efc51ae1c8ba Author: timothyafolami Date: Mon Aug 5 21:08:29 2024 +0100 initial commit diff --git a/FAISS_Embedding.ipynb b/FAISS_Embedding.ipynb new file mode 100644 index 00000000..fbaca0d9 --- /dev/null +++ b/FAISS_Embedding.ipynb @@ -0,0 +1,395 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ESxp1aQc0X53", + "outputId": "2844a981-b5ab-4ae4-8f8a-408d0369a8f2" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting langchain\n", + " Downloading langchain-0.2.9-py3-none-any.whl (987 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m987.7/987.7 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (6.0.1)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.0.31)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (3.9.5)\n", + "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (4.0.3)\n", + "Collecting langchain-core<0.3.0,>=0.2.20 (from langchain)\n", + " Downloading langchain_core-0.2.21-py3-none-any.whl (372 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m372.0/372.0 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)\n", + " Downloading langchain_text_splitters-0.2.2-py3-none-any.whl (25 kB)\n", + "Collecting langsmith<0.2.0,>=0.1.17 (from langchain)\n", + " Downloading langsmith-0.1.90-py3-none-any.whl (134 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.7/134.7 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (1.25.2)\n", + "Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.8.2)\n", + "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.31.0)\n", + "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (8.5.0)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.1)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.5)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.4)\n", + "Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.3.0,>=0.2.20->langchain)\n", + " Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n", + "Requirement already satisfied: packaging<25,>=23.2 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.20->langchain) (24.1)\n", + "Collecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.17->langchain)\n", + " Downloading orjson-3.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (141 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m141.1/141.1 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.20.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (2.20.1)\n", + "Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (4.12.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (2024.7.4)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain) (3.0.3)\n", + "Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain-core<0.3.0,>=0.2.20->langchain)\n", + " Downloading jsonpointer-3.0.0-py2.py3-none-any.whl (7.6 kB)\n", + "Installing collected packages: orjson, jsonpointer, jsonpatch, langsmith, langchain-core, langchain-text-splitters, langchain\n", + "Successfully installed jsonpatch-1.33 jsonpointer-3.0.0 langchain-0.2.9 langchain-core-0.2.21 langchain-text-splitters-0.2.2 langsmith-0.1.90 orjson-3.10.6\n", + "Collecting langchain-huggingface\n", + " Downloading langchain_huggingface-0.0.3-py3-none-any.whl (17 kB)\n", + "Requirement already satisfied: huggingface-hub>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from langchain-huggingface) (0.23.4)\n", + "Requirement already satisfied: langchain-core<0.3,>=0.1.52 in /usr/local/lib/python3.10/dist-packages (from langchain-huggingface) (0.2.21)\n", + "Collecting sentence-transformers>=2.6.0 (from langchain-huggingface)\n", + " Downloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m227.1/227.1 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: tokenizers>=0.19.1 in /usr/local/lib/python3.10/dist-packages (from langchain-huggingface) (0.19.1)\n", + "Requirement already satisfied: transformers>=4.39.0 in /usr/local/lib/python3.10/dist-packages (from langchain-huggingface) (4.42.4)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.23.0->langchain-huggingface) (3.15.4)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.23.0->langchain-huggingface) (2023.6.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.23.0->langchain-huggingface) (24.1)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.23.0->langchain-huggingface) (6.0.1)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.23.0->langchain-huggingface) (2.31.0)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.23.0->langchain-huggingface) (4.66.4)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.23.0->langchain-huggingface) (4.12.2)\n", + "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3,>=0.1.52->langchain-huggingface) (1.33)\n", + "Requirement already satisfied: langsmith<0.2.0,>=0.1.75 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3,>=0.1.52->langchain-huggingface) (0.1.90)\n", + "Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3,>=0.1.52->langchain-huggingface) (2.8.2)\n", + "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3,>=0.1.52->langchain-huggingface) (8.5.0)\n", + "Requirement already satisfied: torch>=1.11.0 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers>=2.6.0->langchain-huggingface) (2.3.1+cu121)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers>=2.6.0->langchain-huggingface) (1.25.2)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from sentence-transformers>=2.6.0->langchain-huggingface) (1.2.2)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers>=2.6.0->langchain-huggingface) (1.11.4)\n", + "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from sentence-transformers>=2.6.0->langchain-huggingface) (9.4.0)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.39.0->langchain-huggingface) (2024.5.15)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.39.0->langchain-huggingface) (0.4.3)\n", + "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.10/dist-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.3,>=0.1.52->langchain-huggingface) (3.0.0)\n", + "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.2.0,>=0.1.75->langchain-core<0.3,>=0.1.52->langchain-huggingface) (3.10.6)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain-core<0.3,>=0.1.52->langchain-huggingface) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.20.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain-core<0.3,>=0.1.52->langchain-huggingface) (2.20.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.23.0->langchain-huggingface) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.23.0->langchain-huggingface) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.23.0->langchain-huggingface) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.23.0->langchain-huggingface) (2024.7.4)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface) (1.13.0)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface) (3.3)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface) (3.1.4)\n", + "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n", + " Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n", + "Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n", + " Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n", + "Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n", + " Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n", + "Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n", + " Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n", + "Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n", + " Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n", + "Collecting nvidia-cufft-cu12==11.0.2.54 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n", + " Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n", + "Collecting nvidia-curand-cu12==10.3.2.106 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n", + " Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n", + "Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n", + " Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n", + "Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n", + " Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n", + "Collecting nvidia-nccl-cu12==2.20.5 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n", + " Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n", + "Collecting nvidia-nvtx-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n", + " Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n", + "Requirement already satisfied: triton==2.3.1 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface) (2.3.1)\n", + "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n", + " Downloading nvidia_nvjitlink_cu12-12.5.82-py3-none-manylinux2014_x86_64.whl (21.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m36.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence-transformers>=2.6.0->langchain-huggingface) (1.4.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence-transformers>=2.6.0->langchain-huggingface) (3.5.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface) (2.1.5)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface) (1.3.0)\n", + "Installing collected packages: nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12, sentence-transformers, langchain-huggingface\n", + "Successfully installed langchain-huggingface-0.0.3 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.5.82 nvidia-nvtx-cu12-12.1.105 sentence-transformers-3.0.1\n", + "Collecting langchain-community\n", + " Downloading langchain_community-0.2.7-py3-none-any.whl (2.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m12.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (6.0.1)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (2.0.31)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (3.9.5)\n", + "Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)\n", + " Downloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)\n", + "Requirement already satisfied: langchain<0.3.0,>=0.2.7 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (0.2.9)\n", + "Requirement already satisfied: langchain-core<0.3.0,>=0.2.12 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (0.2.21)\n", + "Requirement already satisfied: langsmith<0.2.0,>=0.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (0.1.90)\n", + "Requirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (1.25.2)\n", + "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (2.31.0)\n", + "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (8.5.0)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (23.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.4.1)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (6.0.5)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.9.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (4.0.3)\n", + "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)\n", + " Downloading marshmallow-3.21.3-py3-none-any.whl (49 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.2/49.2 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)\n", + " Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", + "Requirement already satisfied: langchain-text-splitters<0.3.0,>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from langchain<0.3.0,>=0.2.7->langchain-community) (0.2.2)\n", + "Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain<0.3.0,>=0.2.7->langchain-community) (2.8.2)\n", + "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.12->langchain-community) (1.33)\n", + "Requirement already satisfied: packaging<25,>=23.2 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.12->langchain-community) (24.1)\n", + "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.2.0,>=0.1.0->langchain-community) (3.10.6)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain-community) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain-community) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain-community) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain-community) (2024.7.4)\n", + "Requirement already satisfied: typing-extensions>=4.6.0 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain-community) (4.12.2)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain-community) (3.0.3)\n", + "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.10/dist-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.3.0,>=0.2.12->langchain-community) (3.0.0)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain<0.3.0,>=0.2.7->langchain-community) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.20.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain<0.3.0,>=0.2.7->langchain-community) (2.20.1)\n", + "Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community)\n", + " Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", + "Installing collected packages: mypy-extensions, marshmallow, typing-inspect, dataclasses-json, langchain-community\n", + "Successfully installed dataclasses-json-0.6.7 langchain-community-0.2.7 marshmallow-3.21.3 mypy-extensions-1.0.0 typing-inspect-0.9.0\n", + "Requirement already satisfied: langchain-text-splitters in /usr/local/lib/python3.10/dist-packages (0.2.2)\n", + "Requirement already satisfied: langchain-core<0.3.0,>=0.2.10 in /usr/local/lib/python3.10/dist-packages (from langchain-text-splitters) (0.2.21)\n", + "Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (6.0.1)\n", + "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (1.33)\n", + "Requirement already satisfied: langsmith<0.2.0,>=0.1.75 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (0.1.90)\n", + "Requirement already satisfied: packaging<25,>=23.2 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (24.1)\n", + "Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (2.8.2)\n", + "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (8.5.0)\n", + "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.10/dist-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (3.0.0)\n", + "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.2.0,>=0.1.75->langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (3.10.6)\n", + "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.2.0,>=0.1.75->langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (2.31.0)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.20.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (2.20.1)\n", + "Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (4.12.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langsmith<0.2.0,>=0.1.75->langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langsmith<0.2.0,>=0.1.75->langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langsmith<0.2.0,>=0.1.75->langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langsmith<0.2.0,>=0.1.75->langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (2024.7.4)\n", + "Collecting faiss-gpu\n", + " Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.5/85.5 MB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: faiss-gpu\n", + "Successfully installed faiss-gpu-1.7.2\n" + ] + } + ], + "source": [ + "!pip install langchain\n", + "!pip install langchain-huggingface\n", + "!pip install langchain-community\n", + "!pip install langchain-text-splitters\n", + "!pip install faiss-gpu #[pip install faiss-cpu]" + ] + }, + { + "cell_type": "code", + "source": [ + "from langchain_community.embeddings import HuggingFaceBgeEmbeddings\n", + "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", + "from langchain_community.vectorstores import FAISS\n", + "\n", + "\n", + "# Initialize embedding\n", + "model_name = \"BAAI/bge-small-en\"\n", + "model_kwargs = {\"device\": \"cuda\"} #can also be cpu\n", + "encode_kwargs = {\"normalize_embeddings\": True}\n", + "embeddings = HuggingFaceBgeEmbeddings(\n", + " model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs\n", + " )\n", + "\n" + ], + "metadata": { + "id": "YG-ahOix0dW9" + }, + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Helper functions\n" + ], + "metadata": { + "id": "_ECS2otw4QxV" + } + }, + { + "cell_type": "code", + "source": [ + "def create_documents(text):\n", + " text_splitter = RecursiveCharacterTextSplitter(\n", + " chunk_size=1000,\n", + " chunk_overlap=200,\n", + " length_function=len,\n", + " is_separator_regex=False,\n", + " )\n", + " docs = text_splitter.create_documents([text])\n", + " return docs\n", + "\n", + "def save_embedded_data(embeddings, key=\"rand\"):\n", + " embeddings.save_local(f\"content/index/faiss_index_{key}\")\n", + "\n", + "def load_embedded_data(embeddings, key):\n", + " embeddings = FAISS.load_local(f\"content/index/faiss_index_{key}\", embeddings, allow_dangerous_deserialization=True)\n", + " return embeddings\n", + "\n", + "def embed_text(text, embeddings):\n", + " docs = create_documents(text)\n", + " db = FAISS.from_documents(docs, embeddings)\n", + " return db\n", + "\n", + "def search(db, query, k=4):\n", + " docs = db.similarity_search(query, k)\n", + " all = \"\"\n", + " for doc in docs:\n", + " all += f\"{doc.page_content}\\n\"\n", + " return docs[0].page_content, all\n" + ], + "metadata": { + "id": "eDX_lK0-4TBu" + }, + "execution_count": 17, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Text to embed" + ], + "metadata": { + "id": "b7x7VtND52ty" + } + }, + { + "cell_type": "code", + "source": [ + "text = ''' The Power of Education: Unlocking Potential and Shaping the Future\n", + "Introduction\n", + "Education is the backbone of human progress, a fundamental right that unlocks the doors to knowledge, opportunity, and growth. It is the key to unlocking individual potential, fostering personal development, and shaping the future of societies and nations. In this article, we will delve into the importance of education, its history, benefits, challenges, and the role it plays in shaping our world.\n", + "History of Education\n", + "Education has a rich history that dates back to ancient civilizations, where knowledge was passed down through oral traditions, apprenticeships, and written records. The ancient Greeks, Romans, and Egyptians recognized the value of education, establishing institutions and systems that laid the foundation for modern education. The Industrial Revolution further emphasized the need for mass education, leading to the development of public education systems. Today, education is a global priority, with international efforts focused on increasing access, quality, and equity.\n", + "Benefits of Education\n", + "Education offers numerous benefits that extend beyond the individual to society as a whole:\n", + "Personal Benefits\n", + "Knowledge and skills: Education provides individuals with the knowledge, skills, and competencies necessary for personal and professional growth.\n", + "Critical thinking and problem-solving: Education fosters critical thinking, creativity, and problem-solving abilities.\n", + "Empowerment and confidence: Education empowers individuals, boosts self-esteem, and enhances their sense of purpose.\n", + "Social Benefits\n", + "Social mobility: Education helps individuals overcome socio-economic barriers, promoting social mobility and equality.\n", + "Civic engagement: Education fosters responsible citizenship, encouraging individuals to participate in the democratic process and community development.\n", + "Social cohesion: Education promotes understanding, tolerance, and social cohesion, breaking down cultural and religious barriers.\n", + "Economic Benefits\n", + "Employment and productivity: Education enhances employability, productivity, and economic growth.\n", + "Innovation and entrepreneurship: Education drives innovation, creativity, and entrepreneurship, leading to new industries and job creation.\n", + "Economic development: Education is essential for economic development, poverty reduction, and improved standards of living.\n", + "Challenges in Education\n", + "Despite the importance of education, numerous challenges hinder its accessibility, quality, and equity:\n", + "Access and Equity\n", + "Disparities in access: Many individuals, particularly in developing countries, lack access to quality education due to geographical, financial, or social barriers.\n", + "Disparities in quality: Educational quality varies significantly, with some institutions providing substandard education.\n", + "Financing and Resources\n", + "Underfunding: Education is often underfunded, leading to inadequate infrastructure, outdated resources, and insufficient teacher training.\n", + "Teacher shortages: Many regions face teacher shortages, compromising the quality of education.\n", + "Technology and Innovation\n", + "Digital divide: The digital divide exacerbates existing inequalities, as some individuals lack access to technology and digital literacy.\n", + "Adapting to change: Education systems struggle to keep pace with technological advancements, requiring innovative approaches to learning.\n", + "Conclusion\n", + "Education is a powerful tool that transforms individuals, societies, and nations. While challenges persist, the benefits of education far outweigh the obstacles. As we move forward, it is essential to prioritize education, address the challenges, and harness the potential of technology and innovation. Together, we can create a more inclusive, equitable, and quality education system that empowers individuals and shapes a brighter future for all.\n", + "Please let me know if you'd like me to add or modify anything in this article. '''" + ], + "metadata": { + "id": "gW1OBm8h5wdo" + }, + "execution_count": 9, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "\n", + "db = embed_text(text, embeddings)\n", + "# Save DB (Optional)\n", + "# save_embedded_data(db, \"key\")\n", + "\n", + "\n", + "# Load existing indexed DB\n", + "# initialize embedding\n", + "# db = load_embedded_data(embeddings, \"key\")\n", + "\n", + "search_result, all = search(db, \"What is social mobility\")\n", + "print( search_result )\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "y1a8P6-N2b8g", + "outputId": "2a9104a9-d637-488a-f519-bc0c8c2e5d1f" + }, + "execution_count": 19, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Critical thinking and problem-solving: Education fosters critical thinking, creativity, and problem-solving abilities.\n", + "Empowerment and confidence: Education empowers individuals, boosts self-esteem, and enhances their sense of purpose.\n", + "Social Benefits\n", + "Social mobility: Education helps individuals overcome socio-economic barriers, promoting social mobility and equality.\n", + "Civic engagement: Education fosters responsible citizenship, encouraging individuals to participate in the democratic process and community development.\n", + "Social cohesion: Education promotes understanding, tolerance, and social cohesion, breaking down cultural and religious barriers.\n", + "Economic Benefits\n", + "Employment and productivity: Education enhances employability, productivity, and economic growth.\n", + "Innovation and entrepreneurship: Education drives innovation, creativity, and entrepreneurship, leading to new industries and job creation.\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "DvGgcsx-5vUI" + } + } + ] +} \ No newline at end of file diff --git a/data/corolla-2020-toyota-owners-manual.pdf b/data/corolla-2020-toyota-owners-manual.pdf new file mode 100644 index 00000000..ec5d8568 Binary files /dev/null and b/data/corolla-2020-toyota-owners-manual.pdf differ diff --git a/doc-experiment.ipynb b/doc-experiment.ipynb new file mode 100644 index 00000000..66761e5e --- /dev/null +++ b/doc-experiment.ipynb @@ -0,0 +1,340 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.embeddings import HuggingFaceBgeEmbeddings\n", + "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", + "from langchain_community.vectorstores import FAISS" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\timmy_3aupohg\\anaconda3\\envs\\smog_env\\Lib\\site-packages\\sentence_transformers\\cross_encoder\\CrossEncoder.py:11: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n", + " from tqdm.autonotebook import tqdm, trange\n" + ] + } + ], + "source": [ + "# Initialize embedding\n", + "model_name = \"BAAI/bge-small-en\"\n", + "model_kwargs = {\"device\": \"cuda\"} #can also be cpu\n", + "encode_kwargs = {\"normalize_embeddings\": True}\n", + "embeddings = HuggingFaceBgeEmbeddings(\n", + " model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs\n", + " )\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.document_loaders import PyPDFLoader" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the document \n", + "loader = PyPDFLoader(\"data/corolla-2020-toyota-owners-manual.pdf\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "pages = loader.load_and_split()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "588" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(pages)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Document(metadata={'source': 'data/corolla-2020-toyota-owners-manual.pdf', 'page': 0}, page_content='1\\n2\\n3\\n456789\\n9\\n10\\nCOROLLA_UPictorial index Search by illustration\\nFor safety \\nand securityMake sure to read through them\\n(Main topics: Child seat, theft deterrent system)\\nVehicle status information and \\nindicatorsReading driving-related information\\n(Main topics: Meters, multi-information display)\\nBefore drivingOpening and closing the doors and windows, \\nadjustment before driving\\n(Main topics: Keys, doors, seats)\\nDrivingOperations and advice which are necessary for driving\\n(Main topics: Starting engine, refueling)\\nEntune audioOperating the Entune Audio\\n(Main topics: Audio/visual, phone, Toyota Entune)\\nInterior featuresUsage of the interior features\\n(Main topics: Air conditioner, storage features)\\nMaintenance \\nand careCaring for your vehicle and maintenance \\nprocedures\\n(Main topics: Interior and exterior, light bulbs)\\nWhen trouble \\narisesWhat to do in case of malfunction and emergency\\n(Main topics: Battery discharge, flat tire)\\nVehicle specifications Vehicle specifications, customizable features\\n(Main topics: Fuel, oil, tire inflation pressure)\\nFor ownersReporting safety defects for U.S. owners, and seat \\nbelt and SRS airbag instructions for Canadian owners\\nIndexSearch by symptom\\nSearch alphabetically\\nhttps://www.MyCarManual.com')" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pages[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\timmy_3aupohg\\anaconda3\\envs\\smog_env\\Lib\\site-packages\\transformers\\models\\bert\\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\\cb\\pytorch_1000000000000\\work\\aten\\src\\ATen\\native\\transformers\\cuda\\sdp_utils.cpp:555.)\n", + " attn_output = torch.nn.functional.scaled_dot_product_attention(\n" + ] + } + ], + "source": [ + "db = FAISS.from_documents(pages, embeddings)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "def save_embedded_data(embeddings, key=\"rand\"):\n", + " embeddings.save_local(f\"vec-db/index/faiss_index_{key}\")\n", + " print(\"Embeddings saved\")\n", + "\n", + "def load_embedded_data(embeddings, key):\n", + " embed_db = FAISS.load_local(f\"vec-db/index/faiss_index_{key}\", embeddings, allow_dangerous_deserialization=True)\n", + " return embed_db" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Embeddings saved\n" + ] + } + ], + "source": [ + "save_embedded_data(db, key=\"rand\")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "load_db = load_embedded_data(embeddings, key=\"rand\")" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "query = \"Steering assist function/lane centering function\"\n", + "docs = load_db.similarity_search(query)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "202 4-5. Using the driving support systems\n", + "COROLLA_UInside of displayed lines is \n", + "black\n", + "Indicates that the system is not able to recognize white (yellow) \n", + "lines or a course\n", + "* or is temporar-\n", + "ily canceled.\n", + "*: Boundary between asphalt and \n", + "the side of the road, such as \n", + "grass, soil, or a curb\n", + "Follow-up cruising display\n", + "Displayed when the multi-informa-tion display is switched to the driv-ing support system information screen.\n", + "Indicates that steering assist of the \n", + "lane centering function is operating by monitoring the position of a pre-ceding vehicle.\n", + "When the follow-up cruising display \n", + "is displayed, if the preceding vehi-cle moves, your vehicle may move in the same way. A lways pay care-\n", + "ful attention to your surroundings and operate the steering wheel as necessary to correct the path of the vehicle and ensure safety.\n", + "■Operation conditions of each \n", + "function\n", + "●Lane departure alert function\n", + "This function oper ates when all of \n", + "the following cond itions are met.\n", + "• LTA is turned on.• Vehicle speed is approximately 32 \n", + "mph (50 km/h) or more.*1\n", + "• System recognizes white (yellow) \n", + "lane lines or a course*2. (When a \n", + "white [yellow] line or course*2 is \n", + "recognized on only one side, the system will operate only for the \n", + "recognized side.)\n", + "• Width of traffic lane is approxi-\n", + "mately 9.8 ft. (3 m) or more.\n", + "• Turn signal lever is not operated.\n", + "(Vehicles with a Blind Spot Moni-\n", + "tor: Except when another vehicle \n", + "is in the lane on the side where the turn signal was operated)\n", + "• Vehicle is not being driven around \n", + "a sharp curve.\n", + "• No system malfunctions are \n", + "detected. ( P.204)\n", + "*1:The function oper ates even if the \n", + "vehicle speed is less than \n", + "approximately 32 mph (50 km/h) when the lane centering function is operating.\n", + "*2:Boundary between asphalt and \n", + "the side of the road, such as grass, soil, or a curb\n", + "●Steering assist function\n", + "This function operates when all of the following conditions are met in addition to the operation conditions for the lane departure alert function.\n", + "• Setting for “Steering Assist” in \n", + "of the multi-information display is \n", + "set to “ON”. ( P.548)\n", + "• Vehicle is not accelerated or \n", + "decelerated by a fixed amount or more.\n", + "• Steering wheel is not operated \n", + "with a steering force level suitable \n", + "for changing lanes.\n", + "• ABS, VSC, TRAC and PCS are \n", + "not operating.\n", + "• TRAC or VSC is not turned off.\n", + "• Hands off steering wheel warning \n", + "is not displayed. ( P.204)\n", + "●Vehicle sway warning function\n", + "This function operates when all of \n", + "https://www.MyCarManual.com\n" + ] + } + ], + "source": [ + "print(docs[0].page_content)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "201\n" + ] + } + ], + "source": [ + "print(docs[0].metadata['page'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def search(db, query, k=4):\n", + " docs = db.similarity_search(query, k)\n", + " all = \"\"\n", + " for doc in docs:\n", + " all += f\"{doc.page_content}\\n\"\n", + " return docs[0].page_content, all" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "'FAISS' object is not callable", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[16], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m search_result, \u001b[38;5;28mall\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[43msearch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdb\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mWhat is LDA\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m( search_result )\n", + "Cell \u001b[1;32mIn[15], line 2\u001b[0m, in \u001b[0;36msearch\u001b[1;34m(db, query, k)\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msearch\u001b[39m(db, query, k\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m4\u001b[39m):\n\u001b[1;32m----> 2\u001b[0m docs \u001b[38;5;241m=\u001b[39m \u001b[43mdb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msimilarity_search\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28mall\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m doc \u001b[38;5;129;01min\u001b[39;00m docs:\n", + "File \u001b[1;32mc:\\Users\\timmy_3aupohg\\anaconda3\\envs\\smog_env\\Lib\\site-packages\\langchain_community\\vectorstores\\faiss.py:530\u001b[0m, in \u001b[0;36mFAISS.similarity_search\u001b[1;34m(self, query, k, filter, fetch_k, **kwargs)\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msimilarity_search\u001b[39m(\n\u001b[0;32m 511\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 512\u001b[0m query: \u001b[38;5;28mstr\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 516\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[0;32m 517\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m List[Document]:\n\u001b[0;32m 518\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Return docs most similar to query.\u001b[39;00m\n\u001b[0;32m 519\u001b[0m \n\u001b[0;32m 520\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 528\u001b[0m \u001b[38;5;124;03m List of Documents most similar to the query.\u001b[39;00m\n\u001b[0;32m 529\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 530\u001b[0m docs_and_scores \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msimilarity_search_with_score\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 531\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mfilter\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mfilter\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfetch_k\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfetch_k\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[0;32m 532\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 533\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m [doc \u001b[38;5;28;01mfor\u001b[39;00m doc, _ \u001b[38;5;129;01min\u001b[39;00m docs_and_scores]\n", + "File \u001b[1;32mc:\\Users\\timmy_3aupohg\\anaconda3\\envs\\smog_env\\Lib\\site-packages\\langchain_community\\vectorstores\\faiss.py:402\u001b[0m, in \u001b[0;36mFAISS.similarity_search_with_score\u001b[1;34m(self, query, k, filter, fetch_k, **kwargs)\u001b[0m\n\u001b[0;32m 378\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msimilarity_search_with_score\u001b[39m(\n\u001b[0;32m 379\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 380\u001b[0m query: \u001b[38;5;28mstr\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 384\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[0;32m 385\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m List[Tuple[Document, \u001b[38;5;28mfloat\u001b[39m]]:\n\u001b[0;32m 386\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Return docs most similar to query.\u001b[39;00m\n\u001b[0;32m 387\u001b[0m \n\u001b[0;32m 388\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 400\u001b[0m \u001b[38;5;124;03m L2 distance in float. Lower score represents more similarity.\u001b[39;00m\n\u001b[0;32m 401\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 402\u001b[0m embedding \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_embed_query\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 403\u001b[0m docs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msimilarity_search_with_score_by_vector(\n\u001b[0;32m 404\u001b[0m embedding,\n\u001b[0;32m 405\u001b[0m k,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 408\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[0;32m 409\u001b[0m )\n\u001b[0;32m 410\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m docs\n", + "File \u001b[1;32mc:\\Users\\timmy_3aupohg\\anaconda3\\envs\\smog_env\\Lib\\site-packages\\langchain_community\\vectorstores\\faiss.py:156\u001b[0m, in \u001b[0;36mFAISS._embed_query\u001b[1;34m(self, text)\u001b[0m\n\u001b[0;32m 154\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39membedding_function\u001b[38;5;241m.\u001b[39membed_query(text)\n\u001b[0;32m 155\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 156\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43membedding_function\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[1;31mTypeError\u001b[0m: 'FAISS' object is not callable" + ] + } + ], + "source": [ + "search_result, all = search(db, \"What is LDA\")\n", + "print( search_result )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ai_index", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..159ad675 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +langchain +faiss-cpu +notebook +pandas +numpy +scikit-learn +langchain-community +langchain-openai +langchain-huggingface +langchain-text-splitters \ No newline at end of file diff --git a/vec-db/index/faiss_index_rand/index.faiss b/vec-db/index/faiss_index_rand/index.faiss new file mode 100644 index 00000000..8626e5d9 Binary files /dev/null and b/vec-db/index/faiss_index_rand/index.faiss differ diff --git a/vec-db/index/faiss_index_rand/index.pkl b/vec-db/index/faiss_index_rand/index.pkl new file mode 100644 index 00000000..046f332a Binary files /dev/null and b/vec-db/index/faiss_index_rand/index.pkl differ