Files
ds_fire_fighter/FAISS_Embedding.ipynb
T

395 lines
37 KiB
Plaintext
Raw Normal View History

2024-08-05 21:08:29 +01:00
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"gpuType": "T4"
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ESxp1aQc0X53",
"outputId": "2844a981-b5ab-4ae4-8f8a-408d0369a8f2"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting langchain\n",
" Downloading langchain-0.2.9-py3-none-any.whl (987 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m987.7/987.7 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (6.0.1)\n",
"Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.0.31)\n",
"Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (3.9.5)\n",
"Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (4.0.3)\n",
"Collecting langchain-core<0.3.0,>=0.2.20 (from langchain)\n",
" Downloading langchain_core-0.2.21-py3-none-any.whl (372 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m372.0/372.0 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)\n",
" Downloading langchain_text_splitters-0.2.2-py3-none-any.whl (25 kB)\n",
"Collecting langsmith<0.2.0,>=0.1.17 (from langchain)\n",
" Downloading langsmith-0.1.90-py3-none-any.whl (134 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.7/134.7 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (1.25.2)\n",
"Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.8.2)\n",
"Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.31.0)\n",
"Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (8.5.0)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n",
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.2.0)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.1)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.5)\n",
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.4)\n",
"Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.3.0,>=0.2.20->langchain)\n",
" Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n",
"Requirement already satisfied: packaging<25,>=23.2 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.20->langchain) (24.1)\n",
"Collecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.17->langchain)\n",
" Downloading orjson-3.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (141 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m141.1/141.1 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (0.7.0)\n",
"Requirement already satisfied: pydantic-core==2.20.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (2.20.1)\n",
"Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (4.12.2)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (3.7)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (2024.7.4)\n",
"Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain) (3.0.3)\n",
"Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain-core<0.3.0,>=0.2.20->langchain)\n",
" Downloading jsonpointer-3.0.0-py2.py3-none-any.whl (7.6 kB)\n",
"Installing collected packages: orjson, jsonpointer, jsonpatch, langsmith, langchain-core, langchain-text-splitters, langchain\n",
"Successfully installed jsonpatch-1.33 jsonpointer-3.0.0 langchain-0.2.9 langchain-core-0.2.21 langchain-text-splitters-0.2.2 langsmith-0.1.90 orjson-3.10.6\n",
"Collecting langchain-huggingface\n",
" Downloading langchain_huggingface-0.0.3-py3-none-any.whl (17 kB)\n",
"Requirement already satisfied: huggingface-hub>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from langchain-huggingface) (0.23.4)\n",
"Requirement already satisfied: langchain-core<0.3,>=0.1.52 in /usr/local/lib/python3.10/dist-packages (from langchain-huggingface) (0.2.21)\n",
"Collecting sentence-transformers>=2.6.0 (from langchain-huggingface)\n",
" Downloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m227.1/227.1 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: tokenizers>=0.19.1 in /usr/local/lib/python3.10/dist-packages (from langchain-huggingface) (0.19.1)\n",
"Requirement already satisfied: transformers>=4.39.0 in /usr/local/lib/python3.10/dist-packages (from langchain-huggingface) (4.42.4)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.23.0->langchain-huggingface) (3.15.4)\n",
"Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.23.0->langchain-huggingface) (2023.6.0)\n",
"Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.23.0->langchain-huggingface) (24.1)\n",
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.23.0->langchain-huggingface) (6.0.1)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.23.0->langchain-huggingface) (2.31.0)\n",
"Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.23.0->langchain-huggingface) (4.66.4)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.23.0->langchain-huggingface) (4.12.2)\n",
"Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3,>=0.1.52->langchain-huggingface) (1.33)\n",
"Requirement already satisfied: langsmith<0.2.0,>=0.1.75 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3,>=0.1.52->langchain-huggingface) (0.1.90)\n",
"Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3,>=0.1.52->langchain-huggingface) (2.8.2)\n",
"Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3,>=0.1.52->langchain-huggingface) (8.5.0)\n",
"Requirement already satisfied: torch>=1.11.0 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers>=2.6.0->langchain-huggingface) (2.3.1+cu121)\n",
"Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers>=2.6.0->langchain-huggingface) (1.25.2)\n",
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from sentence-transformers>=2.6.0->langchain-huggingface) (1.2.2)\n",
"Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers>=2.6.0->langchain-huggingface) (1.11.4)\n",
"Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from sentence-transformers>=2.6.0->langchain-huggingface) (9.4.0)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.39.0->langchain-huggingface) (2024.5.15)\n",
"Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.39.0->langchain-huggingface) (0.4.3)\n",
"Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.10/dist-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.3,>=0.1.52->langchain-huggingface) (3.0.0)\n",
"Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.2.0,>=0.1.75->langchain-core<0.3,>=0.1.52->langchain-huggingface) (3.10.6)\n",
"Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain-core<0.3,>=0.1.52->langchain-huggingface) (0.7.0)\n",
"Requirement already satisfied: pydantic-core==2.20.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain-core<0.3,>=0.1.52->langchain-huggingface) (2.20.1)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.23.0->langchain-huggingface) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.23.0->langchain-huggingface) (3.7)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.23.0->langchain-huggingface) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.23.0->langchain-huggingface) (2024.7.4)\n",
"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface) (1.13.0)\n",
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface) (3.3)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface) (3.1.4)\n",
"Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n",
" Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n",
"Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n",
" Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n",
"Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n",
" Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n",
"Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n",
" Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n",
"Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n",
" Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n",
"Collecting nvidia-cufft-cu12==11.0.2.54 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n",
" Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n",
"Collecting nvidia-curand-cu12==10.3.2.106 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n",
" Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n",
"Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n",
" Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n",
"Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n",
" Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n",
"Collecting nvidia-nccl-cu12==2.20.5 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n",
" Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n",
"Collecting nvidia-nvtx-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n",
" Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n",
"Requirement already satisfied: triton==2.3.1 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface) (2.3.1)\n",
"Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface)\n",
" Downloading nvidia_nvjitlink_cu12-12.5.82-py3-none-manylinux2014_x86_64.whl (21.3 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m36.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence-transformers>=2.6.0->langchain-huggingface) (1.4.2)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence-transformers>=2.6.0->langchain-huggingface) (3.5.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface) (2.1.5)\n",
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.11.0->sentence-transformers>=2.6.0->langchain-huggingface) (1.3.0)\n",
"Installing collected packages: nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12, sentence-transformers, langchain-huggingface\n",
"Successfully installed langchain-huggingface-0.0.3 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.5.82 nvidia-nvtx-cu12-12.1.105 sentence-transformers-3.0.1\n",
"Collecting langchain-community\n",
" Downloading langchain_community-0.2.7-py3-none-any.whl (2.2 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m12.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (6.0.1)\n",
"Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (2.0.31)\n",
"Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (3.9.5)\n",
"Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)\n",
" Downloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)\n",
"Requirement already satisfied: langchain<0.3.0,>=0.2.7 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (0.2.9)\n",
"Requirement already satisfied: langchain-core<0.3.0,>=0.2.12 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (0.2.21)\n",
"Requirement already satisfied: langsmith<0.2.0,>=0.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (0.1.90)\n",
"Requirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (1.25.2)\n",
"Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (2.31.0)\n",
"Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain-community) (8.5.0)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.3.1)\n",
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (23.2.0)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.4.1)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (6.0.5)\n",
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.9.4)\n",
"Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (4.0.3)\n",
"Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)\n",
" Downloading marshmallow-3.21.3-py3-none-any.whl (49 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.2/49.2 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)\n",
" Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n",
"Requirement already satisfied: langchain-text-splitters<0.3.0,>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from langchain<0.3.0,>=0.2.7->langchain-community) (0.2.2)\n",
"Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain<0.3.0,>=0.2.7->langchain-community) (2.8.2)\n",
"Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.12->langchain-community) (1.33)\n",
"Requirement already satisfied: packaging<25,>=23.2 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.12->langchain-community) (24.1)\n",
"Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.2.0,>=0.1.0->langchain-community) (3.10.6)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain-community) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain-community) (3.7)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain-community) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain-community) (2024.7.4)\n",
"Requirement already satisfied: typing-extensions>=4.6.0 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain-community) (4.12.2)\n",
"Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain-community) (3.0.3)\n",
"Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.10/dist-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.3.0,>=0.2.12->langchain-community) (3.0.0)\n",
"Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain<0.3.0,>=0.2.7->langchain-community) (0.7.0)\n",
"Requirement already satisfied: pydantic-core==2.20.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain<0.3.0,>=0.2.7->langchain-community) (2.20.1)\n",
"Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community)\n",
" Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n",
"Installing collected packages: mypy-extensions, marshmallow, typing-inspect, dataclasses-json, langchain-community\n",
"Successfully installed dataclasses-json-0.6.7 langchain-community-0.2.7 marshmallow-3.21.3 mypy-extensions-1.0.0 typing-inspect-0.9.0\n",
"Requirement already satisfied: langchain-text-splitters in /usr/local/lib/python3.10/dist-packages (0.2.2)\n",
"Requirement already satisfied: langchain-core<0.3.0,>=0.2.10 in /usr/local/lib/python3.10/dist-packages (from langchain-text-splitters) (0.2.21)\n",
"Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (6.0.1)\n",
"Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (1.33)\n",
"Requirement already satisfied: langsmith<0.2.0,>=0.1.75 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (0.1.90)\n",
"Requirement already satisfied: packaging<25,>=23.2 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (24.1)\n",
"Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (2.8.2)\n",
"Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (8.5.0)\n",
"Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.10/dist-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (3.0.0)\n",
"Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.2.0,>=0.1.75->langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (3.10.6)\n",
"Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.2.0,>=0.1.75->langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (2.31.0)\n",
"Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (0.7.0)\n",
"Requirement already satisfied: pydantic-core==2.20.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (2.20.1)\n",
"Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (4.12.2)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langsmith<0.2.0,>=0.1.75->langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langsmith<0.2.0,>=0.1.75->langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (3.7)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langsmith<0.2.0,>=0.1.75->langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langsmith<0.2.0,>=0.1.75->langchain-core<0.3.0,>=0.2.10->langchain-text-splitters) (2024.7.4)\n",
"Collecting faiss-gpu\n",
" Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.5/85.5 MB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hInstalling collected packages: faiss-gpu\n",
"Successfully installed faiss-gpu-1.7.2\n"
]
}
],
"source": [
"!pip install langchain\n",
"!pip install langchain-huggingface\n",
"!pip install langchain-community\n",
"!pip install langchain-text-splitters\n",
"!pip install faiss-gpu #[pip install faiss-cpu]"
]
},
{
"cell_type": "code",
"source": [
"from langchain_community.embeddings import HuggingFaceBgeEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"from langchain_community.vectorstores import FAISS\n",
"\n",
"\n",
"# Initialize embedding\n",
"model_name = \"BAAI/bge-small-en\"\n",
"model_kwargs = {\"device\": \"cuda\"} #can also be cpu\n",
"encode_kwargs = {\"normalize_embeddings\": True}\n",
"embeddings = HuggingFaceBgeEmbeddings(\n",
" model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs\n",
" )\n",
"\n"
],
"metadata": {
"id": "YG-ahOix0dW9"
},
"execution_count": 5,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"Helper functions\n"
],
"metadata": {
"id": "_ECS2otw4QxV"
}
},
{
"cell_type": "code",
"source": [
"def create_documents(text):\n",
" text_splitter = RecursiveCharacterTextSplitter(\n",
" chunk_size=1000,\n",
" chunk_overlap=200,\n",
" length_function=len,\n",
" is_separator_regex=False,\n",
" )\n",
" docs = text_splitter.create_documents([text])\n",
" return docs\n",
"\n",
"def save_embedded_data(embeddings, key=\"rand\"):\n",
" embeddings.save_local(f\"content/index/faiss_index_{key}\")\n",
"\n",
"def load_embedded_data(embeddings, key):\n",
" embeddings = FAISS.load_local(f\"content/index/faiss_index_{key}\", embeddings, allow_dangerous_deserialization=True)\n",
" return embeddings\n",
"\n",
"def embed_text(text, embeddings):\n",
" docs = create_documents(text)\n",
" db = FAISS.from_documents(docs, embeddings)\n",
" return db\n",
"\n",
"def search(db, query, k=4):\n",
" docs = db.similarity_search(query, k)\n",
" all = \"\"\n",
" for doc in docs:\n",
" all += f\"{doc.page_content}\\n\"\n",
" return docs[0].page_content, all\n"
],
"metadata": {
"id": "eDX_lK0-4TBu"
},
"execution_count": 17,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"Text to embed"
],
"metadata": {
"id": "b7x7VtND52ty"
}
},
{
"cell_type": "code",
"source": [
"text = ''' The Power of Education: Unlocking Potential and Shaping the Future\n",
"Introduction\n",
"Education is the backbone of human progress, a fundamental right that unlocks the doors to knowledge, opportunity, and growth. It is the key to unlocking individual potential, fostering personal development, and shaping the future of societies and nations. In this article, we will delve into the importance of education, its history, benefits, challenges, and the role it plays in shaping our world.\n",
"History of Education\n",
"Education has a rich history that dates back to ancient civilizations, where knowledge was passed down through oral traditions, apprenticeships, and written records. The ancient Greeks, Romans, and Egyptians recognized the value of education, establishing institutions and systems that laid the foundation for modern education. The Industrial Revolution further emphasized the need for mass education, leading to the development of public education systems. Today, education is a global priority, with international efforts focused on increasing access, quality, and equity.\n",
"Benefits of Education\n",
"Education offers numerous benefits that extend beyond the individual to society as a whole:\n",
"Personal Benefits\n",
"Knowledge and skills: Education provides individuals with the knowledge, skills, and competencies necessary for personal and professional growth.\n",
"Critical thinking and problem-solving: Education fosters critical thinking, creativity, and problem-solving abilities.\n",
"Empowerment and confidence: Education empowers individuals, boosts self-esteem, and enhances their sense of purpose.\n",
"Social Benefits\n",
"Social mobility: Education helps individuals overcome socio-economic barriers, promoting social mobility and equality.\n",
"Civic engagement: Education fosters responsible citizenship, encouraging individuals to participate in the democratic process and community development.\n",
"Social cohesion: Education promotes understanding, tolerance, and social cohesion, breaking down cultural and religious barriers.\n",
"Economic Benefits\n",
"Employment and productivity: Education enhances employability, productivity, and economic growth.\n",
"Innovation and entrepreneurship: Education drives innovation, creativity, and entrepreneurship, leading to new industries and job creation.\n",
"Economic development: Education is essential for economic development, poverty reduction, and improved standards of living.\n",
"Challenges in Education\n",
"Despite the importance of education, numerous challenges hinder its accessibility, quality, and equity:\n",
"Access and Equity\n",
"Disparities in access: Many individuals, particularly in developing countries, lack access to quality education due to geographical, financial, or social barriers.\n",
"Disparities in quality: Educational quality varies significantly, with some institutions providing substandard education.\n",
"Financing and Resources\n",
"Underfunding: Education is often underfunded, leading to inadequate infrastructure, outdated resources, and insufficient teacher training.\n",
"Teacher shortages: Many regions face teacher shortages, compromising the quality of education.\n",
"Technology and Innovation\n",
"Digital divide: The digital divide exacerbates existing inequalities, as some individuals lack access to technology and digital literacy.\n",
"Adapting to change: Education systems struggle to keep pace with technological advancements, requiring innovative approaches to learning.\n",
"Conclusion\n",
"Education is a powerful tool that transforms individuals, societies, and nations. While challenges persist, the benefits of education far outweigh the obstacles. As we move forward, it is essential to prioritize education, address the challenges, and harness the potential of technology and innovation. Together, we can create a more inclusive, equitable, and quality education system that empowers individuals and shapes a brighter future for all.\n",
"Please let me know if you'd like me to add or modify anything in this article. '''"
],
"metadata": {
"id": "gW1OBm8h5wdo"
},
"execution_count": 9,
"outputs": []
},
{
"cell_type": "code",
"source": [
"\n",
"db = embed_text(text, embeddings)\n",
"# Save DB (Optional)\n",
"# save_embedded_data(db, \"key\")\n",
"\n",
"\n",
"# Load existing indexed DB\n",
"# initialize embedding\n",
"# db = load_embedded_data(embeddings, \"key\")\n",
"\n",
"search_result, all = search(db, \"What is social mobility\")\n",
"print( search_result )\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "y1a8P6-N2b8g",
"outputId": "2a9104a9-d637-488a-f519-bc0c8c2e5d1f"
},
"execution_count": 19,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Critical thinking and problem-solving: Education fosters critical thinking, creativity, and problem-solving abilities.\n",
"Empowerment and confidence: Education empowers individuals, boosts self-esteem, and enhances their sense of purpose.\n",
"Social Benefits\n",
"Social mobility: Education helps individuals overcome socio-economic barriers, promoting social mobility and equality.\n",
"Civic engagement: Education fosters responsible citizenship, encouraging individuals to participate in the democratic process and community development.\n",
"Social cohesion: Education promotes understanding, tolerance, and social cohesion, breaking down cultural and religious barriers.\n",
"Economic Benefits\n",
"Employment and productivity: Education enhances employability, productivity, and economic growth.\n",
"Innovation and entrepreneurship: Education drives innovation, creativity, and entrepreneurship, leading to new industries and job creation.\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [],
"metadata": {
"id": "DvGgcsx-5vUI"
}
}
]
}