2024-08-05 21:08:29 +01:00
{
"cells": [
2024-08-05 22:14:19 +01:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Libs import"
]
},
2024-08-05 21:08:29 +01:00
{
"cell_type": "code",
2024-08-07 17:50:40 +01:00
"execution_count": 39,
2024-08-05 21:08:29 +01:00
"metadata": {},
"outputs": [],
"source": [
"from langchain_community.embeddings import HuggingFaceBgeEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
2024-08-06 22:37:11 +01:00
"import faiss\n",
"from langchain_community.docstore.in_memory import InMemoryDocstore\n",
2024-08-05 22:14:19 +01:00
"from langchain_community.vectorstores import FAISS\n",
2024-08-06 22:37:11 +01:00
"from langchain_community.document_loaders import PyPDFLoader\n",
"from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.document_loaders import Docx2txtLoader\n",
"from langchain_community.document_loaders import UnstructuredWordDocumentLoader\n",
"from uuid import uuid4\n",
"import numpy as np\n",
2024-08-07 17:50:40 +01:00
"from langchain_core.documents import Document\n",
"import os\n",
"import json"
2024-08-05 22:14:19 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Loading the embeddings model"
2024-08-05 21:08:29 +01:00
]
},
{
"cell_type": "code",
2024-08-06 22:37:11 +01:00
"execution_count": 2,
2024-08-05 21:08:29 +01:00
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\timmy_3aupohg\\anaconda3\\envs\\smog_env\\Lib\\site-packages\\sentence_transformers\\cross_encoder\\CrossEncoder.py:11: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n",
" from tqdm.autonotebook import tqdm, trange\n"
]
}
],
"source": [
"# Initialize embedding\n",
"model_name = \"BAAI/bge-small-en\"\n",
"model_kwargs = {\"device\": \"cuda\"} #can also be cpu\n",
"encode_kwargs = {\"normalize_embeddings\": True}\n",
"embeddings = HuggingFaceBgeEmbeddings(\n",
" model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs\n",
2024-08-05 22:14:19 +01:00
" )"
2024-08-05 21:08:29 +01:00
]
},
{
2024-08-05 22:14:19 +01:00
"cell_type": "markdown",
2024-08-05 21:08:29 +01:00
"metadata": {},
"source": [
2024-08-06 22:37:11 +01:00
"## Pdf loading"
2024-08-05 21:08:29 +01:00
]
},
{
"cell_type": "code",
2024-08-07 17:50:40 +01:00
"execution_count": 38,
2024-08-05 21:08:29 +01:00
"metadata": {},
"outputs": [],
"source": [
2024-08-07 17:50:40 +01:00
"# this function is meant to create a document for non paginated text data\n",
"def create_documents(doc):\n",
" text = doc[0].page_content\n",
" metadata = doc[0].metadata\n",
" text_splitter = RecursiveCharacterTextSplitter(\n",
" chunk_size=1000,\n",
" chunk_overlap=10,\n",
" length_function=len,\n",
" is_separator_regex=False,\n",
" )\n",
" docs = text_splitter.create_documents([text])\n",
" # converting the text into documents\n",
" documents = []\n",
" for i, chunk in enumerate(docs):\n",
" # Increment page number based on the chunk index\n",
" doc_metadata = metadata.copy()\n",
" doc_metadata['page'] = i # Assign page number based on chunk index\n",
" document = Document(page_content=chunk.page_content, metadata=doc_metadata)\n",
" documents.append(document)\n",
" return documents\n",
2024-08-06 22:37:11 +01:00
"\n",
"\n",
"def load_txt_document(document_path):\n",
2024-08-07 17:50:40 +01:00
" try:\n",
2024-08-06 22:37:11 +01:00
" txt_doc = TextLoader(document_path)\n",
" text = txt_doc.load()\n",
2024-08-07 17:50:40 +01:00
" # implementig document splitting\n",
" docs = create_documents(text)\n",
" return docs\n",
" except:\n",
" raise ValueError(f\"Error loading -- {document_path}\")\n",
2024-08-06 22:37:11 +01:00
" \n",
" \n",
"def load_docx_document(document_path):\n",
2024-08-07 17:50:40 +01:00
" try:\n",
2024-08-06 22:37:11 +01:00
" docx_doc = Docx2txtLoader(document_path)\n",
" text = docx_doc.load()\n",
2024-08-07 17:50:40 +01:00
" # implementig document splitting\n",
" docs = create_documents(text)\n",
" return docs\n",
" except:\n",
" raise ValueError(f\"Error loading -- {document_path}\")\n",
"\n",
" \n",
"# creating a function that checks the document type and loads the document\n",
"def load_pdf_document(document_path):\n",
" try:\n",
" pdf_doc = PyPDFLoader(document_path)\n",
" pages = pdf_doc.load_and_split()\n",
" return pages\n",
" except:\n",
" raise ValueError(f\"Error loading -- {document_path}\")\n",
" \n",
"\n",
"\n",
"\n",
"# A general function that loads textual documents\n",
"def load_document(document_path):\n",
" if document_path.endswith(\".pdf\"):\n",
" return load_pdf_document(document_path)\n",
" elif document_path.endswith(\".txt\"):\n",
" return load_txt_document(document_path)\n",
" elif document_path.endswith(\".docx\"):\n",
" return load_docx_document(document_path)\n",
2024-08-06 22:37:11 +01:00
" else:\n",
" raise ValueError(f\"Unsupported document type for {document_path}\")\n",
"\n",
"\n",
2024-08-07 17:50:40 +01:00
"\n",
2024-08-06 22:37:11 +01:00
"def save_embedded_data(embeddings, key=\"data\"):\n",
" embeddings.save_local(f\"vec-db/index/faiss_index_{key}\")\n",
" print(\"Embeddings saved\")\n",
"\n",
"def load_embedded_data(embeddings, key=\"data\"):\n",
" embed_db = FAISS.load_local(f\"vec-db/index/faiss_index_{key}\", embeddings, allow_dangerous_deserialization=True)\n",
" return embed_db"
2024-08-05 21:08:29 +01:00
]
},
{
"cell_type": "code",
2024-08-07 17:50:40 +01:00
"execution_count": 66,
2024-08-05 21:08:29 +01:00
"metadata": {},
2024-08-05 22:14:19 +01:00
"outputs": [],
2024-08-05 21:08:29 +01:00
"source": [
2024-08-07 17:50:40 +01:00
"# creating a function to load all documents from a directory. \n",
"def load_documents_from_directory(directory_path: str):\n",
" text_doc = ['pdf', 'txt', 'docx', 'doc', 'md']\n",
" image_doc = ['jpg', 'jpeg', 'png', 'gif', 'bmp']\n",
" audio_doc = ['mp3', 'wav', 'flac', 'ogg', 'm4a']\n",
" video_doc = ['mp4', 'avi', 'mkv', 'flv', 'mov']\n",
" \n",
" # accessing the name of the files in the directory\n",
" files = os.listdir(directory_path)\n",
" # creating a list to store the documents\n",
" documents = []\n",
" # another list for the document names \n",
" doc_names = []\n",
" # counting the number of pages in the document\n",
" num_pages= []\n",
" # iterating through the files in the directory\n",
" for file in files: \n",
" # updating the path\n",
" path = os.path.join(directory_path, file)\n",
" # getting the file extension and doc name\n",
" doc_name, extension = file.split('.')[0] , file.split('.')[-1]\n",
" # checking if the file is a text document\n",
" if extension in text_doc:\n",
" # loading the document\n",
" doc = load_document(path)\n",
" # appending the document to the documents list\n",
" documents.append(doc)\n",
" # appending the number of pages in the document\n",
" num_pages.append(len(doc))\n",
" # adding the document name to the doc_names list\n",
" doc_names.append(doc_name)\n",
" print(f\"Document {doc_name} loaded\")\n",
" \n",
" # so we need to create a document id for each document\n",
" docs_id = [uuid4().hex for i in range(len(documents))]\n",
" # creating a json file to store the documents, checking if it exists then open it, else create it\n",
" json_file = f\"{directory_path}/documents.json\"\n",
" if os.path.exists(json_file):\n",
" with open(json_file, 'r') as f:\n",
" data = json.load(f)\n",
" data['doc_names'] = doc_names\n",
" data['docs_id'] = docs_id\n",
" data['num_pages'] = num_pages\n",
" with open(json_file, 'w') as f:\n",
" json.dump(data, f)\n",
" else:\n",
" data = {'doc_names': doc_names, 'docs_id': docs_id, 'num_pages': num_pages}\n",
" with open(json_file, 'w') as f:\n",
" json.dump(data, f)\n",
" \n",
" # returning the documents, and doc ids\n",
" return documents, docs_id, num_pages"
2024-08-05 21:08:29 +01:00
]
},
{
"cell_type": "code",
2024-08-07 17:50:40 +01:00
"execution_count": 67,
2024-08-05 21:08:29 +01:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-08-07 17:50:40 +01:00
"Document Car-Repair-Receipt-repair loaded\n",
"Document Car-Repair-Receipt-service loaded\n",
"Document Car-Repair-Receipt-tire loaded\n",
"Document Car-Repair-Receipt-tuning loaded\n",
"Document Car-Repair-Receipt-wash loaded\n",
"Document corolla-2020-toyota-owners-manual loaded\n",
"Document How to change engine oil and filter on TOYOTA Corolla loaded\n",
"Document How to change front brake pads on TOYOTA Corolla loaded\n",
"Document How to change rear windshield wipers on TOYOTA Corolla loaded\n",
"Document How to change spark plugs on TOYOTA COROLLA loaded\n"
2024-08-05 21:08:29 +01:00
]
}
],
"source": [
2024-08-07 17:50:40 +01:00
"documents, docs_id, num_pages = load_documents_from_directory('data') "
2024-08-05 21:08:29 +01:00
]
},
2024-08-06 22:37:11 +01:00
{
"cell_type": "code",
2024-08-07 17:50:40 +01:00
"execution_count": 68,
2024-08-06 22:37:11 +01:00
"metadata": {},
"outputs": [
{
2024-08-07 17:50:40 +01:00
"data": {
"text/plain": [
"['320bc9710952499baa9553d3f0d4e727',\n",
" '6ba07e1cf09a4ae6b54863040f901328',\n",
" 'dd067c452bd146e4becd61bde8602a3c',\n",
" '640493ad16b546d38851216917d3e82b',\n",
" '08cf1c3c8eab4efe9f81efcf8ce770be',\n",
" 'd8d6a3ca9a0a44e08cd4423ee3fb979d',\n",
" '2b6e45cd99ff46b08242282a423642d4',\n",
" '05524682d2e9425c83c9b57693182c50',\n",
" '4eb170648fbe47c3b87b2831a97f0dd8',\n",
" 'cec3e82f0432402e940a0299bfa086fe']"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
2024-08-06 22:37:11 +01:00
}
],
"source": [
2024-08-07 17:50:40 +01:00
"docs_id"
2024-08-06 22:37:11 +01:00
]
},
{
"cell_type": "code",
2024-08-07 17:50:40 +01:00
"execution_count": 81,
2024-08-06 22:37:11 +01:00
"metadata": {},
"outputs": [],
"source": [
"# A function to create vector store\n",
2024-08-07 17:50:40 +01:00
"def create_vector_store(embeddings, documents: list, docs_id: list, num_pages: list):\n",
" # index set up with the embedding dimension\n",
" index = faiss.IndexFlatL2(384)\n",
2024-08-06 22:37:11 +01:00
" # Initialize the FAISS vector store\n",
" vector_store = FAISS(\n",
" embedding_function=embeddings,\n",
" index=index,\n",
" docstore=InMemoryDocstore(),\n",
" index_to_docstore_id={},\n",
" )\n",
" # Now adding other documents to the store. \n",
2024-08-07 17:50:40 +01:00
" for i in range(len(documents)):\n",
" doc_id = docs_id[i]\n",
" page_ids = [doc_id+ str(i) for i in range(num_pages[i])]\n",
" vector_store.add_documents(documents=documents[i], ids=page_ids)\n",
2024-08-06 22:37:11 +01:00
" \n",
" # saving the vector store automatically\n",
" save_embedded_data(vector_store, key=\"data\")\n",
" \n",
" return vector_store"
]
},
{
"cell_type": "code",
2024-08-07 17:50:40 +01:00
"execution_count": 82,
2024-08-05 21:08:29 +01:00
"metadata": {},
2024-08-06 22:37:11 +01:00
"outputs": [
{
2024-08-07 17:50:40 +01:00
"name": "stdout",
2024-08-06 22:37:11 +01:00
"output_type": "stream",
"text": [
2024-08-07 17:50:40 +01:00
"['320bc9710952499baa9553d3f0d4e7270']\n",
"['6ba07e1cf09a4ae6b54863040f9013280', '6ba07e1cf09a4ae6b54863040f9013281']\n",
"['dd067c452bd146e4becd61bde8602a3c0', 'dd067c452bd146e4becd61bde8602a3c1']\n",
"['640493ad16b546d38851216917d3e82b0', '640493ad16b546d38851216917d3e82b1']\n",
"['08cf1c3c8eab4efe9f81efcf8ce770be0']\n",
"['d8d6a3ca9a0a44e08cd4423ee3fb979d0', 'd8d6a3ca9a0a44e08cd4423ee3fb979d1', 'd8d6a3ca9a0a44e08cd4423ee3fb979d2', 'd8d6a3ca9a0a44e08cd4423ee3fb979d3', 'd8d6a3ca9a0a44e08cd4423ee3fb979d4', 'd8d6a3ca9a0a44e08cd4423ee3fb979d5', 'd8d6a3ca9a0a44e08cd4423ee3fb979d6', 'd8d6a3ca9a0a44e08cd4423ee3fb979d7', 'd8d6a3ca9a0a44e08cd4423ee3fb979d8', 'd8d6a3ca9a0a44e08cd4423ee3fb979d9', 'd8d6a3ca9a0a44e08cd4423ee3fb979d10', 'd8d6a3ca9a0a44e08cd4423ee3fb979d11', 'd8d6a3ca9a0a44e08cd4423ee3fb979d12', 'd8d6a3ca9a0a44e08cd4423ee3fb979d13', 'd8d6a3ca9a0a44e08cd4423ee3fb979d14', 'd8d6a3ca9a0a44e08cd4423ee3fb979d15', 'd8d6a3ca9a0a44e08cd4423ee3fb979d16', 'd8d6a3ca9a0a44e08cd4423ee3fb979d17', 'd8d6a3ca9a0a44e08cd4423ee3fb979d18', 'd8d6a3ca9a0a44e08cd4423ee3fb979d19', 'd8d6a3ca9a0a44e08cd4423ee3fb979d20', 'd8d6a3ca9a0a44e08cd4423ee3fb979d21', 'd8d6a3ca9a0a44e08cd4423ee3fb979d22', 'd8d6a3ca9a0a44e08cd4423ee3fb979d23', 'd8d6a3ca9a0a44e08cd4423ee3fb979d24', 'd8d6a3ca9a0a44e08cd4423ee3fb979d25', 'd8d6a3ca9a0a44e08cd4423ee3fb979d26', 'd8d6a3ca9a0a44e08cd4423ee3fb979d27', 'd8d6a3ca9a0a44e08cd4423ee3fb979d28', 'd8d6a3ca9a0a44e08cd4423ee3fb979d29', 'd8d6a3ca9a0a44e08cd4423ee3fb979d30', 'd8d6a3ca9a0a44e08cd4423ee3fb979d31', 'd8d6a3ca9a0a44e08cd4423ee3fb979d32', 'd8d6a3ca9a0a44e08cd4423ee3fb979d33', 'd8d6a3ca9a0a44e08cd4423ee3fb979d34', 'd8d6a3ca9a0a44e08cd4423ee3fb979d35', 'd8d6a3ca9a0a44e08cd4423ee3fb979d36', 'd8d6a3ca9a0a44e08cd4423ee3fb979d37', 'd8d6a3ca9a0a44e08cd4423ee3fb979d38', 'd8d6a3ca9a0a44e08cd4423ee3fb979d39', 'd8d6a3ca9a0a44e08cd4423ee3fb979d40', 'd8d6a3ca9a0a44e08cd4423ee3fb979d41', 'd8d6a3ca9a0a44e08cd4423ee3fb979d42', 'd8d6a3ca9a0a44e08cd4423ee3fb979d43', 'd8d6a3ca9a0a44e08cd4423ee3fb979d44', 'd8d6a3ca9a0a44e08cd4423ee3fb979d45', 'd8d6a3ca9a0a44e08cd4423ee3fb979d46', 'd8d6a3ca9a0a44e08cd4423ee3fb979d47', 'd8d6a3ca9a0a44e08cd4423ee3fb979d48', 'd8d6a3ca9a0a44e08cd4423ee3fb979d49', 'd8d6a3ca9a0a44e08cd4423ee3fb979d50', 'd8d6a3ca9a0a44e08cd4423ee3fb979d51', 'd8d6a3ca9a0a44e08cd4423ee3fb979d52', 'd8d6a3ca9a0a44e08cd4423ee3fb979d53', 'd8d6a3ca9a0a44e08cd4423ee3fb979d54', 'd8d6a3ca9a0a44e08cd4423ee3fb979d55', 'd8d6a3ca9a0a44e08cd4423ee3fb979d56', 'd8d6a3ca9a0a44e08cd4423ee3fb979d57', 'd8d6a3ca9a0a44e08cd4423ee3fb979d58', 'd8d6a3ca9a0a44e08cd4423ee3fb979d59', 'd8d6a3ca9a0a44e08cd4423ee3fb979d60', 'd8d6a3ca9a0a44e08cd4423ee3fb979d61', 'd8d6a3ca9a0a44e08cd4423ee3fb979d62', 'd8d6a3ca9a0a44e08cd4423ee3fb979d63', 'd8d6a3ca9a0a44e08cd4423ee3fb979d64', 'd8d6a3ca9a0a44e08cd4423ee3fb979d65', 'd8d6a3ca9a0a44e08cd4423ee3fb979d66', 'd8d6a3ca9a0a44e08cd4423ee3fb979d67', 'd8d6a3ca9a0a44e08cd4423ee3fb979d68', 'd8d6a3ca9a0a44e08cd4423ee3fb979d69', 'd8d6a3ca9a0a44e08cd4423ee3fb979d70', 'd8d6a3ca9a0a44e08cd4423ee3fb979d71', 'd8d6a3ca9a0a44e08cd4423ee3fb979d72', 'd8d6a3ca9a0a44e08cd4423ee3fb979d73', 'd8d6a3ca9a0a44e08cd4423ee3fb979d74', 'd8d6a3ca9a0a44e08cd4423ee3fb979d75', 'd8d6a3ca9a0a44e08cd4423ee3fb979d76', 'd8d6a3ca9a0a44e08cd4423ee3fb979d77', 'd8d6a3ca9a0a44e08cd4423ee3fb979d78', 'd8d6a3ca9a0a44e08cd4423ee3fb979d79', 'd8d6a3ca9a0a44e08cd4423ee3fb979d80', 'd8d6a3ca9a0a44e08cd4423ee3fb979d81', 'd8d6a3ca9a0a44e08cd4423ee3fb979d82', 'd8d6a3ca9a0a44e08cd4423ee3fb979d83', 'd8d6a3ca9a0a44e08cd4423ee3fb979d84', 'd8d6a3ca9a0a44e08cd4423ee3fb979d85', 'd8d6a3ca9a0a44e08cd4423ee3fb979d86', 'd8d6a3ca9a0a44e08cd4423ee3fb979d87', 'd8d6a3ca9a0a44e08cd4423ee3fb979d88', 'd8d6a3ca9a0a44e08cd4423ee3fb979d89', 'd8d6a3ca9a0a44e08cd4423ee3fb979d90', 'd8d6a3ca9a0a44e08cd4423ee3fb979d91', 'd8d6a3ca9a0a44e08cd4423ee3fb979d92', 'd8d6a3ca9a0a44e08cd4423ee3fb979d93', 'd8d6a3ca9a0a44e08cd4423ee3fb979d94', 'd8d6a3ca9a0a44e08cd4423ee3fb979d95', 'd8d6a3ca9a0a44e08cd4423ee3fb979d96', 'd8d6a3ca9a0a44e08cd4423ee3fb979d97', 'd8d6a3ca9a0a44e08cd4423ee3fb979d98', 'd8d6a3ca9a0a44e08cd4423ee3fb979d99', 'd8d6a3ca9a0a44e08cd4423ee3fb979d100', 'd8d6a3ca9a0a44e08cd4423ee3fb979d101', 'd8d6a3ca9a0a44e08cd4423ee3fb979d102', 'd8d6a3ca9a0a44e08cd4423ee3fb979d103', 'd8d6a3ca9a0a44e08cd4423ee3fb979d104', 'd8d6a3ca9a0a44e08cd4423ee3fb979d105', 'd8d6a3ca9a0a44e08cd4423ee3fb979d106', 'd8d6a3ca9a0a44e08cd4423
"['2b6e45cd99ff46b08242282a423642d40', '2b6e45cd99ff46b08242282a423642d41', '2b6e45cd99ff46b08242282a423642d42', '2b6e45cd99ff46b08242282a423642d43', '2b6e45cd99ff46b08242282a423642d44', '2b6e45cd99ff46b08242282a423642d45']\n",
"['05524682d2e9425c83c9b57693182c500', '05524682d2e9425c83c9b57693182c501', '05524682d2e9425c83c9b57693182c502', '05524682d2e9425c83c9b57693182c503', '05524682d2e9425c83c9b57693182c504', '05524682d2e9425c83c9b57693182c505', '05524682d2e9425c83c9b57693182c506']\n",
"['4eb170648fbe47c3b87b2831a97f0dd80', '4eb170648fbe47c3b87b2831a97f0dd81', '4eb170648fbe47c3b87b2831a97f0dd82', '4eb170648fbe47c3b87b2831a97f0dd83', '4eb170648fbe47c3b87b2831a97f0dd84', '4eb170648fbe47c3b87b2831a97f0dd85']\n",
"['cec3e82f0432402e940a0299bfa086fe0', 'cec3e82f0432402e940a0299bfa086fe1', 'cec3e82f0432402e940a0299bfa086fe2', 'cec3e82f0432402e940a0299bfa086fe3', 'cec3e82f0432402e940a0299bfa086fe4', 'cec3e82f0432402e940a0299bfa086fe5', 'cec3e82f0432402e940a0299bfa086fe6', 'cec3e82f0432402e940a0299bfa086fe7', 'cec3e82f0432402e940a0299bfa086fe8', 'cec3e82f0432402e940a0299bfa086fe9']\n",
"Embeddings saved\n"
2024-08-06 22:37:11 +01:00
]
}
],
"source": [
2024-08-07 17:50:40 +01:00
"# creating the vector store\n",
"vector_store = create_vector_store(embeddings, documents, docs_id, num_pages)"
2024-08-06 22:37:11 +01:00
]
},
{
"cell_type": "code",
2024-08-07 17:50:40 +01:00
"execution_count": null,
2024-08-05 21:08:29 +01:00
"metadata": {},
"outputs": [],
"source": [
2024-08-07 17:50:40 +01:00
"# creating a function to add documents to the vector store\n",
"def add_documents_to_vector_store(embeddings, documents: list, docs_id: list, num_pages: list):\n",
" # loading the vector store\n",
" vector_store = load_embedded_data(embeddings)\n",
" for i in range(len(documents)):\n",
" doc_id = docs_id[i]\n",
" page_ids = [doc_id+ str(i) for i in range(num_pages[i])]\n",
" vector_store.add_documents(documents=documents[i], ids=page_ids)\n",
" print (\"Documents added to the vector store\")\n",
" "
2024-08-06 22:37:11 +01:00
]
},
{
"cell_type": "code",
2024-08-07 17:50:40 +01:00
"execution_count": 83,
2024-08-05 21:08:29 +01:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-08-07 17:50:40 +01:00
"* [SIM=0.182989] 202 4-5. Using the driving support systems\n",
2024-08-05 21:08:29 +01:00
"COROLLA_UInside of displayed lines is \n",
"black\n",
"Indicates that the system is not able to recognize white (yellow) \n",
"lines or a course\n",
"* or is temporar-\n",
"ily canceled.\n",
"*: Boundary between asphalt and \n",
"the side of the road, such as \n",
"grass, soil, or a curb\n",
"Follow-up cruising display\n",
"Displayed when the multi-informa-tion display is switched to the driv-ing support system information screen.\n",
"Indicates that steering assist of the \n",
"lane centering function is operating by monitoring the position of a pre-ceding vehicle.\n",
"When the follow-up cruising display \n",
"is displayed, if the preceding vehi-cle moves, your vehicle may move in the same way. A lways pay care-\n",
"ful attention to your surroundings and operate the steering wheel as necessary to correct the path of the vehicle and ensure safety.\n",
"■Operation conditions of each \n",
"function\n",
"●Lane departure alert function\n",
"This function oper ates when all of \n",
"the following cond itions are met.\n",
"• LTA is turned on.• Vehicle speed is approximately 32 \n",
"mph (50 km/h) or more.*1\n",
"• System recognizes white (yellow) \n",
"lane lines or a course*2. (When a \n",
"white [yellow] line or course*2 is \n",
"recognized on only one side, the system will operate only for the \n",
"recognized side.)\n",
"• Width of traffic lane is approxi-\n",
"mately 9.8 ft. (3 m) or more.\n",
"• Turn signal lever is not operated.\n",
"(Vehicles with a Blind Spot Moni-\n",
"tor: Except when another vehicle \n",
"is in the lane on the side where the turn signal was operated)\n",
"• Vehicle is not being driven around \n",
"a sharp curve.\n",
"• No system malfunctions are \n",
"detected. ( P.204)\n",
"*1:The function oper ates even if the \n",
"vehicle speed is less than \n",
"approximately 32 mph (50 km/h) when the lane centering function is operating.\n",
"*2:Boundary between asphalt and \n",
"the side of the road, such as grass, soil, or a curb\n",
"●Steering assist function\n",
"This function operates when all of the following conditions are met in addition to the operation conditions for the lane departure alert function.\n",
"• Setting for “Steering Assist” in \n",
"of the multi-information display is \n",
"set to “ON”. ( P.548)\n",
"• Vehicle is not accelerated or \n",
"decelerated by a fixed amount or more.\n",
"• Steering wheel is not operated \n",
"with a steering force level suitable \n",
"for changing lanes.\n",
"• ABS, VSC, TRAC and PCS are \n",
"not operating.\n",
"• TRAC or VSC is not turned off.\n",
"• Hands off steering wheel warning \n",
"is not displayed. ( P.204)\n",
"●Vehicle sway warning function\n",
"This function operates when all of \n",
2024-08-07 17:50:40 +01:00
"https://www.MyCarManual.com [{'source': 'data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 201}]\n",
"* [SIM=0.184764] 199 4-5. Using the driving support systems\n",
"COROLLA_U4Drivingthat the vehicle might depart \n",
"from its lane or course*, the sys-\n",
"tem provides assistance as nec-\n",
"essary by operating the steering wheel in small amounts for a short period of time to keep the vehicle in its lane.\n",
"If the system detects that the steer-\n",
"ing wheel has not been operated \n",
"for a fixed amount of time or the steering wheel is not being firmly gripped, a warning is displayed on the multi-information display and the function is t emporarily can-\n",
"celed.\n",
"Vehicles with a Blind Spot Monitor: \n",
"When the system de termines that \n",
"the vehicle might depart from its lane and that the possibility of a col-lision with an over taking vehicle in \n",
"the adjacent lane is high, the steer-ing assist function will operate even if the turn signals are operating.\n",
"*: Boundary between asphalt and \n",
"the side of the road, such as grass, soil, or a curb■Vehicle sway warning func-\n",
"tion\n",
"When the vehicle is swaying within a lane, the warning buzzer will sound and a mes-sage will be displayed on the multi-information display to alert the driver.\n",
"https://www.MyCarManual.com [{'source': 'data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 198}]\n",
"* [SIM=0.188099] 200 4-5. Using the driving support systems\n",
"COROLLA_U■Lane centering function\n",
"This function is linked with \n",
"dynamic radar cruise control with full-speed range and pro-vides the required assistance by operating the steering wheel to keep the vehicle in its current lane.\n",
"When dynamic radar cruise control \n",
"with full-speed range is not operat-\n",
"ing, the lane centering function does not operate.\n",
"In situations where the white (yel-\n",
"low) lane lines are difficult to see or are not visible, such as when in a traffic jam, this function will operate to help follow a preceding vehicle by monitoring the position of the preceding vehicle.\n",
"If the system detects that the steer-\n",
"ing wheel has not been operated for a fixed amount of time or the steering wheel is not being firmly gripped, a warning is displayed on the multi-information display and the function is temporarily can-celed.\n",
"Press the LTA switch to turn the \n",
"LTA system on.\n",
"The LTA indicator illuminates and a \n",
"message is displayed on the multi-information display.\n",
"Press the LTA swit ch again to turn \n",
"the LTA system off.\n",
"When the LTA system is turned on \n",
"or off, operation of the LTA system continues in the same condition the next time the engine is started.Turning LTA system on\n",
"https://www.MyCarManual.com [{'source': 'data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 199}]\n",
"* [SIM=0.203011] 211 4-5. Using the driving support systems\n",
"COROLLA_U4Driving*: Boundary between asphalt and \n",
"the side of the road, such as \n",
"grass, soil, or a curb\n",
"■Operation conditions of each \n",
"function\n",
"●Lane departure alert function\n",
"This function oper ates when all of \n",
"the following cond itions are met.\n",
"• LDA is turned on.• Vehicle speed is approximately 32 \n",
"mph (50 km/h) or more.\n",
"• System recognizes white (yellow) \n",
"lane lines or a course*. (When a \n",
"white [yellow] line or course* is \n",
"recognized on only one side, the system will operat e only for the \n",
"recognized side.)\n",
"• Width of traffic lane is approxi-\n",
"mately 9.8 ft. ( 3 m) or more.\n",
"• Turn signal lever is not operated.\n",
"(Vehicles with a Blind Spot Moni-tor: Except when a nother vehicle \n",
"is in the lane on the side where \n",
"the turn signal was operated)\n",
"• Vehicle is not being driven around \n",
"a sharp curve.\n",
"• No system malfunctions are \n",
"detected. ( P.212)\n",
"*: Boundary between asphalt and \n",
"the side of the road, such as \n",
"grass, soil, or a curb\n",
"●Steering assist function\n",
"This function oper ates when all of \n",
"the following conditions are met in addition to the operation conditions for the lane departure alert function.\n",
"• Setting for “Steering Assist” in \n",
"of the multi-information display is \n",
"set to “ON”. ( P.548)\n",
"• Vehicle is not accelerated or \n",
"decelerated by a fixed amount or more.\n",
"• Steering wheel is not operated \n",
"with a steering force level suitable for changing lanes.\n",
"• ABS, VSC, TRAC and PCS are \n",
"not operating.• TRAC or VSC is not turned off.• Hands off steering wheel warning \n",
"is not displayed. ( P.212)\n",
"●Vehicle sway warning function\n",
"This function operates when all of \n",
"the following conditions are met.\n",
"• Setting for “Sway Warning” in \n",
"of the multi-information display is \n",
"set to “ON”. ( P.548)\n",
"• Vehicle speed is approximately 32 \n",
"mph (50 km/h) or more.\n",
"• Width of traffic lane is approxi-\n",
"mately 9.8 ft. (3 m) or more.\n",
"• No system malfunctions are \n",
"detected. ( P.212)\n",
"■Temporary cancelation of func-\n",
"tions\n",
"When operation conditions are no longer met, a func tion may be tem-\n",
"porarily canceled. However, when \n",
"the operation conditions are met \n",
"again, operation of the function is \n",
"automatically restored. ( P.211)\n",
"■Steering assist function\n",
"●Depending on the vehicle speed, lane departure situation, road con-ditions, etc., the driver may not \n",
"feel the function is operating or \n",
"the function may not operate at \n",
"all.\n",
"●The steering control of the func-tion is overridden by the driver’ s \n",
"steering wheel operation.\n",
"●Do not attempt to test the opera-tion of the steering assist function.\n",
"■Lane departure alert function\n",
"●The warning buzzer may be diffi-cult to hear due to external noise, \n",
"audio playback, etc.\n",
"●If the edge of the course* is not \n",
"clear or straight, the lane depar-\n",
"ture alert function may not oper-\n",
"ate.\n",
"●Vehicles with a Blind Spot Monitor: It may not be possible for the sys-tem to determine if there is a dan-\n",
"ger of a collision with a vehicle in \n",
"an adjacent lane.\n",
"https://www.MyCarManual.com [{'source': 'data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 210}]\n"
2024-08-05 21:08:29 +01:00
]
}
],
"source": [
2024-08-06 22:37:11 +01:00
"# Example of searching\n",
"query = \"Steering assist function/lane centering function\"\n",
"results = vector_store.similarity_search_with_score(\n",
" query, k=4\n",
")\n",
"for res, score in results:\n",
" print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")"
2024-08-05 21:08:29 +01:00
]
},
2024-08-06 22:37:11 +01:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Data Search"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"query = \"Steering assist function/lane centering function\"\n",
"docs = load_db.similarity_search(query)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(docs[0].page_content)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
2024-08-05 21:08:29 +01:00
"source": [
"print(docs[0].metadata['page'])"
]
},
{
"cell_type": "code",
2024-08-06 22:37:11 +01:00
"execution_count": null,
2024-08-05 21:08:29 +01:00
"metadata": {},
"outputs": [],
"source": [
"def search(db, query, k=4):\n",
" docs = db.similarity_search(query, k)\n",
" all = \"\"\n",
2024-08-05 22:14:19 +01:00
" pages = []\n",
2024-08-05 21:08:29 +01:00
" for doc in docs:\n",
" all += f\"{doc.page_content}\\n\"\n",
2024-08-05 22:14:19 +01:00
" pages.append(doc.metadata['page'])\n",
" return docs[0].page_content, all, pages"
2024-08-05 21:08:29 +01:00
]
},
{
"cell_type": "code",
2024-08-06 22:37:11 +01:00
"execution_count": null,
2024-08-05 21:08:29 +01:00
"metadata": {},
2024-08-06 22:37:11 +01:00
"outputs": [],
2024-08-05 21:08:29 +01:00
"source": [
2024-08-05 22:14:19 +01:00
"search_result, all, pages = search(db, \"What is LDA\")\n",
2024-08-05 21:08:29 +01:00
"print( search_result )"
]
},
2024-08-05 22:14:19 +01:00
{
"cell_type": "code",
2024-08-06 22:37:11 +01:00
"execution_count": null,
2024-08-05 22:14:19 +01:00
"metadata": {},
2024-08-06 22:37:11 +01:00
"outputs": [],
2024-08-05 22:14:19 +01:00
"source": [
"pages"
]
},
2024-08-06 22:37:11 +01:00
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
2024-08-05 21:08:29 +01:00
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "ai_index",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}