Files
ds_fire_fighter/doc-experiment.ipynb
T
2024-08-06 22:37:11 +01:00

767 lines
38 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"!pip install -q docx"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Libs import"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"from langchain_community.embeddings import HuggingFaceBgeEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"import faiss\n",
"from langchain_community.docstore.in_memory import InMemoryDocstore\n",
"from langchain_community.vectorstores import FAISS\n",
"from langchain_community.document_loaders import PyPDFLoader\n",
"from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.document_loaders import Docx2txtLoader\n",
"from langchain_community.document_loaders import UnstructuredWordDocumentLoader\n",
"from uuid import uuid4\n",
"import numpy as np\n",
"from langchain_core.documents import Document"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Loading the embeddings model"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\timmy_3aupohg\\anaconda3\\envs\\smog_env\\Lib\\site-packages\\sentence_transformers\\cross_encoder\\CrossEncoder.py:11: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n",
" from tqdm.autonotebook import tqdm, trange\n"
]
}
],
"source": [
"# Initialize embedding\n",
"model_name = \"BAAI/bge-small-en\"\n",
"model_kwargs = {\"device\": \"cuda\"} #can also be cpu\n",
"encode_kwargs = {\"normalize_embeddings\": True}\n",
"embeddings = HuggingFaceBgeEmbeddings(\n",
" model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs\n",
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Pdf loading"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"# creating a function that checks the document type and loads the document\n",
"def load_pdf_document(document_path):\n",
" if document_path.endswith(\".pdf\"):\n",
" pdf_doc = PyPDFLoader(document_path)\n",
" pages = pdf_doc.load_and_split()\n",
" return pages\n",
" else:\n",
" raise ValueError(f\"Unsupported document type for {document_path}\")\n",
"\n",
"\n",
"def load_txt_document(document_path):\n",
" if document_path.endswith(\".txt\"):\n",
" txt_doc = TextLoader(document_path)\n",
" text = txt_doc.load()\n",
" return text\n",
" else:\n",
" raise ValueError(f\"Unsupported document type for {document_path}\")\n",
" \n",
" \n",
"def load_docx_document(document_path):\n",
" if document_path.endswith(\".docx\"):\n",
" docx_doc = Docx2txtLoader(document_path)\n",
" text = docx_doc.load()\n",
" return text\n",
" else:\n",
" raise ValueError(f\"Unsupported document type for {document_path}\")\n",
"\n",
"\n",
"def save_embedded_data(embeddings, key=\"data\"):\n",
" embeddings.save_local(f\"vec-db/index/faiss_index_{key}\")\n",
" print(\"Embeddings saved\")\n",
"\n",
"def load_embedded_data(embeddings, key=\"data\"):\n",
" embed_db = FAISS.load_local(f\"vec-db/index/faiss_index_{key}\", embeddings, allow_dangerous_deserialization=True)\n",
" return embed_db"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"# Load the document \n",
"document_path = \"data/corolla-2020-toyota-owners-manual.pdf\"\n",
"pdf_pages = load_pdf_document(document_path)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"document_path_ = \"data/Car-Repair-Receipt-service.pdf\"\n",
"pdf_pages_ = load_pdf_document(document_path_)"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(metadata={'source': 'data/Car-Repair-Receipt-service.pdf', 'page': 0}, page_content='CAR REPAIR RECEIPT \\nCompany Name: abc repair Date: August 1, 2024 \\nStreet Address: washington Receipt #: _______________ \\nCity, State, Zip: __________________________ \\nPhone: 11221122 \\nFax: __________________________ \\nEmail: __________________________ \\nWebsite: __________________________ \\n \\nCustomer Information \\nName: ________________________ City : __________________________ \\nPhone: ________________________ Year, Make, Model: __________________________ \\n \\nServices Rendered Price Parts Qty./Price Total \\ngeneral service 2000 1 \\noil change 500 1 \\nbrake pads 1000 2 \\ncatalytic 1000 1 \\n \\n \\n \\nAmount Paid: ____________ \\n Subtotal \\nTotal 5500 \\nName(s) of Service Person(s): ___________________________________________________ \\n____________________________________________________________________________ \\nPrinted Name: __________________________'),\n",
" Document(metadata={'source': 'data/Car-Repair-Receipt-service.pdf', 'page': 1}, page_content='Page 1 of 1')]"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pdf_pages_"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[Document(metadata={'source': 'data/How to change engine oil and filter on TOYOTA Corolla.txt'}, page_content=\"How to change engine oil\\nand filter on TOYOTA\\nCorolla Verso (AR10) \\nreplacement guide\\nSIMILAR VIDEO TUTORIAL\\nThis video shows the replacement procedure of a similar car part on another vehicle\\nCLUB.AUTODOC.CO.UK 110\\nThis replacement procedure can be used for:\\nTOYOTA Corolla Verso (AR10) 1.6 (ZNR10_), TOYOTA Corolla Verso (AR10) 1.8\\n(ZNR11_), TOYOTA Corolla Verso (AR10) 1.5, TOYOTA Corolla Verso (AR10) 1.8\\nThe steps may slightly vary depending on the car design.\\nImportant!\\nThis tutorial was created based on the replacement procedure for a similar car part\\non: TOYOTA Aygo (AB10) 1\\nBuy tools\\nCLUB.AUTODOC.CO.UK 210\\nREPLACEMENT: ENGINE OIL AND FILTER TOYOTA\\nCOROLLA VERSO (AR10). LIST OF THE TOOLS YOU'LL\\nNEED:\\nAll-purpose cleaning spray\\nOil filter cup socket 76/14-F\\nImpact socket №15\\nRatchet wrench\\nTorque wrench\\nOil drain pan\\nFunnel\\nFender cover\\nCARRY OUT REPLACEMENT IN THE FOLLOWING ORDER:\\n1\\nOpen the hood.\\n2\\nUse a fender protection cover to prevent damaging paintwork and plastic parts of the car.\\n3\\nUnscrew the oil filler plug.\\n4\\nLift the car using a jack or place it over an inspection pit.\\nCLUB.AUTODOC.CO.UK 310\\nReplace the oil filter every time when changing motor oil.\\nPlease note: all work on the car TOYOTA Corolla Verso (AR10) should be\\ndone with the engine switched off.\\nWear gloves to prevent contact with hot liquid.\\nThe vehicle must be on even keel, and if it is at an angle, the drain plug must be\\nat the lowest point.\\nAll work should be done with the engine stopped.\\nAUTODOC recommends:\\nReplacement: engine oil and filter TOYOTA Corolla Verso (AR10). Tip from\\nAUTODOC experts:\\n5\\nPut a waste oil container with at least 4 l capacity under the drainage hole.\\n6\\nUnscrew the drain plug. Use a drive socket #15. Use a ratchet wrench.\\n7\\nDrain the used oil.\\nCLUB.AUTODOC.CO.UK 410\\nCaution! The oil may be hot.\\nWait until the oil has completely drained from the drainage hole.\\nReplacement: engine oil and filter TOYOTA Corolla Verso (AR10). Tip from\\nAUTODOC:\\n8 Move the container under the oil filter.\\n9\\nUnscrew the oil filter. Use the 76/14-F socket.\\n10\\nPrepare a container for the old oil filter. Put the old oil filter in the container.\\n11\\nClean the oil filter seat.\\nCLUB.AUTODOC.CO.UK 510\\nMove up the container. The oil starts to drain out of the drainage hole\\nimmediately after you have removed the oil filter.\\nReplacement: engine oil and filter TOYOTA Corolla Verso (AR10). Tip:\\n12\\nLubricate the rubber seal of the new filter with a little amount of fresh oil.\\n13\\nScrew the new filter into its place. Use the 76/14-F socket. Use a torque wrench. Tighten it to 23 nm torque.\\n14\\nClean the area of the oil pan drainage hole.\\n15\\nScrew in the new drainage plug and tighten it. Use a drive socket #15. Use a torque wrench. Tighten it to 27 nm\\ntorque.\\nCLUB.AUTODOC.CO.UK 610\\n16\\nClean the area of the oil pan drainage hole. Use all-purpose cleaning spray.\\n17\\nLower the car.\\n18\\nRemove the oil filler cap. Insert the funnel.\\n19\\nPour oil into the engine through the filler. Required amount of oil: 3.6 L.\\nCLUB.AUTODOC.CO.UK 710\\nUse the oil recommended by the manufacturer.\\nReplacement: engine oil and filter TOYOTA Corolla Verso (AR10). Professionals recommend:\\n20\\nCheck the oil level using the dipstick. Top up oil if necessary.\\n21\\nTighten the oil filler cap.\\n22\\nRun the engine for a few minutes. After the oil pressure warning light goes out, stop the engine.\\n23\\nCheck the oil level 5 minutes after the stop of the engine operation using a dipstick and add oil if necessary.\\nCLUB.AUTODOC.CO.UK 810\\nThe oil level should be between the Min and Max marks.\\nPlease note!\\n24\\nRemove the fender protection cover.\\n25\\nClose the hood.\\nVIEW MORE TUTORIALS\\nCLUB.AUTODOC.CO.UK 910\\nThe oil level should be between the Min and Max marks.\\nMake sure that the oil filler cap is properly tightened.\\nCheck the threaded plug of the drainage hole and the oil filter seal for tightness.\\nIf necessary, carefully tighten them up.\\nNote that oil pressure warning light, which indicates insufficient pressure in the\\nlubrication system, may be on for a longer time than usual. After a few seconds,\\nthe oil will fill all the channels, and the light will go out.\\nTo protect the environment from pollution, be sure to dispose of the used filters\\nat special collection points.\\nWELL DONE!\\nReplacement: engine oil and filter TOYOTA Corolla Verso (AR10). AUTODOC\\nrecommends:\\nReplacement: engine oil and filter TOYOTA Corolla Verso (AR10). AUTODOC\\nexperts recommend:\\nAUTODOC — TOP QUALITY AND AFFORDABLE CAR\\nPARTS ONLINE\\nA GREAT SELECTION OF SPARE PARTS FOR YOUR CAR\\nOIL FILTER: A WIDE SELECTION\\nThe document contains only general recommendations that may be useful for you when you perform repair or replacement work. AUTODOC shall not be liable\\nfor any loss, injury, damage of property occurring in the repair or replacement process due to incorrect use or misinterpretation of the provided information.\\nAUTODOC shall not be liable for any possible mistakes and uncertainties in this guide. The information provided is for information purposes only and cannot\\nreplace advice from specialists.\\nAUTODOC shall not be liable for incorrect or hazardous usage of equipment, tools and car parts. AUTODOC strongly recommends to be careful and observe\\nthe safety rules when performing repair or replacement works. Remember: usage of low quality auto parts does not guarantee you the appropriate level of\\nroad safety.\\n© Copyright 2022 All the contents of this website, in particular texts, photographs and graphics, are protected by copyright. All rights, including\\nreproduction, publication, editing and translation rights, are reserved by AUTODOC GmbH.\\nCLUB.AUTODOC.CO.UK 1010\\nDISCLAIMER:\\nAUTODOC MOBILE APP: GREAT DEALS AND CONVENIENT\\nSHOPPING\")]\n"
]
}
],
"source": [
"# example with loading a txt document\n",
"document_path_1 = \"data/How to change engine oil and filter on TOYOTA Corolla.txt\"\n",
"txt_pages_1 = load_txt_document(document_path_1)\n",
"print(txt_pages_1)"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[Document(metadata={'source': 'data/How to change rear windshield wipers on TOYOTA Corolla.docx'}, page_content=\"How to change rear\\n\\nwindshield wipers on\\n\\nTOYOTA Corolla VI\\n\\nLiftback (E90)\\n\\n \\n\\nreplacement guide\\n\\nHow to change rear\\n\\nwindshield wipers on\\n\\nTOYOTA Corolla VI\\n\\nLiftback (E90)\\n\\n \\n\\nreplacement guide\\n\\n\\n\\n\\nSIMILAR VIDEO TUTORIAL\\n\\nThis video shows the replacement procedure of a similar car part on another vehicle\\n\\nThis replacement procedure can be used for:\\n\\nTOYOTA Corolla VI Liftback (E90) 1.6, TOYOTA Corolla VI Liftback (E90) 1.8 D,\\n\\nTOYOTA Corolla VI Liftback (E90) 1.3 i, TOYOTA Corolla VI Liftback (E90) 1.6 GTI\\n\\n(\\n\\nAE92_), TOYOTA Corolla VI Liftback (E90) 1.6 4WD, TOYOTA Corolla VI Liftback\\n\\n(\\n\\nE90) 1.3, TOYOTA Corolla VI Liftback (E90) 1.4, TOYOTA Corolla VI Liftback \\n\\n(E90) 1.6\\n\\ni, TOYOTA Corolla VI Liftback (E90) 1.6 EFi\\n\\nThe steps may slightly vary depending on the car design.\\n\\nImportant!\\n\\nSIMILAR VIDEO TUTORIAL\\n\\nThis video shows the replacement procedure of a similar car part on another vehicle\\n\\nThis replacement procedure can be used for:\\n\\nTOYOTA Corolla VI Liftback (E90) 1.6, TOYOTA Corolla VI Liftback (E90) 1.8 D,\\n\\nTOYOTA Corolla VI Liftback (E90) 1.3 i, TOYOTA Corolla VI Liftback (E90) 1.6 GTI\\n\\n(\\n\\nAE92_), TOYOTA Corolla VI Liftback (E90) 1.6 4WD, TOYOTA Corolla VI Liftback\\n\\n(\\n\\nE90) 1.3, TOYOTA Corolla VI Liftback (E90) 1.4, TOYOTA Corolla VI Liftback \\n\\n(E90) 1.6\\n\\ni, TOYOTA Corolla VI Liftback (E90) 1.6 EFi\\n\\nThe steps may slightly vary depending on the car design.\\n\\nImportant!\\n\\nThis tutorial was created based on the replacement procedure for a similar car part on: TOYOTA Land Cruiser Prado 120 (J120) 3.0 D-4D\\n\\n\\n\\nBuy tool\\n\\ns\\n\\nREPLACEMENT: WINDSHIELD WIPERS TOYOTA\\n\\nCOROLLA VI LIFTBACK (E90). TOOLS YOU MIGHT NEED:\\n\\nFlat Screwdriver\\n\\nBuy tool\\n\\ns\\n\\nREPLACEMENT: WINDSHIELD WIPERS TOYOTA\\n\\nCOROLLA VI LIFTBACK (E90). TOOLS YOU MIGHT NEED:\\n\\nFlat Screwdriver\\n\\n\\n\\nPlease note: all work on the car TOYOTA Corolla VI Liftback (E90) should be\\n\\ndone with the engine switched off.\\n\\nAUTODOC recommends:\\n\\nPlease note: all work on the car TOYOTA Corolla VI Liftback (E90) should be\\n\\ndone with the engine switched off.\\n\\nAUTODOC recommends:\\n\\nCARRY OUT REPLACEMENT IN THE FOLLOWING ORDER:\\n\\n\\n\\n1\\n\\nPrepare the new windscreen wipers.\\n\\n2\\n\\nPull the wiper arm away from the glass surface until it stops.\\n\\n3\\n\\nPress the clip. Use a flat screwdriver.\\n\\n1\\n\\nPrepare the new windscreen wipers.\\n\\n2\\n\\nPull the wiper arm away from the glass surface until it stops.\\n\\n3\\n\\nPress the clip. Use a flat screwdriver.\\n\\n\\n\\n4\\n\\nRemove the blade from the wiper arm.\\n\\n5\\n\\nInstall the new wiper blade and carefully press the wiper arm down to the glass.\\n\\nWhen replacing the wiper blade, take caution to prevent the spring-loaded wiper\\n\\narm from hitting the glass.\\n\\nDon't touch the wiper blade at the working rubber edge to prevent damage to the\\n\\ngraphite coating. \\n\\nEnsure that the blade rubber strip fits tightly to the glass along the entire length.\\n\\nReplacement: windshield wipers TOYOTA Corolla VI Liftback (E90). Tip from\\n\\nAUTODOC experts:\\n\\nReplacement: windshield wipers TOYOTA Corolla VI Liftback (E90).\\n\\nAUTODOC recommends:\\n\\n4\\n\\nRemove the blade from the wiper arm.\\n\\n5\\n\\nInstall the new wiper blade and carefully press the wiper arm down to the glass.\\n\\nWhen replacing the wiper blade, take caution to prevent the spring-loaded wiper\\n\\narm from hitting the glass.\\n\\nDon't touch the wiper blade at the working rubber edge to prevent damage to the\\n\\ngraphite coating. \\n\\nEnsure that the blade rubber strip fits tightly to the glass along the entire length.\\n\\nReplacement: windshield wipers TOYOTA Corolla VI Liftback (E90). Tip from\\n\\nAUTODOC experts:\\n\\nReplacement: windshield wipers TOYOTA Corolla VI Liftback (E90).\\n\\nAUTODOC recommends:\\n\\n\\n\\n6\\n\\n7\\n\\n6\\n\\n7Switch on the ignition.\\n\\nAfter installation check the wipers performance.\\n\\n\\n\\nVIEW MORE TUTORIAL\\n\\nS\\n\\nWELL DONE!\\n\\nVIEW MORE TUTORIAL\\n\\nS\\n\\nWELL DONE!\\n\\n\\nAUTODOC — TOP QUALITY AND AFFORDABLE CAR\\n\\nPARTS ONLINE\\n\\n\\n\\nAUTODOC MOBILE APP: GREAT DEALS AND CONVENIEN\\n\\nT\\n\\nSHOPPIN\\n\\nG\\n\\nAUTODOC MOBILE APP: GREAT DEALS AND CONVENIEN\\n\\nT\\n\\nSHOPPIN\\n\\nG\\n\\n\\n\\nA GREAT SELECTION OF SPARE PARTS FOR YOUR CA\\n\\nR\\n\\nWINDSHIELD WIPERS: A WIDE SELECTIO\\n\\nN\\n\\nA GREAT SELECTION OF SPARE PARTS FOR YOUR CA\\n\\nR\\n\\nWINDSHIELD WIPERS: A WIDE SELECTIO\\n\\nN\\n\\n DISCLAIMER:\\n\\nThe document contains only general recommendations that may be useful for you when you perform repair or replacement work. AUTODOC shall not be liable for any loss, injury, damage of property occurring in the repair or replacement process due to incorrect use or misinterpretation of the provided information.\\n\\nAUTODOC shall not be liable for any possible mistakes and uncertainties in this guide. The information provided is for information purposes only and cannot replace advice from specialists.\\n\\nAUTODOC shall not be liable for incorrect or hazardous usage of equipment, tools and car parts. AUTODOC strongly recommends to be careful and observe the safety rules when performing repair or replacement works. Remember: usage of low quality auto parts does not guarantee you the appropriate level of road safety.\\n\\n© Copyright 2021 All the contents of this website, in particular texts, photographs and graphics, are protected by copyright. All rights, including reproduction, publication, editing and translation rights, are reserved by AUTODOC GmbH.\\n\\n\\t\\t\\tCLUB.AUTODOC.CO.UK\\t26\\n\\n\\n\\n\\t\\t\\tCLUB.AUTODOC.CO.UK\\t26\")]\n"
]
}
],
"source": [
"# example with loading a docx document\n",
"document_path_2 = \"data/How to change rear windshield wipers on TOYOTA Corolla.docx\"\n",
"docx_pages_2 = load_docx_document(document_path_2)\n",
"print(docx_pages_2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# A function to create vector store\n",
"def create_vector_store(embeddings, documents: list, docs_id: list):\n",
" # Cretaing the vector store with the first document\n",
" doc_1 = documents[0]\n",
" # embedding the pages\n",
" page_embeddings = [embeddings.embed_query(page.page_content) for page in doc_1]\n",
" # index set up\n",
" index = faiss.IndexFlatL2(len(page_embeddings[0]))\n",
" # Initialize the FAISS vector store\n",
" vector_store = FAISS(\n",
" embedding_function=embeddings,\n",
" index=index,\n",
" docstore=InMemoryDocstore(),\n",
" index_to_docstore_id={},\n",
" )\n",
" # Prepare documents to be added\n",
" docs_to_add = {}\n",
" for i, (embedding, page) in enumerate(zip(page_embeddings, pdf_pages)):\n",
" index.add(np.array([embedding], dtype=np.float32)) # Ensure the embedding is a 2D array\n",
" vector_store.index_to_docstore_id[i] = str(i) # Use string ID\n",
" docs_to_add[str(i)] = Document(page_content=page.page_content, metadata=page.metadata)\n",
"\n",
" # Add documents to the InMemoryDocstore\n",
" vector_store.docstore.add(docs_to_add, ids=docs_id[0])\n",
" \n",
" # Now adding other documents to the store. \n",
" if len(documents) > 1:\n",
" vector_store.add_documents(documents=documents[1:], ids=docs_id[1:])\n",
" \n",
" # saving the vector store automatically\n",
" save_embedded_data(vector_store, key=\"data\")\n",
" \n",
" return vector_store"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# creating a function to add documents to the vector store\n",
"def add_documents_to_vector_store(embeddings, documents: list, docs_id: list):\n",
" # loading the vector store\n",
" vector_store = load_embedded_data(embeddings)\n",
" # adding the documents to the vector store\n",
" vector_store.add_documents(documents=documents, ids=docs_id)\n",
" \n",
" print (\"Documents added to the vector store\")\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\timmy_3aupohg\\anaconda3\\envs\\smog_env\\Lib\\site-packages\\transformers\\models\\bert\\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\\cb\\pytorch_1000000000000\\work\\aten\\src\\ATen\\native\\transformers\\cuda\\sdp_utils.cpp:555.)\n",
" attn_output = torch.nn.functional.scaled_dot_product_attention(\n"
]
}
],
"source": [
"page_embeddings = [embeddings.embed_query(page.page_content) for page in pdf_pages]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"588"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(page_embeddings)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"384"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(page_embeddings[3])"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"# Initialize FAISS index with the dimension of the embeddings\n",
"index = faiss.IndexFlatL2(len(page_embeddings[0]))"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"# Initialize the FAISS vector store\n",
"vector_store = FAISS(\n",
" embedding_function=embeddings,\n",
" index=index,\n",
" docstore=InMemoryDocstore(),\n",
" index_to_docstore_id={},\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"# Prepare documents to be added\n",
"docs_to_add = {}\n",
"for i, (embedding, page) in enumerate(zip(page_embeddings, pdf_pages)):\n",
" index.add(np.array([embedding], dtype=np.float32)) # Ensure the embedding is a 2D array\n",
" vector_store.index_to_docstore_id[i] = str(i) # Use string ID\n",
" docs_to_add[str(i)] = Document(page_content=page.page_content, metadata=page.metadata)\n",
"\n",
"# Add documents to the InMemoryDocstore\n",
"vector_store.docstore.add(docs_to_add)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<langchain_community.vectorstores.faiss.FAISS at 0x28d188a3710>"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vector_store"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"* [SIM=0.181231] 206 4-5. Using the driving support systems\n",
"COROLLA_UWARNING\n",
"■Before using LDA system\n",
"●Do not rely solely upon the LDA \n",
"system. The LDA system does \n",
"not automatically drive the vehi-cle or reduce the amount of \n",
"attention that must be paid to \n",
"the area in front of the vehicle. The driver must always assume \n",
"full responsibilit y for driving \n",
"safely by paying careful atten-\n",
"tion to the surrounding condi-tions and operating the steering \n",
"wheel to correct the path of the \n",
"vehicle. Also, the driver must take adequate breaks when \n",
"fatigued, such as from driving \n",
"for a long period of time.\n",
"●Failure to perform appropriate \n",
"driving operations and pay care-\n",
"ful attention may lead to an \n",
"accident, resulting in death or serious injury.\n",
"●When not using the LDA sys-\n",
"tem, use the LDA switch to turn \n",
"the system off.\n",
"■Situations unsuitable for LDA system\n",
"In the following situations, use the LDA switch to turn the system off. \n",
"Failure to do so may lead to an \n",
"accident, resulting in death or serious injury.\n",
"●Vehicle is driven on a road sur-\n",
"face which is slippery due to \n",
"rainy weather, fallen snow, freezing, etc.\n",
"●Vehicle is driven on a snow-cov-\n",
"ered road.\n",
"●White (yellow) lin es are difficult \n",
"to see due to rain, snow, fog, \n",
"dust, etc.\n",
"●A spare tire, tire chains, etc. are \n",
"equipped.●When the tires have been excessively worn, or when the \n",
"tire inflation p ressure is low.\n",
"●When tires of a size other than specified are installed.\n",
"●Vehicle is driven in traffic lanes \n",
"other than that highways and \n",
"freeways.\n",
"●During emergency towing.\n",
"■Preventing LDA system mal-functions and operations per-\n",
"formed by mistake\n",
"●Do not modify the headlights or place stickers, etc. on the sur-\n",
"face of the lights.\n",
"●Do not modify the suspension etc. If the suspension etc. needs \n",
"to be replaced, contact your \n",
"Toyota dealer.\n",
"●Do not install or place anything on the hoo d or grille. Also, do \n",
"not install a gr ille guard (bull \n",
"bars, kangaroo bar, etc.).\n",
"●If your windshield needs repairs, contact your Toyota \n",
"dealer.\n",
"■Conditions in which functions \n",
"may not operate properly\n",
"In the following situations, the \n",
"functions may not operate prop-erly and the vehicle may depart \n",
"from its lane. Drive safely by \n",
"always paying careful attention to your surroundings and operate \n",
"the steering whee l to correct the \n",
"path of the vehicle without relying \n",
"solely on the functions.\n",
"●Vehicle is being driven around a sharp curve.\n",
"https://www.MyCarManual.com [{'source': 'data/corolla-2020-toyota-owners-manual.pdf', 'page': 205}]\n",
"* [SIM=0.197417] 205 4-5. Using the driving support systems\n",
"COROLLA_U4Driving●“LTA Unavailable at Current \n",
"Speed”\n",
"The function cannot be used as the vehicle speed exceeds the LTA \n",
"operation range. Drive slower.\n",
"■If a battery terminal has been \n",
"disconnected and reconnected\n",
"The system needs to be initialized. \n",
"To initialize the system, drive the vehicle straight ahead for 5 seconds \n",
"or more at a speed of approximately \n",
"22 mph (35 km/h) or more.\n",
"■Customization\n",
"Function settings can be changed. (P.548)*: If equippedLDA (Lane Departure \n",
"Alert with steering control)\n",
"*\n",
"When driving on highways \n",
"and freeways with white (yellow) lane lines, this function alerts the driver when the vehicle might depart from its lane or \n",
"course\n",
"* and provides assis-\n",
"tance by operating the \n",
"steering wheel to keep the vehicle in its lane or \n",
"course\n",
"*.\n",
"The LDA system recog-\n",
"nizes white (yellow) lane \n",
"lines or a course* using the \n",
"front camera. Additionally, it \n",
"detects preceding vehicles using the front camera and radar.\n",
"*: Boundary between asphalt and \n",
"the side of the road, such as \n",
"grass, soil, or a curb\n",
"https://www.MyCarManual.com [{'source': 'data/corolla-2020-toyota-owners-manual.pdf', 'page': 204}]\n",
"* [SIM=0.200788] 202 4-5. Using the driving support systems\n",
"COROLLA_UInside of displayed lines is \n",
"black\n",
"Indicates that the system is not able to recognize white (yellow) \n",
"lines or a course\n",
"* or is temporar-\n",
"ily canceled.\n",
"*: Boundary between asphalt and \n",
"the side of the road, such as \n",
"grass, soil, or a curb\n",
"Follow-up cruising display\n",
"Displayed when the multi-informa-tion display is switched to the driv-ing support system information screen.\n",
"Indicates that steering assist of the \n",
"lane centering function is operating by monitoring the position of a pre-ceding vehicle.\n",
"When the follow-up cruising display \n",
"is displayed, if the preceding vehi-cle moves, your vehicle may move in the same way. A lways pay care-\n",
"ful attention to your surroundings and operate the steering wheel as necessary to correct the path of the vehicle and ensure safety.\n",
"■Operation conditions of each \n",
"function\n",
"●Lane departure alert function\n",
"This function oper ates when all of \n",
"the following cond itions are met.\n",
"• LTA is turned on.• Vehicle speed is approximately 32 \n",
"mph (50 km/h) or more.*1\n",
"• System recognizes white (yellow) \n",
"lane lines or a course*2. (When a \n",
"white [yellow] line or course*2 is \n",
"recognized on only one side, the system will operate only for the \n",
"recognized side.)\n",
"• Width of traffic lane is approxi-\n",
"mately 9.8 ft. (3 m) or more.\n",
"• Turn signal lever is not operated.\n",
"(Vehicles with a Blind Spot Moni-\n",
"tor: Except when another vehicle \n",
"is in the lane on the side where the turn signal was operated)\n",
"• Vehicle is not being driven around \n",
"a sharp curve.\n",
"• No system malfunctions are \n",
"detected. ( P.204)\n",
"*1:The function oper ates even if the \n",
"vehicle speed is less than \n",
"approximately 32 mph (50 km/h) when the lane centering function is operating.\n",
"*2:Boundary between asphalt and \n",
"the side of the road, such as grass, soil, or a curb\n",
"●Steering assist function\n",
"This function operates when all of the following conditions are met in addition to the operation conditions for the lane departure alert function.\n",
"• Setting for “Steering Assist” in \n",
"of the multi-information display is \n",
"set to “ON”. ( P.548)\n",
"• Vehicle is not accelerated or \n",
"decelerated by a fixed amount or more.\n",
"• Steering wheel is not operated \n",
"with a steering force level suitable \n",
"for changing lanes.\n",
"• ABS, VSC, TRAC and PCS are \n",
"not operating.\n",
"• TRAC or VSC is not turned off.\n",
"• Hands off steering wheel warning \n",
"is not displayed. ( P.204)\n",
"●Vehicle sway warning function\n",
"This function operates when all of \n",
"https://www.MyCarManual.com [{'source': 'data/corolla-2020-toyota-owners-manual.pdf', 'page': 201}]\n",
"* [SIM=0.201506] 214 4-5. Using the driving support systems\n",
"COROLLA_USet speed\n",
"Indicators\n",
"■Operation switches\n",
"Vehicle-to-vehicle distance \n",
"switch\n",
"“+RES” switchCruise control main switchCancel switch“-SET” switch\n",
"WARNING\n",
"■Before using dynamic radar \n",
"cruise control with full-speed \n",
"range\n",
"●Driving safely is the sole responsibility of the driver. Do not rely solely o n the system, \n",
"and drive safely by always pay-ing careful attention to your sur-roundings.\n",
"●The dynamic radar cruise con-\n",
"trol with full-speed range pro-\n",
"vides driving assistance to reduce the drivers burden. \n",
"However, there are limitations to \n",
"the assistance provided.\n",
"Read the following conditions carefully. Do not overly rely on this system and always drive carefully.\n",
"• When the sensor may not be \n",
"correctly detect ing the vehicle \n",
"ahead: \n",
"P.222• Conditions under which the \n",
"vehicle-to-vehicle distance con-\n",
"trol mode may not function cor-rectly: P.222\n",
"●Set the speed appropriately \n",
"depending on the speed limit, \n",
"traffic flow, road conditions, weather conditions, etc. The \n",
"driver is responsible for check-\n",
"ing the set speed.\n",
"●Even when the system is func-tioning normally, the condition of \n",
"the preceding vehicle as \n",
"detected by the system may dif-fer from the condition observed \n",
"by the driver. Therefore, the \n",
"driver must always remain alert, assess the danger of each situ-\n",
"ation and drive safely. Relying \n",
"solely on this system or assum-ing the system ensures safety \n",
"while driving can lead to an \n",
"accident, result ing in death or \n",
"serious injury.\n",
"●Switch the dynamic radar cruise \n",
"control with full-speed range \n",
"setting to off, using the cruise control main switch when not in \n",
"use.\n",
"■Cautions regarding the driv-\n",
"ing assist systems\n",
"Observe the following precau-tions, as there ar e limitations to \n",
"the assistance provided by the \n",
"system. Failure to do so may \n",
"cause an accident resulting in \n",
"death or serious injury.\n",
"https://www.MyCarManual.com [{'source': 'data/corolla-2020-toyota-owners-manual.pdf', 'page': 213}]\n"
]
}
],
"source": [
"# Example of searching\n",
"query = \"Steering assist function/lane centering function\"\n",
"results = vector_store.similarity_search_with_score(\n",
" query, k=4\n",
")\n",
"for res, score in results:\n",
" print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Embeddings saved\n"
]
}
],
"source": [
"save_embedded_data(vector_store)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"load_db = load_embedded_data(embeddings)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Data Search"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"query = \"Steering assist function/lane centering function\"\n",
"docs = load_db.similarity_search(query)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(docs[0].page_content)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(docs[0].metadata['page'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def search(db, query, k=4):\n",
" docs = db.similarity_search(query, k)\n",
" all = \"\"\n",
" pages = []\n",
" for doc in docs:\n",
" all += f\"{doc.page_content}\\n\"\n",
" pages.append(doc.metadata['page'])\n",
" return docs[0].page_content, all, pages"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"search_result, all, pages = search(db, \"What is LDA\")\n",
"print( search_result )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pages"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "ai_index",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}