diff --git a/data/documents.json b/data/documents.json new file mode 100644 index 00000000..f7608a2f --- /dev/null +++ b/data/documents.json @@ -0,0 +1 @@ +{"doc_names": ["Car-Repair-Receipt-repair", "Car-Repair-Receipt-service", "Car-Repair-Receipt-tire", "Car-Repair-Receipt-tuning", "Car-Repair-Receipt-wash", "corolla-2020-toyota-owners-manual", "How to change engine oil and filter on TOYOTA Corolla", "How to change front brake pads on TOYOTA Corolla", "How to change rear windshield wipers on TOYOTA Corolla", "How to change spark plugs on TOYOTA COROLLA"], "docs_id": ["320bc9710952499baa9553d3f0d4e727", "6ba07e1cf09a4ae6b54863040f901328", "dd067c452bd146e4becd61bde8602a3c", "640493ad16b546d38851216917d3e82b", "08cf1c3c8eab4efe9f81efcf8ce770be", "d8d6a3ca9a0a44e08cd4423ee3fb979d", "2b6e45cd99ff46b08242282a423642d4", "05524682d2e9425c83c9b57693182c50", "4eb170648fbe47c3b87b2831a97f0dd8", "cec3e82f0432402e940a0299bfa086fe"], "num_pages": [1, 2, 2, 2, 1, 588, 6, 7, 6, 10]} \ No newline at end of file diff --git a/data_ingestion/__pycache__/utils.cpython-311.pyc b/data_ingestion/__pycache__/utils.cpython-311.pyc index d392cb33..bba3716f 100644 Binary files a/data_ingestion/__pycache__/utils.cpython-311.pyc and b/data_ingestion/__pycache__/utils.cpython-311.pyc differ diff --git a/data_ingestion/utils.py b/data_ingestion/utils.py index 817e18fc..d55241f7 100644 --- a/data_ingestion/utils.py +++ b/data_ingestion/utils.py @@ -1,5 +1,15 @@ from langchain_community.embeddings import HuggingFaceBgeEmbeddings +from langchain_text_splitters import RecursiveCharacterTextSplitter +import faiss +from langchain_community.docstore.in_memory import InMemoryDocstore from langchain_community.vectorstores import FAISS +from langchain_community.document_loaders import PyPDFLoader +from langchain_community.document_loaders import TextLoader +from langchain_community.document_loaders import Docx2txtLoader +from uuid import uuid4 +from langchain_core.documents import Document +import os +import json # loading the embedding model @@ -16,21 +26,170 @@ def load_embedding_model(): embeddings = load_embedding_model() -# A function to create the vector store -def create_vector_store(document, embeddings=embeddings): - embed_db = FAISS.from_documents(document, embeddings) - return embed_db +def create_documents(doc): + text = doc[0].page_content + metadata = doc[0].metadata + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=1000, + chunk_overlap=10, + length_function=len, + is_separator_regex=False, + ) + docs = text_splitter.create_documents([text]) + # converting the text into documents + documents = [] + for i, chunk in enumerate(docs): + # Increment page number based on the chunk index + doc_metadata = metadata.copy() + doc_metadata['page'] = i # Assign page number based on chunk index + document = Document(page_content=chunk.page_content, metadata=doc_metadata) + documents.append(document) + return documents -# A function to save the embedded data -def save_embedded_data(docs, key="pdf"): - docs.save_local(f"vec-db/index/faiss_index_{key}") + +def load_txt_document(document_path): + try: + txt_doc = TextLoader(document_path) + text = txt_doc.load() + # implementig document splitting + docs = create_documents(text) + return docs + except: + raise ValueError(f"Error loading -- {document_path}") + + +def load_docx_document(document_path): + try: + docx_doc = Docx2txtLoader(document_path) + text = docx_doc.load() + # implementig document splitting + docs = create_documents(text) + return docs + except: + raise ValueError(f"Error loading -- {document_path}") + + +# creating a function that checks the document type and loads the document +def load_pdf_document(document_path): + try: + pdf_doc = PyPDFLoader(document_path) + pages = pdf_doc.load_and_split() + return pages + except: + raise ValueError(f"Error loading -- {document_path}") + + + + +# A general function that loads textual documents +def load_document(document_path): + if document_path.endswith(".pdf"): + return load_pdf_document(document_path) + elif document_path.endswith(".txt"): + return load_txt_document(document_path) + elif document_path.endswith(".docx"): + return load_docx_document(document_path) + else: + raise ValueError(f"Unsupported document type for {document_path}") + + + +def save_embedded_data(embeddings, key="data"): + embeddings.save_local(f"vec-db/index/faiss_index_{key}") print("Embeddings saved") -# A function to load the embedded data -def load_embedded_data(embeddings=embeddings, key="pdf"): +def load_embedded_data(embeddings=embeddings, key="data"): embed_db = FAISS.load_local(f"vec-db/index/faiss_index_{key}", embeddings, allow_dangerous_deserialization=True) return embed_db + +# creating a function to load all documents from a directory. +def load_documents_from_directory(directory_path: str): + text_doc = ['pdf', 'txt', 'docx', 'doc', 'md'] + image_doc = ['jpg', 'jpeg', 'png', 'gif', 'bmp'] + audio_doc = ['mp3', 'wav', 'flac', 'ogg', 'm4a'] + video_doc = ['mp4', 'avi', 'mkv', 'flv', 'mov'] + + # accessing the name of the files in the directory + files = os.listdir(directory_path) + # creating a list to store the documents + documents = [] + # another list for the document names + doc_names = [] + # counting the number of pages in the document + num_pages= [] + # iterating through the files in the directory + for file in files: + # updating the path + path = os.path.join(directory_path, file) + # getting the file extension and doc name + doc_name, extension = file.split('.')[0] , file.split('.')[-1] + # checking if the file is a text document + if extension in text_doc: + # loading the document + doc = load_document(path) + # appending the document to the documents list + documents.append(doc) + # appending the number of pages in the document + num_pages.append(len(doc)) + # adding the document name to the doc_names list + doc_names.append(doc_name) + print(f"Document {doc_name} loaded") + + # so we need to create a document id for each document + docs_id = [uuid4().hex for i in range(len(documents))] + # creating a json file to store the documents, checking if it exists then open it, else create it + json_file = f"{directory_path}/documents.json" + if os.path.exists(json_file): + with open(json_file, 'r') as f: + data = json.load(f) + data['doc_names'] = doc_names + data['docs_id'] = docs_id + data['num_pages'] = num_pages + with open(json_file, 'w') as f: + json.dump(data, f) + else: + data = {'doc_names': doc_names, 'docs_id': docs_id, 'num_pages': num_pages} + with open(json_file, 'w') as f: + json.dump(data, f) + + # returning the documents, and doc ids + return documents, docs_id, num_pages + + +# A function to create vector store +def create_vector_store(embeddings, documents: list, docs_id: list, num_pages: list): + # index set up with the embedding dimension + index = faiss.IndexFlatL2(384) + # Initialize the FAISS vector store + vector_store = FAISS( + embedding_function=embeddings, + index=index, + docstore=InMemoryDocstore(), + index_to_docstore_id={}, + ) + # Now adding other documents to the store. + for i in range(len(documents)): + doc_id = docs_id[i] + page_ids = [doc_id+ str(i) for i in range(num_pages[i])] + vector_store.add_documents(documents=documents[i], ids=page_ids) + + # saving the vector store automatically + save_embedded_data(vector_store, key="data") + + return vector_store + +# creating a function to add documents to the vector store +def add_documents_to_vector_store(embeddings, documents: list, docs_id: list, num_pages: list): + # loading the vector store + vector_store = load_embedded_data(embeddings) + for i in range(len(documents)): + doc_id = docs_id[i] + page_ids = [doc_id+ str(i) for i in range(num_pages[i])] + vector_store.add_documents(documents=documents[i], ids=page_ids) + print ("Documents added to the vector store") + + # A document search function def search(db, query, k=4): docs = db.similarity_search(query, k) diff --git a/doc-experiment.ipynb b/doc-experiment.ipynb index 4abdb68c..165b4234 100644 --- a/doc-experiment.ipynb +++ b/doc-experiment.ipynb @@ -1,14 +1,5 @@ { "cells": [ - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install -q docx" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -18,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 39, "metadata": {}, "outputs": [], "source": [ @@ -33,7 +24,9 @@ "from langchain_community.document_loaders import UnstructuredWordDocumentLoader\n", "from uuid import uuid4\n", "import numpy as np\n", - "from langchain_core.documents import Document" + "from langchain_core.documents import Document\n", + "import os\n", + "import json" ] }, { @@ -76,38 +69,79 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ - "# creating a function that checks the document type and loads the document\n", - "def load_pdf_document(document_path):\n", - " if document_path.endswith(\".pdf\"):\n", - " pdf_doc = PyPDFLoader(document_path)\n", - " pages = pdf_doc.load_and_split()\n", - " return pages\n", - " else:\n", - " raise ValueError(f\"Unsupported document type for {document_path}\")\n", + "# this function is meant to create a document for non paginated text data\n", + "def create_documents(doc):\n", + " text = doc[0].page_content\n", + " metadata = doc[0].metadata\n", + " text_splitter = RecursiveCharacterTextSplitter(\n", + " chunk_size=1000,\n", + " chunk_overlap=10,\n", + " length_function=len,\n", + " is_separator_regex=False,\n", + " )\n", + " docs = text_splitter.create_documents([text])\n", + " # converting the text into documents\n", + " documents = []\n", + " for i, chunk in enumerate(docs):\n", + " # Increment page number based on the chunk index\n", + " doc_metadata = metadata.copy()\n", + " doc_metadata['page'] = i # Assign page number based on chunk index\n", + " document = Document(page_content=chunk.page_content, metadata=doc_metadata)\n", + " documents.append(document)\n", + " return documents\n", "\n", "\n", "def load_txt_document(document_path):\n", - " if document_path.endswith(\".txt\"):\n", + " try:\n", " txt_doc = TextLoader(document_path)\n", " text = txt_doc.load()\n", - " return text\n", - " else:\n", - " raise ValueError(f\"Unsupported document type for {document_path}\")\n", + " # implementig document splitting\n", + " docs = create_documents(text)\n", + " return docs\n", + " except:\n", + " raise ValueError(f\"Error loading -- {document_path}\")\n", " \n", " \n", "def load_docx_document(document_path):\n", - " if document_path.endswith(\".docx\"):\n", + " try:\n", " docx_doc = Docx2txtLoader(document_path)\n", " text = docx_doc.load()\n", - " return text\n", + " # implementig document splitting\n", + " docs = create_documents(text)\n", + " return docs\n", + " except:\n", + " raise ValueError(f\"Error loading -- {document_path}\")\n", + "\n", + " \n", + "# creating a function that checks the document type and loads the document\n", + "def load_pdf_document(document_path):\n", + " try:\n", + " pdf_doc = PyPDFLoader(document_path)\n", + " pages = pdf_doc.load_and_split()\n", + " return pages\n", + " except:\n", + " raise ValueError(f\"Error loading -- {document_path}\")\n", + " \n", + "\n", + "\n", + "\n", + "# A general function that loads textual documents\n", + "def load_document(document_path):\n", + " if document_path.endswith(\".pdf\"):\n", + " return load_pdf_document(document_path)\n", + " elif document_path.endswith(\".txt\"):\n", + " return load_txt_document(document_path)\n", + " elif document_path.endswith(\".docx\"):\n", + " return load_docx_document(document_path)\n", " else:\n", " raise ValueError(f\"Unsupported document type for {document_path}\")\n", "\n", "\n", + "\n", "def save_embedded_data(embeddings, key=\"data\"):\n", " embeddings.save_local(f\"vec-db/index/faiss_index_{key}\")\n", " print(\"Embeddings saved\")\n", @@ -119,107 +153,129 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 66, "metadata": {}, "outputs": [], "source": [ - "# Load the document \n", - "document_path = \"data/corolla-2020-toyota-owners-manual.pdf\"\n", - "pdf_pages = load_pdf_document(document_path)" + "# creating a function to load all documents from a directory. \n", + "def load_documents_from_directory(directory_path: str):\n", + " text_doc = ['pdf', 'txt', 'docx', 'doc', 'md']\n", + " image_doc = ['jpg', 'jpeg', 'png', 'gif', 'bmp']\n", + " audio_doc = ['mp3', 'wav', 'flac', 'ogg', 'm4a']\n", + " video_doc = ['mp4', 'avi', 'mkv', 'flv', 'mov']\n", + " \n", + " # accessing the name of the files in the directory\n", + " files = os.listdir(directory_path)\n", + " # creating a list to store the documents\n", + " documents = []\n", + " # another list for the document names \n", + " doc_names = []\n", + " # counting the number of pages in the document\n", + " num_pages= []\n", + " # iterating through the files in the directory\n", + " for file in files: \n", + " # updating the path\n", + " path = os.path.join(directory_path, file)\n", + " # getting the file extension and doc name\n", + " doc_name, extension = file.split('.')[0] , file.split('.')[-1]\n", + " # checking if the file is a text document\n", + " if extension in text_doc:\n", + " # loading the document\n", + " doc = load_document(path)\n", + " # appending the document to the documents list\n", + " documents.append(doc)\n", + " # appending the number of pages in the document\n", + " num_pages.append(len(doc))\n", + " # adding the document name to the doc_names list\n", + " doc_names.append(doc_name)\n", + " print(f\"Document {doc_name} loaded\")\n", + " \n", + " # so we need to create a document id for each document\n", + " docs_id = [uuid4().hex for i in range(len(documents))]\n", + " # creating a json file to store the documents, checking if it exists then open it, else create it\n", + " json_file = f\"{directory_path}/documents.json\"\n", + " if os.path.exists(json_file):\n", + " with open(json_file, 'r') as f:\n", + " data = json.load(f)\n", + " data['doc_names'] = doc_names\n", + " data['docs_id'] = docs_id\n", + " data['num_pages'] = num_pages\n", + " with open(json_file, 'w') as f:\n", + " json.dump(data, f)\n", + " else:\n", + " data = {'doc_names': doc_names, 'docs_id': docs_id, 'num_pages': num_pages}\n", + " with open(json_file, 'w') as f:\n", + " json.dump(data, f)\n", + " \n", + " # returning the documents, and doc ids\n", + " return documents, docs_id, num_pages" ] }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 67, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document Car-Repair-Receipt-repair loaded\n", + "Document Car-Repair-Receipt-service loaded\n", + "Document Car-Repair-Receipt-tire loaded\n", + "Document Car-Repair-Receipt-tuning loaded\n", + "Document Car-Repair-Receipt-wash loaded\n", + "Document corolla-2020-toyota-owners-manual loaded\n", + "Document How to change engine oil and filter on TOYOTA Corolla loaded\n", + "Document How to change front brake pads on TOYOTA Corolla loaded\n", + "Document How to change rear windshield wipers on TOYOTA Corolla loaded\n", + "Document How to change spark plugs on TOYOTA COROLLA loaded\n" + ] + } + ], "source": [ - "document_path_ = \"data/Car-Repair-Receipt-service.pdf\"\n", - "pdf_pages_ = load_pdf_document(document_path_)" + "documents, docs_id, num_pages = load_documents_from_directory('data') " ] }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 68, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[Document(metadata={'source': 'data/Car-Repair-Receipt-service.pdf', 'page': 0}, page_content='CAR REPAIR RECEIPT \\nCompany Name: abc repair Date: August 1, 2024 \\nStreet Address: washington Receipt #: _______________ \\nCity, State, Zip: __________________________ \\nPhone: 11221122 \\nFax: __________________________ \\nEmail: __________________________ \\nWebsite: __________________________ \\n \\nCustomer Information \\nName: ________________________ City : __________________________ \\nPhone: ________________________ Year, Make, Model: __________________________ \\n \\nServices Rendered Price Parts Qty./Price Total \\ngeneral service 2000 1 \\noil change 500 1 \\nbrake pads 1000 2 \\ncatalytic 1000 1 \\n \\n \\n \\nAmount Paid: ____________ \\n Subtotal \\nTotal 5500 \\nName(s) of Service Person(s): ___________________________________________________ \\n____________________________________________________________________________ \\nPrinted Name: __________________________'),\n", - " Document(metadata={'source': 'data/Car-Repair-Receipt-service.pdf', 'page': 1}, page_content='Page 1 of 1')]" + "['320bc9710952499baa9553d3f0d4e727',\n", + " '6ba07e1cf09a4ae6b54863040f901328',\n", + " 'dd067c452bd146e4becd61bde8602a3c',\n", + " '640493ad16b546d38851216917d3e82b',\n", + " '08cf1c3c8eab4efe9f81efcf8ce770be',\n", + " 'd8d6a3ca9a0a44e08cd4423ee3fb979d',\n", + " '2b6e45cd99ff46b08242282a423642d4',\n", + " '05524682d2e9425c83c9b57693182c50',\n", + " '4eb170648fbe47c3b87b2831a97f0dd8',\n", + " 'cec3e82f0432402e940a0299bfa086fe']" ] }, - "execution_count": 52, + "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pdf_pages_" + "docs_id" ] }, { "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Document(metadata={'source': 'data/How to change engine oil and filter on TOYOTA Corolla.txt'}, page_content=\"How to change engine oil\\nand filter on TOYOTA\\nCorolla Verso (AR10) –\\nreplacement guide\\nSIMILAR VIDEO TUTORIAL\\nThis video shows the replacement procedure of a similar car part on another vehicle\\nCLUB.AUTODOC.CO.UK 1–10\\nThis replacement procedure can be used for:\\nTOYOTA Corolla Verso (AR10) 1.6 (ZNR10_), TOYOTA Corolla Verso (AR10) 1.8\\n(ZNR11_), TOYOTA Corolla Verso (AR10) 1.5, TOYOTA Corolla Verso (AR10) 1.8\\nThe steps may slightly vary depending on the car design.\\nImportant!\\nThis tutorial was created based on the replacement procedure for a similar car part\\non: TOYOTA Aygo (AB10) 1\\nBuy tools\\nCLUB.AUTODOC.CO.UK 2–10\\nREPLACEMENT: ENGINE OIL AND FILTER – TOYOTA\\nCOROLLA VERSO (AR10). LIST OF THE TOOLS YOU'LL\\nNEED:\\nAll-purpose cleaning spray\\nOil filter cup socket 76/14-F\\nImpact socket №15\\nRatchet wrench\\nTorque wrench\\nOil drain pan\\nFunnel\\nFender cover\\nCARRY OUT REPLACEMENT IN THE FOLLOWING ORDER:\\n1\\nOpen the hood.\\n2\\nUse a fender protection cover to prevent damaging paintwork and plastic parts of the car.\\n3\\nUnscrew the oil filler plug.\\n4\\nLift the car using a jack or place it over an inspection pit.\\nCLUB.AUTODOC.CO.UK 3–10\\nReplace the oil filter every time when changing motor oil.\\nPlease note: all work on the car – TOYOTA Corolla Verso (AR10) – should be\\ndone with the engine switched off.\\nWear gloves to prevent contact with hot liquid.\\nThe vehicle must be on even keel, and if it is at an angle, the drain plug must be\\nat the lowest point.\\nAll work should be done with the engine stopped.\\nAUTODOC recommends:\\nReplacement: engine oil and filter – TOYOTA Corolla Verso (AR10). Tip from\\nAUTODOC experts:\\n5\\nPut a waste oil container with at least 4 l capacity under the drainage hole.\\n6\\nUnscrew the drain plug. Use a drive socket #15. Use a ratchet wrench.\\n7\\nDrain the used oil.\\nCLUB.AUTODOC.CO.UK 4–10\\nCaution! The oil may be hot.\\nWait until the oil has completely drained from the drainage hole.\\nReplacement: engine oil and filter – TOYOTA Corolla Verso (AR10). Tip from\\nAUTODOC:\\n8 Move the container under the oil filter.\\n9\\nUnscrew the oil filter. Use the 76/14-F socket.\\n10\\nPrepare a container for the old oil filter. Put the old oil filter in the container.\\n11\\nClean the oil filter seat.\\nCLUB.AUTODOC.CO.UK 5–10\\nMove up the container. The oil starts to drain out of the drainage hole\\nimmediately after you have removed the oil filter.\\nReplacement: engine oil and filter – TOYOTA Corolla Verso (AR10). Tip:\\n12\\nLubricate the rubber seal of the new filter with a little amount of fresh oil.\\n13\\nScrew the new filter into its place. Use the 76/14-F socket. Use a torque wrench. Tighten it to 23 nm torque.\\n14\\nClean the area of the oil pan drainage hole.\\n15\\nScrew in the new drainage plug and tighten it. Use a drive socket #15. Use a torque wrench. Tighten it to 27 nm\\ntorque.\\nCLUB.AUTODOC.CO.UK 6–10\\n16\\nClean the area of the oil pan drainage hole. Use all-purpose cleaning spray.\\n17\\nLower the car.\\n18\\nRemove the oil filler cap. Insert the funnel.\\n19\\nPour oil into the engine through the filler. Required amount of oil: 3.6 L.\\nCLUB.AUTODOC.CO.UK 7–10\\nUse the oil recommended by the manufacturer.\\nReplacement: engine oil and filter – TOYOTA Corolla Verso (AR10). Professionals recommend:\\n20\\nCheck the oil level using the dipstick. Top up oil if necessary.\\n21\\nTighten the oil filler cap.\\n22\\nRun the engine for a few minutes. After the oil pressure warning light goes out, stop the engine.\\n23\\nCheck the oil level 5 minutes after the stop of the engine operation using a dipstick and add oil if necessary.\\nCLUB.AUTODOC.CO.UK 8–10\\nThe oil level should be between the Min and Max marks.\\nPlease note!\\n24\\nRemove the fender protection cover.\\n25\\nClose the hood.\\nVIEW MORE TUTORIALS\\nCLUB.AUTODOC.CO.UK 9–10\\nThe oil level should be between the Min and Max marks.\\nMake sure that the oil filler cap is properly tightened.\\nCheck the threaded plug of the drainage hole and the oil filter seal for tightness.\\nIf necessary, carefully tighten them up.\\nNote that oil pressure warning light, which indicates insufficient pressure in the\\nlubrication system, may be on for a longer time than usual. After a few seconds,\\nthe oil will fill all the channels, and the light will go out.\\nTo protect the environment from pollution, be sure to dispose of the used filters\\nat special collection points.\\nWELL DONE!\\nReplacement: engine oil and filter – TOYOTA Corolla Verso (AR10). AUTODOC\\nrecommends:\\nReplacement: engine oil and filter – TOYOTA Corolla Verso (AR10). AUTODOC\\nexperts recommend:\\nAUTODOC — TOP QUALITY AND AFFORDABLE CAR\\nPARTS ONLINE\\nA GREAT SELECTION OF SPARE PARTS FOR YOUR CAR\\nOIL FILTER: A WIDE SELECTION\\nThe document contains only general recommendations that may be useful for you when you perform repair or replacement work. AUTODOC shall not be liable\\nfor any loss, injury, damage of property occurring in the repair or replacement process due to incorrect use or misinterpretation of the provided information.\\nAUTODOC shall not be liable for any possible mistakes and uncertainties in this guide. The information provided is for information purposes only and cannot\\nreplace advice from specialists.\\nAUTODOC shall not be liable for incorrect or hazardous usage of equipment, tools and car parts. AUTODOC strongly recommends to be careful and observe\\nthe safety rules when performing repair or replacement works. Remember: usage of low quality auto parts does not guarantee you the appropriate level of\\nroad safety.\\n© Copyright 2022 – All the contents of this website, in particular texts, photographs and graphics, are protected by copyright. All rights, including\\nreproduction, publication, editing and translation rights, are reserved by AUTODOC GmbH.\\nCLUB.AUTODOC.CO.UK 10–10\\nDISCLAIMER:\\nAUTODOC MOBILE APP: GREAT DEALS AND CONVENIENT\\nSHOPPING\")]\n" - ] - } - ], - "source": [ - "# example with loading a txt document\n", - "document_path_1 = \"data/How to change engine oil and filter on TOYOTA Corolla.txt\"\n", - "txt_pages_1 = load_txt_document(document_path_1)\n", - "print(txt_pages_1)" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Document(metadata={'source': 'data/How to change rear windshield wipers on TOYOTA Corolla.docx'}, page_content=\"How to change rear\\n\\nwindshield wipers on\\n\\nTOYOTA Corolla VI\\n\\nLiftback (E90)\\n\\n –\\n\\nreplacement guide\\n\\nHow to change rear\\n\\nwindshield wipers on\\n\\nTOYOTA Corolla VI\\n\\nLiftback (E90)\\n\\n –\\n\\nreplacement guide\\n\\n\\n\\n\\nSIMILAR VIDEO TUTORIAL\\n\\nThis video shows the replacement procedure of a similar car part on another vehicle\\n\\nThis replacement procedure can be used for:\\n\\nTOYOTA Corolla VI Liftback (E90) 1.6, TOYOTA Corolla VI Liftback (E90) 1.8 D,\\n\\nTOYOTA Corolla VI Liftback (E90) 1.3 i, TOYOTA Corolla VI Liftback (E90) 1.6 GTI\\n\\n(\\n\\nAE92_), TOYOTA Corolla VI Liftback (E90) 1.6 4WD, TOYOTA Corolla VI Liftback\\n\\n(\\n\\nE90) 1.3, TOYOTA Corolla VI Liftback (E90) 1.4, TOYOTA Corolla VI Liftback \\n\\n(E90) 1.6\\n\\ni, TOYOTA Corolla VI Liftback (E90) 1.6 EFi\\n\\nThe steps may slightly vary depending on the car design.\\n\\nImportant!\\n\\nSIMILAR VIDEO TUTORIAL\\n\\nThis video shows the replacement procedure of a similar car part on another vehicle\\n\\nThis replacement procedure can be used for:\\n\\nTOYOTA Corolla VI Liftback (E90) 1.6, TOYOTA Corolla VI Liftback (E90) 1.8 D,\\n\\nTOYOTA Corolla VI Liftback (E90) 1.3 i, TOYOTA Corolla VI Liftback (E90) 1.6 GTI\\n\\n(\\n\\nAE92_), TOYOTA Corolla VI Liftback (E90) 1.6 4WD, TOYOTA Corolla VI Liftback\\n\\n(\\n\\nE90) 1.3, TOYOTA Corolla VI Liftback (E90) 1.4, TOYOTA Corolla VI Liftback \\n\\n(E90) 1.6\\n\\ni, TOYOTA Corolla VI Liftback (E90) 1.6 EFi\\n\\nThe steps may slightly vary depending on the car design.\\n\\nImportant!\\n\\nThis tutorial was created based on the replacement procedure for a similar car part on: TOYOTA Land Cruiser Prado 120 (J120) 3.0 D-4D\\n\\n\\n\\nBuy tool\\n\\ns\\n\\nREPLACEMENT: WINDSHIELD WIPERS – TOYOTA\\n\\nCOROLLA VI LIFTBACK (E90). TOOLS YOU MIGHT NEED:\\n\\nFlat Screwdriver\\n\\nBuy tool\\n\\ns\\n\\nREPLACEMENT: WINDSHIELD WIPERS – TOYOTA\\n\\nCOROLLA VI LIFTBACK (E90). TOOLS YOU MIGHT NEED:\\n\\nFlat Screwdriver\\n\\n\\n\\nPlease note: all work on the car – TOYOTA Corolla VI Liftback (E90) – should be\\n\\ndone with the engine switched off.\\n\\nAUTODOC recommends:\\n\\nPlease note: all work on the car – TOYOTA Corolla VI Liftback (E90) – should be\\n\\ndone with the engine switched off.\\n\\nAUTODOC recommends:\\n\\nCARRY OUT REPLACEMENT IN THE FOLLOWING ORDER:\\n\\n\\n\\n1\\n\\nPrepare the new windscreen wipers.\\n\\n2\\n\\nPull the wiper arm away from the glass surface until it stops.\\n\\n3\\n\\nPress the clip. Use a flat screwdriver.\\n\\n1\\n\\nPrepare the new windscreen wipers.\\n\\n2\\n\\nPull the wiper arm away from the glass surface until it stops.\\n\\n3\\n\\nPress the clip. Use a flat screwdriver.\\n\\n\\n\\n4\\n\\nRemove the blade from the wiper arm.\\n\\n5\\n\\nInstall the new wiper blade and carefully press the wiper arm down to the glass.\\n\\nWhen replacing the wiper blade, take caution to prevent the spring-loaded wiper\\n\\narm from hitting the glass.\\n\\nDon't touch the wiper blade at the working rubber edge to prevent damage to the\\n\\ngraphite coating. \\n\\nEnsure that the blade rubber strip fits tightly to the glass along the entire length.\\n\\nReplacement: windshield wipers – TOYOTA Corolla VI Liftback (E90). Tip from\\n\\nAUTODOC experts:\\n\\nReplacement: windshield wipers – TOYOTA Corolla VI Liftback (E90).\\n\\nAUTODOC recommends:\\n\\n4\\n\\nRemove the blade from the wiper arm.\\n\\n5\\n\\nInstall the new wiper blade and carefully press the wiper arm down to the glass.\\n\\nWhen replacing the wiper blade, take caution to prevent the spring-loaded wiper\\n\\narm from hitting the glass.\\n\\nDon't touch the wiper blade at the working rubber edge to prevent damage to the\\n\\ngraphite coating. \\n\\nEnsure that the blade rubber strip fits tightly to the glass along the entire length.\\n\\nReplacement: windshield wipers – TOYOTA Corolla VI Liftback (E90). Tip from\\n\\nAUTODOC experts:\\n\\nReplacement: windshield wipers – TOYOTA Corolla VI Liftback (E90).\\n\\nAUTODOC recommends:\\n\\n\\n\\n6\\n\\n7\\n\\n6\\n\\n7Switch on the ignition.\\n\\nAfter installation check the wipers performance.\\n\\n\\n\\nVIEW MORE TUTORIAL\\n\\nS\\n\\nWELL DONE!\\n\\nVIEW MORE TUTORIAL\\n\\nS\\n\\nWELL DONE!\\n\\n\\nAUTODOC — TOP QUALITY AND AFFORDABLE CAR\\n\\nPARTS ONLINE\\n\\n\\n\\nAUTODOC MOBILE APP: GREAT DEALS AND CONVENIEN\\n\\nT\\n\\nSHOPPIN\\n\\nG\\n\\nAUTODOC MOBILE APP: GREAT DEALS AND CONVENIEN\\n\\nT\\n\\nSHOPPIN\\n\\nG\\n\\n\\n\\nA GREAT SELECTION OF SPARE PARTS FOR YOUR CA\\n\\nR\\n\\nWINDSHIELD WIPERS: A WIDE SELECTIO\\n\\nN\\n\\nA GREAT SELECTION OF SPARE PARTS FOR YOUR CA\\n\\nR\\n\\nWINDSHIELD WIPERS: A WIDE SELECTIO\\n\\nN\\n\\n DISCLAIMER:\\n\\nThe document contains only general recommendations that may be useful for you when you perform repair or replacement work. AUTODOC shall not be liable for any loss, injury, damage of property occurring in the repair or replacement process due to incorrect use or misinterpretation of the provided information.\\n\\nAUTODOC shall not be liable for any possible mistakes and uncertainties in this guide. The information provided is for information purposes only and cannot replace advice from specialists.\\n\\nAUTODOC shall not be liable for incorrect or hazardous usage of equipment, tools and car parts. AUTODOC strongly recommends to be careful and observe the safety rules when performing repair or replacement works. Remember: usage of low quality auto parts does not guarantee you the appropriate level of road safety.\\n\\n© Copyright 2021 – All the contents of this website, in particular texts, photographs and graphics, are protected by copyright. All rights, including reproduction, publication, editing and translation rights, are reserved by AUTODOC GmbH.\\n\\n\\t\\t\\tCLUB.AUTODOC.CO.UK\\t2–6\\n\\n\\n\\n\\t\\t\\tCLUB.AUTODOC.CO.UK\\t2–6\")]\n" - ] - } - ], - "source": [ - "# example with loading a docx document\n", - "document_path_2 = \"data/How to change rear windshield wipers on TOYOTA Corolla.docx\"\n", - "docx_pages_2 = load_docx_document(document_path_2)\n", - "print(docx_pages_2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 81, "metadata": {}, "outputs": [], "source": [ "# A function to create vector store\n", - "def create_vector_store(embeddings, documents: list, docs_id: list):\n", - " # Cretaing the vector store with the first document\n", - " doc_1 = documents[0]\n", - " # embedding the pages\n", - " page_embeddings = [embeddings.embed_query(page.page_content) for page in doc_1]\n", - " # index set up\n", - " index = faiss.IndexFlatL2(len(page_embeddings[0]))\n", + "def create_vector_store(embeddings, documents: list, docs_id: list, num_pages: list):\n", + " # index set up with the embedding dimension\n", + " index = faiss.IndexFlatL2(384)\n", " # Initialize the FAISS vector store\n", " vector_store = FAISS(\n", " embedding_function=embeddings,\n", @@ -227,19 +283,11 @@ " docstore=InMemoryDocstore(),\n", " index_to_docstore_id={},\n", " )\n", - " # Prepare documents to be added\n", - " docs_to_add = {}\n", - " for i, (embedding, page) in enumerate(zip(page_embeddings, pdf_pages)):\n", - " index.add(np.array([embedding], dtype=np.float32)) # Ensure the embedding is a 2D array\n", - " vector_store.index_to_docstore_id[i] = str(i) # Use string ID\n", - " docs_to_add[str(i)] = Document(page_content=page.page_content, metadata=page.metadata)\n", - "\n", - " # Add documents to the InMemoryDocstore\n", - " vector_store.docstore.add(docs_to_add, ids=docs_id[0])\n", - " \n", " # Now adding other documents to the store. \n", - " if len(documents) > 1:\n", - " vector_store.add_documents(documents=documents[1:], ids=docs_id[1:])\n", + " for i in range(len(documents)):\n", + " doc_id = docs_id[i]\n", + " page_ids = [doc_id+ str(i) for i in range(num_pages[i])]\n", + " vector_store.add_documents(documents=documents[i], ids=page_ids)\n", " \n", " # saving the vector store automatically\n", " save_embedded_data(vector_store, key=\"data\")\n", @@ -249,255 +297,60 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# creating a function to add documents to the vector store\n", - "def add_documents_to_vector_store(embeddings, documents: list, docs_id: list):\n", - " # loading the vector store\n", - " vector_store = load_embedded_data(embeddings)\n", - " # adding the documents to the vector store\n", - " vector_store.add_documents(documents=documents, ids=docs_id)\n", - " \n", - " print (\"Documents added to the vector store\")\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\Users\\timmy_3aupohg\\anaconda3\\envs\\smog_env\\Lib\\site-packages\\transformers\\models\\bert\\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\\cb\\pytorch_1000000000000\\work\\aten\\src\\ATen\\native\\transformers\\cuda\\sdp_utils.cpp:555.)\n", - " attn_output = torch.nn.functional.scaled_dot_product_attention(\n" - ] - } - ], - "source": [ - "page_embeddings = [embeddings.embed_query(page.page_content) for page in pdf_pages]" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "588" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(page_embeddings)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "384" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(page_embeddings[3])" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "# Initialize FAISS index with the dimension of the embeddings\n", - "index = faiss.IndexFlatL2(len(page_embeddings[0]))" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "# Initialize the FAISS vector store\n", - "vector_store = FAISS(\n", - " embedding_function=embeddings,\n", - " index=index,\n", - " docstore=InMemoryDocstore(),\n", - " index_to_docstore_id={},\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "# Prepare documents to be added\n", - "docs_to_add = {}\n", - "for i, (embedding, page) in enumerate(zip(page_embeddings, pdf_pages)):\n", - " index.add(np.array([embedding], dtype=np.float32)) # Ensure the embedding is a 2D array\n", - " vector_store.index_to_docstore_id[i] = str(i) # Use string ID\n", - " docs_to_add[str(i)] = Document(page_content=page.page_content, metadata=page.metadata)\n", - "\n", - "# Add documents to the InMemoryDocstore\n", - "vector_store.docstore.add(docs_to_add)" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "vector_store" - ] - }, - { - "cell_type": "code", - "execution_count": 31, + "execution_count": 82, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "* [SIM=0.181231] 206 4-5. Using the driving support systems\n", - "COROLLA_UWARNING\n", - "■Before using LDA system\n", - "●Do not rely solely upon the LDA \n", - "system. The LDA system does \n", - "not automatically drive the vehi-cle or reduce the amount of \n", - "attention that must be paid to \n", - "the area in front of the vehicle. The driver must always assume \n", - "full responsibilit y for driving \n", - "safely by paying careful atten-\n", - "tion to the surrounding condi-tions and operating the steering \n", - "wheel to correct the path of the \n", - "vehicle. Also, the driver must take adequate breaks when \n", - "fatigued, such as from driving \n", - "for a long period of time.\n", - "●Failure to perform appropriate \n", - "driving operations and pay care-\n", - "ful attention may lead to an \n", - "accident, resulting in death or serious injury.\n", - "●When not using the LDA sys-\n", - "tem, use the LDA switch to turn \n", - "the system off.\n", - "■Situations unsuitable for LDA system\n", - "In the following situations, use the LDA switch to turn the system off. \n", - "Failure to do so may lead to an \n", - "accident, resulting in death or serious injury.\n", - "●Vehicle is driven on a road sur-\n", - "face which is slippery due to \n", - "rainy weather, fallen snow, freezing, etc.\n", - "●Vehicle is driven on a snow-cov-\n", - "ered road.\n", - "●White (yellow) lin es are difficult \n", - "to see due to rain, snow, fog, \n", - "dust, etc.\n", - "●A spare tire, tire chains, etc. are \n", - "equipped.●When the tires have been excessively worn, or when the \n", - "tire inflation p ressure is low.\n", - "●When tires of a size other than specified are installed.\n", - "●Vehicle is driven in traffic lanes \n", - "other than that highways and \n", - "freeways.\n", - "●During emergency towing.\n", - "■Preventing LDA system mal-functions and operations per-\n", - "formed by mistake\n", - "●Do not modify the headlights or place stickers, etc. on the sur-\n", - "face of the lights.\n", - "●Do not modify the suspension etc. If the suspension etc. needs \n", - "to be replaced, contact your \n", - "Toyota dealer.\n", - "●Do not install or place anything on the hoo d or grille. Also, do \n", - "not install a gr ille guard (bull \n", - "bars, kangaroo bar, etc.).\n", - "●If your windshield needs repairs, contact your Toyota \n", - "dealer.\n", - "■Conditions in which functions \n", - "may not operate properly\n", - "In the following situations, the \n", - "functions may not operate prop-erly and the vehicle may depart \n", - "from its lane. Drive safely by \n", - "always paying careful attention to your surroundings and operate \n", - "the steering whee l to correct the \n", - "path of the vehicle without relying \n", - "solely on the functions.\n", - "●Vehicle is being driven around a sharp curve.\n", - "https://www.MyCarManual.com [{'source': 'data/corolla-2020-toyota-owners-manual.pdf', 'page': 205}]\n", - "* [SIM=0.197417] 205 4-5. Using the driving support systems\n", - "COROLLA_U4Driving●“LTA Unavailable at Current \n", - "Speed”\n", - "The function cannot be used as the vehicle speed exceeds the LTA \n", - "operation range. Drive slower.\n", - "■If a battery terminal has been \n", - "disconnected and reconnected\n", - "The system needs to be initialized. \n", - "To initialize the system, drive the vehicle straight ahead for 5 seconds \n", - "or more at a speed of approximately \n", - "22 mph (35 km/h) or more.\n", - "■Customization\n", - "Function settings can be changed. (P.548)*: If equippedLDA (Lane Departure \n", - "Alert with steering control)\n", - "*\n", - "When driving on highways \n", - "and freeways with white (yellow) lane lines, this function alerts the driver when the vehicle might depart from its lane or \n", - "course\n", - "* and provides assis-\n", - "tance by operating the \n", - "steering wheel to keep the vehicle in its lane or \n", - "course\n", - "*.\n", - "The LDA system recog-\n", - "nizes white (yellow) lane \n", - "lines or a course* using the \n", - "front camera. Additionally, it \n", - "detects preceding vehicles using the front camera and radar.\n", - "*: Boundary between asphalt and \n", - "the side of the road, such as \n", - "grass, soil, or a curb\n", - "https://www.MyCarManual.com [{'source': 'data/corolla-2020-toyota-owners-manual.pdf', 'page': 204}]\n", - "* [SIM=0.200788] 202 4-5. Using the driving support systems\n", + "['320bc9710952499baa9553d3f0d4e7270']\n", + "['6ba07e1cf09a4ae6b54863040f9013280', '6ba07e1cf09a4ae6b54863040f9013281']\n", + "['dd067c452bd146e4becd61bde8602a3c0', 'dd067c452bd146e4becd61bde8602a3c1']\n", + "['640493ad16b546d38851216917d3e82b0', '640493ad16b546d38851216917d3e82b1']\n", + "['08cf1c3c8eab4efe9f81efcf8ce770be0']\n", + "['d8d6a3ca9a0a44e08cd4423ee3fb979d0', 'd8d6a3ca9a0a44e08cd4423ee3fb979d1', 'd8d6a3ca9a0a44e08cd4423ee3fb979d2', 'd8d6a3ca9a0a44e08cd4423ee3fb979d3', 'd8d6a3ca9a0a44e08cd4423ee3fb979d4', 'd8d6a3ca9a0a44e08cd4423ee3fb979d5', 'd8d6a3ca9a0a44e08cd4423ee3fb979d6', 'd8d6a3ca9a0a44e08cd4423ee3fb979d7', 'd8d6a3ca9a0a44e08cd4423ee3fb979d8', 'd8d6a3ca9a0a44e08cd4423ee3fb979d9', 'd8d6a3ca9a0a44e08cd4423ee3fb979d10', 'd8d6a3ca9a0a44e08cd4423ee3fb979d11', 'd8d6a3ca9a0a44e08cd4423ee3fb979d12', 'd8d6a3ca9a0a44e08cd4423ee3fb979d13', 'd8d6a3ca9a0a44e08cd4423ee3fb979d14', 'd8d6a3ca9a0a44e08cd4423ee3fb979d15', 'd8d6a3ca9a0a44e08cd4423ee3fb979d16', 'd8d6a3ca9a0a44e08cd4423ee3fb979d17', 'd8d6a3ca9a0a44e08cd4423ee3fb979d18', 'd8d6a3ca9a0a44e08cd4423ee3fb979d19', 'd8d6a3ca9a0a44e08cd4423ee3fb979d20', 'd8d6a3ca9a0a44e08cd4423ee3fb979d21', 'd8d6a3ca9a0a44e08cd4423ee3fb979d22', 'd8d6a3ca9a0a44e08cd4423ee3fb979d23', 'd8d6a3ca9a0a44e08cd4423ee3fb979d24', 'd8d6a3ca9a0a44e08cd4423ee3fb979d25', 'd8d6a3ca9a0a44e08cd4423ee3fb979d26', 'd8d6a3ca9a0a44e08cd4423ee3fb979d27', 'd8d6a3ca9a0a44e08cd4423ee3fb979d28', 'd8d6a3ca9a0a44e08cd4423ee3fb979d29', 'd8d6a3ca9a0a44e08cd4423ee3fb979d30', 'd8d6a3ca9a0a44e08cd4423ee3fb979d31', 'd8d6a3ca9a0a44e08cd4423ee3fb979d32', 'd8d6a3ca9a0a44e08cd4423ee3fb979d33', 'd8d6a3ca9a0a44e08cd4423ee3fb979d34', 'd8d6a3ca9a0a44e08cd4423ee3fb979d35', 'd8d6a3ca9a0a44e08cd4423ee3fb979d36', 'd8d6a3ca9a0a44e08cd4423ee3fb979d37', 'd8d6a3ca9a0a44e08cd4423ee3fb979d38', 'd8d6a3ca9a0a44e08cd4423ee3fb979d39', 'd8d6a3ca9a0a44e08cd4423ee3fb979d40', 'd8d6a3ca9a0a44e08cd4423ee3fb979d41', 'd8d6a3ca9a0a44e08cd4423ee3fb979d42', 'd8d6a3ca9a0a44e08cd4423ee3fb979d43', 'd8d6a3ca9a0a44e08cd4423ee3fb979d44', 'd8d6a3ca9a0a44e08cd4423ee3fb979d45', 'd8d6a3ca9a0a44e08cd4423ee3fb979d46', 'd8d6a3ca9a0a44e08cd4423ee3fb979d47', 'd8d6a3ca9a0a44e08cd4423ee3fb979d48', 'd8d6a3ca9a0a44e08cd4423ee3fb979d49', 'd8d6a3ca9a0a44e08cd4423ee3fb979d50', 'd8d6a3ca9a0a44e08cd4423ee3fb979d51', 'd8d6a3ca9a0a44e08cd4423ee3fb979d52', 'd8d6a3ca9a0a44e08cd4423ee3fb979d53', 'd8d6a3ca9a0a44e08cd4423ee3fb979d54', 'd8d6a3ca9a0a44e08cd4423ee3fb979d55', 'd8d6a3ca9a0a44e08cd4423ee3fb979d56', 'd8d6a3ca9a0a44e08cd4423ee3fb979d57', 'd8d6a3ca9a0a44e08cd4423ee3fb979d58', 'd8d6a3ca9a0a44e08cd4423ee3fb979d59', 'd8d6a3ca9a0a44e08cd4423ee3fb979d60', 'd8d6a3ca9a0a44e08cd4423ee3fb979d61', 'd8d6a3ca9a0a44e08cd4423ee3fb979d62', 'd8d6a3ca9a0a44e08cd4423ee3fb979d63', 'd8d6a3ca9a0a44e08cd4423ee3fb979d64', 'd8d6a3ca9a0a44e08cd4423ee3fb979d65', 'd8d6a3ca9a0a44e08cd4423ee3fb979d66', 'd8d6a3ca9a0a44e08cd4423ee3fb979d67', 'd8d6a3ca9a0a44e08cd4423ee3fb979d68', 'd8d6a3ca9a0a44e08cd4423ee3fb979d69', 'd8d6a3ca9a0a44e08cd4423ee3fb979d70', 'd8d6a3ca9a0a44e08cd4423ee3fb979d71', 'd8d6a3ca9a0a44e08cd4423ee3fb979d72', 'd8d6a3ca9a0a44e08cd4423ee3fb979d73', 'd8d6a3ca9a0a44e08cd4423ee3fb979d74', 'd8d6a3ca9a0a44e08cd4423ee3fb979d75', 'd8d6a3ca9a0a44e08cd4423ee3fb979d76', 'd8d6a3ca9a0a44e08cd4423ee3fb979d77', 'd8d6a3ca9a0a44e08cd4423ee3fb979d78', 'd8d6a3ca9a0a44e08cd4423ee3fb979d79', 'd8d6a3ca9a0a44e08cd4423ee3fb979d80', 'd8d6a3ca9a0a44e08cd4423ee3fb979d81', 'd8d6a3ca9a0a44e08cd4423ee3fb979d82', 'd8d6a3ca9a0a44e08cd4423ee3fb979d83', 'd8d6a3ca9a0a44e08cd4423ee3fb979d84', 'd8d6a3ca9a0a44e08cd4423ee3fb979d85', 'd8d6a3ca9a0a44e08cd4423ee3fb979d86', 'd8d6a3ca9a0a44e08cd4423ee3fb979d87', 'd8d6a3ca9a0a44e08cd4423ee3fb979d88', 'd8d6a3ca9a0a44e08cd4423ee3fb979d89', 'd8d6a3ca9a0a44e08cd4423ee3fb979d90', 'd8d6a3ca9a0a44e08cd4423ee3fb979d91', 'd8d6a3ca9a0a44e08cd4423ee3fb979d92', 'd8d6a3ca9a0a44e08cd4423ee3fb979d93', 'd8d6a3ca9a0a44e08cd4423ee3fb979d94', 'd8d6a3ca9a0a44e08cd4423ee3fb979d95', 'd8d6a3ca9a0a44e08cd4423ee3fb979d96', 'd8d6a3ca9a0a44e08cd4423ee3fb979d97', 'd8d6a3ca9a0a44e08cd4423ee3fb979d98', 'd8d6a3ca9a0a44e08cd4423ee3fb979d99', 'd8d6a3ca9a0a44e08cd4423ee3fb979d100', 'd8d6a3ca9a0a44e08cd4423ee3fb979d101', 'd8d6a3ca9a0a44e08cd4423ee3fb979d102', 'd8d6a3ca9a0a44e08cd4423ee3fb979d103', 'd8d6a3ca9a0a44e08cd4423ee3fb979d104', 'd8d6a3ca9a0a44e08cd4423ee3fb979d105', 'd8d6a3ca9a0a44e08cd4423ee3fb979d106', 'd8d6a3ca9a0a44e08cd4423ee3fb979d107', 'd8d6a3ca9a0a44e08cd4423ee3fb979d108', 'd8d6a3ca9a0a44e08cd4423ee3fb979d109', 'd8d6a3ca9a0a44e08cd4423ee3fb979d110', 'd8d6a3ca9a0a44e08cd4423ee3fb979d111', 'd8d6a3ca9a0a44e08cd4423ee3fb979d112', 'd8d6a3ca9a0a44e08cd4423ee3fb979d113', 'd8d6a3ca9a0a44e08cd4423ee3fb979d114', 'd8d6a3ca9a0a44e08cd4423ee3fb979d115', 'd8d6a3ca9a0a44e08cd4423ee3fb979d116', 'd8d6a3ca9a0a44e08cd4423ee3fb979d117', 'd8d6a3ca9a0a44e08cd4423ee3fb979d118', 'd8d6a3ca9a0a44e08cd4423ee3fb979d119', 'd8d6a3ca9a0a44e08cd4423ee3fb979d120', 'd8d6a3ca9a0a44e08cd4423ee3fb979d121', 'd8d6a3ca9a0a44e08cd4423ee3fb979d122', 'd8d6a3ca9a0a44e08cd4423ee3fb979d123', 'd8d6a3ca9a0a44e08cd4423ee3fb979d124', 'd8d6a3ca9a0a44e08cd4423ee3fb979d125', 'd8d6a3ca9a0a44e08cd4423ee3fb979d126', 'd8d6a3ca9a0a44e08cd4423ee3fb979d127', 'd8d6a3ca9a0a44e08cd4423ee3fb979d128', 'd8d6a3ca9a0a44e08cd4423ee3fb979d129', 'd8d6a3ca9a0a44e08cd4423ee3fb979d130', 'd8d6a3ca9a0a44e08cd4423ee3fb979d131', 'd8d6a3ca9a0a44e08cd4423ee3fb979d132', 'd8d6a3ca9a0a44e08cd4423ee3fb979d133', 'd8d6a3ca9a0a44e08cd4423ee3fb979d134', 'd8d6a3ca9a0a44e08cd4423ee3fb979d135', 'd8d6a3ca9a0a44e08cd4423ee3fb979d136', 'd8d6a3ca9a0a44e08cd4423ee3fb979d137', 'd8d6a3ca9a0a44e08cd4423ee3fb979d138', 'd8d6a3ca9a0a44e08cd4423ee3fb979d139', 'd8d6a3ca9a0a44e08cd4423ee3fb979d140', 'd8d6a3ca9a0a44e08cd4423ee3fb979d141', 'd8d6a3ca9a0a44e08cd4423ee3fb979d142', 'd8d6a3ca9a0a44e08cd4423ee3fb979d143', 'd8d6a3ca9a0a44e08cd4423ee3fb979d144', 'd8d6a3ca9a0a44e08cd4423ee3fb979d145', 'd8d6a3ca9a0a44e08cd4423ee3fb979d146', 'd8d6a3ca9a0a44e08cd4423ee3fb979d147', 'd8d6a3ca9a0a44e08cd4423ee3fb979d148', 'd8d6a3ca9a0a44e08cd4423ee3fb979d149', 'd8d6a3ca9a0a44e08cd4423ee3fb979d150', 'd8d6a3ca9a0a44e08cd4423ee3fb979d151', 'd8d6a3ca9a0a44e08cd4423ee3fb979d152', 'd8d6a3ca9a0a44e08cd4423ee3fb979d153', 'd8d6a3ca9a0a44e08cd4423ee3fb979d154', 'd8d6a3ca9a0a44e08cd4423ee3fb979d155', 'd8d6a3ca9a0a44e08cd4423ee3fb979d156', 'd8d6a3ca9a0a44e08cd4423ee3fb979d157', 'd8d6a3ca9a0a44e08cd4423ee3fb979d158', 'd8d6a3ca9a0a44e08cd4423ee3fb979d159', 'd8d6a3ca9a0a44e08cd4423ee3fb979d160', 'd8d6a3ca9a0a44e08cd4423ee3fb979d161', 'd8d6a3ca9a0a44e08cd4423ee3fb979d162', 'd8d6a3ca9a0a44e08cd4423ee3fb979d163', 'd8d6a3ca9a0a44e08cd4423ee3fb979d164', 'd8d6a3ca9a0a44e08cd4423ee3fb979d165', 'd8d6a3ca9a0a44e08cd4423ee3fb979d166', 'd8d6a3ca9a0a44e08cd4423ee3fb979d167', 'd8d6a3ca9a0a44e08cd4423ee3fb979d168', 'd8d6a3ca9a0a44e08cd4423ee3fb979d169', 'd8d6a3ca9a0a44e08cd4423ee3fb979d170', 'd8d6a3ca9a0a44e08cd4423ee3fb979d171', 'd8d6a3ca9a0a44e08cd4423ee3fb979d172', 'd8d6a3ca9a0a44e08cd4423ee3fb979d173', 'd8d6a3ca9a0a44e08cd4423ee3fb979d174', 'd8d6a3ca9a0a44e08cd4423ee3fb979d175', 'd8d6a3ca9a0a44e08cd4423ee3fb979d176', 'd8d6a3ca9a0a44e08cd4423ee3fb979d177', 'd8d6a3ca9a0a44e08cd4423ee3fb979d178', 'd8d6a3ca9a0a44e08cd4423ee3fb979d179', 'd8d6a3ca9a0a44e08cd4423ee3fb979d180', 'd8d6a3ca9a0a44e08cd4423ee3fb979d181', 'd8d6a3ca9a0a44e08cd4423ee3fb979d182', 'd8d6a3ca9a0a44e08cd4423ee3fb979d183', 'd8d6a3ca9a0a44e08cd4423ee3fb979d184', 'd8d6a3ca9a0a44e08cd4423ee3fb979d185', 'd8d6a3ca9a0a44e08cd4423ee3fb979d186', 'd8d6a3ca9a0a44e08cd4423ee3fb979d187', 'd8d6a3ca9a0a44e08cd4423ee3fb979d188', 'd8d6a3ca9a0a44e08cd4423ee3fb979d189', 'd8d6a3ca9a0a44e08cd4423ee3fb979d190', 'd8d6a3ca9a0a44e08cd4423ee3fb979d191', 'd8d6a3ca9a0a44e08cd4423ee3fb979d192', 'd8d6a3ca9a0a44e08cd4423ee3fb979d193', 'd8d6a3ca9a0a44e08cd4423ee3fb979d194', 'd8d6a3ca9a0a44e08cd4423ee3fb979d195', 'd8d6a3ca9a0a44e08cd4423ee3fb979d196', 'd8d6a3ca9a0a44e08cd4423ee3fb979d197', 'd8d6a3ca9a0a44e08cd4423ee3fb979d198', 'd8d6a3ca9a0a44e08cd4423ee3fb979d199', 'd8d6a3ca9a0a44e08cd4423ee3fb979d200', 'd8d6a3ca9a0a44e08cd4423ee3fb979d201', 'd8d6a3ca9a0a44e08cd4423ee3fb979d202', 'd8d6a3ca9a0a44e08cd4423ee3fb979d203', 'd8d6a3ca9a0a44e08cd4423ee3fb979d204', 'd8d6a3ca9a0a44e08cd4423ee3fb979d205', 'd8d6a3ca9a0a44e08cd4423ee3fb979d206', 'd8d6a3ca9a0a44e08cd4423ee3fb979d207', 'd8d6a3ca9a0a44e08cd4423ee3fb979d208', 'd8d6a3ca9a0a44e08cd4423ee3fb979d209', 'd8d6a3ca9a0a44e08cd4423ee3fb979d210', 'd8d6a3ca9a0a44e08cd4423ee3fb979d211', 'd8d6a3ca9a0a44e08cd4423ee3fb979d212', 'd8d6a3ca9a0a44e08cd4423ee3fb979d213', 'd8d6a3ca9a0a44e08cd4423ee3fb979d214', 'd8d6a3ca9a0a44e08cd4423ee3fb979d215', 'd8d6a3ca9a0a44e08cd4423ee3fb979d216', 'd8d6a3ca9a0a44e08cd4423ee3fb979d217', 'd8d6a3ca9a0a44e08cd4423ee3fb979d218', 'd8d6a3ca9a0a44e08cd4423ee3fb979d219', 'd8d6a3ca9a0a44e08cd4423ee3fb979d220', 'd8d6a3ca9a0a44e08cd4423ee3fb979d221', 'd8d6a3ca9a0a44e08cd4423ee3fb979d222', 'd8d6a3ca9a0a44e08cd4423ee3fb979d223', 'd8d6a3ca9a0a44e08cd4423ee3fb979d224', 'd8d6a3ca9a0a44e08cd4423ee3fb979d225', 'd8d6a3ca9a0a44e08cd4423ee3fb979d226', 'd8d6a3ca9a0a44e08cd4423ee3fb979d227', 'd8d6a3ca9a0a44e08cd4423ee3fb979d228', 'd8d6a3ca9a0a44e08cd4423ee3fb979d229', 'd8d6a3ca9a0a44e08cd4423ee3fb979d230', 'd8d6a3ca9a0a44e08cd4423ee3fb979d231', 'd8d6a3ca9a0a44e08cd4423ee3fb979d232', 'd8d6a3ca9a0a44e08cd4423ee3fb979d233', 'd8d6a3ca9a0a44e08cd4423ee3fb979d234', 'd8d6a3ca9a0a44e08cd4423ee3fb979d235', 'd8d6a3ca9a0a44e08cd4423ee3fb979d236', 'd8d6a3ca9a0a44e08cd4423ee3fb979d237', 'd8d6a3ca9a0a44e08cd4423ee3fb979d238', 'd8d6a3ca9a0a44e08cd4423ee3fb979d239', 'd8d6a3ca9a0a44e08cd4423ee3fb979d240', 'd8d6a3ca9a0a44e08cd4423ee3fb979d241', 'd8d6a3ca9a0a44e08cd4423ee3fb979d242', 'd8d6a3ca9a0a44e08cd4423ee3fb979d243', 'd8d6a3ca9a0a44e08cd4423ee3fb979d244', 'd8d6a3ca9a0a44e08cd4423ee3fb979d245', 'd8d6a3ca9a0a44e08cd4423ee3fb979d246', 'd8d6a3ca9a0a44e08cd4423ee3fb979d247', 'd8d6a3ca9a0a44e08cd4423ee3fb979d248', 'd8d6a3ca9a0a44e08cd4423ee3fb979d249', 'd8d6a3ca9a0a44e08cd4423ee3fb979d250', 'd8d6a3ca9a0a44e08cd4423ee3fb979d251', 'd8d6a3ca9a0a44e08cd4423ee3fb979d252', 'd8d6a3ca9a0a44e08cd4423ee3fb979d253', 'd8d6a3ca9a0a44e08cd4423ee3fb979d254', 'd8d6a3ca9a0a44e08cd4423ee3fb979d255', 'd8d6a3ca9a0a44e08cd4423ee3fb979d256', 'd8d6a3ca9a0a44e08cd4423ee3fb979d257', 'd8d6a3ca9a0a44e08cd4423ee3fb979d258', 'd8d6a3ca9a0a44e08cd4423ee3fb979d259', 'd8d6a3ca9a0a44e08cd4423ee3fb979d260', 'd8d6a3ca9a0a44e08cd4423ee3fb979d261', 'd8d6a3ca9a0a44e08cd4423ee3fb979d262', 'd8d6a3ca9a0a44e08cd4423ee3fb979d263', 'd8d6a3ca9a0a44e08cd4423ee3fb979d264', 'd8d6a3ca9a0a44e08cd4423ee3fb979d265', 'd8d6a3ca9a0a44e08cd4423ee3fb979d266', 'd8d6a3ca9a0a44e08cd4423ee3fb979d267', 'd8d6a3ca9a0a44e08cd4423ee3fb979d268', 'd8d6a3ca9a0a44e08cd4423ee3fb979d269', 'd8d6a3ca9a0a44e08cd4423ee3fb979d270', 'd8d6a3ca9a0a44e08cd4423ee3fb979d271', 'd8d6a3ca9a0a44e08cd4423ee3fb979d272', 'd8d6a3ca9a0a44e08cd4423ee3fb979d273', 'd8d6a3ca9a0a44e08cd4423ee3fb979d274', 'd8d6a3ca9a0a44e08cd4423ee3fb979d275', 'd8d6a3ca9a0a44e08cd4423ee3fb979d276', 'd8d6a3ca9a0a44e08cd4423ee3fb979d277', 'd8d6a3ca9a0a44e08cd4423ee3fb979d278', 'd8d6a3ca9a0a44e08cd4423ee3fb979d279', 'd8d6a3ca9a0a44e08cd4423ee3fb979d280', 'd8d6a3ca9a0a44e08cd4423ee3fb979d281', 'd8d6a3ca9a0a44e08cd4423ee3fb979d282', 'd8d6a3ca9a0a44e08cd4423ee3fb979d283', 'd8d6a3ca9a0a44e08cd4423ee3fb979d284', 'd8d6a3ca9a0a44e08cd4423ee3fb979d285', 'd8d6a3ca9a0a44e08cd4423ee3fb979d286', 'd8d6a3ca9a0a44e08cd4423ee3fb979d287', 'd8d6a3ca9a0a44e08cd4423ee3fb979d288', 'd8d6a3ca9a0a44e08cd4423ee3fb979d289', 'd8d6a3ca9a0a44e08cd4423ee3fb979d290', 'd8d6a3ca9a0a44e08cd4423ee3fb979d291', 'd8d6a3ca9a0a44e08cd4423ee3fb979d292', 'd8d6a3ca9a0a44e08cd4423ee3fb979d293', 'd8d6a3ca9a0a44e08cd4423ee3fb979d294', 'd8d6a3ca9a0a44e08cd4423ee3fb979d295', 'd8d6a3ca9a0a44e08cd4423ee3fb979d296', 'd8d6a3ca9a0a44e08cd4423ee3fb979d297', 'd8d6a3ca9a0a44e08cd4423ee3fb979d298', 'd8d6a3ca9a0a44e08cd4423ee3fb979d299', 'd8d6a3ca9a0a44e08cd4423ee3fb979d300', 'd8d6a3ca9a0a44e08cd4423ee3fb979d301', 'd8d6a3ca9a0a44e08cd4423ee3fb979d302', 'd8d6a3ca9a0a44e08cd4423ee3fb979d303', 'd8d6a3ca9a0a44e08cd4423ee3fb979d304', 'd8d6a3ca9a0a44e08cd4423ee3fb979d305', 'd8d6a3ca9a0a44e08cd4423ee3fb979d306', 'd8d6a3ca9a0a44e08cd4423ee3fb979d307', 'd8d6a3ca9a0a44e08cd4423ee3fb979d308', 'd8d6a3ca9a0a44e08cd4423ee3fb979d309', 'd8d6a3ca9a0a44e08cd4423ee3fb979d310', 'd8d6a3ca9a0a44e08cd4423ee3fb979d311', 'd8d6a3ca9a0a44e08cd4423ee3fb979d312', 'd8d6a3ca9a0a44e08cd4423ee3fb979d313', 'd8d6a3ca9a0a44e08cd4423ee3fb979d314', 'd8d6a3ca9a0a44e08cd4423ee3fb979d315', 'd8d6a3ca9a0a44e08cd4423ee3fb979d316', 'd8d6a3ca9a0a44e08cd4423ee3fb979d317', 'd8d6a3ca9a0a44e08cd4423ee3fb979d318', 'd8d6a3ca9a0a44e08cd4423ee3fb979d319', 'd8d6a3ca9a0a44e08cd4423ee3fb979d320', 'd8d6a3ca9a0a44e08cd4423ee3fb979d321', 'd8d6a3ca9a0a44e08cd4423ee3fb979d322', 'd8d6a3ca9a0a44e08cd4423ee3fb979d323', 'd8d6a3ca9a0a44e08cd4423ee3fb979d324', 'd8d6a3ca9a0a44e08cd4423ee3fb979d325', 'd8d6a3ca9a0a44e08cd4423ee3fb979d326', 'd8d6a3ca9a0a44e08cd4423ee3fb979d327', 'd8d6a3ca9a0a44e08cd4423ee3fb979d328', 'd8d6a3ca9a0a44e08cd4423ee3fb979d329', 'd8d6a3ca9a0a44e08cd4423ee3fb979d330', 'd8d6a3ca9a0a44e08cd4423ee3fb979d331', 'd8d6a3ca9a0a44e08cd4423ee3fb979d332', 'd8d6a3ca9a0a44e08cd4423ee3fb979d333', 'd8d6a3ca9a0a44e08cd4423ee3fb979d334', 'd8d6a3ca9a0a44e08cd4423ee3fb979d335', 'd8d6a3ca9a0a44e08cd4423ee3fb979d336', 'd8d6a3ca9a0a44e08cd4423ee3fb979d337', 'd8d6a3ca9a0a44e08cd4423ee3fb979d338', 'd8d6a3ca9a0a44e08cd4423ee3fb979d339', 'd8d6a3ca9a0a44e08cd4423ee3fb979d340', 'd8d6a3ca9a0a44e08cd4423ee3fb979d341', 'd8d6a3ca9a0a44e08cd4423ee3fb979d342', 'd8d6a3ca9a0a44e08cd4423ee3fb979d343', 'd8d6a3ca9a0a44e08cd4423ee3fb979d344', 'd8d6a3ca9a0a44e08cd4423ee3fb979d345', 'd8d6a3ca9a0a44e08cd4423ee3fb979d346', 'd8d6a3ca9a0a44e08cd4423ee3fb979d347', 'd8d6a3ca9a0a44e08cd4423ee3fb979d348', 'd8d6a3ca9a0a44e08cd4423ee3fb979d349', 'd8d6a3ca9a0a44e08cd4423ee3fb979d350', 'd8d6a3ca9a0a44e08cd4423ee3fb979d351', 'd8d6a3ca9a0a44e08cd4423ee3fb979d352', 'd8d6a3ca9a0a44e08cd4423ee3fb979d353', 'd8d6a3ca9a0a44e08cd4423ee3fb979d354', 'd8d6a3ca9a0a44e08cd4423ee3fb979d355', 'd8d6a3ca9a0a44e08cd4423ee3fb979d356', 'd8d6a3ca9a0a44e08cd4423ee3fb979d357', 'd8d6a3ca9a0a44e08cd4423ee3fb979d358', 'd8d6a3ca9a0a44e08cd4423ee3fb979d359', 'd8d6a3ca9a0a44e08cd4423ee3fb979d360', 'd8d6a3ca9a0a44e08cd4423ee3fb979d361', 'd8d6a3ca9a0a44e08cd4423ee3fb979d362', 'd8d6a3ca9a0a44e08cd4423ee3fb979d363', 'd8d6a3ca9a0a44e08cd4423ee3fb979d364', 'd8d6a3ca9a0a44e08cd4423ee3fb979d365', 'd8d6a3ca9a0a44e08cd4423ee3fb979d366', 'd8d6a3ca9a0a44e08cd4423ee3fb979d367', 'd8d6a3ca9a0a44e08cd4423ee3fb979d368', 'd8d6a3ca9a0a44e08cd4423ee3fb979d369', 'd8d6a3ca9a0a44e08cd4423ee3fb979d370', 'd8d6a3ca9a0a44e08cd4423ee3fb979d371', 'd8d6a3ca9a0a44e08cd4423ee3fb979d372', 'd8d6a3ca9a0a44e08cd4423ee3fb979d373', 'd8d6a3ca9a0a44e08cd4423ee3fb979d374', 'd8d6a3ca9a0a44e08cd4423ee3fb979d375', 'd8d6a3ca9a0a44e08cd4423ee3fb979d376', 'd8d6a3ca9a0a44e08cd4423ee3fb979d377', 'd8d6a3ca9a0a44e08cd4423ee3fb979d378', 'd8d6a3ca9a0a44e08cd4423ee3fb979d379', 'd8d6a3ca9a0a44e08cd4423ee3fb979d380', 'd8d6a3ca9a0a44e08cd4423ee3fb979d381', 'd8d6a3ca9a0a44e08cd4423ee3fb979d382', 'd8d6a3ca9a0a44e08cd4423ee3fb979d383', 'd8d6a3ca9a0a44e08cd4423ee3fb979d384', 'd8d6a3ca9a0a44e08cd4423ee3fb979d385', 'd8d6a3ca9a0a44e08cd4423ee3fb979d386', 'd8d6a3ca9a0a44e08cd4423ee3fb979d387', 'd8d6a3ca9a0a44e08cd4423ee3fb979d388', 'd8d6a3ca9a0a44e08cd4423ee3fb979d389', 'd8d6a3ca9a0a44e08cd4423ee3fb979d390', 'd8d6a3ca9a0a44e08cd4423ee3fb979d391', 'd8d6a3ca9a0a44e08cd4423ee3fb979d392', 'd8d6a3ca9a0a44e08cd4423ee3fb979d393', 'd8d6a3ca9a0a44e08cd4423ee3fb979d394', 'd8d6a3ca9a0a44e08cd4423ee3fb979d395', 'd8d6a3ca9a0a44e08cd4423ee3fb979d396', 'd8d6a3ca9a0a44e08cd4423ee3fb979d397', 'd8d6a3ca9a0a44e08cd4423ee3fb979d398', 'd8d6a3ca9a0a44e08cd4423ee3fb979d399', 'd8d6a3ca9a0a44e08cd4423ee3fb979d400', 'd8d6a3ca9a0a44e08cd4423ee3fb979d401', 'd8d6a3ca9a0a44e08cd4423ee3fb979d402', 'd8d6a3ca9a0a44e08cd4423ee3fb979d403', 'd8d6a3ca9a0a44e08cd4423ee3fb979d404', 'd8d6a3ca9a0a44e08cd4423ee3fb979d405', 'd8d6a3ca9a0a44e08cd4423ee3fb979d406', 'd8d6a3ca9a0a44e08cd4423ee3fb979d407', 'd8d6a3ca9a0a44e08cd4423ee3fb979d408', 'd8d6a3ca9a0a44e08cd4423ee3fb979d409', 'd8d6a3ca9a0a44e08cd4423ee3fb979d410', 'd8d6a3ca9a0a44e08cd4423ee3fb979d411', 'd8d6a3ca9a0a44e08cd4423ee3fb979d412', 'd8d6a3ca9a0a44e08cd4423ee3fb979d413', 'd8d6a3ca9a0a44e08cd4423ee3fb979d414', 'd8d6a3ca9a0a44e08cd4423ee3fb979d415', 'd8d6a3ca9a0a44e08cd4423ee3fb979d416', 'd8d6a3ca9a0a44e08cd4423ee3fb979d417', 'd8d6a3ca9a0a44e08cd4423ee3fb979d418', 'd8d6a3ca9a0a44e08cd4423ee3fb979d419', 'd8d6a3ca9a0a44e08cd4423ee3fb979d420', 'd8d6a3ca9a0a44e08cd4423ee3fb979d421', 'd8d6a3ca9a0a44e08cd4423ee3fb979d422', 'd8d6a3ca9a0a44e08cd4423ee3fb979d423', 'd8d6a3ca9a0a44e08cd4423ee3fb979d424', 'd8d6a3ca9a0a44e08cd4423ee3fb979d425', 'd8d6a3ca9a0a44e08cd4423ee3fb979d426', 'd8d6a3ca9a0a44e08cd4423ee3fb979d427', 'd8d6a3ca9a0a44e08cd4423ee3fb979d428', 'd8d6a3ca9a0a44e08cd4423ee3fb979d429', 'd8d6a3ca9a0a44e08cd4423ee3fb979d430', 'd8d6a3ca9a0a44e08cd4423ee3fb979d431', 'd8d6a3ca9a0a44e08cd4423ee3fb979d432', 'd8d6a3ca9a0a44e08cd4423ee3fb979d433', 'd8d6a3ca9a0a44e08cd4423ee3fb979d434', 'd8d6a3ca9a0a44e08cd4423ee3fb979d435', 'd8d6a3ca9a0a44e08cd4423ee3fb979d436', 'd8d6a3ca9a0a44e08cd4423ee3fb979d437', 'd8d6a3ca9a0a44e08cd4423ee3fb979d438', 'd8d6a3ca9a0a44e08cd4423ee3fb979d439', 'd8d6a3ca9a0a44e08cd4423ee3fb979d440', 'd8d6a3ca9a0a44e08cd4423ee3fb979d441', 'd8d6a3ca9a0a44e08cd4423ee3fb979d442', 'd8d6a3ca9a0a44e08cd4423ee3fb979d443', 'd8d6a3ca9a0a44e08cd4423ee3fb979d444', 'd8d6a3ca9a0a44e08cd4423ee3fb979d445', 'd8d6a3ca9a0a44e08cd4423ee3fb979d446', 'd8d6a3ca9a0a44e08cd4423ee3fb979d447', 'd8d6a3ca9a0a44e08cd4423ee3fb979d448', 'd8d6a3ca9a0a44e08cd4423ee3fb979d449', 'd8d6a3ca9a0a44e08cd4423ee3fb979d450', 'd8d6a3ca9a0a44e08cd4423ee3fb979d451', 'd8d6a3ca9a0a44e08cd4423ee3fb979d452', 'd8d6a3ca9a0a44e08cd4423ee3fb979d453', 'd8d6a3ca9a0a44e08cd4423ee3fb979d454', 'd8d6a3ca9a0a44e08cd4423ee3fb979d455', 'd8d6a3ca9a0a44e08cd4423ee3fb979d456', 'd8d6a3ca9a0a44e08cd4423ee3fb979d457', 'd8d6a3ca9a0a44e08cd4423ee3fb979d458', 'd8d6a3ca9a0a44e08cd4423ee3fb979d459', 'd8d6a3ca9a0a44e08cd4423ee3fb979d460', 'd8d6a3ca9a0a44e08cd4423ee3fb979d461', 'd8d6a3ca9a0a44e08cd4423ee3fb979d462', 'd8d6a3ca9a0a44e08cd4423ee3fb979d463', 'd8d6a3ca9a0a44e08cd4423ee3fb979d464', 'd8d6a3ca9a0a44e08cd4423ee3fb979d465', 'd8d6a3ca9a0a44e08cd4423ee3fb979d466', 'd8d6a3ca9a0a44e08cd4423ee3fb979d467', 'd8d6a3ca9a0a44e08cd4423ee3fb979d468', 'd8d6a3ca9a0a44e08cd4423ee3fb979d469', 'd8d6a3ca9a0a44e08cd4423ee3fb979d470', 'd8d6a3ca9a0a44e08cd4423ee3fb979d471', 'd8d6a3ca9a0a44e08cd4423ee3fb979d472', 'd8d6a3ca9a0a44e08cd4423ee3fb979d473', 'd8d6a3ca9a0a44e08cd4423ee3fb979d474', 'd8d6a3ca9a0a44e08cd4423ee3fb979d475', 'd8d6a3ca9a0a44e08cd4423ee3fb979d476', 'd8d6a3ca9a0a44e08cd4423ee3fb979d477', 'd8d6a3ca9a0a44e08cd4423ee3fb979d478', 'd8d6a3ca9a0a44e08cd4423ee3fb979d479', 'd8d6a3ca9a0a44e08cd4423ee3fb979d480', 'd8d6a3ca9a0a44e08cd4423ee3fb979d481', 'd8d6a3ca9a0a44e08cd4423ee3fb979d482', 'd8d6a3ca9a0a44e08cd4423ee3fb979d483', 'd8d6a3ca9a0a44e08cd4423ee3fb979d484', 'd8d6a3ca9a0a44e08cd4423ee3fb979d485', 'd8d6a3ca9a0a44e08cd4423ee3fb979d486', 'd8d6a3ca9a0a44e08cd4423ee3fb979d487', 'd8d6a3ca9a0a44e08cd4423ee3fb979d488', 'd8d6a3ca9a0a44e08cd4423ee3fb979d489', 'd8d6a3ca9a0a44e08cd4423ee3fb979d490', 'd8d6a3ca9a0a44e08cd4423ee3fb979d491', 'd8d6a3ca9a0a44e08cd4423ee3fb979d492', 'd8d6a3ca9a0a44e08cd4423ee3fb979d493', 'd8d6a3ca9a0a44e08cd4423ee3fb979d494', 'd8d6a3ca9a0a44e08cd4423ee3fb979d495', 'd8d6a3ca9a0a44e08cd4423ee3fb979d496', 'd8d6a3ca9a0a44e08cd4423ee3fb979d497', 'd8d6a3ca9a0a44e08cd4423ee3fb979d498', 'd8d6a3ca9a0a44e08cd4423ee3fb979d499', 'd8d6a3ca9a0a44e08cd4423ee3fb979d500', 'd8d6a3ca9a0a44e08cd4423ee3fb979d501', 'd8d6a3ca9a0a44e08cd4423ee3fb979d502', 'd8d6a3ca9a0a44e08cd4423ee3fb979d503', 'd8d6a3ca9a0a44e08cd4423ee3fb979d504', 'd8d6a3ca9a0a44e08cd4423ee3fb979d505', 'd8d6a3ca9a0a44e08cd4423ee3fb979d506', 'd8d6a3ca9a0a44e08cd4423ee3fb979d507', 'd8d6a3ca9a0a44e08cd4423ee3fb979d508', 'd8d6a3ca9a0a44e08cd4423ee3fb979d509', 'd8d6a3ca9a0a44e08cd4423ee3fb979d510', 'd8d6a3ca9a0a44e08cd4423ee3fb979d511', 'd8d6a3ca9a0a44e08cd4423ee3fb979d512', 'd8d6a3ca9a0a44e08cd4423ee3fb979d513', 'd8d6a3ca9a0a44e08cd4423ee3fb979d514', 'd8d6a3ca9a0a44e08cd4423ee3fb979d515', 'd8d6a3ca9a0a44e08cd4423ee3fb979d516', 'd8d6a3ca9a0a44e08cd4423ee3fb979d517', 'd8d6a3ca9a0a44e08cd4423ee3fb979d518', 'd8d6a3ca9a0a44e08cd4423ee3fb979d519', 'd8d6a3ca9a0a44e08cd4423ee3fb979d520', 'd8d6a3ca9a0a44e08cd4423ee3fb979d521', 'd8d6a3ca9a0a44e08cd4423ee3fb979d522', 'd8d6a3ca9a0a44e08cd4423ee3fb979d523', 'd8d6a3ca9a0a44e08cd4423ee3fb979d524', 'd8d6a3ca9a0a44e08cd4423ee3fb979d525', 'd8d6a3ca9a0a44e08cd4423ee3fb979d526', 'd8d6a3ca9a0a44e08cd4423ee3fb979d527', 'd8d6a3ca9a0a44e08cd4423ee3fb979d528', 'd8d6a3ca9a0a44e08cd4423ee3fb979d529', 'd8d6a3ca9a0a44e08cd4423ee3fb979d530', 'd8d6a3ca9a0a44e08cd4423ee3fb979d531', 'd8d6a3ca9a0a44e08cd4423ee3fb979d532', 'd8d6a3ca9a0a44e08cd4423ee3fb979d533', 'd8d6a3ca9a0a44e08cd4423ee3fb979d534', 'd8d6a3ca9a0a44e08cd4423ee3fb979d535', 'd8d6a3ca9a0a44e08cd4423ee3fb979d536', 'd8d6a3ca9a0a44e08cd4423ee3fb979d537', 'd8d6a3ca9a0a44e08cd4423ee3fb979d538', 'd8d6a3ca9a0a44e08cd4423ee3fb979d539', 'd8d6a3ca9a0a44e08cd4423ee3fb979d540', 'd8d6a3ca9a0a44e08cd4423ee3fb979d541', 'd8d6a3ca9a0a44e08cd4423ee3fb979d542', 'd8d6a3ca9a0a44e08cd4423ee3fb979d543', 'd8d6a3ca9a0a44e08cd4423ee3fb979d544', 'd8d6a3ca9a0a44e08cd4423ee3fb979d545', 'd8d6a3ca9a0a44e08cd4423ee3fb979d546', 'd8d6a3ca9a0a44e08cd4423ee3fb979d547', 'd8d6a3ca9a0a44e08cd4423ee3fb979d548', 'd8d6a3ca9a0a44e08cd4423ee3fb979d549', 'd8d6a3ca9a0a44e08cd4423ee3fb979d550', 'd8d6a3ca9a0a44e08cd4423ee3fb979d551', 'd8d6a3ca9a0a44e08cd4423ee3fb979d552', 'd8d6a3ca9a0a44e08cd4423ee3fb979d553', 'd8d6a3ca9a0a44e08cd4423ee3fb979d554', 'd8d6a3ca9a0a44e08cd4423ee3fb979d555', 'd8d6a3ca9a0a44e08cd4423ee3fb979d556', 'd8d6a3ca9a0a44e08cd4423ee3fb979d557', 'd8d6a3ca9a0a44e08cd4423ee3fb979d558', 'd8d6a3ca9a0a44e08cd4423ee3fb979d559', 'd8d6a3ca9a0a44e08cd4423ee3fb979d560', 'd8d6a3ca9a0a44e08cd4423ee3fb979d561', 'd8d6a3ca9a0a44e08cd4423ee3fb979d562', 'd8d6a3ca9a0a44e08cd4423ee3fb979d563', 'd8d6a3ca9a0a44e08cd4423ee3fb979d564', 'd8d6a3ca9a0a44e08cd4423ee3fb979d565', 'd8d6a3ca9a0a44e08cd4423ee3fb979d566', 'd8d6a3ca9a0a44e08cd4423ee3fb979d567', 'd8d6a3ca9a0a44e08cd4423ee3fb979d568', 'd8d6a3ca9a0a44e08cd4423ee3fb979d569', 'd8d6a3ca9a0a44e08cd4423ee3fb979d570', 'd8d6a3ca9a0a44e08cd4423ee3fb979d571', 'd8d6a3ca9a0a44e08cd4423ee3fb979d572', 'd8d6a3ca9a0a44e08cd4423ee3fb979d573', 'd8d6a3ca9a0a44e08cd4423ee3fb979d574', 'd8d6a3ca9a0a44e08cd4423ee3fb979d575', 'd8d6a3ca9a0a44e08cd4423ee3fb979d576', 'd8d6a3ca9a0a44e08cd4423ee3fb979d577', 'd8d6a3ca9a0a44e08cd4423ee3fb979d578', 'd8d6a3ca9a0a44e08cd4423ee3fb979d579', 'd8d6a3ca9a0a44e08cd4423ee3fb979d580', 'd8d6a3ca9a0a44e08cd4423ee3fb979d581', 'd8d6a3ca9a0a44e08cd4423ee3fb979d582', 'd8d6a3ca9a0a44e08cd4423ee3fb979d583', 'd8d6a3ca9a0a44e08cd4423ee3fb979d584', 'd8d6a3ca9a0a44e08cd4423ee3fb979d585', 'd8d6a3ca9a0a44e08cd4423ee3fb979d586', 'd8d6a3ca9a0a44e08cd4423ee3fb979d587']\n", + "['2b6e45cd99ff46b08242282a423642d40', '2b6e45cd99ff46b08242282a423642d41', '2b6e45cd99ff46b08242282a423642d42', '2b6e45cd99ff46b08242282a423642d43', '2b6e45cd99ff46b08242282a423642d44', '2b6e45cd99ff46b08242282a423642d45']\n", + "['05524682d2e9425c83c9b57693182c500', '05524682d2e9425c83c9b57693182c501', '05524682d2e9425c83c9b57693182c502', '05524682d2e9425c83c9b57693182c503', '05524682d2e9425c83c9b57693182c504', '05524682d2e9425c83c9b57693182c505', '05524682d2e9425c83c9b57693182c506']\n", + "['4eb170648fbe47c3b87b2831a97f0dd80', '4eb170648fbe47c3b87b2831a97f0dd81', '4eb170648fbe47c3b87b2831a97f0dd82', '4eb170648fbe47c3b87b2831a97f0dd83', '4eb170648fbe47c3b87b2831a97f0dd84', '4eb170648fbe47c3b87b2831a97f0dd85']\n", + "['cec3e82f0432402e940a0299bfa086fe0', 'cec3e82f0432402e940a0299bfa086fe1', 'cec3e82f0432402e940a0299bfa086fe2', 'cec3e82f0432402e940a0299bfa086fe3', 'cec3e82f0432402e940a0299bfa086fe4', 'cec3e82f0432402e940a0299bfa086fe5', 'cec3e82f0432402e940a0299bfa086fe6', 'cec3e82f0432402e940a0299bfa086fe7', 'cec3e82f0432402e940a0299bfa086fe8', 'cec3e82f0432402e940a0299bfa086fe9']\n", + "Embeddings saved\n" + ] + } + ], + "source": [ + "# creating the vector store\n", + "vector_store = create_vector_store(embeddings, documents, docs_id, num_pages)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# creating a function to add documents to the vector store\n", + "def add_documents_to_vector_store(embeddings, documents: list, docs_id: list, num_pages: list):\n", + " # loading the vector store\n", + " vector_store = load_embedded_data(embeddings)\n", + " for i in range(len(documents)):\n", + " doc_id = docs_id[i]\n", + " page_ids = [doc_id+ str(i) for i in range(num_pages[i])]\n", + " vector_store.add_documents(documents=documents[i], ids=page_ids)\n", + " print (\"Documents added to the vector store\")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* [SIM=0.182989] 202 4-5. Using the driving support systems\n", "COROLLA_UInside of displayed lines is \n", "black\n", "Indicates that the system is not able to recognize white (yellow) \n", @@ -558,59 +411,125 @@ "is not displayed. ( P.204)\n", "●Vehicle sway warning function\n", "This function operates when all of \n", - "https://www.MyCarManual.com [{'source': 'data/corolla-2020-toyota-owners-manual.pdf', 'page': 201}]\n", - "* [SIM=0.201506] 214 4-5. Using the driving support systems\n", - "COROLLA_USet speed\n", - "Indicators\n", - "■Operation switches\n", - "Vehicle-to-vehicle distance \n", - "switch\n", - "“+RES” switchCruise control main switchCancel switch“-SET” switch\n", - "WARNING\n", - "■Before using dynamic radar \n", - "cruise control with full-speed \n", - "range\n", - "●Driving safely is the sole responsibility of the driver. Do not rely solely o n the system, \n", - "and drive safely by always pay-ing careful attention to your sur-roundings.\n", - "●The dynamic radar cruise con-\n", - "trol with full-speed range pro-\n", - "vides driving assistance to reduce the driver’s burden. \n", - "However, there are limitations to \n", - "the assistance provided.\n", - "Read the following conditions carefully. Do not overly rely on this system and always drive carefully.\n", - "• When the sensor may not be \n", - "correctly detect ing the vehicle \n", - "ahead: \n", - "P.222• Conditions under which the \n", - "vehicle-to-vehicle distance con-\n", - "trol mode may not function cor-rectly: P.222\n", - "●Set the speed appropriately \n", - "depending on the speed limit, \n", - "traffic flow, road conditions, weather conditions, etc. The \n", - "driver is responsible for check-\n", - "ing the set speed.\n", - "●Even when the system is func-tioning normally, the condition of \n", - "the preceding vehicle as \n", - "detected by the system may dif-fer from the condition observed \n", - "by the driver. Therefore, the \n", - "driver must always remain alert, assess the danger of each situ-\n", - "ation and drive safely. Relying \n", - "solely on this system or assum-ing the system ensures safety \n", - "while driving can lead to an \n", - "accident, result ing in death or \n", - "serious injury.\n", - "●Switch the dynamic radar cruise \n", - "control with full-speed range \n", - "setting to off, using the cruise control main switch when not in \n", - "use.\n", - "■Cautions regarding the driv-\n", - "ing assist systems\n", - "Observe the following precau-tions, as there ar e limitations to \n", - "the assistance provided by the \n", - "system. Failure to do so may \n", - "cause an accident resulting in \n", - "death or serious injury.\n", - "https://www.MyCarManual.com [{'source': 'data/corolla-2020-toyota-owners-manual.pdf', 'page': 213}]\n" + "https://www.MyCarManual.com [{'source': 'data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 201}]\n", + "* [SIM=0.184764] 199 4-5. Using the driving support systems\n", + "COROLLA_U4Drivingthat the vehicle might depart \n", + "from its lane or course*, the sys-\n", + "tem provides assistance as nec-\n", + "essary by operating the steering wheel in small amounts for a short period of time to keep the vehicle in its lane.\n", + "If the system detects that the steer-\n", + "ing wheel has not been operated \n", + "for a fixed amount of time or the steering wheel is not being firmly gripped, a warning is displayed on the multi-information display and the function is t emporarily can-\n", + "celed.\n", + "Vehicles with a Blind Spot Monitor: \n", + "When the system de termines that \n", + "the vehicle might depart from its lane and that the possibility of a col-lision with an over taking vehicle in \n", + "the adjacent lane is high, the steer-ing assist function will operate even if the turn signals are operating.\n", + "*: Boundary between asphalt and \n", + "the side of the road, such as grass, soil, or a curb■Vehicle sway warning func-\n", + "tion\n", + "When the vehicle is swaying within a lane, the warning buzzer will sound and a mes-sage will be displayed on the multi-information display to alert the driver.\n", + "https://www.MyCarManual.com [{'source': 'data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 198}]\n", + "* [SIM=0.188099] 200 4-5. Using the driving support systems\n", + "COROLLA_U■Lane centering function\n", + "This function is linked with \n", + "dynamic radar cruise control with full-speed range and pro-vides the required assistance by operating the steering wheel to keep the vehicle in its current lane.\n", + "When dynamic radar cruise control \n", + "with full-speed range is not operat-\n", + "ing, the lane centering function does not operate.\n", + "In situations where the white (yel-\n", + "low) lane lines are difficult to see or are not visible, such as when in a traffic jam, this function will operate to help follow a preceding vehicle by monitoring the position of the preceding vehicle.\n", + "If the system detects that the steer-\n", + "ing wheel has not been operated for a fixed amount of time or the steering wheel is not being firmly gripped, a warning is displayed on the multi-information display and the function is temporarily can-celed.\n", + "Press the LTA switch to turn the \n", + "LTA system on.\n", + "The LTA indicator illuminates and a \n", + "message is displayed on the multi-information display.\n", + "Press the LTA swit ch again to turn \n", + "the LTA system off.\n", + "When the LTA system is turned on \n", + "or off, operation of the LTA system continues in the same condition the next time the engine is started.Turning LTA system on\n", + "https://www.MyCarManual.com [{'source': 'data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 199}]\n", + "* [SIM=0.203011] 211 4-5. Using the driving support systems\n", + "COROLLA_U4Driving*: Boundary between asphalt and \n", + "the side of the road, such as \n", + "grass, soil, or a curb\n", + "■Operation conditions of each \n", + "function\n", + "●Lane departure alert function\n", + "This function oper ates when all of \n", + "the following cond itions are met.\n", + "• LDA is turned on.• Vehicle speed is approximately 32 \n", + "mph (50 km/h) or more.\n", + "• System recognizes white (yellow) \n", + "lane lines or a course*. (When a \n", + "white [yellow] line or course* is \n", + "recognized on only one side, the system will operat e only for the \n", + "recognized side.)\n", + "• Width of traffic lane is approxi-\n", + "mately 9.8 ft. ( 3 m) or more.\n", + "• Turn signal lever is not operated.\n", + "(Vehicles with a Blind Spot Moni-tor: Except when a nother vehicle \n", + "is in the lane on the side where \n", + "the turn signal was operated)\n", + "• Vehicle is not being driven around \n", + "a sharp curve.\n", + "• No system malfunctions are \n", + "detected. ( P.212)\n", + "*: Boundary between asphalt and \n", + "the side of the road, such as \n", + "grass, soil, or a curb\n", + "●Steering assist function\n", + "This function oper ates when all of \n", + "the following conditions are met in addition to the operation conditions for the lane departure alert function.\n", + "• Setting for “Steering Assist” in \n", + "of the multi-information display is \n", + "set to “ON”. ( P.548)\n", + "• Vehicle is not accelerated or \n", + "decelerated by a fixed amount or more.\n", + "• Steering wheel is not operated \n", + "with a steering force level suitable for changing lanes.\n", + "• ABS, VSC, TRAC and PCS are \n", + "not operating.• TRAC or VSC is not turned off.• Hands off steering wheel warning \n", + "is not displayed. ( P.212)\n", + "●Vehicle sway warning function\n", + "This function operates when all of \n", + "the following conditions are met.\n", + "• Setting for “Sway Warning” in \n", + "of the multi-information display is \n", + "set to “ON”. ( P.548)\n", + "• Vehicle speed is approximately 32 \n", + "mph (50 km/h) or more.\n", + "• Width of traffic lane is approxi-\n", + "mately 9.8 ft. (3 m) or more.\n", + "• No system malfunctions are \n", + "detected. ( P.212)\n", + "■Temporary cancelation of func-\n", + "tions\n", + "When operation conditions are no longer met, a func tion may be tem-\n", + "porarily canceled. However, when \n", + "the operation conditions are met \n", + "again, operation of the function is \n", + "automatically restored. ( P.211)\n", + "■Steering assist function\n", + "●Depending on the vehicle speed, lane departure situation, road con-ditions, etc., the driver may not \n", + "feel the function is operating or \n", + "the function may not operate at \n", + "all.\n", + "●The steering control of the func-tion is overridden by the driver’s \n", + "steering wheel operation.\n", + "●Do not attempt to test the opera-tion of the steering assist function.\n", + "■Lane departure alert function\n", + "●The warning buzzer may be diffi-cult to hear due to external noise, \n", + "audio playback, etc.\n", + "●If the edge of the course* is not \n", + "clear or straight, the lane depar-\n", + "ture alert function may not oper-\n", + "ate.\n", + "●Vehicles with a Blind Spot Monitor: It may not be possible for the sys-tem to determine if there is a dan-\n", + "ger of a collision with a vehicle in \n", + "an adjacent lane.\n", + "https://www.MyCarManual.com [{'source': 'data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 210}]\n" ] } ], @@ -624,32 +543,6 @@ " print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")" ] }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Embeddings saved\n" - ] - } - ], - "source": [ - "save_embedded_data(vector_store)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "load_db = load_embedded_data(embeddings)" - ] - }, { "cell_type": "markdown", "metadata": {}, diff --git a/loggings/app.log b/loggings/app.log index 762510e3..179a0c21 100644 --- a/loggings/app.log +++ b/loggings/app.log @@ -26,3 +26,6 @@ 2024-08-05 22:12:55,205 - INFO - Receiving the search query 2024-08-05 22:13:04,060 - INFO - Searching for what is LDA? 2024-08-05 22:13:04,241 - INFO - Search completed +2024-08-07 17:49:19,962 - INFO - Receiving the search query +2024-08-07 17:49:29,498 - INFO - Searching for what is lda? +2024-08-07 17:49:29,876 - INFO - Search completed diff --git a/ocr.py b/ocr.py new file mode 100644 index 00000000..cf7bd2fe --- /dev/null +++ b/ocr.py @@ -0,0 +1,88 @@ +from flask_restx import Namespace, Resource, fields +from flask import request, jsonify, current_app as app, send_file +from ...services.ocr import OCRService +from ...utils.decorators.auth import protected_route +from .models.errors import error_404, error_500 +from .models.response import response +import json +import os +import numpy as np +from werkzeug.datastructures import FileStorage + +api = Namespace('OCR', + description='Description', + path='/v2/api/tools/ocr') + +upload_parser = api.parser() +upload_parser.add_argument('file', location='files', + type=FileStorage, required=True) +# OCR Data Model +ocr_model = api.model('OCR', { + 'format': fields.String(required=True), + 'data': fields.String(required=False), + +}) + +success_response = api.clone('OCR Model Response', response, { + 'model': fields.Nested(ocr_model) +}) + + +@api.route('') +@api.doc(security='apikey') +class OCRResource(Resource): + @api.doc('get_text') + @api.expect(upload_parser) + @protected_route + def post(self): + output_format = request.get_json().get('format') + + if not output_format: + output_format = 'txt' + + args = upload_parser.parse_args() + + try: + if 'file' not in args: + raise ValueError("Invalid file") + + file = args['file'] + + if file.filename == '': + raise ValueError("Invalid file") + + if file: + # Save the uploaded file to the UPLOAD_FOLDER + filename = os.path.join(file.filename) + file.save(filename) + ocr_service = OCRService(image_directory='', + export_directory=os.path.join(app.config['UPLOAD_FOLDER']), + language='en') + output_format = 'text' + result = ocr_service.read_text(filename, output_format=output_format) + if output_format == 'text': + txt_file, message = result + if txt_file: + + return send_file(os.path.join("..",'..','..', txt_file), mimetype='text/plain', as_attachment=True, download_name=txt_file) + else: + return jsonify(error=message) + + elif output_format == 'json': + json_data, message = result + if json_data: + result_json = json.dumps(json_data, default=np_encoder) + return {'model': { + 'format': output_format, + 'data': result_json + }} + api.abort(code=500, message="Invalid Format", error=True) + + + except Exception as e: + api.abort(code=500, message="Something went wrong", error=True) + + +def np_encoder(object): + if isinstance(object, np.generic): + return object.item() diff --git a/search.py b/search.py index 258ee37f..2a46e898 100644 --- a/search.py +++ b/search.py @@ -21,5 +21,4 @@ if __name__ == "__main__": logger.info(f"Page content: {page_content}") print(f"Page content: {page_content}") print(f"Pages: {pages}") - print(f"All: {all}") print("Search completed") \ No newline at end of file diff --git a/text_extractor.py b/text_extractor.py new file mode 100644 index 00000000..efd435e9 --- /dev/null +++ b/text_extractor.py @@ -0,0 +1,158 @@ +import pytesseract +from PIL import Image +import pdfplumber +import platform +import os +import io + + +class TextExtractor: + def __init__(self): + self.set_tesseract_path() + + def set_tesseract_path(self): + """ + Sets the path to the Tesseract executable based on the detected platform. + """ + # Get the current platform + current_platform = platform.system() + + # Set path to Tesseract executable based on platform + if current_platform == 'Linux': + pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract' + elif current_platform == 'Windows': + pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe' + elif current_platform == 'Darwin': + pytesseract.pytesseract.tesseract_cmd = '/usr/local/bin/tesseract' + else: + print("Unsupported platform. Please set the Tesseract path manually.") + + def read_text_from_image(self, image_path): + """ + Reads text from an image using pytesseract. + + Args: + image_path (str): Path to the image file. + + Returns: + str: Extracted text from the image. + """ + try: + with Image.open(image_path) as img: + text = pytesseract.image_to_string(img) + return text + except Exception as e: + print(f"Error reading text from image: {e}") + return "" + finally: + os.remove(image_path) + + def read_text_from_pdf(self, pdf_path): + """ + Reads text from a PDF file using pytesseract. + + Args: + pdf_path (str): Path to the PDF file. + + Returns: + str: Extracted text from the PDF. + """ + try: + text = "" + with pdfplumber.open(pdf_path) as pdf: + for page in pdf.pages: + text += page.extract_text() + return text + except Exception as e: + print(f"Error reading text from PDF: {e}") + return "" + finally: + os.remove(pdf_path) + + def extract_text_from_pdf(self, pdf_path): + """ + Reads text from a PDF file. + + Args: + pdf_path (str): Path to the PDF file. + + Returns: + str: Extracted text from the PDF. + """ + try: + print("path", pdf_path) + text = "" + with pdfplumber.open(pdf_path) as pdf: + for page in pdf.pages: + if self._has_images(page): + text += self._extract_text_from_images(page) + else: + text += page.extract_text() + return text + except Exception as e: + print(f"Error reading text from PDF: {e}") + return "" + finally: + os.remove(pdf_path) + + def _has_images(self, page): + """ + Checks if a PDF page contains images. + + Args: + page (pdfplumber.Page): PDF page object. + + Returns: + bool: True if the page contains images, False otherwise. + """ + for obj in page.objects: + if page.objects[obj][0]['object_type'] == "image": + return True + return False + + def _extract_text_from_images(self, page): + """ + Extracts text from images within a PDF page using pytesseract. + + Args: + page (pdfplumber.Page): PDF page object. + + Returns: + str: Extracted text from the images. + """ + text = "" + for obj in page.objects: + if page.objects[obj][0]['object_type'] == "image": + obj = page.objects[obj][0] + text += self._read_text_from_image(obj["x0"], obj["y0"], obj["x1"], obj["y1"], obj['stream']) + return text + + def _read_text_from_image(self, x0, y0, x1, y1, stream): + """ + Reads text from a specified region of the page image using pytesseract. + + Args: + x0, y0, x1, y1 (float): Coordinates of the region to read text from. + page (pdfplumber.Page): PDF page object. + + Returns: + str: Extracted text from the specified region of the page image. + """ + try: + # Convert the PDF image object to a PIL Image object + raw_image = stream.get_rawdata() + + # Convert the raw image data to a PIL Image object + pil_image = Image.open(io.BytesIO(raw_image)) + + # Crop the PIL Image to the specified region + # pil_image = pil_image.crop((x0, y0, x1, y1)) + + # Perform OCR on the image and extract text + text = pytesseract.image_to_string(pil_image) + return text + except Exception as e: + print(f"Error extracting text from image: {e}") + return "" + + diff --git a/vec-db/index/faiss_index_data/index.faiss b/vec-db/index/faiss_index_data/index.faiss new file mode 100644 index 00000000..0381e07d Binary files /dev/null and b/vec-db/index/faiss_index_data/index.faiss differ diff --git a/vec-db/index/faiss_index_pdf/index.pkl b/vec-db/index/faiss_index_data/index.pkl similarity index 87% rename from vec-db/index/faiss_index_pdf/index.pkl rename to vec-db/index/faiss_index_data/index.pkl index 7a07d4c7..0eaf1b3f 100644 Binary files a/vec-db/index/faiss_index_pdf/index.pkl and b/vec-db/index/faiss_index_data/index.pkl differ diff --git a/vec-db/index/faiss_index_pdf/index.faiss b/vec-db/index/faiss_index_pdf/index.faiss deleted file mode 100644 index 841a51c3..00000000 Binary files a/vec-db/index/faiss_index_pdf/index.faiss and /dev/null differ