{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# !pip install langchain-groq" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-08-16 16:06:32,880 - INFO - Loading the embedding model\n", "2024-08-16 16:06:38,758 - WARNING - c:\\Users\\timmy_3aupohg\\anaconda3\\envs\\smog_env\\Lib\\site-packages\\sentence_transformers\\cross_encoder\\CrossEncoder.py:11: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n", " from tqdm.autonotebook import tqdm, trange\n", "\n", "2024-08-16 16:06:47,268 - INFO - PyTorch version 2.4.0+cu124 available.\n", "2024-08-16 16:06:47,868 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en\n", "2024-08-16 16:06:55,638 - INFO - Embedding model loaded\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from utils import search\n", "import sys, os\n", "from dotenv import load_dotenv\n", "from langchain_groq import ChatGroq\n", "from langchain_core.prompts.prompt import PromptTemplate\n", "from langchain_core.output_parsers import StrOutputParser\n", "from collections import defaultdict\n", "import json\n", "load_dotenv()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# setting up groq api key\n", "# os.environ[\"GROQ_API_KEY\"] = os.getenv('GROQ_API_KEY')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "\n", "# # chat set up\n", "# GROQ_LLM = ChatGroq(temperature=0, model_name=\"llama3-8b-8192\", max_tokens=100)\n", "\n", "\n", "# ### Chains #####\n", "# # Initiator\n", "# def doc_summarizer(document_page: list) -> str:\n", "# initiator_prompt = PromptTemplate(\n", "# template=\"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n", "# Create a short summary of the document based on the provided text. \n", " \n", "# Start with: This document is about...\n", " \n", "# <|eot_id|><|start_header_id|>user<|end_header_id|>\n", "# DOCUMENT: {document_page} \\n\n", " \n", "# <|eot_id|><|start_header_id|>assistant<|end_header_id|>\"\"\",\n", "# input_variables=[\"document_page\"],\n", "# )\n", "\n", "# initiator_router = initiator_prompt | GROQ_LLM | StrOutputParser()\n", "# output = initiator_router.invoke({\"document_page\":document_page})\n", "# return output\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "document_page = 'Wirebrush WD 40'\n", "# testing the function\n", "# summary = doc_summarizer(document_page)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-08-16 16:06:55,717 - INFO - Searching for Wirebrush WD 40\n", "2024-08-16 16:06:55,717 - INFO - Loading embedded data\n", "2024-08-16 16:06:56,487 - WARNING - c:\\Users\\timmy_3aupohg\\anaconda3\\envs\\smog_env\\Lib\\site-packages\\transformers\\models\\bert\\modeling_bert.py:439: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\\cb\\pytorch_1000000000000\\work\\aten\\src\\ATen\\native\\transformers\\cuda\\sdp_utils.cpp:555.)\n", " attn_output = torch.nn.functional.scaled_dot_product_attention(\n", "\n", "2024-08-16 16:06:56,628 - INFO - Search completed\n" ] } ], "source": [ "docs = search(document_page)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'source': './data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 424},\n", " {'source': 'How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]',\n", " 'timestamp': '0:30-1:0',\n", " 'file_type': 'video'},\n", " {'source': 'How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]',\n", " 'timestamp': '2:00-2:00',\n", " 'file_type': 'video'},\n", " {'source': 'How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]',\n", " 'timestamp': '2:30-3:0',\n", " 'file_type': 'video'},\n", " {'source': 'How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]',\n", " 'timestamp': '4:00-4:00',\n", " 'file_type': 'video'},\n", " {'source': 'How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]',\n", " 'timestamp': '5:30-6:0',\n", " 'file_type': 'video'},\n", " {'source': 'How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]',\n", " 'timestamp': '8:00-8:00',\n", " 'file_type': 'video'},\n", " {'source': 'How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]',\n", " 'timestamp': '8:30-9:0',\n", " 'file_type': 'video'},\n", " {'source': 'How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]',\n", " 'timestamp': '10:00-10:00',\n", " 'file_type': 'video'},\n", " {'source': 'How to change rear wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]',\n", " 'timestamp': '3:30-4:0',\n", " 'file_type': 'video'},\n", " {'source': 'How to change rear wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]',\n", " 'timestamp': '5:30-6:0',\n", " 'file_type': 'video'},\n", " {'source': './data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 329},\n", " {'source': './data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 264},\n", " {'source': './data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 290},\n", " {'source': './data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 201},\n", " {'source': './data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 326},\n", " {'source': './data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 315},\n", " {'source': './data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 317},\n", " {'source': './data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 325},\n", " {'source': './data\\\\corolla-2020-toyota-owners-manual.pdf', 'page': 422}]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "docs" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "# a function to get data description\n", "def get_data_description(data_path):\n", " # ensuring no // or / or extension is present\n", " data_name = data_path.split('/')[-1].split('\\\\')[-1].split('.')[0]\n", " # print(data_name)\n", " # open the data.json file\n", " with open('data/data.json') as f:\n", " data = json.load(f)\n", " existing_data = data.keys()\n", " if data_name in existing_data:\n", " return data[data_name]['doc_summary']\n", " else:\n", " return 'No description available'" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"This document is about a video tutorial series on replacing car parts, specifically the latest installment of AutoDoc's video tutorials.\"" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_data_description('How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]')" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "# getting data thumbnais. \n", "def get_data_thumbnail(data_path, timestamp = None):\n", " # ensuring no // or / or extension is present\n", " file_name = data_path.split('/')[-1].split('\\\\')[-1].split('.')[0]\n", " # first check is to see if the file_name has a .png image in the thumbnail folder\n", " if os.path.exists(f'data/thumbnails/{file_name}.png'):\n", " return f'data/thumbnails/{file_name}.png'\n", " # the second check is to see if we have a folder with this file_name\n", " elif os.path.exists(f'data/{file_name}'):\n", " # so now we want to access the first timestamp\n", " if timestamp:\n", " first = timestamp[0]\n", " # split by -\n", " start, end = first.split('-')\n", " # we want to convert something like 03:00, 04:00, 03:30 which is in min:sec to seconds\n", " start = int(start.split(':')[0])*60 + int(start.split(':')[1])\n", " end = int(end.split(':')[0])*60 + int(end.split(':')[1])\n", " # bringing them together\n", " image_file = f\"{start}-{end}s.png\"\n", " # niw checkin if the file exists\n", " if os.path.exists(f'data/{file_name}/{image_file}'):\n", " return f'data/{file_name}/{image_file}'" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'data/How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]/210-240s.png'" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_data_thumbnail('How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]', timestamp=['3:30-4:0', '5:30-6:0'])" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'data/thumbnails/corolla-2020-toyota-owners-manual.png'" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_data_thumbnail(\"./data\\\\corolla-2020-toyota-owners-manual.pdf'\")" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "def summarize_doc_search(data):\n", " summary = {}\n", "\n", " for item in data:\n", " source = item['source']\n", " if source not in summary:\n", " summary[source] = {'pages': [], 'timestamps': [], 'file_type': item.get('file_type', 'pdf')}\n", " \n", " if 'page' in item:\n", " summary[source]['pages'].append(item['page'])\n", " if 'timestamp' in item:\n", " summary[source]['timestamps'].append(item['timestamp'])\n", " \n", " # Formatting the summary as a list of dictionaries\n", " summarized_list = [\n", " {'filename': key.split(\"\\\\\")[-1], \n", " 'pages': value['pages'], \n", " 'timestamps': value['timestamps'], \n", " 'file_type': value['file_type']}\n", " for key, value in summary.items()\n", " ]\n", " \n", " # getting the file description and thumbnail\n", " for item in summarized_list:\n", " item['description'] = get_data_description(item['filename'])\n", " # ehcking if we have an empty timestamp list\n", " if len(item['timestamps']) > 0:\n", " item['thumbnail'] = get_data_thumbnail(item['filename'], item['timestamps'])\n", " else:\n", " item['thumbnail'] = get_data_thumbnail(item['filename'])\n", " \n", " return summarized_list" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "doc_summary = summarize_doc_search(docs)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'filename': 'corolla-2020-toyota-owners-manual.pdf',\n", " 'pages': [424, 329, 264, 290, 201, 326, 315, 317, 325, 422],\n", " 'timestamps': [],\n", " 'file_type': 'pdf',\n", " 'description': \"This document is about the user manual for a Toyota Corolla, providing information and instructions on various aspects of the vehicle, including safety and security, vehicle status, driving operations, interior features, maintenance, and troubleshooting. The manual covers topics such as child seat installation, theft deterrent systems, reading driving-related information, operating the Entune audio system, and caring for the vehicle's interior and exterior. It also includes information on reporting safety defects and provides instructions for Canadian owners on seat belt and SRS air\",\n", " 'thumbnail': 'data/thumbnails/corolla-2020-toyota-owners-manual.png'},\n", " {'filename': 'How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]',\n", " 'pages': [],\n", " 'timestamps': ['0:30-1:0',\n", " '2:00-2:00',\n", " '2:30-3:0',\n", " '4:00-4:00',\n", " '5:30-6:0',\n", " '8:00-8:00',\n", " '8:30-9:0',\n", " '10:00-10:00'],\n", " 'file_type': 'video',\n", " 'description': \"This document is about a video tutorial series on replacing car parts, specifically the latest installment of AutoDoc's video tutorials.\",\n", " 'thumbnail': 'data/How to change front wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]/30-60s.png'},\n", " {'filename': 'How to change rear wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]',\n", " 'pages': [],\n", " 'timestamps': ['3:30-4:0', '5:30-6:0'],\n", " 'file_type': 'video',\n", " 'description': \"This document is about a video tutorial series on replacing car parts, specifically the latest installment of Auto-Doc's video tutorials.\",\n", " 'thumbnail': 'data/How to change rear wheel bearing on TOYOTA RAV4 II [TUTORIAL AUTODOC]/210-240s.png'}]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "doc_summary" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "smog_env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 2 }