diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..0fe238ac
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+Ai indexing
+data
+images
\ No newline at end of file
diff --git a/__pycache__/text_extractor.cpython-311.pyc b/__pycache__/text_extractor.cpython-311.pyc
new file mode 100644
index 00000000..34fe646c
Binary files /dev/null and b/__pycache__/text_extractor.cpython-311.pyc differ
diff --git a/audio_experiment.ipynb b/audio_experiment.ipynb
new file mode 100644
index 00000000..e69de29b
diff --git a/data/documents.json b/data/documents.json
index c700f924..e8bf3afc 100644
--- a/data/documents.json
+++ b/data/documents.json
@@ -1 +1 @@
-{"doc_names": ["Car-Repair-Receipt-repair", "Car-Repair-Receipt-service", "Car-Repair-Receipt-tire", "Car-Repair-Receipt-tuning", "Car-Repair-Receipt-wash", "corolla-2020-toyota-owners-manual", "How to change engine oil and filter on TOYOTA Corolla", "How to change front brake pads on TOYOTA Corolla", "How to change rear windshield wipers on TOYOTA Corolla", "How to change spark plugs on TOYOTA COROLLA"], "docs_id": ["911dea9b7b714adf8ecafd483a37741b", "e9308cf998a64cab8aef9bde04795fc4", "1f013bd6ac464a07acd8d60a425142d7", "3c99eade18a344d4a568cd77e58558f3", "708f7ba5121442c692dba1346097c4e4", "9e134439a0b84f26a213a288cbe45ab5", "8eb0c0f04eb44e2bafba7640ed34b26b", "c4571cec94034cf38b5d2d59a694464e", "4253d6ea5aeb43f1a65b11a2a631389f", "e2c66cfac77b4099908b1d41a66a7fe2"], "num_pages": [1, 2, 2, 2, 1, 588, 6, 7, 6, 10]}
\ No newline at end of file
+{"doc_names": ["Car-Repair-Receipt-repair", "Car-Repair-Receipt-service", "Car-Repair-Receipt-tire", "Car-Repair-Receipt-tuning", "Car-Repair-Receipt-wash", "corolla-2020-toyota-owners-manual", "data\\dodge-challenger-auto-body-repair-after", "data\\dodge-challenger-auto-body-repair-before", "How to change engine oil and filter on TOYOTA Corolla", "How to change front brake pads on TOYOTA Corolla", "How to change rear windshield wipers on TOYOTA Corolla", "How to change spark plugs on TOYOTA COROLLA", "data\\hyundai-sonata-auto-body-repair-after", "data\\hyundai-sonata-auto-body-repair-before", "data\\IMG_1436", "data\\IMG_1437", "data\\IMG_1438", "data\\IMG_1440", "data\\IMG_1441", "data\\IMG_1442", "data\\IMG_1443", "data\\IMG_1444", "data\\pontiac-vibe-auto-body-repair-after", "data\\pontiac-vibe-auto-body-repair-before", "data\\toyota-tacoma-auto-body-repair-after", "data\\toyota-tacoma-auto-body-repair-before"], "docs_id": ["5f26879376a44a77bbc2b966b9189ca4", "51b1c6cab5f1440e9fd948b6d858e812", "1d63ef4a149d4addb0803370885d70c1", "749ea365f2244eb6b23bb17e28d9cd2e", "e6d3736c0e8f424382c2ff5298814534", "91b116993e4b4865b3dc7bceca9749f0", "77f9558bd9894daeaf9aaea4013ed20e", "d974631f67d242739343b3c32e91355c", "a18ad23b3c7641b3a61e77e0e143a265", "0b710683db314b14ae6f0e0919a12068", "136c808efffa4f8798c55e7595c768a1", "236dc9603c9c4e83840721175d3dc861", "5aa9f750dbdd403c94abb53883c0fad2", "0382e54d68a84021803b07c7cf7c3ad9", "a772d008c9bf4ee6a2026f00998f3f2c", "66afb44563f6449ca705a39c9a72440d", "59ef1e9cc81b41d3a32d5dcc069a0ace", "9991145202384596bc3f5ff666d213bd", "d7f49b6629e84ec7bfd1a0048d2ade76", "689296161d6b46e8b9e792dbdc8a155d", "ba6be2ab8ae74042a9c9da51c46b8f90", "d62daf66b833419fae17333395cd7b04", "7b109e03c62343fd8f8e23dcf6bdfd3b", "8254386611fc4feb85744f69e5120e18", "022d6bae08274a618921c49590040a1f", "719ed0e4d9a94fe39799c227eaac1e05"], "num_pages": [1, 2, 2, 2, 1, 588, 1, 1, 6, 7, 6, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
\ No newline at end of file
diff --git a/data_ingestion/__pycache__/utils.cpython-311.pyc b/data_ingestion/__pycache__/utils.cpython-311.pyc
index 403ae253..55171dc7 100644
Binary files a/data_ingestion/__pycache__/utils.cpython-311.pyc and b/data_ingestion/__pycache__/utils.cpython-311.pyc differ
diff --git a/data_ingestion/utils.py b/data_ingestion/utils.py
index 50ce4885..a4eaff52 100644
--- a/data_ingestion/utils.py
+++ b/data_ingestion/utils.py
@@ -8,6 +8,7 @@ from langchain_community.document_loaders import TextLoader
 from langchain_community.document_loaders import Docx2txtLoader
 from uuid import uuid4
 from langchain_core.documents  import Document
+from text_extractor import TextExtractor
 import os
 import json
 
@@ -90,6 +91,18 @@ def load_document(document_path):
     else:
         raise ValueError(f"Unsupported document type for {document_path}")
 
+def create_image_document(image_path):
+    # getting the image name from the image path
+    image_name = image_path.split('/')[-1].split('.')[0]
+    # setting image name as metadata
+    metadata = {'filename': image_name}
+    text_extractor = TextExtractor()
+    text = text_extractor.read_text_from_image(image_path)
+    # removing special characters and line breaks
+    text = ''.join(e for e in text if e.isalnum() or e.isspace() or e == '\n')
+    doc = Document(page_content=text, metadata=metadata)
+    # returning the document in a list
+    return [doc]
 
 
 def save_embedded_data(embeddings, key="data"):
@@ -133,6 +146,16 @@ def load_documents_from_directory(directory_path: str):
             # adding the document name to the doc_names list
             doc_names.append(doc_name)
             print(f"Document {doc_name} loaded")
+        elif extension in image_doc:
+            # creating an image document
+            doc = create_image_document(path)
+            # appending the document to the documents list
+            documents.append(doc)
+            # appending the number of pages in the document
+            num_pages.append(1)
+            # adding the document name to the doc_names list
+            doc_names.append(doc[0].metadata['filename'])
+            print(f"Document {doc[0].metadata['filename']} loaded")
             
     # so we need to create a document id for each document
     docs_id = [uuid4().hex for i in range(len(documents))]
@@ -189,11 +212,14 @@ def add_documents_to_vector_store(embeddings, documents: list, docs_id: list, nu
     
 
 # A document search function
-def search(db, query, k=4):
+def search(db, query, k=3):
   docs = db.similarity_search(query, k)
   all = ""
   pages = []
   for doc in docs:
-      all += f"{doc.page_content}\n"
-      pages.append(doc.metadata['page'])
+    all += f"{doc.page_content}\n"
+    try:
+        pages.append(doc.metadata['page'])
+    except:
+        pages.append(doc.metadata['filename'])
   return docs[0].page_content, all, pages
diff --git a/image_experiment.ipynb b/image_experiment.ipynb
new file mode 100644
index 00000000..9ff5adec
--- /dev/null
+++ b/image_experiment.ipynb
@@ -0,0 +1,161 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install -q pdfplumber"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from text_extractor import TextExtractor\n",
+    "from langchain_core.documents  import Document"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# creating a function to extract texts from image\n",
+    "def create_image_document(image_path):\n",
+    "    # getting the image name from the image path\n",
+    "    image_name = image_path.split('/')[-1].split('.')[0]\n",
+    "    # setting image name as metadata\n",
+    "    metadata = {'filename': image_name}\n",
+    "    text_extractor = TextExtractor()\n",
+    "    text = text_extractor.read_text_from_image(image_path)\n",
+    "    # removing special characters and line breaks\n",
+    "    text = ''.join(e for e in text if e.isalnum() or e.isspace() or e == '\\n')\n",
+    "    doc = Document(page_content=text, metadata=metadata)\n",
+    "    # returning the document\n",
+    "    return [doc]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[Document(metadata={'filename': 'IMG_1438'}, page_content='ex   a\\n\\nAccidented car before repair\\n')]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# testing the function\n",
+    "image_path = 'data/IMG_1438.jpeg'\n",
+    "text = create_image_document(image_path)\n",
+    "print(text)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'filename': 'IMG_1438'}"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "text[0].metadata"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "smog_env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/loggings/app.log b/loggings/app.log
index 83cf54b1..35d6d571 100644
--- a/loggings/app.log
+++ b/loggings/app.log
@@ -50,3 +50,33 @@
 2024-08-07 18:46:38,939 - INFO - Loading the embeddings
 2024-08-07 18:46:38,939 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
 2024-08-07 18:47:03,089 - INFO - Embeddings loaded
+2024-08-08 14:03:36,111 - INFO - Loading the embeddings
+2024-08-08 14:03:36,113 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
+2024-08-08 14:03:39,637 - INFO - Embeddings loaded
+2024-08-08 14:03:39,637 - INFO - Loading data from ./data
+2024-08-08 14:04:29,085 - INFO - Data loaded
+2024-08-08 14:04:29,087 - INFO - Creating vector store
+2024-08-08 14:06:40,106 - INFO - Loading the embeddings
+2024-08-08 14:06:40,106 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
+2024-08-08 14:06:43,523 - INFO - Embeddings loaded
+2024-08-08 14:06:43,523 - INFO - Loading data from ./data
+2024-08-08 14:20:21,150 - INFO - Loading the embeddings
+2024-08-08 14:20:21,150 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en
+2024-08-08 14:20:25,150 - INFO - Embeddings loaded
+2024-08-08 14:20:25,150 - INFO - Loading data from ./data
+2024-08-08 14:21:13,769 - INFO - Data loaded
+2024-08-08 14:21:13,769 - INFO - Creating vector store
+2024-08-08 14:21:24,386 - INFO - Vector store created
+2024-08-08 14:21:24,386 - INFO - Saving the vector store
+2024-08-08 14:21:24,386 - INFO - Vector store saved
+2024-08-08 14:22:17,106 - INFO - Receiving the search query
+2024-08-08 14:22:23,740 - INFO - Searching for Accidented car before repair
+2024-08-08 14:24:45,013 - INFO - Receiving the search query
+2024-08-08 14:25:07,699 - INFO - Searching for Accidented car before repair
+2024-08-08 14:28:43,776 - INFO - Receiving the search query
+2024-08-08 14:28:46,944 - INFO - Searching for Accidented car before repair
+2024-08-08 14:29:13,295 - INFO - Receiving the search query
+2024-08-08 14:29:17,628 - INFO - Searching for Accidented car before repair
+2024-08-08 14:29:17,820 - INFO - Search completed
+2024-08-08 14:29:17,820 - INFO - Page content: Accidented car Before repair
+
diff --git a/requirements.txt b/requirements.txt
index 5f3958d6..6db0dfa7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,4 +11,6 @@ langchain-text-splitters
 unstructured[all-docs]
 docx2txt
 docx
-"fastapi[standard]"
\ No newline at end of file
+fastapi[standard]
+pdfplumber
+pytesseract
\ No newline at end of file
diff --git a/text_extractor.py b/text_extractor.py
index efd435e9..82d4572a 100644
--- a/text_extractor.py
+++ b/text_extractor.py
@@ -44,8 +44,8 @@ class TextExtractor:
         except Exception as e:
             print(f"Error reading text from image: {e}")
             return ""
-        finally:
-            os.remove(image_path)
+        # finally:
+        #     os.remove(image_path)
 
     def read_text_from_pdf(self, pdf_path):
         """
diff --git a/vec-db/index/faiss_index_data/index.faiss b/vec-db/index/faiss_index_data/index.faiss
index cdb06fcd..163e591d 100644
Binary files a/vec-db/index/faiss_index_data/index.faiss and b/vec-db/index/faiss_index_data/index.faiss differ
diff --git a/vec-db/index/faiss_index_data/index.pkl b/vec-db/index/faiss_index_data/index.pkl
index 4d6c0f41..cc9c1998 100644
Binary files a/vec-db/index/faiss_index_data/index.pkl and b/vec-db/index/faiss_index_data/index.pkl differ
diff --git a/video_experiment.ipynb b/video_experiment.ipynb
new file mode 100644
index 00000000..62f01dbc
--- /dev/null
+++ b/video_experiment.ipynb
@@ -0,0 +1,102 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}