image pipeline perfected. audio pipeline in progress

This commit is contained in:
timothyafolami
2024-08-08 22:06:39 +01:00
parent c54dc17989
commit f1aa34bef2
10 changed files with 319 additions and 63 deletions
+95 -57
View File
@@ -11,17 +11,93 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from text_extractor import TextExtractor\n",
"from langchain_core.documents import Document"
"from langchain_core.documents import Document\n",
"import os\n",
"import base64\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"load_dotenv()\n",
"\n",
"# OpenAI API Key\n",
"api_key = os.getenv('OPENAI_API_KEY')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Vision Model Set Up"
]
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Function to encode the image\n",
"def encode_image(image_path):\n",
" with open(image_path, \"rb\") as image_file:\n",
" return base64.b64encode(image_file.read()).decode('utf-8')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def process_image(image_path):\n",
" global api_key\n",
"\n",
" # Getting the base64 string\n",
" base64_image = encode_image(image_path)\n",
"\n",
" headers = {\n",
" \"Content-Type\": \"application/json\",\n",
" \"Authorization\": f\"Bearer {api_key}\"\n",
" }\n",
"\n",
" try:\n",
" payload = {\n",
" \"model\": \"gpt-4o-mini\",\n",
" \"messages\": [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": [\n",
" {\n",
" \"type\": \"text\",\n",
" \"text\": \"Whats in this image?\"\n",
" },\n",
" {\n",
" \"type\": \"image_url\",\n",
" \"image_url\": {\n",
" \"url\": f\"data:image/jpeg;base64,{base64_image}\"\n",
" }\n",
" }\n",
" ]\n",
" }\n",
" ],\n",
" \"max_tokens\": 300\n",
" }\n",
"\n",
" response = requests.post(\"https://api.openai.com/v1/chat/completions\", headers=headers, json=payload)\n",
" # returning the content of the response\n",
" response = response.json()['choices'][0]['message']['content']\n",
" except Exception as e:\n",
" response = \"Image not good enough for processing\"\n",
"\n",
" return response"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
@@ -35,79 +111,41 @@
" text = text_extractor.read_text_from_image(image_path)\n",
" # removing special characters and line breaks\n",
" text = ''.join(e for e in text if e.isalnum() or e.isspace() or e == '\\n')\n",
" doc = Document(page_content=text, metadata=metadata)\n",
" # returning the document\n",
" return [doc]"
" \n",
" # if the text is empty, then we will process the image with OpenAI vision model\n",
" if text == '':\n",
" text = process_image(image_path)\n",
" \n",
" # checking if there's no value error or something, we will only return the text if there isnt any error\n",
" if text != \"Image not good enough for processing\":\n",
" # creating a document from the text\n",
" doc = Document(page_content=text, metadata=metadata)\n",
" # returning the document\n",
" return [doc]\n",
" else:\n",
" pass # if there's an error, we will return None"
]
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[Document(metadata={'filename': 'IMG_1438'}, page_content='ex a\\n\\nAccidented car before repair\\n')]\n"
"[Document(metadata={'filename': 'hyundai-sonata-auto-body-repair-before'}, page_content=\"The image shows a dark-colored car with visible damage on the driver's side. The damage appears to be a dent and scratches on the door and fender area. The car is parked indoors, likely in a garage.\")]\n"
]
}
],
"source": [
"# testing the function\n",
"image_path = 'data/IMG_1438.jpeg'\n",
"image_path = 'data/hyundai-sonata-auto-body-repair-before.jpg'\n",
"text = create_image_document(image_path)\n",
"print(text)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'filename': 'IMG_1438'}"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"text[0].metadata"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,