image pipeline perfected. audio pipeline in progress
This commit is contained in:
+95
-57
@@ -11,17 +11,93 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from text_extractor import TextExtractor\n",
|
||||
"from langchain_core.documents import Document"
|
||||
"from langchain_core.documents import Document\n",
|
||||
"import os\n",
|
||||
"import base64\n",
|
||||
"import requests\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"load_dotenv()\n",
|
||||
"\n",
|
||||
"# OpenAI API Key\n",
|
||||
"api_key = os.getenv('OPENAI_API_KEY')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Vision Model Set Up"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Function to encode the image\n",
|
||||
"def encode_image(image_path):\n",
|
||||
" with open(image_path, \"rb\") as image_file:\n",
|
||||
" return base64.b64encode(image_file.read()).decode('utf-8')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def process_image(image_path):\n",
|
||||
" global api_key\n",
|
||||
"\n",
|
||||
" # Getting the base64 string\n",
|
||||
" base64_image = encode_image(image_path)\n",
|
||||
"\n",
|
||||
" headers = {\n",
|
||||
" \"Content-Type\": \"application/json\",\n",
|
||||
" \"Authorization\": f\"Bearer {api_key}\"\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" payload = {\n",
|
||||
" \"model\": \"gpt-4o-mini\",\n",
|
||||
" \"messages\": [\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"text\",\n",
|
||||
" \"text\": \"What’s in this image?\"\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"type\": \"image_url\",\n",
|
||||
" \"image_url\": {\n",
|
||||
" \"url\": f\"data:image/jpeg;base64,{base64_image}\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" \"max_tokens\": 300\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" response = requests.post(\"https://api.openai.com/v1/chat/completions\", headers=headers, json=payload)\n",
|
||||
" # returning the content of the response\n",
|
||||
" response = response.json()['choices'][0]['message']['content']\n",
|
||||
" except Exception as e:\n",
|
||||
" response = \"Image not good enough for processing\"\n",
|
||||
"\n",
|
||||
" return response"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -35,79 +111,41 @@
|
||||
" text = text_extractor.read_text_from_image(image_path)\n",
|
||||
" # removing special characters and line breaks\n",
|
||||
" text = ''.join(e for e in text if e.isalnum() or e.isspace() or e == '\\n')\n",
|
||||
" doc = Document(page_content=text, metadata=metadata)\n",
|
||||
" # returning the document\n",
|
||||
" return [doc]"
|
||||
" \n",
|
||||
" # if the text is empty, then we will process the image with OpenAI vision model\n",
|
||||
" if text == '':\n",
|
||||
" text = process_image(image_path)\n",
|
||||
" \n",
|
||||
" # checking if there's no value error or something, we will only return the text if there isnt any error\n",
|
||||
" if text != \"Image not good enough for processing\":\n",
|
||||
" # creating a document from the text\n",
|
||||
" doc = Document(page_content=text, metadata=metadata)\n",
|
||||
" # returning the document\n",
|
||||
" return [doc]\n",
|
||||
" else:\n",
|
||||
" pass # if there's an error, we will return None"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Document(metadata={'filename': 'IMG_1438'}, page_content='ex a\\n\\nAccidented car before repair\\n')]\n"
|
||||
"[Document(metadata={'filename': 'hyundai-sonata-auto-body-repair-before'}, page_content=\"The image shows a dark-colored car with visible damage on the driver's side. The damage appears to be a dent and scratches on the door and fender area. The car is parked indoors, likely in a garage.\")]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# testing the function\n",
|
||||
"image_path = 'data/IMG_1438.jpeg'\n",
|
||||
"image_path = 'data/hyundai-sonata-auto-body-repair-before.jpg'\n",
|
||||
"text = create_image_document(image_path)\n",
|
||||
"print(text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'filename': 'IMG_1438'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text[0].metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
||||
Reference in New Issue
Block a user