Files
ds_fire_fighter/image_experiment.ipynb
T

200 lines
5.6 KiB
Plaintext
Raw Normal View History

2024-08-08 14:58:44 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"!pip install -q pdfplumber"
]
},
{
"cell_type": "code",
"execution_count": 2,
2024-08-08 14:58:44 +01:00
"metadata": {},
"outputs": [],
"source": [
"from text_extractor import TextExtractor\n",
"from langchain_core.documents import Document\n",
"import os\n",
"import base64\n",
"import requests\n",
"from dotenv import load_dotenv\n",
"load_dotenv()\n",
"\n",
"# OpenAI API Key\n",
"api_key = os.getenv('OPENAI_API_KEY')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Vision Model Set Up"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Function to encode the image\n",
"def encode_image(image_path):\n",
" with open(image_path, \"rb\") as image_file:\n",
" return base64.b64encode(image_file.read()).decode('utf-8')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def process_image(image_path):\n",
" global api_key\n",
"\n",
" # Getting the base64 string\n",
" base64_image = encode_image(image_path)\n",
"\n",
" headers = {\n",
" \"Content-Type\": \"application/json\",\n",
" \"Authorization\": f\"Bearer {api_key}\"\n",
" }\n",
"\n",
" try:\n",
" payload = {\n",
" \"model\": \"gpt-4o-mini\",\n",
" \"messages\": [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": [\n",
" {\n",
" \"type\": \"text\",\n",
" \"text\": \"Whats in this image?\"\n",
" },\n",
" {\n",
" \"type\": \"image_url\",\n",
" \"image_url\": {\n",
" \"url\": f\"data:image/jpeg;base64,{base64_image}\"\n",
" }\n",
" }\n",
" ]\n",
" }\n",
" ],\n",
" \"max_tokens\": 300\n",
" }\n",
"\n",
" response = requests.post(\"https://api.openai.com/v1/chat/completions\", headers=headers, json=payload)\n",
" # returning the content of the response\n",
" response = response.json()['choices'][0]['message']['content']\n",
" except Exception as e:\n",
" response = \"Image not good enough for processing\"\n",
"\n",
" return response"
2024-08-08 14:58:44 +01:00
]
},
{
"cell_type": "code",
"execution_count": 13,
2024-08-08 14:58:44 +01:00
"metadata": {},
"outputs": [],
"source": [
"# creating a function to extract texts from image\n",
"def create_image_document(image_path):\n",
" # getting the image name from the image path\n",
" image_name = image_path.split('/')[-1].split('.')[0]\n",
" # setting image name as metadata\n",
" metadata = {'filename': image_name}\n",
" text_extractor = TextExtractor()\n",
" text = text_extractor.read_text_from_image(image_path)\n",
" # removing special characters and line breaks\n",
" text = ''.join(e for e in text if e.isalnum() or e.isspace() or e == '\\n')\n",
" \n",
" # if the text is empty, then we will process the image with OpenAI vision model\n",
" if text == '':\n",
" text = process_image(image_path)\n",
" \n",
" # checking if there's no value error or something, we will only return the text if there isnt any error\n",
" if text != \"Image not good enough for processing\":\n",
" # creating a document from the text\n",
" doc = Document(page_content=text, metadata=metadata)\n",
" # returning the document\n",
" return [doc]\n",
" else:\n",
" pass # if there's an error, we will return None"
2024-08-08 14:58:44 +01:00
]
},
{
"cell_type": "code",
"execution_count": 14,
2024-08-08 14:58:44 +01:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[Document(metadata={'filename': 'hyundai-sonata-auto-body-repair-before'}, page_content=\"The image shows a dark-colored car with visible damage on the driver's side. The damage appears to be a dent and scratches on the door and fender area. The car is parked indoors, likely in a garage.\")]\n"
2024-08-08 14:58:44 +01:00
]
}
],
"source": [
"# testing the function\n",
"image_path = 'data/hyundai-sonata-auto-body-repair-before.jpg'\n",
2024-08-08 14:58:44 +01:00
"text = create_image_document(image_path)\n",
"print(text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "smog_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}