image pipeline perfected. audio pipeline in progress

This commit is contained in:
timothyafolami
2024-08-08 22:06:39 +01:00
parent c54dc17989
commit f1aa34bef2
10 changed files with 319 additions and 63 deletions
Binary file not shown.
+70 -3
View File
@@ -11,6 +11,13 @@ from langchain_core.documents import Document
from text_extractor import TextExtractor
import os
import json
import base64
import requests
from dotenv import load_dotenv
load_dotenv()
# OpenAI API Key
api_key = os.getenv('OPENAI_API_KEY')
# loading the embedding model
@@ -91,6 +98,56 @@ def load_document(document_path):
else:
raise ValueError(f"Unsupported document type for {document_path}")
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
# Vision API to process the image
def process_image(image_path):
global api_key
# Getting the base64 string
base64_image = encode_image(image_path)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
try:
payload = {
"model": "gpt-4o-mini",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Whats in this image?"
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
"max_tokens": 300
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
# returning the content of the response
response = response.json()['choices'][0]['message']['content']
except Exception as e:
response = "Image not good enough for processing"
return response
# create image document
def create_image_document(image_path):
# getting the image name from the image path
image_name = image_path.split('/')[-1].split('.')[0]
@@ -100,9 +157,19 @@ def create_image_document(image_path):
text = text_extractor.read_text_from_image(image_path)
# removing special characters and line breaks
text = ''.join(e for e in text if e.isalnum() or e.isspace() or e == '\n')
doc = Document(page_content=text, metadata=metadata)
# returning the document in a list
return [doc]
# if the text is empty, then we will process the image with OpenAI vision model
if text == '':
text = process_image(image_path)
# checking if there's no value error or something, we will only return the text if there isnt any error
if text != "Image not good enough for processing":
# creating a document from the text
doc = Document(page_content=text, metadata=metadata)
# returning the document
return [doc]
else:
pass # if there's an error, we will return None
def save_embedded_data(embeddings, key="data"):