image pipeline perfected. audio pipeline in progress
This commit is contained in:
Binary file not shown.
+70
-3
@@ -11,6 +11,13 @@ from langchain_core.documents import Document
|
||||
from text_extractor import TextExtractor
|
||||
import os
|
||||
import json
|
||||
import base64
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
# OpenAI API Key
|
||||
api_key = os.getenv('OPENAI_API_KEY')
|
||||
|
||||
|
||||
# loading the embedding model
|
||||
@@ -91,6 +98,56 @@ def load_document(document_path):
|
||||
else:
|
||||
raise ValueError(f"Unsupported document type for {document_path}")
|
||||
|
||||
# Function to encode the image
|
||||
def encode_image(image_path):
|
||||
with open(image_path, "rb") as image_file:
|
||||
return base64.b64encode(image_file.read()).decode('utf-8')
|
||||
|
||||
# Vision API to process the image
|
||||
def process_image(image_path):
|
||||
global api_key
|
||||
|
||||
# Getting the base64 string
|
||||
base64_image = encode_image(image_path)
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {api_key}"
|
||||
}
|
||||
|
||||
try:
|
||||
payload = {
|
||||
"model": "gpt-4o-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What’s in this image?"
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{base64_image}"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"max_tokens": 300
|
||||
}
|
||||
|
||||
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
|
||||
# returning the content of the response
|
||||
response = response.json()['choices'][0]['message']['content']
|
||||
except Exception as e:
|
||||
response = "Image not good enough for processing"
|
||||
|
||||
return response
|
||||
|
||||
|
||||
# create image document
|
||||
def create_image_document(image_path):
|
||||
# getting the image name from the image path
|
||||
image_name = image_path.split('/')[-1].split('.')[0]
|
||||
@@ -100,9 +157,19 @@ def create_image_document(image_path):
|
||||
text = text_extractor.read_text_from_image(image_path)
|
||||
# removing special characters and line breaks
|
||||
text = ''.join(e for e in text if e.isalnum() or e.isspace() or e == '\n')
|
||||
doc = Document(page_content=text, metadata=metadata)
|
||||
# returning the document in a list
|
||||
return [doc]
|
||||
|
||||
# if the text is empty, then we will process the image with OpenAI vision model
|
||||
if text == '':
|
||||
text = process_image(image_path)
|
||||
|
||||
# checking if there's no value error or something, we will only return the text if there isnt any error
|
||||
if text != "Image not good enough for processing":
|
||||
# creating a document from the text
|
||||
doc = Document(page_content=text, metadata=metadata)
|
||||
# returning the document
|
||||
return [doc]
|
||||
else:
|
||||
pass # if there's an error, we will return None
|
||||
|
||||
|
||||
def save_embedded_data(embeddings, key="data"):
|
||||
|
||||
Reference in New Issue
Block a user