initial update, video snapshotting reduced
This commit is contained in:
@@ -10,6 +10,7 @@ from uuid import uuid4
|
||||
from langchain_core.documents import Document
|
||||
from text_extractor import TextExtractor
|
||||
import os
|
||||
import math
|
||||
import json
|
||||
from groq import Groq
|
||||
import re
|
||||
@@ -213,10 +214,10 @@ def split_audio_by_duration(audio_file_path, chunk_duration_minutes, print_outpu
|
||||
chunk_paths = []
|
||||
|
||||
if audio_duration_ms > chunk_length_ms:
|
||||
# Calculate the number of chunks
|
||||
num_chunks = audio_duration_ms // chunk_length_ms + (1 if audio_duration_ms % chunk_length_ms != 0 else 0)
|
||||
# Calculate the number of chunks, using math.ceil to ensure rounding up
|
||||
num_chunks = math.ceil(audio_duration_ms / chunk_length_ms)
|
||||
|
||||
for i in range(num_chunks):
|
||||
for i in range(int(num_chunks)):
|
||||
start_ms = i * chunk_length_ms
|
||||
end_ms = min(start_ms + chunk_length_ms, audio_duration_ms)
|
||||
chunk = audio[start_ms:end_ms]
|
||||
@@ -299,7 +300,7 @@ def preprocess_video_data(video_path: str, time_interval: int):
|
||||
video_name = os.path.splitext(os.path.basename(video_path))[0]
|
||||
|
||||
# Create a directory for snapshots using the video name
|
||||
snapshot_dir = os.path.join(os.path.dirname(video_path), f"{video_name}_snapshots")
|
||||
snapshot_dir = os.path.join(os.path.dirname(video_path), f"{video_name}")
|
||||
os.makedirs(snapshot_dir, exist_ok=True)
|
||||
|
||||
# Get the duration of the video using ffmpeg
|
||||
@@ -308,15 +309,19 @@ def preprocess_video_data(video_path: str, time_interval: int):
|
||||
|
||||
# Loop through the video and take snapshots at 0s, 3min, 6min, etc.
|
||||
for i in range(0, int(duration), time_interval):
|
||||
# Calculate the time for the current frame
|
||||
frame_time = i
|
||||
start_time = i
|
||||
end_time = min(i + time_interval, int(duration))
|
||||
|
||||
# Format the interval as 'start-end'
|
||||
interval_str = f"{start_time}-{end_time}"
|
||||
|
||||
# Save the snapshot as an image file in the created folder
|
||||
frame_img = os.path.join(snapshot_dir, f"frame_at_{frame_time//60}min.png")
|
||||
frame_img = os.path.join(snapshot_dir, f"{interval_str}s.png")
|
||||
|
||||
# Extract the frame using ffmpeg
|
||||
(
|
||||
ffmpeg
|
||||
.input(video_path, ss=frame_time)
|
||||
.input(video_path, ss=start_time)
|
||||
.output(frame_img, vframes=1)
|
||||
.run()
|
||||
)
|
||||
@@ -326,6 +331,9 @@ def preprocess_video_data(video_path: str, time_interval: int):
|
||||
|
||||
# now creating document from the audio file
|
||||
documents = create_audio_document(audio_path, chunk_duration_minutes=0.5, file_type='video')
|
||||
|
||||
# deleting the audio file
|
||||
os.remove(audio_path)
|
||||
return documents
|
||||
|
||||
|
||||
@@ -333,11 +341,11 @@ def preprocess_video_data(video_path: str, time_interval: int):
|
||||
#-----------------------------------------------------OTHERS--------------------------------------------------------------
|
||||
|
||||
def save_embedded_data(embeddings, key="data"):
|
||||
embeddings.save_local(f"vec-db/index/faiss_index_{key}")
|
||||
embeddings.save_local(f"index/faiss_index_{key}")
|
||||
print("Embeddings saved")
|
||||
|
||||
def load_embedded_data(embeddings=embeddings, key="data"):
|
||||
embed_db = FAISS.load_local(f"vec-db/index/faiss_index_{key}", embeddings, allow_dangerous_deserialization=True)
|
||||
embed_db = FAISS.load_local(f"index/faiss_index_{key}", embeddings, allow_dangerous_deserialization=True)
|
||||
return embed_db
|
||||
|
||||
|
||||
@@ -361,7 +369,7 @@ def load_documents_from_directory(directory_path: str):
|
||||
# updating the path
|
||||
path = os.path.join(directory_path, file)
|
||||
# getting the file extension and doc name
|
||||
doc_name, extension = file.split('.')[0] , file.split('.')[-1]
|
||||
doc_name, extension = path.split('/')[-1].split('.')[0] , file.split('.')[-1]
|
||||
# checking if the file is a text document
|
||||
if extension in text_doc:
|
||||
# loading the document
|
||||
|
||||
Reference in New Issue
Block a user