Add tiered summarization based on pricing plans
- Implement advanced AI summarization with action items for Pro plan - Create basic bullet-point summarization for Freemium plan - Add plan tier validation and feature differentiation - Support speaker identification in transcripts - Define plan limits (600 mins Pro/200 mins Freemium)
This commit is contained in:
+47
-18
@@ -33,11 +33,12 @@ Transcribes audio or video content from a URL.
|
|||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"media_url": "https://example.com/audio-file.mp3",
|
"media_url": "https://example.com/audio-file.mp3",
|
||||||
"media_type": "audio" // "audio" or "video"
|
"media_type": "audio", // "audio" or "video"
|
||||||
|
"plan_tier": "freemium" // "freemium" or "pro" (optional, defaults to "freemium")
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
**Response:**
|
**Pro Plan Response (with speaker identification):**
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
@@ -45,7 +46,33 @@ Transcribes audio or video content from a URL.
|
|||||||
"sentences": [
|
"sentences": [
|
||||||
{
|
{
|
||||||
"sentence": "Hello and welcome to the meeting.",
|
"sentence": "Hello and welcome to the meeting.",
|
||||||
"speaker": "speaker_0",
|
"speaker": "speaker_0", // Speaker identification included
|
||||||
|
"start": 0.0,
|
||||||
|
"end": 2.5,
|
||||||
|
"words": [
|
||||||
|
{
|
||||||
|
"word": "Hello",
|
||||||
|
"start": 0.0,
|
||||||
|
"end": 0.4
|
||||||
|
},
|
||||||
|
// Additional words...
|
||||||
|
]
|
||||||
|
},
|
||||||
|
// Additional sentences...
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Freemium Plan Response (without speaker identification):**
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"transcript": {
|
||||||
|
"sentences": [
|
||||||
|
{
|
||||||
|
"sentence": "Hello and welcome to the meeting.",
|
||||||
|
// No "speaker" field in freemium plan
|
||||||
"start": 0.0,
|
"start": 0.0,
|
||||||
"end": 2.5,
|
"end": 2.5,
|
||||||
"words": [
|
"words": [
|
||||||
@@ -271,14 +298,15 @@ headers = {
|
|||||||
"Content-Type": "application/json"
|
"Content-Type": "application/json"
|
||||||
}
|
}
|
||||||
|
|
||||||
# 1. Transcribe an audio file
|
# 1. Transcribe an audio file (Pro plan with speaker identification)
|
||||||
transcribe_payload = {
|
transcribe_payload = {
|
||||||
"media_url": "https://example.com/meeting-recording.mp3",
|
"media_url": "https://example.com/meeting-recording.mp3",
|
||||||
"media_type": "audio"
|
"media_type": "audio",
|
||||||
|
"plan_tier": "pro" # Specify "pro" for speaker identification or "freemium" for no speakers
|
||||||
}
|
}
|
||||||
|
|
||||||
transcribe_response = requests.post(
|
transcribe_response = requests.post(
|
||||||
f"{base_url}/microdot-ai/transcribe",
|
f"{base_url}/microdot-ai/transcribe",
|
||||||
headers=headers,
|
headers=headers,
|
||||||
json=transcribe_payload
|
json=transcribe_payload
|
||||||
)
|
)
|
||||||
@@ -287,19 +315,19 @@ transcribe_response = requests.post(
|
|||||||
if transcribe_response.status_code == 200:
|
if transcribe_response.status_code == 200:
|
||||||
transcript_data = transcribe_response.json()
|
transcript_data = transcribe_response.json()
|
||||||
transcript_json = json.dumps(transcript_data["transcript"])
|
transcript_json = json.dumps(transcript_data["transcript"])
|
||||||
|
|
||||||
# 2. Generate a summary (Pro plan)
|
# 2. Generate a summary (Pro plan)
|
||||||
summary_payload = {
|
summary_payload = {
|
||||||
"transcript": transcript_json,
|
"transcript": transcript_json,
|
||||||
"plan_tier": "pro"
|
"plan_tier": "pro"
|
||||||
}
|
}
|
||||||
|
|
||||||
summary_response = requests.post(
|
summary_response = requests.post(
|
||||||
f"{base_url}/microdot-ai/general-summary",
|
f"{base_url}/microdot-ai/general-summary",
|
||||||
headers=headers,
|
headers=headers,
|
||||||
json=summary_payload
|
json=summary_payload
|
||||||
)
|
)
|
||||||
|
|
||||||
if summary_response.status_code == 200:
|
if summary_response.status_code == 200:
|
||||||
summary_data = summary_response.json()
|
summary_data = summary_response.json()
|
||||||
print("Summary generated successfully!")
|
print("Summary generated successfully!")
|
||||||
@@ -316,7 +344,7 @@ else:
|
|||||||
const axios = require('axios');
|
const axios = require('axios');
|
||||||
|
|
||||||
// API endpoint
|
// API endpoint
|
||||||
const baseUrl = 'https://api.microdot.ai';
|
const baseUrl = 'http://0.0.0.0:5056';
|
||||||
|
|
||||||
// Your API key
|
// Your API key
|
||||||
const apiKey = 'your_api_key_here';
|
const apiKey = 'your_api_key_here';
|
||||||
@@ -331,34 +359,35 @@ const headers = {
|
|||||||
const transcribeAudio = async () => {
|
const transcribeAudio = async () => {
|
||||||
const transcribePayload = {
|
const transcribePayload = {
|
||||||
media_url: 'https://example.com/meeting-recording.mp3',
|
media_url: 'https://example.com/meeting-recording.mp3',
|
||||||
media_type: 'audio'
|
media_type: 'audio',
|
||||||
|
plan_tier: 'pro' // Specify 'pro' for speaker identification or 'freemium' for no speakers
|
||||||
};
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const transcribeResponse = await axios.post(
|
const transcribeResponse = await axios.post(
|
||||||
`${baseUrl}/microdot-ai/transcribe`,
|
`${baseUrl}/microdot-ai/transcribe`,
|
||||||
transcribePayload,
|
transcribePayload,
|
||||||
{ headers }
|
{ headers }
|
||||||
);
|
);
|
||||||
|
|
||||||
const transcriptData = transcribeResponse.data;
|
const transcriptData = transcribeResponse.data;
|
||||||
const transcriptJson = JSON.stringify(transcriptData.transcript);
|
const transcriptJson = JSON.stringify(transcriptData.transcript);
|
||||||
|
|
||||||
// 2. Generate a summary (Pro plan)
|
// 2. Generate a summary (Pro plan)
|
||||||
const summaryPayload = {
|
const summaryPayload = {
|
||||||
transcript: transcriptJson,
|
transcript: transcriptJson,
|
||||||
plan_tier: 'pro'
|
plan_tier: 'pro'
|
||||||
};
|
};
|
||||||
|
|
||||||
const summaryResponse = await axios.post(
|
const summaryResponse = await axios.post(
|
||||||
`${baseUrl}/microdot-ai/general-summary`,
|
`${baseUrl}/microdot-ai/general-summary`,
|
||||||
summaryPayload,
|
summaryPayload,
|
||||||
{ headers }
|
{ headers }
|
||||||
);
|
);
|
||||||
|
|
||||||
console.log('Summary generated successfully!');
|
console.log('Summary generated successfully!');
|
||||||
console.log(JSON.stringify(summaryResponse.data.transcript, null, 2));
|
console.log(JSON.stringify(summaryResponse.data.transcript, null, 2));
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error:', error.response ? error.response.data : error.message);
|
console.error('Error:', error.response ? error.response.data : error.message);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -55,6 +55,7 @@ async def get_api_key(api_key_header: str = Security(api_key_header)) -> str:
|
|||||||
class TranscribeRequest(BaseModel):
|
class TranscribeRequest(BaseModel):
|
||||||
media_url: Optional[str] = None
|
media_url: Optional[str] = None
|
||||||
media_type: Optional[str] # Corrected type hint for media_type
|
media_type: Optional[str] # Corrected type hint for media_type
|
||||||
|
plan_tier: Optional[str] = "freemium" # Default to freemium plan if not specified
|
||||||
|
|
||||||
class ChatResp(BaseModel): # Added BaseModel inheritance
|
class ChatResp(BaseModel): # Added BaseModel inheritance
|
||||||
error: Optional[str] = None
|
error: Optional[str] = None
|
||||||
@@ -78,6 +79,16 @@ async def chat_endpoint(
|
|||||||
api_key: str = Depends(get_api_key)
|
api_key: str = Depends(get_api_key)
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
|
# Get the plan tier from the request or default to freemium
|
||||||
|
plan_tier = request.plan_tier.lower() if request.plan_tier else "freemium"
|
||||||
|
|
||||||
|
# Validate plan tier using our PlanTier enum
|
||||||
|
valid_tiers = [t.value for t in PlanTier]
|
||||||
|
if plan_tier not in valid_tiers:
|
||||||
|
plan_tier = PlanTier.FREEMIUM.value # Default to freemium if invalid tier
|
||||||
|
|
||||||
|
# Check if the plan includes speaker identification
|
||||||
|
include_speakers = PlanLimits.get_limit(plan_tier, "speaker_identification")
|
||||||
|
|
||||||
# Use the transcribe_media function to transcribe the media
|
# Use the transcribe_media function to transcribe the media
|
||||||
if request.media_url:
|
if request.media_url:
|
||||||
@@ -88,7 +99,7 @@ async def chat_endpoint(
|
|||||||
|
|
||||||
# Parse response
|
# Parse response
|
||||||
words = transcription_response["results"]["channels"][0]["alternatives"][0]["words"]
|
words = transcription_response["results"]["channels"][0]["alternatives"][0]["words"]
|
||||||
transcript = group_words_into_sentences(words=words)
|
transcript = group_words_into_sentences(words=words, include_speakers=include_speakers)
|
||||||
return TranscriptResponse(
|
return TranscriptResponse(
|
||||||
transcript=transcript, # Corrected to return the transcript
|
transcript=transcript, # Corrected to return the transcript
|
||||||
error=None
|
error=None
|
||||||
|
|||||||
+52
-30
@@ -28,7 +28,7 @@ def extract_audio(url: str, output_template=os.path.join(UPLOAD_FOLDER, "%(title
|
|||||||
"""
|
"""
|
||||||
Download and extract audio from a video URL using yt-dlp.
|
Download and extract audio from a video URL using yt-dlp.
|
||||||
The file will be saved in the 'upload' folder.
|
The file will be saved in the 'upload' folder.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: The absolute path to the downloaded audio file (with a unique id appended).
|
str: The absolute path to the downloaded audio file (with a unique id appended).
|
||||||
"""
|
"""
|
||||||
@@ -42,51 +42,51 @@ def extract_audio(url: str, output_template=os.path.join(UPLOAD_FOLDER, "%(title
|
|||||||
}],
|
}],
|
||||||
"quiet": True,
|
"quiet": True,
|
||||||
}
|
}
|
||||||
|
|
||||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||||
info = ydl.extract_info(url, download=True)
|
info = ydl.extract_info(url, download=True)
|
||||||
# Prepare the filename from the info.
|
# Prepare the filename from the info.
|
||||||
# Note: prepare_filename returns the filename *before* postprocessing,
|
# Note: prepare_filename returns the filename *before* postprocessing,
|
||||||
# so we change the extension to mp3.
|
# so we change the extension to mp3.
|
||||||
original_filepath = os.path.splitext(ydl.prepare_filename(info))[0] + ".mp3"
|
original_filepath = os.path.splitext(ydl.prepare_filename(info))[0] + ".mp3"
|
||||||
|
|
||||||
# Debug: list files in the upload folder
|
# Debug: list files in the upload folder
|
||||||
if not os.path.exists(original_filepath):
|
if not os.path.exists(original_filepath):
|
||||||
files = os.listdir(UPLOAD_FOLDER)
|
files = os.listdir(UPLOAD_FOLDER)
|
||||||
print("Warning: Could not find expected file.")
|
print("Warning: Could not find expected file.")
|
||||||
print("Files in upload folder:", files)
|
print("Files in upload folder:", files)
|
||||||
raise FileNotFoundError(f"Expected audio file not found: {original_filepath}")
|
raise FileNotFoundError(f"Expected audio file not found: {original_filepath}")
|
||||||
|
|
||||||
# Get the video's title and sanitize it
|
# Get the video's title and sanitize it
|
||||||
title = info.get('title', 'audio')
|
title = info.get('title', 'audio')
|
||||||
safe_title = sanitize_filename(title)
|
safe_title = sanitize_filename(title)
|
||||||
|
|
||||||
# Generate a unique identifier
|
# Generate a unique identifier
|
||||||
unique_id = uuid.uuid4().hex # Unique identifier in hex format
|
unique_id = uuid.uuid4().hex # Unique identifier in hex format
|
||||||
|
|
||||||
# Construct the new filename with the unique id appended.
|
# Construct the new filename with the unique id appended.
|
||||||
new_audio_filename = f"{safe_title}_{unique_id}.mp3"
|
new_audio_filename = f"{safe_title}_{unique_id}.mp3"
|
||||||
new_audio_filepath = os.path.join(UPLOAD_FOLDER, new_audio_filename)
|
new_audio_filepath = os.path.join(UPLOAD_FOLDER, new_audio_filename)
|
||||||
|
|
||||||
# Rename the downloaded file to include the unique ID.
|
# Rename the downloaded file to include the unique ID.
|
||||||
os.rename(original_filepath, new_audio_filepath)
|
os.rename(original_filepath, new_audio_filepath)
|
||||||
print(f"Renamed file to: {new_audio_filepath}")
|
print(f"Renamed file to: {new_audio_filepath}")
|
||||||
|
|
||||||
# Return the absolute path to the renamed audio file.
|
# Return the absolute path to the renamed audio file.
|
||||||
return os.path.abspath(new_audio_filepath)
|
return os.path.abspath(new_audio_filepath)
|
||||||
|
|
||||||
def transcribe_media(file_loc: str, media_type: str = "audio"):
|
def transcribe_media(file_loc: str, media_type: str = "audio"):
|
||||||
"""
|
"""
|
||||||
Transcribe media using Deepgram.
|
Transcribe media using Deepgram.
|
||||||
|
|
||||||
If media_type is "audio" (remote URL), use Deepgram's URL transcription.
|
If media_type is "audio" (remote URL), use Deepgram's URL transcription.
|
||||||
If media_type is "video" (remote URL), extract audio locally (in the upload folder),
|
If media_type is "video" (remote URL), extract audio locally (in the upload folder),
|
||||||
transcribe via file, and then delete the local audio file.
|
transcribe via file, and then delete the local audio file.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file_loc (str): URL to the remote audio or video file.
|
file_loc (str): URL to the remote audio or video file.
|
||||||
media_type (str): "audio" or "video".
|
media_type (str): "audio" or "video".
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: The transcription response from Deepgram.
|
dict: The transcription response from Deepgram.
|
||||||
"""
|
"""
|
||||||
@@ -100,22 +100,22 @@ def transcribe_media(file_loc: str, media_type: str = "audio"):
|
|||||||
smart_format=True,
|
smart_format=True,
|
||||||
diarize=True,
|
diarize=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
if media_type.lower() == "audio":
|
if media_type.lower() == "audio":
|
||||||
# For remote audio files, use the URL transcription method.
|
# For remote audio files, use the URL transcription method.
|
||||||
response = deepgram.listen.rest.v("1").transcribe_url({"url": file_loc}, options)
|
response = deepgram.listen.rest.v("1").transcribe_url({"url": file_loc}, options)
|
||||||
|
|
||||||
elif media_type.lower() == "video":
|
elif media_type.lower() == "video":
|
||||||
# For remote video files, first extract the audio locally.
|
# For remote video files, first extract the audio locally.
|
||||||
local_audio_path = extract_audio(file_loc)
|
local_audio_path = extract_audio(file_loc)
|
||||||
print(f"Extracted audio to: {local_audio_path}")
|
print(f"Extracted audio to: {local_audio_path}")
|
||||||
|
|
||||||
# Transcribe using the local file method.
|
# Transcribe using the local file method.
|
||||||
with open(local_audio_path, "rb") as file:
|
with open(local_audio_path, "rb") as file:
|
||||||
buffer_data = file.read()
|
buffer_data = file.read()
|
||||||
payload: FileSource = {"buffer": buffer_data}
|
payload: FileSource = {"buffer": buffer_data}
|
||||||
response = deepgram.listen.rest.v("1").transcribe_file(payload, options)
|
response = deepgram.listen.rest.v("1").transcribe_file(payload, options)
|
||||||
|
|
||||||
# Clean up: delete the local audio file.
|
# Clean up: delete the local audio file.
|
||||||
if os.path.exists(local_audio_path):
|
if os.path.exists(local_audio_path):
|
||||||
os.remove(local_audio_path)
|
os.remove(local_audio_path)
|
||||||
@@ -123,7 +123,7 @@ def transcribe_media(file_loc: str, media_type: str = "audio"):
|
|||||||
else:
|
else:
|
||||||
raise ValueError("media_type must be either 'audio' or 'video'.")
|
raise ValueError("media_type must be either 'audio' or 'video'.")
|
||||||
|
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -137,54 +137,76 @@ def transcribe_media(file_loc: str, media_type: str = "audio"):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
def group_words_into_sentences(words, max_words=15):
|
def group_words_into_sentences(words, max_words=15, include_speakers=True):
|
||||||
|
"""
|
||||||
|
Group words into sentences based on speaker changes.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
words: List of word objects from the transcription
|
||||||
|
max_words: Maximum number of words per sentence
|
||||||
|
include_speakers: Whether to include speaker information in the output
|
||||||
|
(True for Pro plan, False for Freemium plan)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A dictionary containing the sentences
|
||||||
|
"""
|
||||||
sentences = []
|
sentences = []
|
||||||
current_sentence = []
|
current_sentence = []
|
||||||
current_speaker = None
|
current_speaker = None
|
||||||
start_time = None
|
start_time = None
|
||||||
|
|
||||||
for i, word_info in enumerate(words):
|
for i, word_info in enumerate(words):
|
||||||
word = word_info["punctuated_word"]
|
word = word_info["punctuated_word"]
|
||||||
speaker = word_info["speaker"]
|
speaker = word_info["speaker"] if include_speakers else "speaker_0" # Use a default speaker if not including speakers
|
||||||
start = word_info["start"]
|
start = word_info["start"]
|
||||||
end = word_info["end"]
|
end = word_info["end"]
|
||||||
|
|
||||||
# If speaker changes or sentence reaches max length, start a new sentence
|
# If speaker changes or sentence reaches max length, start a new sentence
|
||||||
if speaker != current_speaker:
|
if speaker != current_speaker:
|
||||||
if current_sentence:
|
if current_sentence:
|
||||||
sentences.append({
|
sentence_obj = {
|
||||||
"sentence": " ".join([w["word"] for w in current_sentence]),
|
"sentence": " ".join([w["word"] for w in current_sentence]),
|
||||||
"speaker": current_speaker,
|
|
||||||
"start": start_time,
|
"start": start_time,
|
||||||
"end": words[i-1]["end"],
|
"end": words[i-1]["end"],
|
||||||
"words": current_sentence
|
"words": current_sentence
|
||||||
})
|
}
|
||||||
|
|
||||||
|
# Only include speaker information if include_speakers is True
|
||||||
|
if include_speakers:
|
||||||
|
sentence_obj["speaker"] = current_speaker
|
||||||
|
|
||||||
|
sentences.append(sentence_obj)
|
||||||
current_sentence = []
|
current_sentence = []
|
||||||
current_speaker = speaker
|
current_speaker = speaker
|
||||||
start_time = start
|
start_time = start
|
||||||
|
|
||||||
# Append word with metadata inside the current sentence
|
# Append word with metadata inside the current sentence
|
||||||
current_sentence.append({"word": word, "start": start, "end": end})
|
current_sentence.append({"word": word, "start": start, "end": end})
|
||||||
|
|
||||||
# Append the last sentence if any words remain
|
# Append the last sentence if any words remain
|
||||||
if current_sentence:
|
if current_sentence:
|
||||||
sentences.append({
|
sentence_obj = {
|
||||||
"sentence": " ".join([w["word"] for w in current_sentence]),
|
"sentence": " ".join([w["word"] for w in current_sentence]),
|
||||||
"speaker": current_speaker,
|
|
||||||
"start": start_time,
|
"start": start_time,
|
||||||
"end": words[-1]["end"],
|
"end": words[-1]["end"],
|
||||||
"words": current_sentence
|
"words": current_sentence
|
||||||
})
|
}
|
||||||
|
|
||||||
|
# Only include speaker information if include_speakers is True
|
||||||
|
if include_speakers:
|
||||||
|
sentence_obj["speaker"] = current_speaker
|
||||||
|
|
||||||
|
sentences.append(sentence_obj)
|
||||||
|
|
||||||
return {"sentences": sentences}
|
return {"sentences": sentences}
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
|
||||||
audio_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/019933724441Business%20English%20Conversation%20Lesson%2045_%20Meeting%20a%20New%20Colleague.mp3"
|
audio_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/019933724441Business%20English%20Conversation%20Lesson%2045_%20Meeting%20a%20New%20Colleague.mp3"
|
||||||
video_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/038426704141Business%20English%20Conversation%20Lesson%2045_%20%20Meeting%20a%20New%20Colleague.mp4"
|
video_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/038426704141Business%20English%20Conversation%20Lesson%2045_%20%20Meeting%20a%20New%20Colleague.mp4"
|
||||||
|
|
||||||
# Folder for file uploads/downloads
|
# Folder for file uploads/downloads
|
||||||
|
|
||||||
response = transcribe_media(video_url, media_type="video")
|
response = transcribe_media(video_url, media_type="video")
|
||||||
print(response)
|
print(response)
|
||||||
+4
-2
@@ -15,13 +15,15 @@ class PlanLimits:
|
|||||||
"transcription_minutes": 200,
|
"transcription_minutes": 200,
|
||||||
"summary_type": "basic",
|
"summary_type": "basic",
|
||||||
"transcript_history_days": 7,
|
"transcript_history_days": 7,
|
||||||
"integrations": ["google_meet", "zoom"]
|
"integrations": ["google_meet", "zoom"],
|
||||||
|
"speaker_identification": False
|
||||||
},
|
},
|
||||||
PlanTier.PRO: {
|
PlanTier.PRO: {
|
||||||
"transcription_minutes": 600,
|
"transcription_minutes": 600,
|
||||||
"summary_type": "advanced",
|
"summary_type": "advanced",
|
||||||
"transcript_history_days": 30,
|
"transcript_history_days": 30,
|
||||||
"integrations": ["google_meet", "zoom", "slack", "notion", "asana", "microsoft_teams"]
|
"integrations": ["google_meet", "zoom", "slack", "notion", "asana", "microsoft_teams"],
|
||||||
|
"speaker_identification": True
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -19,33 +19,76 @@ headers = {
|
|||||||
# Audio URL from your notebook
|
# Audio URL from your notebook
|
||||||
audio_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/019933724441Business%20English%20Conversation%20Lesson%2045_%20Meeting%20a%20New%20Colleague.mp3"
|
audio_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/019933724441Business%20English%20Conversation%20Lesson%2045_%20Meeting%20a%20New%20Colleague.mp3"
|
||||||
|
|
||||||
# 1. First, transcribe the audio
|
# 1. First, transcribe the audio with the Pro plan (with speaker identification)
|
||||||
transcribe_payload = {
|
pro_transcribe_payload = {
|
||||||
"media_url": audio_url,
|
"media_url": audio_url,
|
||||||
"media_type": "audio"
|
"media_type": "audio",
|
||||||
|
"plan_tier": "pro" # Specify the pro plan to include speaker identification
|
||||||
}
|
}
|
||||||
|
|
||||||
transcribe_response = requests.post(
|
pro_transcribe_response = requests.post(
|
||||||
f"{base_url}/microdot-ai/transcribe",
|
f"{base_url}/microdot-ai/transcribe",
|
||||||
headers=headers,
|
headers=headers,
|
||||||
json=transcribe_payload
|
json=pro_transcribe_payload
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check if transcription was successful
|
# Check if Pro plan transcription was successful
|
||||||
if transcribe_response.status_code == 200:
|
if pro_transcribe_response.status_code == 200:
|
||||||
transcript_data = transcribe_response.json()
|
pro_transcript_data = pro_transcribe_response.json()
|
||||||
print("Transcription successful!")
|
print("Pro plan transcription successful!")
|
||||||
|
|
||||||
# Save the transcript for later use
|
|
||||||
transcript_json = json.dumps(transcript_data["transcript"], indent=4)
|
|
||||||
# Save the transcript to a file
|
|
||||||
with open("transcript.json", "w") as f:
|
|
||||||
f.write(transcript_json)
|
|
||||||
print("Transcript saved to transcript.json")
|
|
||||||
|
|
||||||
|
# Save the Pro plan transcript for later use
|
||||||
|
pro_transcript_json = json.dumps(pro_transcript_data["transcript"], indent=4)
|
||||||
|
# Save the Pro plan transcript to a file
|
||||||
|
with open("pro_transcript.json", "w") as f:
|
||||||
|
f.write(pro_transcript_json)
|
||||||
|
print("Pro plan transcript saved to pro_transcript.json")
|
||||||
|
|
||||||
|
# Check if the Pro plan transcript has speaker information
|
||||||
|
has_speaker_pro = "speaker" in pro_transcript_data["transcript"]["sentences"][0] if pro_transcript_data["transcript"]["sentences"] else False
|
||||||
|
print(f"Pro plan has speaker information: {has_speaker_pro}")
|
||||||
else:
|
else:
|
||||||
print(f"Transcription failed with status code: {transcribe_response.status_code}")
|
print(f"Pro plan transcription failed with status code: {pro_transcribe_response.status_code}")
|
||||||
print(transcribe_response.text)
|
print(pro_transcribe_response.text)
|
||||||
|
|
||||||
|
# 1b. Now transcribe with the Free plan (without speaker identification)
|
||||||
|
free_transcribe_payload = {
|
||||||
|
"media_url": audio_url,
|
||||||
|
"media_type": "audio",
|
||||||
|
"plan_tier": "freemium" # Specify the freemium plan to exclude speaker identification
|
||||||
|
}
|
||||||
|
|
||||||
|
free_transcribe_response = requests.post(
|
||||||
|
f"{base_url}/microdot-ai/transcribe",
|
||||||
|
headers=headers,
|
||||||
|
json=free_transcribe_payload
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check if Free plan transcription was successful
|
||||||
|
if free_transcribe_response.status_code == 200:
|
||||||
|
free_transcript_data = free_transcribe_response.json()
|
||||||
|
print("Free plan transcription successful!")
|
||||||
|
|
||||||
|
# Save the Free plan transcript for later use
|
||||||
|
free_transcript_json = json.dumps(free_transcript_data["transcript"], indent=4)
|
||||||
|
# Save the Free plan transcript to a file
|
||||||
|
with open("free_transcript.json", "w") as f:
|
||||||
|
f.write(free_transcript_json)
|
||||||
|
print("Free plan transcript saved to free_transcript.json")
|
||||||
|
|
||||||
|
# Check if the Free plan transcript has speaker information
|
||||||
|
has_speaker_free = "speaker" in free_transcript_data["transcript"]["sentences"][0] if free_transcript_data["transcript"]["sentences"] else False
|
||||||
|
print(f"Free plan has speaker information: {has_speaker_free}")
|
||||||
|
|
||||||
|
# Use the Pro plan transcript for the summary tests
|
||||||
|
transcript_json = pro_transcript_json
|
||||||
|
else:
|
||||||
|
print(f"Free plan transcription failed with status code: {free_transcribe_response.status_code}")
|
||||||
|
print(free_transcribe_response.text)
|
||||||
|
|
||||||
|
# If Free plan fails but Pro plan succeeded, use Pro plan transcript for summary tests
|
||||||
|
if pro_transcribe_response.status_code == 200:
|
||||||
|
transcript_json = pro_transcript_json
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -56,7 +99,7 @@ basic_summary_payload = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
basic_summary_response = requests.post(
|
basic_summary_response = requests.post(
|
||||||
f"{base_url}/microdot-ai/general-summary",
|
f"{base_url}/microdot-ai/general-summary",
|
||||||
headers=headers,
|
headers=headers,
|
||||||
json=basic_summary_payload
|
json=basic_summary_payload
|
||||||
)
|
)
|
||||||
@@ -83,7 +126,7 @@ advanced_summary_payload = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
advanced_summary_response = requests.post(
|
advanced_summary_response = requests.post(
|
||||||
f"{base_url}/microdot-ai/general-summary",
|
f"{base_url}/microdot-ai/general-summary",
|
||||||
headers=headers,
|
headers=headers,
|
||||||
json=advanced_summary_payload
|
json=advanced_summary_payload
|
||||||
)
|
)
|
||||||
@@ -93,7 +136,7 @@ if advanced_summary_response.status_code == 200:
|
|||||||
advanced_summary_data = advanced_summary_response.json()
|
advanced_summary_data = advanced_summary_response.json()
|
||||||
print("\n--- Advanced (Pro) Summary ---")
|
print("\n--- Advanced (Pro) Summary ---")
|
||||||
advanced_summary_json = json.dumps(advanced_summary_data, indent=2)
|
advanced_summary_json = json.dumps(advanced_summary_data, indent=2)
|
||||||
|
|
||||||
with open("advanced_summary.json", "w") as f:
|
with open("advanced_summary.json", "w") as f:
|
||||||
f.write(advanced_summary_json)
|
f.write(advanced_summary_json)
|
||||||
print("Advanced summary saved to advanced_summary.json")
|
print("Advanced summary saved to advanced_summary.json")
|
||||||
|
|||||||
@@ -0,0 +1,90 @@
|
|||||||
|
import os
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# API endpoint
|
||||||
|
base_url = "http://localhost:5056"
|
||||||
|
|
||||||
|
# Your API key
|
||||||
|
api_key = os.getenv("API_KEY_ACCESS")
|
||||||
|
|
||||||
|
# Headers
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {api_key}",
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Audio URL for testing
|
||||||
|
audio_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/019933724441Business%20English%20Conversation%20Lesson%2045_%20Meeting%20a%20New%20Colleague.mp3"
|
||||||
|
|
||||||
|
# Test function to compare freemium and pro plan transcriptions
|
||||||
|
def test_plan_differences():
|
||||||
|
# 1. Test the freemium plan (no speaker identification)
|
||||||
|
freemium_payload = {
|
||||||
|
"media_url": audio_url,
|
||||||
|
"media_type": "audio",
|
||||||
|
"plan_tier": "freemium"
|
||||||
|
}
|
||||||
|
|
||||||
|
print("Testing Freemium Plan (no speaker identification)...")
|
||||||
|
freemium_response = requests.post(
|
||||||
|
f"{base_url}/microdot-ai/transcribe",
|
||||||
|
headers=headers,
|
||||||
|
json=freemium_payload
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Test the pro plan (with speaker identification)
|
||||||
|
pro_payload = {
|
||||||
|
"media_url": audio_url,
|
||||||
|
"media_type": "audio",
|
||||||
|
"plan_tier": "pro"
|
||||||
|
}
|
||||||
|
|
||||||
|
print("Testing Pro Plan (with speaker identification)...")
|
||||||
|
pro_response = requests.post(
|
||||||
|
f"{base_url}/microdot-ai/transcribe",
|
||||||
|
headers=headers,
|
||||||
|
json=pro_payload
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check if both requests were successful
|
||||||
|
if freemium_response.status_code == 200 and pro_response.status_code == 200:
|
||||||
|
freemium_data = freemium_response.json()
|
||||||
|
pro_data = pro_response.json()
|
||||||
|
|
||||||
|
# Save the transcripts for inspection
|
||||||
|
with open("freemium_transcript.json", "w") as f:
|
||||||
|
f.write(json.dumps(freemium_data, indent=4))
|
||||||
|
|
||||||
|
with open("pro_transcript.json", "w") as f:
|
||||||
|
f.write(json.dumps(pro_data, indent=4))
|
||||||
|
|
||||||
|
print("Transcripts saved to freemium_transcript.json and pro_transcript.json")
|
||||||
|
|
||||||
|
# Check if the freemium plan has speaker information
|
||||||
|
has_speaker_freemium = "speaker" in freemium_data["transcript"]["sentences"][0] if freemium_data["transcript"]["sentences"] else False
|
||||||
|
|
||||||
|
# Check if the pro plan has speaker information
|
||||||
|
has_speaker_pro = "speaker" in pro_data["transcript"]["sentences"][0] if pro_data["transcript"]["sentences"] else False
|
||||||
|
|
||||||
|
print(f"Freemium plan has speaker information: {has_speaker_freemium}")
|
||||||
|
print(f"Pro plan has speaker information: {has_speaker_pro}")
|
||||||
|
|
||||||
|
# Verify the expected behavior
|
||||||
|
if not has_speaker_freemium and has_speaker_pro:
|
||||||
|
print("✅ Test PASSED: Freemium plan doesn't show speakers, Pro plan does.")
|
||||||
|
else:
|
||||||
|
print("❌ Test FAILED: Expected behavior not observed.")
|
||||||
|
|
||||||
|
else:
|
||||||
|
print(f"Freemium request status: {freemium_response.status_code}")
|
||||||
|
print(f"Pro request status: {pro_response.status_code}")
|
||||||
|
if freemium_response.status_code != 200:
|
||||||
|
print(f"Freemium error: {freemium_response.text}")
|
||||||
|
if pro_response.status_code != 200:
|
||||||
|
print(f"Pro error: {pro_response.text}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_plan_differences()
|
||||||
Reference in New Issue
Block a user