Add tiered summarization based on pricing plans

- Implement advanced AI summarization with action items for Pro plan
- Create basic bullet-point summarization for Freemium plan
- Add plan tier validation and feature differentiation
- Support speaker identification in transcripts
- Define plan limits (600 mins Pro/200 mins Freemium)
This commit is contained in:
Michael Ikehi
2025-04-24 17:18:53 +01:00
parent 316e82b6cf
commit a91613efe2
6 changed files with 270 additions and 73 deletions
+36 -7
View File
@@ -33,11 +33,12 @@ Transcribes audio or video content from a URL.
```json ```json
{ {
"media_url": "https://example.com/audio-file.mp3", "media_url": "https://example.com/audio-file.mp3",
"media_type": "audio" // "audio" or "video" "media_type": "audio", // "audio" or "video"
"plan_tier": "freemium" // "freemium" or "pro" (optional, defaults to "freemium")
} }
``` ```
**Response:** **Pro Plan Response (with speaker identification):**
```json ```json
{ {
@@ -45,7 +46,33 @@ Transcribes audio or video content from a URL.
"sentences": [ "sentences": [
{ {
"sentence": "Hello and welcome to the meeting.", "sentence": "Hello and welcome to the meeting.",
"speaker": "speaker_0", "speaker": "speaker_0", // Speaker identification included
"start": 0.0,
"end": 2.5,
"words": [
{
"word": "Hello",
"start": 0.0,
"end": 0.4
},
// Additional words...
]
},
// Additional sentences...
]
}
}
```
**Freemium Plan Response (without speaker identification):**
```json
{
"transcript": {
"sentences": [
{
"sentence": "Hello and welcome to the meeting.",
// No "speaker" field in freemium plan
"start": 0.0, "start": 0.0,
"end": 2.5, "end": 2.5,
"words": [ "words": [
@@ -271,10 +298,11 @@ headers = {
"Content-Type": "application/json" "Content-Type": "application/json"
} }
# 1. Transcribe an audio file # 1. Transcribe an audio file (Pro plan with speaker identification)
transcribe_payload = { transcribe_payload = {
"media_url": "https://example.com/meeting-recording.mp3", "media_url": "https://example.com/meeting-recording.mp3",
"media_type": "audio" "media_type": "audio",
"plan_tier": "pro" # Specify "pro" for speaker identification or "freemium" for no speakers
} }
transcribe_response = requests.post( transcribe_response = requests.post(
@@ -316,7 +344,7 @@ else:
const axios = require('axios'); const axios = require('axios');
// API endpoint // API endpoint
const baseUrl = 'https://api.microdot.ai'; const baseUrl = 'http://0.0.0.0:5056';
// Your API key // Your API key
const apiKey = 'your_api_key_here'; const apiKey = 'your_api_key_here';
@@ -331,7 +359,8 @@ const headers = {
const transcribeAudio = async () => { const transcribeAudio = async () => {
const transcribePayload = { const transcribePayload = {
media_url: 'https://example.com/meeting-recording.mp3', media_url: 'https://example.com/meeting-recording.mp3',
media_type: 'audio' media_type: 'audio',
plan_tier: 'pro' // Specify 'pro' for speaker identification or 'freemium' for no speakers
}; };
try { try {
+12 -1
View File
@@ -55,6 +55,7 @@ async def get_api_key(api_key_header: str = Security(api_key_header)) -> str:
class TranscribeRequest(BaseModel): class TranscribeRequest(BaseModel):
media_url: Optional[str] = None media_url: Optional[str] = None
media_type: Optional[str] # Corrected type hint for media_type media_type: Optional[str] # Corrected type hint for media_type
plan_tier: Optional[str] = "freemium" # Default to freemium plan if not specified
class ChatResp(BaseModel): # Added BaseModel inheritance class ChatResp(BaseModel): # Added BaseModel inheritance
error: Optional[str] = None error: Optional[str] = None
@@ -78,6 +79,16 @@ async def chat_endpoint(
api_key: str = Depends(get_api_key) api_key: str = Depends(get_api_key)
): ):
try: try:
# Get the plan tier from the request or default to freemium
plan_tier = request.plan_tier.lower() if request.plan_tier else "freemium"
# Validate plan tier using our PlanTier enum
valid_tiers = [t.value for t in PlanTier]
if plan_tier not in valid_tiers:
plan_tier = PlanTier.FREEMIUM.value # Default to freemium if invalid tier
# Check if the plan includes speaker identification
include_speakers = PlanLimits.get_limit(plan_tier, "speaker_identification")
# Use the transcribe_media function to transcribe the media # Use the transcribe_media function to transcribe the media
if request.media_url: if request.media_url:
@@ -88,7 +99,7 @@ async def chat_endpoint(
# Parse response # Parse response
words = transcription_response["results"]["channels"][0]["alternatives"][0]["words"] words = transcription_response["results"]["channels"][0]["alternatives"][0]["words"]
transcript = group_words_into_sentences(words=words) transcript = group_words_into_sentences(words=words, include_speakers=include_speakers)
return TranscriptResponse( return TranscriptResponse(
transcript=transcript, # Corrected to return the transcript transcript=transcript, # Corrected to return the transcript
error=None error=None
+30 -8
View File
@@ -137,7 +137,19 @@ def transcribe_media(file_loc: str, media_type: str = "audio"):
def group_words_into_sentences(words, max_words=15): def group_words_into_sentences(words, max_words=15, include_speakers=True):
"""
Group words into sentences based on speaker changes.
Args:
words: List of word objects from the transcription
max_words: Maximum number of words per sentence
include_speakers: Whether to include speaker information in the output
(True for Pro plan, False for Freemium plan)
Returns:
A dictionary containing the sentences
"""
sentences = [] sentences = []
current_sentence = [] current_sentence = []
current_speaker = None current_speaker = None
@@ -145,20 +157,25 @@ def group_words_into_sentences(words, max_words=15):
for i, word_info in enumerate(words): for i, word_info in enumerate(words):
word = word_info["punctuated_word"] word = word_info["punctuated_word"]
speaker = word_info["speaker"] speaker = word_info["speaker"] if include_speakers else "speaker_0" # Use a default speaker if not including speakers
start = word_info["start"] start = word_info["start"]
end = word_info["end"] end = word_info["end"]
# If speaker changes or sentence reaches max length, start a new sentence # If speaker changes or sentence reaches max length, start a new sentence
if speaker != current_speaker: if speaker != current_speaker:
if current_sentence: if current_sentence:
sentences.append({ sentence_obj = {
"sentence": " ".join([w["word"] for w in current_sentence]), "sentence": " ".join([w["word"] for w in current_sentence]),
"speaker": current_speaker,
"start": start_time, "start": start_time,
"end": words[i-1]["end"], "end": words[i-1]["end"],
"words": current_sentence "words": current_sentence
}) }
# Only include speaker information if include_speakers is True
if include_speakers:
sentence_obj["speaker"] = current_speaker
sentences.append(sentence_obj)
current_sentence = [] current_sentence = []
current_speaker = speaker current_speaker = speaker
start_time = start start_time = start
@@ -168,13 +185,18 @@ def group_words_into_sentences(words, max_words=15):
# Append the last sentence if any words remain # Append the last sentence if any words remain
if current_sentence: if current_sentence:
sentences.append({ sentence_obj = {
"sentence": " ".join([w["word"] for w in current_sentence]), "sentence": " ".join([w["word"] for w in current_sentence]),
"speaker": current_speaker,
"start": start_time, "start": start_time,
"end": words[-1]["end"], "end": words[-1]["end"],
"words": current_sentence "words": current_sentence
}) }
# Only include speaker information if include_speakers is True
if include_speakers:
sentence_obj["speaker"] = current_speaker
sentences.append(sentence_obj)
return {"sentences": sentences} return {"sentences": sentences}
+4 -2
View File
@@ -15,13 +15,15 @@ class PlanLimits:
"transcription_minutes": 200, "transcription_minutes": 200,
"summary_type": "basic", "summary_type": "basic",
"transcript_history_days": 7, "transcript_history_days": 7,
"integrations": ["google_meet", "zoom"] "integrations": ["google_meet", "zoom"],
"speaker_identification": False
}, },
PlanTier.PRO: { PlanTier.PRO: {
"transcription_minutes": 600, "transcription_minutes": 600,
"summary_type": "advanced", "summary_type": "advanced",
"transcript_history_days": 30, "transcript_history_days": 30,
"integrations": ["google_meet", "zoom", "slack", "notion", "asana", "microsoft_teams"] "integrations": ["google_meet", "zoom", "slack", "notion", "asana", "microsoft_teams"],
"speaker_identification": True
} }
} }
+60 -17
View File
@@ -19,33 +19,76 @@ headers = {
# Audio URL from your notebook # Audio URL from your notebook
audio_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/019933724441Business%20English%20Conversation%20Lesson%2045_%20Meeting%20a%20New%20Colleague.mp3" audio_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/019933724441Business%20English%20Conversation%20Lesson%2045_%20Meeting%20a%20New%20Colleague.mp3"
# 1. First, transcribe the audio # 1. First, transcribe the audio with the Pro plan (with speaker identification)
transcribe_payload = { pro_transcribe_payload = {
"media_url": audio_url, "media_url": audio_url,
"media_type": "audio" "media_type": "audio",
"plan_tier": "pro" # Specify the pro plan to include speaker identification
} }
transcribe_response = requests.post( pro_transcribe_response = requests.post(
f"{base_url}/microdot-ai/transcribe", f"{base_url}/microdot-ai/transcribe",
headers=headers, headers=headers,
json=transcribe_payload json=pro_transcribe_payload
) )
# Check if transcription was successful # Check if Pro plan transcription was successful
if transcribe_response.status_code == 200: if pro_transcribe_response.status_code == 200:
transcript_data = transcribe_response.json() pro_transcript_data = pro_transcribe_response.json()
print("Transcription successful!") print("Pro plan transcription successful!")
# Save the transcript for later use # Save the Pro plan transcript for later use
transcript_json = json.dumps(transcript_data["transcript"], indent=4) pro_transcript_json = json.dumps(pro_transcript_data["transcript"], indent=4)
# Save the transcript to a file # Save the Pro plan transcript to a file
with open("transcript.json", "w") as f: with open("pro_transcript.json", "w") as f:
f.write(transcript_json) f.write(pro_transcript_json)
print("Transcript saved to transcript.json") print("Pro plan transcript saved to pro_transcript.json")
# Check if the Pro plan transcript has speaker information
has_speaker_pro = "speaker" in pro_transcript_data["transcript"]["sentences"][0] if pro_transcript_data["transcript"]["sentences"] else False
print(f"Pro plan has speaker information: {has_speaker_pro}")
else: else:
print(f"Transcription failed with status code: {transcribe_response.status_code}") print(f"Pro plan transcription failed with status code: {pro_transcribe_response.status_code}")
print(transcribe_response.text) print(pro_transcribe_response.text)
# 1b. Now transcribe with the Free plan (without speaker identification)
free_transcribe_payload = {
"media_url": audio_url,
"media_type": "audio",
"plan_tier": "freemium" # Specify the freemium plan to exclude speaker identification
}
free_transcribe_response = requests.post(
f"{base_url}/microdot-ai/transcribe",
headers=headers,
json=free_transcribe_payload
)
# Check if Free plan transcription was successful
if free_transcribe_response.status_code == 200:
free_transcript_data = free_transcribe_response.json()
print("Free plan transcription successful!")
# Save the Free plan transcript for later use
free_transcript_json = json.dumps(free_transcript_data["transcript"], indent=4)
# Save the Free plan transcript to a file
with open("free_transcript.json", "w") as f:
f.write(free_transcript_json)
print("Free plan transcript saved to free_transcript.json")
# Check if the Free plan transcript has speaker information
has_speaker_free = "speaker" in free_transcript_data["transcript"]["sentences"][0] if free_transcript_data["transcript"]["sentences"] else False
print(f"Free plan has speaker information: {has_speaker_free}")
# Use the Pro plan transcript for the summary tests
transcript_json = pro_transcript_json
else:
print(f"Free plan transcription failed with status code: {free_transcribe_response.status_code}")
print(free_transcribe_response.text)
# If Free plan fails but Pro plan succeeded, use Pro plan transcript for summary tests
if pro_transcribe_response.status_code == 200:
transcript_json = pro_transcript_json
+90
View File
@@ -0,0 +1,90 @@
import os
import requests
import json
from dotenv import load_dotenv
load_dotenv()
# API endpoint
base_url = "http://localhost:5056"
# Your API key
api_key = os.getenv("API_KEY_ACCESS")
# Headers
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
# Audio URL for testing
audio_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/019933724441Business%20English%20Conversation%20Lesson%2045_%20Meeting%20a%20New%20Colleague.mp3"
# Test function to compare freemium and pro plan transcriptions
def test_plan_differences():
# 1. Test the freemium plan (no speaker identification)
freemium_payload = {
"media_url": audio_url,
"media_type": "audio",
"plan_tier": "freemium"
}
print("Testing Freemium Plan (no speaker identification)...")
freemium_response = requests.post(
f"{base_url}/microdot-ai/transcribe",
headers=headers,
json=freemium_payload
)
# 2. Test the pro plan (with speaker identification)
pro_payload = {
"media_url": audio_url,
"media_type": "audio",
"plan_tier": "pro"
}
print("Testing Pro Plan (with speaker identification)...")
pro_response = requests.post(
f"{base_url}/microdot-ai/transcribe",
headers=headers,
json=pro_payload
)
# Check if both requests were successful
if freemium_response.status_code == 200 and pro_response.status_code == 200:
freemium_data = freemium_response.json()
pro_data = pro_response.json()
# Save the transcripts for inspection
with open("freemium_transcript.json", "w") as f:
f.write(json.dumps(freemium_data, indent=4))
with open("pro_transcript.json", "w") as f:
f.write(json.dumps(pro_data, indent=4))
print("Transcripts saved to freemium_transcript.json and pro_transcript.json")
# Check if the freemium plan has speaker information
has_speaker_freemium = "speaker" in freemium_data["transcript"]["sentences"][0] if freemium_data["transcript"]["sentences"] else False
# Check if the pro plan has speaker information
has_speaker_pro = "speaker" in pro_data["transcript"]["sentences"][0] if pro_data["transcript"]["sentences"] else False
print(f"Freemium plan has speaker information: {has_speaker_freemium}")
print(f"Pro plan has speaker information: {has_speaker_pro}")
# Verify the expected behavior
if not has_speaker_freemium and has_speaker_pro:
print("✅ Test PASSED: Freemium plan doesn't show speakers, Pro plan does.")
else:
print("❌ Test FAILED: Expected behavior not observed.")
else:
print(f"Freemium request status: {freemium_response.status_code}")
print(f"Pro request status: {pro_response.status_code}")
if freemium_response.status_code != 200:
print(f"Freemium error: {freemium_response.text}")
if pro_response.status_code != 200:
print(f"Pro error: {pro_response.text}")
if __name__ == "__main__":
test_plan_differences()