Compare commits
4 Commits
99b7668b03
..
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 2ee0d1638a | |||
| a91613efe2 | |||
| 316e82b6cf | |||
| c345538243 |
@@ -0,0 +1,397 @@
|
||||
# Microdot AI API Documentation
|
||||
|
||||
## Overview
|
||||
|
||||
Microdot AI provides a powerful API for transcribing audio/video content and generating AI-powered summaries. The API supports different pricing tiers with varying features and capabilities.
|
||||
|
||||
## Base URL
|
||||
|
||||
```
|
||||
http://0.0.0.0:5056
|
||||
```
|
||||
|
||||
## Authentication
|
||||
|
||||
All API requests require authentication using a Bearer token.
|
||||
|
||||
```
|
||||
Authorization: Bearer YOUR_API_KEY
|
||||
```
|
||||
|
||||
## Endpoints
|
||||
|
||||
### 1. Transcribe Media
|
||||
|
||||
Transcribes audio or video content from a URL.
|
||||
|
||||
**Endpoint:** `/microdot-ai/transcribe`
|
||||
|
||||
**Method:** POST
|
||||
|
||||
**Request Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"media_url": "https://example.com/audio-file.mp3",
|
||||
"media_type": "audio", // "audio" or "video"
|
||||
"plan_tier": "freemium" // "freemium" or "pro" (optional, defaults to "freemium")
|
||||
}
|
||||
```
|
||||
|
||||
**Pro Plan Response (with speaker identification):**
|
||||
|
||||
```json
|
||||
{
|
||||
"transcript": {
|
||||
"sentences": [
|
||||
{
|
||||
"sentence": "Hello and welcome to the meeting.",
|
||||
"speaker": "speaker_0", // Speaker identification included
|
||||
"start": 0.0,
|
||||
"end": 2.5,
|
||||
"words": [
|
||||
{
|
||||
"word": "Hello",
|
||||
"start": 0.0,
|
||||
"end": 0.4
|
||||
},
|
||||
// Additional words...
|
||||
]
|
||||
},
|
||||
// Additional sentences...
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Freemium Plan Response (without speaker identification):**
|
||||
|
||||
```json
|
||||
{
|
||||
"transcript": {
|
||||
"sentences": [
|
||||
{
|
||||
"sentence": "Hello and welcome to the meeting.",
|
||||
// No "speaker" field in freemium plan
|
||||
"start": 0.0,
|
||||
"end": 2.5,
|
||||
"words": [
|
||||
{
|
||||
"word": "Hello",
|
||||
"start": 0.0,
|
||||
"end": 0.4
|
||||
},
|
||||
// Additional words...
|
||||
]
|
||||
},
|
||||
// Additional sentences...
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Generate General Summary
|
||||
|
||||
Generates a summary of a transcript based on the user's plan tier.
|
||||
|
||||
**Endpoint:** `/microdot-ai/general-summary`
|
||||
|
||||
**Method:** POST
|
||||
|
||||
**Request Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"transcript": "JSON_STRING_OF_TRANSCRIPT",
|
||||
"plan_tier": "pro" // "freemium" or "pro"
|
||||
}
|
||||
```
|
||||
|
||||
#### Freemium Plan Response:
|
||||
|
||||
```json
|
||||
{
|
||||
"transcript": {
|
||||
"Key_Points": [
|
||||
{
|
||||
"text": "Team discussed Q3 marketing strategy.",
|
||||
"timestamp": 120.5
|
||||
},
|
||||
{
|
||||
"text": "Budget approval needed by Friday.",
|
||||
"timestamp": 360.2
|
||||
},
|
||||
{
|
||||
"text": "New product launch delayed until September.",
|
||||
"timestamp": 480.7
|
||||
}
|
||||
],
|
||||
"Summary": {
|
||||
"text": "Marketing team meeting to review Q3 plans and budget requirements. Team agreed on strategy but product launch delayed.",
|
||||
"duration_minutes": 15.5
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Pro Plan Response:
|
||||
|
||||
```json
|
||||
{
|
||||
"transcript": {
|
||||
"Purpose": {
|
||||
"text": "Discuss project progress and define upcoming milestones."
|
||||
},
|
||||
"Chapters": {
|
||||
"minutes_total": 3,
|
||||
"content": [
|
||||
{
|
||||
"chapter": "Project Overview",
|
||||
"time_stamp": {"start": 5.12, "end": 5.68},
|
||||
"content": [
|
||||
{"text": "- overview of the project's objectives.", "original_transcript_start": 3.4, "original_transcript_end": 5.7},
|
||||
// Additional content...
|
||||
],
|
||||
"words_time_stamp": [
|
||||
{"word": "Project", "timestamp": 5.12},
|
||||
{"word": "Overview", "timestamp": 5.12}
|
||||
]
|
||||
},
|
||||
// Additional chapters...
|
||||
]
|
||||
},
|
||||
"Outcomes": {
|
||||
"minutes_total": 3,
|
||||
"content": [
|
||||
// Outcome content...
|
||||
]
|
||||
},
|
||||
"Action_Items_Per_User": [
|
||||
{
|
||||
"speaker": "Speaker_A",
|
||||
"minutes_total": 3,
|
||||
"action_items": [
|
||||
// Action items...
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Generate Template Summary
|
||||
|
||||
Generates a custom summary based on a user-defined template.
|
||||
|
||||
**Endpoint:** `/microdot-ai/template-summary`
|
||||
|
||||
**Method:** POST
|
||||
|
||||
**Request Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"transcript": "JSON_STRING_OF_TRANSCRIPT",
|
||||
"template": "JSON_STRING_OF_TEMPLATE"
|
||||
}
|
||||
```
|
||||
|
||||
**Example Template:**
|
||||
|
||||
```json
|
||||
{
|
||||
"Key_Points": "Summarize the most critical discussion points from the meeting.",
|
||||
"Summary": "Provide a brief overall summary of what was discussed.",
|
||||
"Next_Steps": "List the next steps decided during the meeting, including any action items."
|
||||
}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"transcript": {
|
||||
"Key_Points": {
|
||||
"minutes_total": 3.5,
|
||||
"content": [
|
||||
{
|
||||
"text": "Introductions between Diane Taylor and Cody Smith.",
|
||||
"time_stamp": {"start": 5.12, "end": 5.68},
|
||||
"words_time_stamp": [
|
||||
{"word": "Introductions", "timestamp": 5.12},
|
||||
// Additional words...
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"Summary": {
|
||||
// Summary content...
|
||||
},
|
||||
"Next_Steps": {
|
||||
// Next steps content...
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Plan Features
|
||||
|
||||
### Freemium Plan
|
||||
|
||||
- 200 minutes of transcription per month
|
||||
- Basic AI summarization (short bullet points)
|
||||
- 7-day transcript history
|
||||
- Limited integrations (Google Meet & Zoom only)
|
||||
|
||||
### Pro Plan
|
||||
|
||||
- 600 minutes of transcription per month
|
||||
- Advanced AI summarization with action items
|
||||
- Speaker identification
|
||||
- 30-day transcript history
|
||||
- Multi-platform integrations (Slack, Notion, Asana, Microsoft Teams)
|
||||
|
||||
## Error Responses
|
||||
|
||||
All endpoints return standard HTTP status codes:
|
||||
|
||||
- `200 OK`: Request successful
|
||||
- `400 Bad Request`: Invalid request parameters
|
||||
- `401 Unauthorized`: Invalid or missing API key
|
||||
- `500 Internal Server Error`: Server-side error
|
||||
|
||||
Error response format:
|
||||
|
||||
```json
|
||||
{
|
||||
"detail": {
|
||||
"error": "Error type",
|
||||
"message": "Detailed error message"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Rate Limits
|
||||
|
||||
- Freemium Plan: 100 requests per day
|
||||
- Pro Plan: 1000 requests per day
|
||||
|
||||
Exceeding your plan's transcription minutes will result in a `402 Payment Required` response until the next billing cycle.
|
||||
|
||||
## Code Examples
|
||||
|
||||
### Python
|
||||
|
||||
```python
|
||||
import requests
|
||||
import json
|
||||
|
||||
# API endpoint
|
||||
base_url = "https://api.microdot.ai"
|
||||
|
||||
# Your API key
|
||||
api_key = "your_api_key_here"
|
||||
|
||||
# Headers
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
# 1. Transcribe an audio file (Pro plan with speaker identification)
|
||||
transcribe_payload = {
|
||||
"media_url": "https://example.com/meeting-recording.mp3",
|
||||
"media_type": "audio",
|
||||
"plan_tier": "pro" # Specify "pro" for speaker identification or "freemium" for no speakers
|
||||
}
|
||||
|
||||
transcribe_response = requests.post(
|
||||
f"{base_url}/microdot-ai/transcribe",
|
||||
headers=headers,
|
||||
json=transcribe_payload
|
||||
)
|
||||
|
||||
# Check if transcription was successful
|
||||
if transcribe_response.status_code == 200:
|
||||
transcript_data = transcribe_response.json()
|
||||
transcript_json = json.dumps(transcript_data["transcript"])
|
||||
|
||||
# 2. Generate a summary (Pro plan)
|
||||
summary_payload = {
|
||||
"transcript": transcript_json,
|
||||
"plan_tier": "pro"
|
||||
}
|
||||
|
||||
summary_response = requests.post(
|
||||
f"{base_url}/microdot-ai/general-summary",
|
||||
headers=headers,
|
||||
json=summary_payload
|
||||
)
|
||||
|
||||
if summary_response.status_code == 200:
|
||||
summary_data = summary_response.json()
|
||||
print("Summary generated successfully!")
|
||||
print(json.dumps(summary_data["transcript"], indent=2))
|
||||
else:
|
||||
print(f"Summary generation failed: {summary_response.text}")
|
||||
else:
|
||||
print(f"Transcription failed: {transcribe_response.text}")
|
||||
```
|
||||
|
||||
### JavaScript
|
||||
|
||||
```javascript
|
||||
const axios = require('axios');
|
||||
|
||||
// API endpoint
|
||||
const baseUrl = 'http://0.0.0.0:5056';
|
||||
|
||||
// Your API key
|
||||
const apiKey = 'your_api_key_here';
|
||||
|
||||
// Headers
|
||||
const headers = {
|
||||
'Authorization': `Bearer ${apiKey}`,
|
||||
'Content-Type': 'application/json'
|
||||
};
|
||||
|
||||
// 1. Transcribe an audio file
|
||||
const transcribeAudio = async () => {
|
||||
const transcribePayload = {
|
||||
media_url: 'https://example.com/meeting-recording.mp3',
|
||||
media_type: 'audio',
|
||||
plan_tier: 'pro' // Specify 'pro' for speaker identification or 'freemium' for no speakers
|
||||
};
|
||||
|
||||
try {
|
||||
const transcribeResponse = await axios.post(
|
||||
`${baseUrl}/microdot-ai/transcribe`,
|
||||
transcribePayload,
|
||||
{ headers }
|
||||
);
|
||||
|
||||
const transcriptData = transcribeResponse.data;
|
||||
const transcriptJson = JSON.stringify(transcriptData.transcript);
|
||||
|
||||
// 2. Generate a summary (Pro plan)
|
||||
const summaryPayload = {
|
||||
transcript: transcriptJson,
|
||||
plan_tier: 'pro'
|
||||
};
|
||||
|
||||
const summaryResponse = await axios.post(
|
||||
`${baseUrl}/microdot-ai/general-summary`,
|
||||
summaryPayload,
|
||||
{ headers }
|
||||
);
|
||||
|
||||
console.log('Summary generated successfully!');
|
||||
console.log(JSON.stringify(summaryResponse.data.transcript, null, 2));
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error:', error.response ? error.response.data : error.message);
|
||||
}
|
||||
};
|
||||
|
||||
transcribeAudio();
|
||||
```
|
||||
@@ -3,24 +3,14 @@ from typing import Optional
|
||||
from fastapi import FastAPI, HTTPException, Security, Depends
|
||||
from fastapi.security import APIKeyHeader
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse
|
||||
from dotenv import load_dotenv
|
||||
import json
|
||||
from pydantic import BaseModel
|
||||
from langchain_openai import ChatOpenAI
|
||||
import requests
|
||||
import tempfile
|
||||
from typing import Dict, Any
|
||||
from fastapi.responses import Response
|
||||
from datetime import datetime
|
||||
from fastapi import HTTPException
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, Union, Dict, Any
|
||||
import os
|
||||
import requests
|
||||
import os
|
||||
from scripts.transcriber import transcribe_media,group_words_into_sentences # Import the transcribe_media function
|
||||
from scripts.generate_summary import general_summary,custom_summary
|
||||
from scripts.transcriber import transcribe_media, group_words_into_sentences
|
||||
from scripts.generate_summary import general_summary, custom_summary
|
||||
from src.models import PlanTier, PlanLimits
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
API_KEY = os.getenv("API_KEY_ACCESS")
|
||||
@@ -52,19 +42,20 @@ async def get_api_key(api_key_header: str = Security(api_key_header)) -> str:
|
||||
status_code=401,
|
||||
detail={"error": "Unauthorized", "message": "API key is missing or invalid."}
|
||||
)
|
||||
|
||||
|
||||
token = api_key_header.split(' ')[1]
|
||||
if token != API_KEY:
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail={"error": "Unauthorized", "message": "API key does not match."}
|
||||
)
|
||||
|
||||
|
||||
return token
|
||||
|
||||
class TranscribeRequest(BaseModel):
|
||||
media_url: Optional[str] = None
|
||||
media_type: Optional[str] # Corrected type hint for media_type
|
||||
plan_tier: Optional[str] = "freemium" # Default to freemium plan if not specified
|
||||
|
||||
class ChatResp(BaseModel): # Added BaseModel inheritance
|
||||
error: Optional[str] = None
|
||||
@@ -75,7 +66,8 @@ class TranscriptResponse(BaseModel):
|
||||
|
||||
class GeneralSummaryRequest(BaseModel):
|
||||
transcript: Optional[str] = None
|
||||
|
||||
plan_tier: Optional[str] = "pro" # Default to pro plan if not specified
|
||||
|
||||
class TemplateSummaryRequest(BaseModel):
|
||||
transcript: Optional[str] = None
|
||||
template: Optional[str] = None
|
||||
@@ -87,7 +79,17 @@ async def chat_endpoint(
|
||||
api_key: str = Depends(get_api_key)
|
||||
):
|
||||
try:
|
||||
|
||||
# Get the plan tier from the request or default to freemium
|
||||
plan_tier = request.plan_tier.lower() if request.plan_tier else "freemium"
|
||||
|
||||
# Validate plan tier using our PlanTier enum
|
||||
valid_tiers = [t.value for t in PlanTier]
|
||||
if plan_tier not in valid_tiers:
|
||||
plan_tier = PlanTier.FREEMIUM.value # Default to freemium if invalid tier
|
||||
|
||||
# Check if the plan includes speaker identification
|
||||
include_speakers = PlanLimits.get_limit(plan_tier, "speaker_identification")
|
||||
|
||||
# Use the transcribe_media function to transcribe the media
|
||||
if request.media_url:
|
||||
transcription_response = transcribe_media(request.media_url, media_type=request.media_type)
|
||||
@@ -97,12 +99,12 @@ async def chat_endpoint(
|
||||
|
||||
# Parse response
|
||||
words = transcription_response["results"]["channels"][0]["alternatives"][0]["words"]
|
||||
transcript = group_words_into_sentences(words=words)
|
||||
transcript = group_words_into_sentences(words=words, include_speakers=include_speakers)
|
||||
return TranscriptResponse(
|
||||
transcript=transcript, # Corrected to return the transcript
|
||||
error=None
|
||||
)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing chat request: {str(e)}") # Print statement added
|
||||
raise HTTPException(
|
||||
@@ -112,55 +114,67 @@ async def chat_endpoint(
|
||||
|
||||
|
||||
@app.post("/microdot-ai/general-summary")
|
||||
async def general_summary_endpoint(
|
||||
request: GeneralSummaryRequest,
|
||||
async def general_summary_endpoint(
|
||||
request: GeneralSummaryRequest,
|
||||
api_key: str = Depends(get_api_key)
|
||||
):
|
||||
try:
|
||||
if not request.transcript:
|
||||
if not request.transcript:
|
||||
raise HTTPException(status_code=400, detail="Transcript is required.")
|
||||
|
||||
response = general_summary(json.loads(request.transcript))
|
||||
|
||||
|
||||
# Get the plan tier from the request or default to pro
|
||||
plan_tier = request.plan_tier.lower() if request.plan_tier else "pro"
|
||||
|
||||
# Validate plan tier using our PlanTier enum
|
||||
valid_tiers = [t.value for t in PlanTier]
|
||||
if plan_tier not in valid_tiers:
|
||||
plan_tier = PlanTier.PRO.value # Default to pro if invalid tier
|
||||
|
||||
# Get the appropriate summary type for this plan tier
|
||||
summary_type = PlanLimits.get_limit(plan_tier, "summary_type")
|
||||
|
||||
# Generate the summary based on the plan tier
|
||||
response = general_summary(json.loads(request.transcript), plan_tier=plan_tier)
|
||||
|
||||
return TranscriptResponse(
|
||||
transcript=response
|
||||
)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing general summary request: {str(e)}")
|
||||
print(f"Error processing general summary request: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error processing general summary request: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
|
||||
@app.post("/microdot-ai/template-summary")
|
||||
async def template_summary_endpoint( # Corrected function name to avoid conflict
|
||||
request: TemplateSummaryRequest,
|
||||
request: TemplateSummaryRequest,
|
||||
api_key: str = Depends(get_api_key)
|
||||
):
|
||||
try:
|
||||
if not request.transcript:
|
||||
if not request.transcript:
|
||||
raise HTTPException(status_code=400, detail="Transcript is required.")
|
||||
|
||||
if not request.template:
|
||||
|
||||
if not request.template:
|
||||
raise HTTPException(status_code=400, detail="Template is required.")
|
||||
|
||||
|
||||
transcript = json.loads(request.transcript)
|
||||
template = json.loads(request.template) # Removed the check for missing template as it's now required
|
||||
response = custom_summary(template, transcript)
|
||||
|
||||
|
||||
return TranscriptResponse(
|
||||
transcript=response
|
||||
)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing template summary request: {str(e)}") # Updated print statement for clarity
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error processing template summary request: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
@@ -170,5 +184,4 @@ async def startup_event():
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run("app:app", host="0.0.0.0", port=5056, reload=True)
|
||||
|
||||
|
||||
|
||||
|
||||
+4
-1
@@ -25,4 +25,7 @@ moviepy
|
||||
yt-dlp
|
||||
ffmpeg-python
|
||||
reportlab
|
||||
anthropic
|
||||
anthropic
|
||||
pytest
|
||||
pytest-mock
|
||||
langchain-anthropic
|
||||
+239
-29
@@ -1,42 +1,252 @@
|
||||
import anthropic
|
||||
import os
|
||||
import logging
|
||||
from typing import Dict, List, Any, Optional
|
||||
from dotenv import load_dotenv
|
||||
import json
|
||||
from src.prompt import general_summary_prompt,custom_template_prompt
|
||||
from pydantic import BaseModel
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.messages import HumanMessage, SystemMessage
|
||||
from src.prompt import advanced_summary_prompt, basic_summary_prompt, custom_template_prompt
|
||||
|
||||
load_dotenv()
|
||||
|
||||
def general_summary(transcription):
|
||||
client = anthropic.Anthropic(
|
||||
api_key=os.getenv("ANTHTROPIC_API_KEY"),
|
||||
)
|
||||
message = client.messages.create(
|
||||
model="claude-3-5-sonnet-20241022",
|
||||
max_tokens=4000,
|
||||
messages=[
|
||||
{"role": "user", "content": f"{general_summary_prompt}"},
|
||||
{"role": "user", "content": f"Transcription: {transcription}"}
|
||||
]
|
||||
# Setup logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ============================================================================
|
||||
# Pydantic Models for Structured Outputs
|
||||
# ============================================================================
|
||||
|
||||
# Basic Summary Models (Freemium Plan)
|
||||
class KeyPoint(BaseModel):
|
||||
"""A key point from the meeting"""
|
||||
text: str
|
||||
timestamp: float
|
||||
|
||||
class Summary(BaseModel):
|
||||
"""Overall summary of the meeting"""
|
||||
text: str
|
||||
duration_minutes: float
|
||||
|
||||
class BasicSummary(BaseModel):
|
||||
"""Basic summary structure for freemium plan"""
|
||||
Key_Points: List[KeyPoint]
|
||||
Summary: Summary
|
||||
|
||||
|
||||
# Advanced Summary Models (Pro Plan)
|
||||
class Purpose(BaseModel):
|
||||
"""Purpose of the meeting"""
|
||||
text: str
|
||||
|
||||
class ChapterContent(BaseModel):
|
||||
"""Content item within a chapter"""
|
||||
text: str
|
||||
original_transcript_start: float
|
||||
original_transcript_end: float
|
||||
|
||||
class WordTimestamp(BaseModel):
|
||||
"""Word-level timestamp"""
|
||||
word: str
|
||||
timestamp: float
|
||||
|
||||
class TimeStamp(BaseModel):
|
||||
"""Time range"""
|
||||
start: float
|
||||
end: float
|
||||
|
||||
class Chapter(BaseModel):
|
||||
"""A chapter in the meeting"""
|
||||
chapter: str
|
||||
time_stamp: TimeStamp
|
||||
content: List[ChapterContent]
|
||||
words_time_stamp: List[WordTimestamp]
|
||||
|
||||
class Chapters(BaseModel):
|
||||
"""Chapters section"""
|
||||
minutes_total: float
|
||||
content: List[Chapter]
|
||||
|
||||
class OutcomeContent(BaseModel):
|
||||
"""Content item in outcomes"""
|
||||
text: str
|
||||
time_stamp: TimeStamp
|
||||
words_time_stamp: List[WordTimestamp]
|
||||
|
||||
class Outcomes(BaseModel):
|
||||
"""Outcomes section"""
|
||||
minutes_total: float
|
||||
content: List[OutcomeContent]
|
||||
|
||||
class ActionItem(BaseModel):
|
||||
"""An action item"""
|
||||
text: str
|
||||
time_stamp: TimeStamp
|
||||
words_time_stamp: List[WordTimestamp]
|
||||
|
||||
class ActionItemsPerUser(BaseModel):
|
||||
"""Action items for a specific user"""
|
||||
speaker: str
|
||||
minutes_total: float
|
||||
action_items: List[ActionItem]
|
||||
|
||||
class AdvancedSummary(BaseModel):
|
||||
"""Advanced summary structure for pro plan"""
|
||||
Purpose: Purpose
|
||||
Chapters: Chapters
|
||||
Outcomes: Outcomes
|
||||
Action_Items_Per_User: List[ActionItemsPerUser]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Summary Generation Functions
|
||||
# ============================================================================
|
||||
|
||||
def general_summary(transcription, plan_tier="pro"):
|
||||
"""
|
||||
Generate a summary of the transcription based on the user's plan tier.
|
||||
Uses LangChain Anthropic with structured outputs.
|
||||
|
||||
Args:
|
||||
transcription: The transcription to summarize (dict or JSON string)
|
||||
plan_tier: The user's plan tier ("freemium" or "pro")
|
||||
|
||||
Returns:
|
||||
A dict containing the summary (structured output)
|
||||
"""
|
||||
# Get API key (note: original code had typo ANTHTROPIC_API_KEY)
|
||||
api_key = os.getenv("ANTHROPIC_API_KEY") or os.getenv("ANTHTROPIC_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError("ANTHROPIC_API_KEY environment variable is required")
|
||||
|
||||
logger.info(f"Generating {plan_tier} summary with structured output")
|
||||
|
||||
# Convert transcription to string if it's a dict
|
||||
if isinstance(transcription, dict):
|
||||
transcription_str = json.dumps(transcription)
|
||||
else:
|
||||
transcription_str = str(transcription)
|
||||
|
||||
# Select the appropriate prompt and schema based on the user's plan tier
|
||||
if plan_tier.lower() == "freemium":
|
||||
prompt = basic_summary_prompt
|
||||
max_tokens = 2000
|
||||
output_schema = BasicSummary
|
||||
else: # Default to pro
|
||||
prompt = advanced_summary_prompt
|
||||
max_tokens = 4000
|
||||
output_schema = AdvancedSummary
|
||||
|
||||
# Initialize LangChain Anthropic model
|
||||
model = ChatAnthropic(
|
||||
model="claude-sonnet-4-5-20250929",
|
||||
api_key=api_key,
|
||||
temperature=0.2,
|
||||
max_tokens=max_tokens
|
||||
)
|
||||
|
||||
text = message.content[0].text
|
||||
return json.loads(text)
|
||||
|
||||
# Create messages directly to avoid template variable parsing issues
|
||||
messages = [
|
||||
SystemMessage(content="You are an AI meeting transcript summary formatter. Follow the instructions carefully and return structured output."),
|
||||
HumanMessage(content=prompt + "\n\nTranscription: " + transcription_str)
|
||||
]
|
||||
|
||||
# Use structured output
|
||||
structured_model = model.with_structured_output(output_schema)
|
||||
|
||||
try:
|
||||
# Invoke the structured model with messages
|
||||
result = structured_model.invoke(messages)
|
||||
|
||||
# Convert Pydantic model to dict
|
||||
if isinstance(result, BaseModel):
|
||||
logger.info(f"Successfully generated {plan_tier} summary with structured output")
|
||||
return result.model_dump()
|
||||
else:
|
||||
logger.info(f"Successfully generated {plan_tier} summary")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
# Log error and return fallback
|
||||
logger.error(f"Error generating summary with structured output: {e}")
|
||||
# Fallback: try without structured output
|
||||
try:
|
||||
logger.warning("Falling back to non-structured output")
|
||||
response = model.invoke(messages)
|
||||
text = response.content if hasattr(response, 'content') else str(response)
|
||||
return json.loads(text)
|
||||
except Exception as fallback_error:
|
||||
logger.error(f"Fallback also failed: {fallback_error}")
|
||||
raise
|
||||
|
||||
|
||||
def custom_summary(template, transcription):
|
||||
client = anthropic.Anthropic(
|
||||
api_key=os.getenv("ANTHTROPIC_API_KEY"),
|
||||
)
|
||||
message = client.messages.create(
|
||||
model="claude-3-5-sonnet-20241022",
|
||||
max_tokens=8000,
|
||||
messages=[
|
||||
{"role": "user", "content": f"{custom_template_prompt}"},
|
||||
{"role": "user", "content": f"TEMPLATE : {template}"},
|
||||
{"role": "user", "content": f"Transcription: {transcription}"}
|
||||
]
|
||||
"""
|
||||
Generate a custom summary based on a user-defined template.
|
||||
Uses LangChain Anthropic.
|
||||
|
||||
Args:
|
||||
template: The custom template (dict or JSON string)
|
||||
transcription: The transcription to summarize (dict or JSON string)
|
||||
|
||||
Returns:
|
||||
A dict containing the custom summary
|
||||
"""
|
||||
# Get API key (note: original code had typo ANTHTROPIC_API_KEY)
|
||||
api_key = os.getenv("ANTHROPIC_API_KEY") or os.getenv("ANTHTROPIC_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError("ANTHROPIC_API_KEY environment variable is required")
|
||||
|
||||
logger.info("Generating custom summary")
|
||||
|
||||
# Convert to strings if needed
|
||||
if isinstance(template, dict):
|
||||
template_str = json.dumps(template)
|
||||
else:
|
||||
template_str = str(template)
|
||||
|
||||
if isinstance(transcription, dict):
|
||||
transcription_str = json.dumps(transcription)
|
||||
else:
|
||||
transcription_str = str(transcription)
|
||||
|
||||
# Initialize LangChain Anthropic model
|
||||
model = ChatAnthropic(
|
||||
model="claude-sonnet-4-5-20250929", # Using the same model as general_summary
|
||||
api_key=api_key,
|
||||
temperature=0.2,
|
||||
max_tokens=8000
|
||||
)
|
||||
|
||||
text = message.content[0].text
|
||||
return json.loads(text)
|
||||
# Create messages directly to avoid template variable parsing issues
|
||||
messages = [
|
||||
SystemMessage(content="You are an AI meeting transcript summary formatter. Follow the user-defined template structure exactly."),
|
||||
HumanMessage(content=custom_template_prompt + "\n\nTEMPLATE: " + template_str + "\n\nTranscription: " + transcription_str)
|
||||
]
|
||||
|
||||
try:
|
||||
response = model.invoke(messages)
|
||||
text = response.content if hasattr(response, 'content') else str(response)
|
||||
|
||||
# Try to parse as JSON
|
||||
try:
|
||||
return json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
# If it's wrapped in markdown code blocks, try to extract JSON
|
||||
if "```json" in text:
|
||||
json_start = text.find("```json") + 7
|
||||
json_end = text.find("```", json_start)
|
||||
text = text[json_start:json_end].strip()
|
||||
return json.loads(text)
|
||||
elif "```" in text:
|
||||
json_start = text.find("```") + 3
|
||||
json_end = text.find("```", json_start)
|
||||
text = text[json_start:json_end].strip()
|
||||
return json.loads(text)
|
||||
else:
|
||||
raise ValueError(f"Could not parse response as JSON: {text[:200]}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating custom summary: {e}")
|
||||
raise
|
||||
|
||||
+52
-30
@@ -28,7 +28,7 @@ def extract_audio(url: str, output_template=os.path.join(UPLOAD_FOLDER, "%(title
|
||||
"""
|
||||
Download and extract audio from a video URL using yt-dlp.
|
||||
The file will be saved in the 'upload' folder.
|
||||
|
||||
|
||||
Returns:
|
||||
str: The absolute path to the downloaded audio file (with a unique id appended).
|
||||
"""
|
||||
@@ -42,51 +42,51 @@ def extract_audio(url: str, output_template=os.path.join(UPLOAD_FOLDER, "%(title
|
||||
}],
|
||||
"quiet": True,
|
||||
}
|
||||
|
||||
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
info = ydl.extract_info(url, download=True)
|
||||
# Prepare the filename from the info.
|
||||
# Note: prepare_filename returns the filename *before* postprocessing,
|
||||
# so we change the extension to mp3.
|
||||
original_filepath = os.path.splitext(ydl.prepare_filename(info))[0] + ".mp3"
|
||||
|
||||
|
||||
# Debug: list files in the upload folder
|
||||
if not os.path.exists(original_filepath):
|
||||
files = os.listdir(UPLOAD_FOLDER)
|
||||
print("Warning: Could not find expected file.")
|
||||
print("Files in upload folder:", files)
|
||||
raise FileNotFoundError(f"Expected audio file not found: {original_filepath}")
|
||||
|
||||
|
||||
# Get the video's title and sanitize it
|
||||
title = info.get('title', 'audio')
|
||||
safe_title = sanitize_filename(title)
|
||||
|
||||
|
||||
# Generate a unique identifier
|
||||
unique_id = uuid.uuid4().hex # Unique identifier in hex format
|
||||
|
||||
|
||||
# Construct the new filename with the unique id appended.
|
||||
new_audio_filename = f"{safe_title}_{unique_id}.mp3"
|
||||
new_audio_filepath = os.path.join(UPLOAD_FOLDER, new_audio_filename)
|
||||
|
||||
|
||||
# Rename the downloaded file to include the unique ID.
|
||||
os.rename(original_filepath, new_audio_filepath)
|
||||
print(f"Renamed file to: {new_audio_filepath}")
|
||||
|
||||
|
||||
# Return the absolute path to the renamed audio file.
|
||||
return os.path.abspath(new_audio_filepath)
|
||||
|
||||
def transcribe_media(file_loc: str, media_type: str = "audio"):
|
||||
"""
|
||||
Transcribe media using Deepgram.
|
||||
|
||||
|
||||
If media_type is "audio" (remote URL), use Deepgram's URL transcription.
|
||||
If media_type is "video" (remote URL), extract audio locally (in the upload folder),
|
||||
transcribe via file, and then delete the local audio file.
|
||||
|
||||
|
||||
Args:
|
||||
file_loc (str): URL to the remote audio or video file.
|
||||
media_type (str): "audio" or "video".
|
||||
|
||||
|
||||
Returns:
|
||||
dict: The transcription response from Deepgram.
|
||||
"""
|
||||
@@ -100,22 +100,22 @@ def transcribe_media(file_loc: str, media_type: str = "audio"):
|
||||
smart_format=True,
|
||||
diarize=True,
|
||||
)
|
||||
|
||||
|
||||
if media_type.lower() == "audio":
|
||||
# For remote audio files, use the URL transcription method.
|
||||
response = deepgram.listen.rest.v("1").transcribe_url({"url": file_loc}, options)
|
||||
|
||||
|
||||
elif media_type.lower() == "video":
|
||||
# For remote video files, first extract the audio locally.
|
||||
local_audio_path = extract_audio(file_loc)
|
||||
print(f"Extracted audio to: {local_audio_path}")
|
||||
|
||||
|
||||
# Transcribe using the local file method.
|
||||
with open(local_audio_path, "rb") as file:
|
||||
buffer_data = file.read()
|
||||
payload: FileSource = {"buffer": buffer_data}
|
||||
response = deepgram.listen.rest.v("1").transcribe_file(payload, options)
|
||||
|
||||
|
||||
# Clean up: delete the local audio file.
|
||||
if os.path.exists(local_audio_path):
|
||||
os.remove(local_audio_path)
|
||||
@@ -123,7 +123,7 @@ def transcribe_media(file_loc: str, media_type: str = "audio"):
|
||||
else:
|
||||
raise ValueError("media_type must be either 'audio' or 'video'.")
|
||||
|
||||
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
@@ -137,54 +137,76 @@ def transcribe_media(file_loc: str, media_type: str = "audio"):
|
||||
|
||||
|
||||
|
||||
def group_words_into_sentences(words, max_words=15):
|
||||
def group_words_into_sentences(words, max_words=15, include_speakers=True):
|
||||
"""
|
||||
Group words into sentences based on speaker changes.
|
||||
|
||||
Args:
|
||||
words: List of word objects from the transcription
|
||||
max_words: Maximum number of words per sentence
|
||||
include_speakers: Whether to include speaker information in the output
|
||||
(True for Pro plan, False for Freemium plan)
|
||||
|
||||
Returns:
|
||||
A dictionary containing the sentences
|
||||
"""
|
||||
sentences = []
|
||||
current_sentence = []
|
||||
current_speaker = None
|
||||
start_time = None
|
||||
|
||||
|
||||
for i, word_info in enumerate(words):
|
||||
word = word_info["punctuated_word"]
|
||||
speaker = word_info["speaker"]
|
||||
speaker = word_info["speaker"] if include_speakers else "speaker_0" # Use a default speaker if not including speakers
|
||||
start = word_info["start"]
|
||||
end = word_info["end"]
|
||||
|
||||
|
||||
# If speaker changes or sentence reaches max length, start a new sentence
|
||||
if speaker != current_speaker:
|
||||
if current_sentence:
|
||||
sentences.append({
|
||||
sentence_obj = {
|
||||
"sentence": " ".join([w["word"] for w in current_sentence]),
|
||||
"speaker": current_speaker,
|
||||
"start": start_time,
|
||||
"end": words[i-1]["end"],
|
||||
"words": current_sentence
|
||||
})
|
||||
}
|
||||
|
||||
# Only include speaker information if include_speakers is True
|
||||
if include_speakers:
|
||||
sentence_obj["speaker"] = current_speaker
|
||||
|
||||
sentences.append(sentence_obj)
|
||||
current_sentence = []
|
||||
current_speaker = speaker
|
||||
start_time = start
|
||||
|
||||
|
||||
# Append word with metadata inside the current sentence
|
||||
current_sentence.append({"word": word, "start": start, "end": end})
|
||||
|
||||
# Append the last sentence if any words remain
|
||||
if current_sentence:
|
||||
sentences.append({
|
||||
sentence_obj = {
|
||||
"sentence": " ".join([w["word"] for w in current_sentence]),
|
||||
"speaker": current_speaker,
|
||||
"start": start_time,
|
||||
"end": words[-1]["end"],
|
||||
"words": current_sentence
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
# Only include speaker information if include_speakers is True
|
||||
if include_speakers:
|
||||
sentence_obj["speaker"] = current_speaker
|
||||
|
||||
sentences.append(sentence_obj)
|
||||
|
||||
return {"sentences": sentences}
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
|
||||
|
||||
audio_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/019933724441Business%20English%20Conversation%20Lesson%2045_%20Meeting%20a%20New%20Colleague.mp3"
|
||||
video_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/038426704141Business%20English%20Conversation%20Lesson%2045_%20%20Meeting%20a%20New%20Colleague.mp4"
|
||||
|
||||
# Folder for file uploads/downloads
|
||||
|
||||
|
||||
response = transcribe_media(video_url, media_type="video")
|
||||
print(response)
|
||||
@@ -0,0 +1,46 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, Dict, List
|
||||
from enum import Enum
|
||||
|
||||
class PlanTier(str, Enum):
|
||||
FREEMIUM = "freemium"
|
||||
PRO = "pro"
|
||||
|
||||
class PlanLimits:
|
||||
"""
|
||||
Defines the limits for each plan tier
|
||||
"""
|
||||
LIMITS = {
|
||||
PlanTier.FREEMIUM: {
|
||||
"transcription_minutes": 200,
|
||||
"summary_type": "basic",
|
||||
"transcript_history_days": 7,
|
||||
"integrations": ["google_meet", "zoom"],
|
||||
"speaker_identification": False
|
||||
},
|
||||
PlanTier.PRO: {
|
||||
"transcription_minutes": 600,
|
||||
"summary_type": "advanced",
|
||||
"transcript_history_days": 30,
|
||||
"integrations": ["google_meet", "zoom", "slack", "notion", "asana", "microsoft_teams"],
|
||||
"speaker_identification": True
|
||||
}
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_limit(cls, tier: str, limit_name: str):
|
||||
"""
|
||||
Get a specific limit for a plan tier
|
||||
|
||||
Args:
|
||||
tier: The plan tier (freemium or pro)
|
||||
limit_name: The name of the limit to retrieve
|
||||
|
||||
Returns:
|
||||
The limit value or None if not found
|
||||
"""
|
||||
tier_enum = PlanTier(tier.lower()) if tier.lower() in [t.value for t in PlanTier] else PlanTier.FREEMIUM
|
||||
|
||||
if tier_enum in cls.LIMITS and limit_name in cls.LIMITS[tier_enum]:
|
||||
return cls.LIMITS[tier_enum][limit_name]
|
||||
return None
|
||||
+51
-85
@@ -1,20 +1,21 @@
|
||||
general_summary_prompt = """
|
||||
# Advanced (Pro Plan) summary prompt
|
||||
advanced_summary_prompt = """
|
||||
You are an AI meeting transcript summary formatter. You will be provided with a detailed meeting transcript that includes sentence-level summaries with timestamps (in seconds), speaker details, and word-level timestamps. Your task is to generate a concise summary of the meeting organized into four sections:
|
||||
|
||||
1. **Purpose:**
|
||||
1. **Purpose:**
|
||||
- Provide a brief description of the meeting's purpose.
|
||||
|
||||
2. **Chapters:**
|
||||
- Provide a list of chapter titles that segment the meeting into key parts.
|
||||
- For each chapter, include a timestamp range (with "start" and "end" in seconds) indicating when that chapter begins and ends.
|
||||
2. **Chapters:**
|
||||
- Provide a list of chapter titles that segment the meeting into key parts.
|
||||
- For each chapter, include a timestamp range (with "start" and "end" in seconds) indicating when that chapter begins and ends.
|
||||
- Additionally, include a list of word-level timestamps for each word in the chapter. **Important:** For every word in a sentence, the timestamp must be the start timestamp of the sentence to which the word belongs.
|
||||
|
||||
3. **Outcomes:**
|
||||
- Provide a coherent description of the meeting outcomes.
|
||||
3. **Outcomes:**
|
||||
- Provide a coherent description of the meeting outcomes.
|
||||
- For each outcome, include a timestamp range (with "start" and "end" in seconds) corresponding to the relevant moment, and include word-level timestamps for each word (using the sentence’s start timestamp for every word).
|
||||
|
||||
4. **Action Items:**
|
||||
- Provide a list of actionable items derived from the meeting discussion.
|
||||
4. **Action Items:**
|
||||
- Provide a list of actionable items derived from the meeting discussion.
|
||||
- For each action item, include either a single timestamp or a timestamp range (if available) and a list of word-level timestamps for each word (again, each word's timestamp is the start timestamp of its parent sentence).
|
||||
|
||||
At the end of each section, include a field named "minutes_total" which represents the total duration in minutes for that section. Calculate this value by using the start time of the first sentence and the end time of the last sentence within the section. If the duration is not a whole number, express it as a decimal (e.g., 0.5).
|
||||
@@ -27,86 +28,48 @@ At the end of each section, include a field named "minutes_total" which represen
|
||||
- Ensure that for each sentence you generate, every word in that sentence is assigned the same timestamp—the start timestamp of that sentence.
|
||||
**Example Output JSON:**
|
||||
|
||||
{
|
||||
"Purpose": {
|
||||
"text": "Discuss project progress and define upcoming milestones."
|
||||
},
|
||||
"Chapters": {
|
||||
"minutes_total": 3,
|
||||
"content": [
|
||||
{
|
||||
"chapter": "Project Overview",
|
||||
"time_stamp": {"start": 5.12, "end": 5.68},
|
||||
"content": [
|
||||
{"text":"- overview of the project's objectives.","original_transcript_start":3.4,"original_transcript_end":5.7},
|
||||
{"text":"- It outlines the key milestones achieved so far.", "original_transcript_start":6.7, "original_transcript_end":10.5},
|
||||
{"text":"- main challenges faced during the project.", "original_transcript_start":10.8, "original_transcript_end":11.2}
|
||||
],
|
||||
"words_time_stamp": [
|
||||
{"word": "Project", "timestamp": 5.12},
|
||||
{"word": "Overview", "timestamp": 5.12}
|
||||
]
|
||||
},
|
||||
{
|
||||
"chapter": "Budget Review",
|
||||
"time_stamp": {"start": 10.50, "end": 11.20},
|
||||
"content": [
|
||||
{"text":"- review of the current budget allocations.","original_transcript_start":10.5,"original_transcript_end":11.0},
|
||||
{"text":"- discussion on potential cost-saving measures.", "original_transcript_start":11.1, "original_transcript_end":12.0},
|
||||
{"text":"- approval of the budget for the next quarter.", "original_transcript_start":12.1, "original_transcript_end":13.0}
|
||||
],
|
||||
"words_time_stamp": [
|
||||
{"word": "Budget", "timestamp": 10.50},
|
||||
{"word": "Review", "timestamp": 10.50}
|
||||
]
|
||||
}
|
||||
]
|
||||
"Outcomes": {
|
||||
"minutes_total": 3,
|
||||
"content": [
|
||||
{
|
||||
"text": "Key performance metrics were defined and improvement areas identified.",
|
||||
"time_stamp": {"start": 15.30, "end": 16.00},
|
||||
"words_time_stamp": [
|
||||
{"word": "Key", "timestamp": 15.30},
|
||||
{"word": "performance", "timestamp": 15.30},
|
||||
{"word": "metrics", "timestamp": 15.30},
|
||||
{"word": "were", "timestamp": 15.30},
|
||||
{"word": "defined", "timestamp": 15.30},
|
||||
{"word": "and", "timestamp": 15.30},
|
||||
{"word": "improvement", "timestamp": 15.30},
|
||||
{"word": "areas", "timestamp": 15.30},
|
||||
{"word": "identified", "timestamp": 15.30}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"Action_Items": {
|
||||
"minutes_total": 3,
|
||||
"content": [
|
||||
{
|
||||
"text": "Prepare a detailed budget report for the next meeting.",
|
||||
"time_stamp": {"start": 30.45, "end": 30.45},
|
||||
"words_time_stamp": [
|
||||
{"word": "Prepare", "timestamp": 30.45},
|
||||
{"word": "a", "timestamp": 30.45},
|
||||
{"word": "detailed", "timestamp": 30.45},
|
||||
{"word": "budget", "timestamp": 30.45},
|
||||
{"word": "report", "timestamp": 30.45},
|
||||
{"word": "for", "timestamp": 30.45},
|
||||
{"word": "the", "timestamp": 30.45},
|
||||
{"word": "next", "timestamp": 30.45},
|
||||
{"word": "meeting", "timestamp": 30.45}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
NOTE: Action points to the person who is to take the action and if not specified use "unassigned"
|
||||
NOTE: The content under each chapter provides a detailed bulleted explanation of the chapter. It includes "original_transcript_start" and "original_transcript_end," which indicate the timestamps for each bulleted point, referencing where to find it in the original transcript.
|
||||
Remember, every word in each sentence must have a single timestamp equal to the start timestamp of that sentence. Your output must strictly adhere to the provided structure, and the "minutes_total" for each section must be correctly calculated based on the start time of the first sentence and the end time of the last sentence, expressed as a decimal if necessary.
|
||||
NOTE : start and end time are in seconds , so take that into considerations when calculating the total time in mins
|
||||
NOTE: When creating action items per user, if the assigned user is among the speakers, use their associated speaker key that was presented in the sentence (do not infer names from context). If you can't determine the action item is for one of the speakers, make it "unassigned."
|
||||
NOTE: OUTPUT ONLY JSON, NO OTHER TEXT OR COMMENTS DO NOT ADD ```json before or after the text , just return the json output format please
|
||||
"""
|
||||
|
||||
# Basic (Freemium Plan) summary prompt
|
||||
basic_summary_prompt = """
|
||||
You are an AI meeting transcript summary formatter. You will be provided with a detailed meeting transcript that includes sentence-level summaries with timestamps (in seconds), speaker details, and word-level timestamps. Your task is to generate a very concise, bullet-point summary of the meeting with minimal detail.
|
||||
|
||||
Create a simple JSON response with just two sections:
|
||||
|
||||
1. **Key Points:**
|
||||
- Provide 3-5 short bullet points covering the main topics discussed.
|
||||
- Each bullet point should be no more than 15 words.
|
||||
- Include a timestamp for each bullet point.
|
||||
|
||||
2. **Summary:**
|
||||
- Provide a very brief overall summary of the meeting in 2-3 sentences.
|
||||
- Include the total duration of the meeting in minutes.
|
||||
|
||||
**Instructions:**
|
||||
- Keep the output extremely concise and simple.
|
||||
- Return a JSON response containing only the required fields with no additional commentary.
|
||||
- The JSON output must be properly formatted and valid.
|
||||
- Do not include any markdown or code block formatting markers in your output.
|
||||
|
||||
**Example Output JSON:**
|
||||
|
||||
|
||||
|
||||
Remember to keep your output extremely simple and concise, focusing only on the most important information from the meeting.
|
||||
|
||||
NOTE: OUTPUT ONLY JSON, NO OTHER TEXT OR COMMENTS DO NOT ADD ```json before or after the text , just return the json output format please
|
||||
"""
|
||||
|
||||
# Keeping the original as general_summary_prompt for backward compatibility
|
||||
general_summary_prompt = advanced_summary_prompt
|
||||
|
||||
custom_template_prompt = """ You are an AI meeting transcript summary formatter. You will be provided with a sentence-level and word-level summary of a meeting, which includes timestamps for each sentence (in seconds), speaker details, and word-level timestamps. Your task is to generate a structured summary of the meeting based on a user-defined template.
|
||||
|
||||
How It Works: The user will provide custom section headers along with descriptions of what each section should contain. You must generate a JSON response that exactly follows the user-defined structure. For each section that includes timestamps, ensure that the timestamps are accurately inferred from the provided sentence and word-level timestamps. For every sentence you generate, assign each word the same timestamp—the start timestamp of the sentence that the word belongs to. Word-level timestamps you generate should reflect the sentence’s start time for every word. At the end of each section, correctly calculate the total duration in minutes ("minutes_total") based on the start time of the first sentence and the end time of the last sentence. If the total duration is not a whole number, represent it as a decimal (e.g., 0.5 mins).
|
||||
@@ -125,4 +88,7 @@ Example Output JSON:
|
||||
{ "Key_Points": { "minutes_total": 3.5, "content": [ { "text": "Introductions between Diane Taylor and Cody Smith.", "time_stamp": {"start": 5.12, "end": 5.68}, "words_time_stamp": [ {"word": "Introductions", "timestamp": 5.12}, {"word": "between", "timestamp": 5.12}, {"word": "Diane", "timestamp": 5.12}, {"word": "Taylor", "timestamp": 5.12}, {"word": "and", "timestamp": 5.12}, {"word": "Cody", "timestamp": 5.12}, {"word": "Smith.", "timestamp": 5.12} ] } ] }, "Summary": { "minutes_total": 3.5, "content": [ { "text": "The meeting started with introductions, followed by a discussion of key topics.", "time_stamp": {"start": 5.12, "end": 10.12}, "words_time_stamp": [ {"word": "The", "timestamp": 5.12}, {"word": "meeting", "timestamp": 5.12}, {"word": "started", "timestamp": 5.12}, {"word": "with", "timestamp": 5.12}, {"word": "introductions,", "timestamp": 5.12}, {"word": "followed", "timestamp": 5.12}, {"word": "by", "timestamp": 5.12}, {"word": "a", "timestamp": 5.12}, {"word": "discussion", "timestamp": 5.12}, {"word": "of", "timestamp": 5.12}, {"word": "key", "timestamp": 5.12}, {"word": "topics.", "timestamp": 5.12} ] } ] }, "Next_Steps": { "minutes_total": 2.0, "content": [ { "text": "Diane will follow up with Cody regarding office management tasks.", "time_stamp": {"start": 30.45, "end": 30.45}, "words_time_stamp": [ {"word": "Diane", "timestamp": 30.45}, {"word": "will", "timestamp": 30.45}, {"word": "follow", "timestamp": 30.45}, {"word": "up", "timestamp": 30.45}, {"word": "with", "timestamp": 30.45}, {"word": "Cody", "timestamp": 30.45}, {"word": "regarding", "timestamp": 30.45}, {"word": "office", "timestamp": 30.45}, {"word": "management", "timestamp": 30.45}, {"word": "tasks.", "timestamp": 30.45} ] } ] } }
|
||||
|
||||
Remember, for every sentence generated in any section, every word must be assigned the sentence’s start timestamp as its "timestamp" value. Additionally, calculate the "minutes_total" for each section by using the start time of the first sentence and the end time of the last sentence; if the result is not a whole number, express it as a decimal (e.g., 0.5 mins). Your output must strictly adhere to the provided structure.
|
||||
NOTE : start and end time are in seconds , so take that into considerations when calculating the total time in mins"""
|
||||
NOTE : start and end time are in seconds , so take that into considerations when calculating the total time in mins
|
||||
|
||||
NOTE: OUTPUT ONLY JSON, NO OTHER TEXT OR COMMENTS DO NOT ADD ```json before or after the text , just return the json output format please
|
||||
"""
|
||||
-52
@@ -1,52 +0,0 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='[%(asctime)s]: %(message)s:')
|
||||
|
||||
project_name = "sql-copilot-dot"
|
||||
|
||||
list_of_files = [
|
||||
".github/workflows/.gitkeep",
|
||||
f"src/__init__.py",
|
||||
f"src/routes/__init__.py",
|
||||
f"src/models/__init__.py",
|
||||
|
||||
|
||||
|
||||
f"src/utils/__init__.py",
|
||||
f"src/utils/common.py",
|
||||
f"src/config/__init__.py",
|
||||
f"src/services/__init__.py",
|
||||
"main.py",
|
||||
"app.py",
|
||||
"Dockerfile",
|
||||
"requirements.txt",
|
||||
"setup.py",
|
||||
"research/trials.ipynb",
|
||||
"templates/index.html",
|
||||
"test.py",
|
||||
"notebooks/.gitkeep", # Added notebooks folder
|
||||
"config.py" # Added general config folder
|
||||
]
|
||||
|
||||
for filepath in list_of_files:
|
||||
filepath = Path(filepath)
|
||||
filedir, filename = os.path.split(filepath)
|
||||
|
||||
if filedir != "":
|
||||
os.makedirs(filedir, exist_ok=True)
|
||||
logging.info(f"Creating directory; {filedir} for the file: {filename}")
|
||||
|
||||
if (not os.path.exists(filepath)) or (os.path.getsize(filepath) == 0):
|
||||
with open(filepath, "w") as f:
|
||||
pass
|
||||
logging.info(f"Creating empty file: {filepath}")
|
||||
|
||||
else:
|
||||
logging.info(f"{filename} is already exists")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,145 @@
|
||||
import os
|
||||
import requests
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
# API endpoint
|
||||
base_url = "http://localhost:5056"
|
||||
|
||||
# Your API key
|
||||
api_key = os.getenv("API_KEY_ACCESS")
|
||||
|
||||
# Headers
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
# Audio URL from your notebook
|
||||
audio_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/019933724441Business%20English%20Conversation%20Lesson%2045_%20Meeting%20a%20New%20Colleague.mp3"
|
||||
|
||||
# 1. First, transcribe the audio with the Pro plan (with speaker identification)
|
||||
pro_transcribe_payload = {
|
||||
"media_url": audio_url,
|
||||
"media_type": "audio",
|
||||
"plan_tier": "pro" # Specify the pro plan to include speaker identification
|
||||
}
|
||||
|
||||
pro_transcribe_response = requests.post(
|
||||
f"{base_url}/microdot-ai/transcribe",
|
||||
headers=headers,
|
||||
json=pro_transcribe_payload
|
||||
)
|
||||
|
||||
# Check if Pro plan transcription was successful
|
||||
if pro_transcribe_response.status_code == 200:
|
||||
pro_transcript_data = pro_transcribe_response.json()
|
||||
print("Pro plan transcription successful!")
|
||||
|
||||
# Save the Pro plan transcript for later use
|
||||
pro_transcript_json = json.dumps(pro_transcript_data["transcript"], indent=4)
|
||||
# Save the Pro plan transcript to a file
|
||||
with open("pro_transcript.json", "w") as f:
|
||||
f.write(pro_transcript_json)
|
||||
print("Pro plan transcript saved to pro_transcript.json")
|
||||
|
||||
# Check if the Pro plan transcript has speaker information
|
||||
has_speaker_pro = "speaker" in pro_transcript_data["transcript"]["sentences"][0] if pro_transcript_data["transcript"]["sentences"] else False
|
||||
print(f"Pro plan has speaker information: {has_speaker_pro}")
|
||||
else:
|
||||
print(f"Pro plan transcription failed with status code: {pro_transcribe_response.status_code}")
|
||||
print(pro_transcribe_response.text)
|
||||
|
||||
# 1b. Now transcribe with the Free plan (without speaker identification)
|
||||
free_transcribe_payload = {
|
||||
"media_url": audio_url,
|
||||
"media_type": "audio",
|
||||
"plan_tier": "freemium" # Specify the freemium plan to exclude speaker identification
|
||||
}
|
||||
|
||||
free_transcribe_response = requests.post(
|
||||
f"{base_url}/microdot-ai/transcribe",
|
||||
headers=headers,
|
||||
json=free_transcribe_payload
|
||||
)
|
||||
|
||||
# Check if Free plan transcription was successful
|
||||
if free_transcribe_response.status_code == 200:
|
||||
free_transcript_data = free_transcribe_response.json()
|
||||
print("Free plan transcription successful!")
|
||||
|
||||
# Save the Free plan transcript for later use
|
||||
free_transcript_json = json.dumps(free_transcript_data["transcript"], indent=4)
|
||||
# Save the Free plan transcript to a file
|
||||
with open("free_transcript.json", "w") as f:
|
||||
f.write(free_transcript_json)
|
||||
print("Free plan transcript saved to free_transcript.json")
|
||||
|
||||
# Check if the Free plan transcript has speaker information
|
||||
has_speaker_free = "speaker" in free_transcript_data["transcript"]["sentences"][0] if free_transcript_data["transcript"]["sentences"] else False
|
||||
print(f"Free plan has speaker information: {has_speaker_free}")
|
||||
|
||||
# Use the Pro plan transcript for the summary tests
|
||||
transcript_json = pro_transcript_json
|
||||
else:
|
||||
print(f"Free plan transcription failed with status code: {free_transcribe_response.status_code}")
|
||||
print(free_transcribe_response.text)
|
||||
|
||||
# If Free plan fails but Pro plan succeeded, use Pro plan transcript for summary tests
|
||||
if pro_transcribe_response.status_code == 200:
|
||||
transcript_json = pro_transcript_json
|
||||
|
||||
|
||||
|
||||
# 2. Test the basic (Freemium) summarization
|
||||
basic_summary_payload = {
|
||||
"transcript": transcript_json,
|
||||
"plan_tier": "freemium" # Specify the freemium plan
|
||||
}
|
||||
|
||||
basic_summary_response = requests.post(
|
||||
f"{base_url}/microdot-ai/general-summary",
|
||||
headers=headers,
|
||||
json=basic_summary_payload
|
||||
)
|
||||
|
||||
# Check if summarization was successful
|
||||
if basic_summary_response.status_code == 200:
|
||||
basic_summary_data = basic_summary_response.json()
|
||||
print("\n--- Basic (Freemium) Summary ---")
|
||||
basic_summary_json = json.dumps(basic_summary_data, indent=2)
|
||||
|
||||
with open("basic_summary.json", "w") as f:
|
||||
f.write(basic_summary_json)
|
||||
print("Basic summary saved to basic_summary.json")
|
||||
|
||||
else:
|
||||
print(f"Basic summarization failed with status code: {basic_summary_response.status_code}")
|
||||
print(basic_summary_response.text)
|
||||
|
||||
|
||||
# 3. Test the advanced (Pro) summarization
|
||||
advanced_summary_payload = {
|
||||
"transcript": transcript_json,
|
||||
"plan_tier": "pro" # Specify the pro plan
|
||||
}
|
||||
|
||||
advanced_summary_response = requests.post(
|
||||
f"{base_url}/microdot-ai/general-summary",
|
||||
headers=headers,
|
||||
json=advanced_summary_payload
|
||||
)
|
||||
|
||||
# Check if summarization was successful
|
||||
if advanced_summary_response.status_code == 200:
|
||||
advanced_summary_data = advanced_summary_response.json()
|
||||
print("\n--- Advanced (Pro) Summary ---")
|
||||
advanced_summary_json = json.dumps(advanced_summary_data, indent=2)
|
||||
|
||||
with open("advanced_summary.json", "w") as f:
|
||||
f.write(advanced_summary_json)
|
||||
print("Advanced summary saved to advanced_summary.json")
|
||||
else:
|
||||
print(f"Advanced summarization failed with status code: {advanced_summary_response.status_code}")
|
||||
print(advanced_summary_response.text)
|
||||
@@ -0,0 +1,90 @@
|
||||
import os
|
||||
import requests
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
# API endpoint
|
||||
base_url = "http://localhost:5056"
|
||||
|
||||
# Your API key
|
||||
api_key = os.getenv("API_KEY_ACCESS")
|
||||
|
||||
# Headers
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
# Audio URL for testing
|
||||
audio_url = "https://s3.us-east-2.amazonaws.com/com.mkdlabs.images/baas/jordan/019933724441Business%20English%20Conversation%20Lesson%2045_%20Meeting%20a%20New%20Colleague.mp3"
|
||||
|
||||
# Test function to compare freemium and pro plan transcriptions
|
||||
def test_plan_differences():
|
||||
# 1. Test the freemium plan (no speaker identification)
|
||||
freemium_payload = {
|
||||
"media_url": audio_url,
|
||||
"media_type": "audio",
|
||||
"plan_tier": "freemium"
|
||||
}
|
||||
|
||||
print("Testing Freemium Plan (no speaker identification)...")
|
||||
freemium_response = requests.post(
|
||||
f"{base_url}/microdot-ai/transcribe",
|
||||
headers=headers,
|
||||
json=freemium_payload
|
||||
)
|
||||
|
||||
# 2. Test the pro plan (with speaker identification)
|
||||
pro_payload = {
|
||||
"media_url": audio_url,
|
||||
"media_type": "audio",
|
||||
"plan_tier": "pro"
|
||||
}
|
||||
|
||||
print("Testing Pro Plan (with speaker identification)...")
|
||||
pro_response = requests.post(
|
||||
f"{base_url}/microdot-ai/transcribe",
|
||||
headers=headers,
|
||||
json=pro_payload
|
||||
)
|
||||
|
||||
# Check if both requests were successful
|
||||
if freemium_response.status_code == 200 and pro_response.status_code == 200:
|
||||
freemium_data = freemium_response.json()
|
||||
pro_data = pro_response.json()
|
||||
|
||||
# Save the transcripts for inspection
|
||||
with open("freemium_transcript.json", "w") as f:
|
||||
f.write(json.dumps(freemium_data, indent=4))
|
||||
|
||||
with open("pro_transcript.json", "w") as f:
|
||||
f.write(json.dumps(pro_data, indent=4))
|
||||
|
||||
print("Transcripts saved to freemium_transcript.json and pro_transcript.json")
|
||||
|
||||
# Check if the freemium plan has speaker information
|
||||
has_speaker_freemium = "speaker" in freemium_data["transcript"]["sentences"][0] if freemium_data["transcript"]["sentences"] else False
|
||||
|
||||
# Check if the pro plan has speaker information
|
||||
has_speaker_pro = "speaker" in pro_data["transcript"]["sentences"][0] if pro_data["transcript"]["sentences"] else False
|
||||
|
||||
print(f"Freemium plan has speaker information: {has_speaker_freemium}")
|
||||
print(f"Pro plan has speaker information: {has_speaker_pro}")
|
||||
|
||||
# Verify the expected behavior
|
||||
if not has_speaker_freemium and has_speaker_pro:
|
||||
print("✅ Test PASSED: Freemium plan doesn't show speakers, Pro plan does.")
|
||||
else:
|
||||
print("❌ Test FAILED: Expected behavior not observed.")
|
||||
|
||||
else:
|
||||
print(f"Freemium request status: {freemium_response.status_code}")
|
||||
print(f"Pro request status: {pro_response.status_code}")
|
||||
if freemium_response.status_code != 200:
|
||||
print(f"Freemium error: {freemium_response.text}")
|
||||
if pro_response.status_code != 200:
|
||||
print(f"Pro error: {pro_response.text}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_plan_differences()
|
||||
@@ -0,0 +1,2 @@
|
||||
# Tests package
|
||||
|
||||
@@ -0,0 +1,133 @@
|
||||
"""
|
||||
Shared fixtures and configuration for tests
|
||||
"""
|
||||
import pytest
|
||||
import os
|
||||
from unittest.mock import Mock, MagicMock
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_anthropic_api_key():
|
||||
"""Mock Anthropic API key for testing"""
|
||||
return "test-api-key-12345"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_transcription_dict():
|
||||
"""Sample transcription dictionary for testing"""
|
||||
return {
|
||||
"sentences": [
|
||||
{
|
||||
"text": "Hello, welcome to the meeting.",
|
||||
"start": 0.5,
|
||||
"end": 3.2,
|
||||
"speaker": "Speaker_A",
|
||||
"words": [
|
||||
{"word": "Hello", "start": 0.5, "end": 0.8},
|
||||
{"word": "welcome", "start": 0.9, "end": 1.3},
|
||||
{"word": "to", "start": 1.4, "end": 1.5},
|
||||
{"word": "the", "start": 1.6, "end": 1.7},
|
||||
{"word": "meeting", "start": 1.8, "end": 2.3}
|
||||
]
|
||||
},
|
||||
{
|
||||
"text": "Let's discuss the project timeline.",
|
||||
"start": 4.0,
|
||||
"end": 7.5,
|
||||
"speaker": "Speaker_B",
|
||||
"words": [
|
||||
{"word": "Let's", "start": 4.0, "end": 4.3},
|
||||
{"word": "discuss", "start": 4.4, "end": 5.0},
|
||||
{"word": "the", "start": 5.1, "end": 5.2},
|
||||
{"word": "project", "start": 5.3, "end": 5.8},
|
||||
{"word": "timeline", "start": 5.9, "end": 6.5}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_basic_summary_response():
|
||||
"""Sample basic summary response (freemium plan)"""
|
||||
return {
|
||||
"Key_Points": [
|
||||
{"text": "Team discussed project timeline.", "timestamp": 4.0},
|
||||
{"text": "Meeting started with introductions.", "timestamp": 0.5}
|
||||
],
|
||||
"Summary": {
|
||||
"text": "Brief meeting to discuss project timeline and introductions.",
|
||||
"duration_minutes": 7.5
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_advanced_summary_response():
|
||||
"""Sample advanced summary response (pro plan)"""
|
||||
return {
|
||||
"Purpose": {
|
||||
"text": "Discuss project timeline and team introductions."
|
||||
},
|
||||
"Chapters": {
|
||||
"minutes_total": 0.125,
|
||||
"content": [
|
||||
{
|
||||
"chapter": "Introduction",
|
||||
"time_stamp": {"start": 0.5, "end": 3.2},
|
||||
"content": [
|
||||
{
|
||||
"text": "- Welcome to the meeting.",
|
||||
"original_transcript_start": 0.5,
|
||||
"original_transcript_end": 3.2
|
||||
}
|
||||
],
|
||||
"words_time_stamp": [
|
||||
{"word": "Introduction", "timestamp": 0.5}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"Outcomes": {
|
||||
"minutes_total": 0.125,
|
||||
"content": [
|
||||
{
|
||||
"text": "Project timeline discussed.",
|
||||
"time_stamp": {"start": 4.0, "end": 7.5},
|
||||
"words_time_stamp": [
|
||||
{"word": "Project", "timestamp": 4.0},
|
||||
{"word": "timeline", "timestamp": 4.0}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"Action_Items_Per_User": [
|
||||
{
|
||||
"speaker": "Speaker_B",
|
||||
"minutes_total": 0.125,
|
||||
"action_items": [
|
||||
{
|
||||
"text": "Review project timeline.",
|
||||
"time_stamp": {"start": 4.0, "end": 7.5},
|
||||
"words_time_stamp": [
|
||||
{"word": "Review", "timestamp": 4.0}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_template():
|
||||
"""Sample custom template"""
|
||||
return {
|
||||
"Key_Points": "Summarize the most critical discussion points from the meeting.",
|
||||
"Summary": "Provide a brief overall summary of what was discussed.",
|
||||
"Next_Steps": "List the next steps decided during the meeting."
|
||||
}
|
||||
|
||||
@@ -0,0 +1,417 @@
|
||||
"""
|
||||
Unit tests for generate_summary.py module
|
||||
"""
|
||||
import pytest
|
||||
import os
|
||||
import json
|
||||
from unittest.mock import Mock, MagicMock, patch, call
|
||||
from pydantic import ValidationError
|
||||
|
||||
# Import the functions and models to test
|
||||
import sys
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
||||
|
||||
from scripts.generate_summary import (
|
||||
general_summary,
|
||||
custom_summary,
|
||||
BasicSummary,
|
||||
AdvancedSummary,
|
||||
KeyPoint,
|
||||
Summary,
|
||||
Purpose,
|
||||
Chapters,
|
||||
Outcomes,
|
||||
ActionItemsPerUser
|
||||
)
|
||||
|
||||
|
||||
class TestPydanticModels:
|
||||
"""Test Pydantic model validation"""
|
||||
|
||||
def test_key_point_model(self):
|
||||
"""Test KeyPoint model validation"""
|
||||
key_point = KeyPoint(text="Test point", timestamp=10.5)
|
||||
assert key_point.text == "Test point"
|
||||
assert key_point.timestamp == 10.5
|
||||
|
||||
def test_summary_model(self):
|
||||
"""Test Summary model validation"""
|
||||
summary = Summary(text="Test summary", duration_minutes=15.5)
|
||||
assert summary.text == "Test summary"
|
||||
assert summary.duration_minutes == 15.5
|
||||
|
||||
def test_basic_summary_model(self, sample_basic_summary_response):
|
||||
"""Test BasicSummary model validation"""
|
||||
basic_summary = BasicSummary(**sample_basic_summary_response)
|
||||
assert len(basic_summary.Key_Points) == 2
|
||||
assert basic_summary.Summary.text is not None
|
||||
assert basic_summary.Summary.duration_minutes > 0
|
||||
|
||||
def test_advanced_summary_model(self, sample_advanced_summary_response):
|
||||
"""Test AdvancedSummary model validation"""
|
||||
advanced_summary = AdvancedSummary(**sample_advanced_summary_response)
|
||||
assert advanced_summary.Purpose.text is not None
|
||||
assert len(advanced_summary.Chapters.content) > 0
|
||||
assert len(advanced_summary.Outcomes.content) > 0
|
||||
assert len(advanced_summary.Action_Items_Per_User) > 0
|
||||
|
||||
|
||||
class TestGeneralSummary:
|
||||
"""Test general_summary function"""
|
||||
|
||||
@patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'})
|
||||
@patch('scripts.generate_summary.ChatAnthropic')
|
||||
@patch('scripts.generate_summary.ChatPromptTemplate')
|
||||
def test_general_summary_freemium_plan(
|
||||
self,
|
||||
mock_prompt_template,
|
||||
mock_chat_anthropic,
|
||||
sample_transcription_dict,
|
||||
sample_basic_summary_response
|
||||
):
|
||||
"""Test general_summary with freemium plan"""
|
||||
# Setup mocks
|
||||
mock_model = MagicMock()
|
||||
mock_chain = MagicMock()
|
||||
mock_structured_model = MagicMock()
|
||||
|
||||
# Create a mock BasicSummary instance
|
||||
from scripts.generate_summary import BasicSummary
|
||||
mock_result = BasicSummary(**sample_basic_summary_response)
|
||||
|
||||
mock_structured_model.invoke.return_value = mock_result
|
||||
mock_model.with_structured_output.return_value = mock_structured_model
|
||||
mock_prompt_template.from_messages.return_value = MagicMock()
|
||||
mock_chat_anthropic.return_value = mock_model
|
||||
|
||||
# Mock the pipe operator
|
||||
mock_prompt_instance = MagicMock()
|
||||
mock_prompt_template.from_messages.return_value = mock_prompt_instance
|
||||
mock_prompt_instance.__or__ = lambda self, other: mock_chain
|
||||
mock_chain.invoke.return_value = mock_result
|
||||
|
||||
# Call the function
|
||||
result = general_summary(sample_transcription_dict, plan_tier="freemium")
|
||||
|
||||
# Verify results
|
||||
assert result is not None
|
||||
assert "Key_Points" in result
|
||||
assert "Summary" in result
|
||||
assert len(result["Key_Points"]) > 0
|
||||
|
||||
# Verify model was initialized with correct parameters
|
||||
mock_chat_anthropic.assert_called_once()
|
||||
call_args = mock_chat_anthropic.call_args
|
||||
assert call_args.kwargs["model"] == "claude-sonnet-4-5-20250929"
|
||||
assert call_args.kwargs["max_tokens"] == 2000
|
||||
assert call_args.kwargs["temperature"] == 0.2
|
||||
|
||||
@patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'})
|
||||
@patch('scripts.generate_summary.ChatAnthropic')
|
||||
@patch('scripts.generate_summary.ChatPromptTemplate')
|
||||
def test_general_summary_pro_plan(
|
||||
self,
|
||||
mock_prompt_template,
|
||||
mock_chat_anthropic,
|
||||
sample_transcription_dict,
|
||||
sample_advanced_summary_response
|
||||
):
|
||||
"""Test general_summary with pro plan"""
|
||||
# Setup mocks
|
||||
mock_model = MagicMock()
|
||||
mock_chain = MagicMock()
|
||||
|
||||
# Create a mock AdvancedSummary instance
|
||||
from scripts.generate_summary import AdvancedSummary
|
||||
mock_result = AdvancedSummary(**sample_advanced_summary_response)
|
||||
|
||||
mock_structured_model = MagicMock()
|
||||
mock_structured_model.invoke.return_value = mock_result
|
||||
mock_model.with_structured_output.return_value = mock_structured_model
|
||||
mock_prompt_template.from_messages.return_value = MagicMock()
|
||||
mock_chat_anthropic.return_value = mock_model
|
||||
|
||||
# Mock the pipe operator
|
||||
mock_prompt_instance = MagicMock()
|
||||
mock_prompt_template.from_messages.return_value = mock_prompt_instance
|
||||
mock_prompt_instance.__or__ = lambda self, other: mock_chain
|
||||
mock_chain.invoke.return_value = mock_result
|
||||
|
||||
# Call the function
|
||||
result = general_summary(sample_transcription_dict, plan_tier="pro")
|
||||
|
||||
# Verify results
|
||||
assert result is not None
|
||||
assert "Purpose" in result
|
||||
assert "Chapters" in result
|
||||
assert "Outcomes" in result
|
||||
assert "Action_Items_Per_User" in result
|
||||
|
||||
# Verify model was initialized with correct parameters
|
||||
mock_chat_anthropic.assert_called_once()
|
||||
call_args = mock_chat_anthropic.call_args
|
||||
assert call_args.kwargs["model"] == "claude-sonnet-4-5-20250929"
|
||||
assert call_args.kwargs["max_tokens"] == 4000
|
||||
|
||||
@patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'})
|
||||
@patch('scripts.generate_summary.ChatAnthropic')
|
||||
@patch('scripts.generate_summary.ChatPromptTemplate')
|
||||
def test_general_summary_with_string_transcription(
|
||||
self,
|
||||
mock_prompt_template,
|
||||
mock_chat_anthropic,
|
||||
sample_basic_summary_response
|
||||
):
|
||||
"""Test general_summary with string transcription"""
|
||||
transcription_str = json.dumps({"sentences": []})
|
||||
|
||||
mock_model = MagicMock()
|
||||
mock_chain = MagicMock()
|
||||
from scripts.generate_summary import BasicSummary
|
||||
mock_result = BasicSummary(**sample_basic_summary_response)
|
||||
|
||||
mock_structured_model = MagicMock()
|
||||
mock_structured_model.invoke.return_value = mock_result
|
||||
mock_model.with_structured_output.return_value = mock_structured_model
|
||||
mock_prompt_template.from_messages.return_value = MagicMock()
|
||||
mock_chat_anthropic.return_value = mock_model
|
||||
|
||||
mock_prompt_instance = MagicMock()
|
||||
mock_prompt_template.from_messages.return_value = mock_prompt_instance
|
||||
mock_prompt_instance.__or__ = lambda self, other: mock_chain
|
||||
mock_chain.invoke.return_value = mock_result
|
||||
|
||||
result = general_summary(transcription_str, plan_tier="freemium")
|
||||
|
||||
assert result is not None
|
||||
assert "Key_Points" in result
|
||||
|
||||
def test_general_summary_missing_api_key(self, sample_transcription_dict):
|
||||
"""Test general_summary raises error when API key is missing"""
|
||||
with patch.dict(os.environ, {}, clear=True):
|
||||
with pytest.raises(ValueError, match="ANTHROPIC_API_KEY"):
|
||||
general_summary(sample_transcription_dict, plan_tier="freemium")
|
||||
|
||||
@patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'})
|
||||
@patch('scripts.generate_summary.ChatAnthropic')
|
||||
@patch('scripts.generate_summary.ChatPromptTemplate')
|
||||
def test_general_summary_fallback_on_error(
|
||||
self,
|
||||
mock_prompt_template,
|
||||
mock_chat_anthropic,
|
||||
sample_transcription_dict,
|
||||
sample_basic_summary_response
|
||||
):
|
||||
"""Test general_summary falls back to non-structured output on error"""
|
||||
mock_model = MagicMock()
|
||||
mock_chain = MagicMock()
|
||||
mock_fallback_chain = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = json.dumps(sample_basic_summary_response)
|
||||
|
||||
# First call (structured) raises error
|
||||
mock_structured_model = MagicMock()
|
||||
mock_structured_model.invoke.side_effect = Exception("Structured output failed")
|
||||
mock_model.with_structured_output.return_value = mock_structured_model
|
||||
|
||||
# Set up the chain so that structured chain raises exception
|
||||
# and fallback chain returns the mock_response
|
||||
mock_prompt_instance = MagicMock()
|
||||
mock_prompt_template.from_messages.return_value = mock_prompt_instance
|
||||
|
||||
# When __or__ is called with structured_model, return chain that raises exception
|
||||
# When __or__ is called with model (fallback), return fallback_chain that succeeds
|
||||
def or_handler(self, other):
|
||||
if other == mock_structured_model:
|
||||
# Structured chain - should raise exception
|
||||
mock_chain.invoke.side_effect = Exception("Structured output failed")
|
||||
return mock_chain
|
||||
elif other == mock_model:
|
||||
# Fallback chain - should succeed
|
||||
mock_fallback_chain.invoke.return_value = mock_response
|
||||
return mock_fallback_chain
|
||||
return mock_chain
|
||||
|
||||
mock_prompt_instance.__or__ = or_handler
|
||||
mock_chat_anthropic.return_value = mock_model
|
||||
|
||||
result = general_summary(sample_transcription_dict, plan_tier="freemium")
|
||||
|
||||
assert result is not None
|
||||
assert "Key_Points" in result
|
||||
|
||||
|
||||
class TestCustomSummary:
|
||||
"""Test custom_summary function"""
|
||||
|
||||
@patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'})
|
||||
@patch('scripts.generate_summary.ChatAnthropic')
|
||||
@patch('scripts.generate_summary.ChatPromptTemplate')
|
||||
def test_custom_summary_success(
|
||||
self,
|
||||
mock_prompt_template,
|
||||
mock_chat_anthropic,
|
||||
sample_transcription_dict,
|
||||
sample_template
|
||||
):
|
||||
"""Test custom_summary with successful response"""
|
||||
mock_model = MagicMock()
|
||||
mock_chain = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
|
||||
expected_result = {
|
||||
"Key_Points": {"content": []},
|
||||
"Summary": {"content": []},
|
||||
"Next_Steps": {"content": []}
|
||||
}
|
||||
mock_response.content = json.dumps(expected_result)
|
||||
|
||||
mock_prompt_instance = MagicMock()
|
||||
mock_prompt_template.from_messages.return_value = mock_prompt_instance
|
||||
mock_prompt_instance.__or__ = lambda self, other: mock_chain
|
||||
mock_chain.invoke.return_value = mock_response
|
||||
mock_chat_anthropic.return_value = mock_model
|
||||
|
||||
result = custom_summary(sample_template, sample_transcription_dict)
|
||||
|
||||
assert result is not None
|
||||
assert "Key_Points" in result or "Summary" in result
|
||||
|
||||
# Verify model was initialized correctly
|
||||
mock_chat_anthropic.assert_called_once()
|
||||
call_args = mock_chat_anthropic.call_args
|
||||
assert call_args.kwargs["model"] == "claude-sonnet-4-5-20250929"
|
||||
assert call_args.kwargs["max_tokens"] == 8000
|
||||
|
||||
@patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'})
|
||||
@patch('scripts.generate_summary.ChatAnthropic')
|
||||
@patch('scripts.generate_summary.ChatPromptTemplate')
|
||||
def test_custom_summary_with_markdown_wrapper(
|
||||
self,
|
||||
mock_prompt_template,
|
||||
mock_chat_anthropic,
|
||||
sample_transcription_dict,
|
||||
sample_template
|
||||
):
|
||||
"""Test custom_summary handles markdown-wrapped JSON"""
|
||||
mock_model = MagicMock()
|
||||
mock_chain = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
|
||||
expected_result = {"result": "test"}
|
||||
wrapped_json = f"```json\n{json.dumps(expected_result)}\n```"
|
||||
mock_response.content = wrapped_json
|
||||
|
||||
mock_prompt_instance = MagicMock()
|
||||
mock_prompt_template.from_messages.return_value = mock_prompt_instance
|
||||
mock_prompt_instance.__or__ = lambda self, other: mock_chain
|
||||
mock_chain.invoke.return_value = mock_response
|
||||
mock_chat_anthropic.return_value = mock_model
|
||||
|
||||
result = custom_summary(sample_template, sample_transcription_dict)
|
||||
|
||||
assert result == expected_result
|
||||
|
||||
def test_custom_summary_missing_api_key(self, sample_transcription_dict, sample_template):
|
||||
"""Test custom_summary raises error when API key is missing"""
|
||||
with patch.dict(os.environ, {}, clear=True):
|
||||
with pytest.raises(ValueError, match="ANTHROPIC_API_KEY"):
|
||||
custom_summary(sample_template, sample_transcription_dict)
|
||||
|
||||
@patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'})
|
||||
@patch('scripts.generate_summary.ChatAnthropic')
|
||||
@patch('scripts.generate_summary.ChatPromptTemplate')
|
||||
def test_custom_summary_invalid_json(
|
||||
self,
|
||||
mock_prompt_template,
|
||||
mock_chat_anthropic,
|
||||
sample_transcription_dict,
|
||||
sample_template
|
||||
):
|
||||
"""Test custom_summary handles invalid JSON gracefully"""
|
||||
mock_model = MagicMock()
|
||||
mock_chain = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = "This is not valid JSON"
|
||||
|
||||
mock_prompt_instance = MagicMock()
|
||||
mock_prompt_template.from_messages.return_value = mock_prompt_instance
|
||||
mock_prompt_instance.__or__ = lambda self, other: mock_chain
|
||||
mock_chain.invoke.return_value = mock_response
|
||||
mock_chat_anthropic.return_value = mock_model
|
||||
|
||||
with pytest.raises(ValueError, match="Could not parse response as JSON"):
|
||||
custom_summary(sample_template, sample_transcription_dict)
|
||||
|
||||
|
||||
class TestSchemaSwitching:
|
||||
"""Test schema switching based on plan tier"""
|
||||
|
||||
@patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'})
|
||||
@patch('scripts.generate_summary.ChatAnthropic')
|
||||
@patch('scripts.generate_summary.ChatPromptTemplate')
|
||||
def test_schema_switching_freemium(
|
||||
self,
|
||||
mock_prompt_template,
|
||||
mock_chat_anthropic,
|
||||
sample_transcription_dict,
|
||||
sample_basic_summary_response
|
||||
):
|
||||
"""Test that freemium plan uses BasicSummary schema"""
|
||||
mock_model = MagicMock()
|
||||
mock_chain = MagicMock()
|
||||
from scripts.generate_summary import BasicSummary
|
||||
mock_result = BasicSummary(**sample_basic_summary_response)
|
||||
|
||||
mock_structured_model = MagicMock()
|
||||
mock_structured_model.invoke.return_value = mock_result
|
||||
mock_model.with_structured_output = MagicMock(return_value=mock_structured_model)
|
||||
|
||||
mock_prompt_instance = MagicMock()
|
||||
mock_prompt_template.from_messages.return_value = mock_prompt_instance
|
||||
mock_prompt_instance.__or__ = lambda self, other: mock_chain
|
||||
mock_chain.invoke.return_value = mock_result
|
||||
mock_chat_anthropic.return_value = mock_model
|
||||
|
||||
general_summary(sample_transcription_dict, plan_tier="freemium")
|
||||
|
||||
# Verify BasicSummary schema was used
|
||||
mock_model.with_structured_output.assert_called_once()
|
||||
call_args = mock_model.with_structured_output.call_args
|
||||
from scripts.generate_summary import BasicSummary
|
||||
assert call_args[0][0] == BasicSummary
|
||||
|
||||
@patch.dict(os.environ, {'ANTHROPIC_API_KEY': 'test-key'})
|
||||
@patch('scripts.generate_summary.ChatAnthropic')
|
||||
@patch('scripts.generate_summary.ChatPromptTemplate')
|
||||
def test_schema_switching_pro(
|
||||
self,
|
||||
mock_prompt_template,
|
||||
mock_chat_anthropic,
|
||||
sample_transcription_dict,
|
||||
sample_advanced_summary_response
|
||||
):
|
||||
"""Test that pro plan uses AdvancedSummary schema"""
|
||||
mock_model = MagicMock()
|
||||
mock_chain = MagicMock()
|
||||
from scripts.generate_summary import AdvancedSummary
|
||||
mock_result = AdvancedSummary(**sample_advanced_summary_response)
|
||||
|
||||
mock_structured_model = MagicMock()
|
||||
mock_structured_model.invoke.return_value = mock_result
|
||||
mock_model.with_structured_output = MagicMock(return_value=mock_structured_model)
|
||||
|
||||
mock_prompt_instance = MagicMock()
|
||||
mock_prompt_template.from_messages.return_value = mock_prompt_instance
|
||||
mock_prompt_instance.__or__ = lambda self, other: mock_chain
|
||||
mock_chain.invoke.return_value = mock_result
|
||||
mock_chat_anthropic.return_value = mock_model
|
||||
|
||||
general_summary(sample_transcription_dict, plan_tier="pro")
|
||||
|
||||
# Verify AdvancedSummary schema was used
|
||||
mock_model.with_structured_output.assert_called_once()
|
||||
call_args = mock_model.with_structured_output.call_args
|
||||
from scripts.generate_summary import AdvancedSummary
|
||||
assert call_args[0][0] == AdvancedSummary
|
||||
|
||||
Reference in New Issue
Block a user