2025-04-24 10:15:13 +01:00
|
|
|
import os
|
2025-11-11 20:11:53 +00:00
|
|
|
import logging
|
|
|
|
|
from typing import Dict, List, Any, Optional
|
2025-04-24 10:15:13 +01:00
|
|
|
from dotenv import load_dotenv
|
|
|
|
|
import json
|
2025-11-11 20:11:53 +00:00
|
|
|
from pydantic import BaseModel
|
|
|
|
|
from langchain_anthropic import ChatAnthropic
|
|
|
|
|
from langchain_core.prompts import ChatPromptTemplate
|
|
|
|
|
from langchain_core.messages import HumanMessage, SystemMessage
|
2025-04-24 10:15:13 +01:00
|
|
|
from src.prompt import advanced_summary_prompt, basic_summary_prompt, custom_template_prompt
|
2025-11-11 20:11:53 +00:00
|
|
|
|
2025-04-24 10:15:13 +01:00
|
|
|
load_dotenv()
|
|
|
|
|
|
2025-11-11 20:11:53 +00:00
|
|
|
# Setup logger
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
# ============================================================================
|
|
|
|
|
# Pydantic Models for Structured Outputs
|
|
|
|
|
# ============================================================================
|
|
|
|
|
|
|
|
|
|
# Basic Summary Models (Freemium Plan)
|
|
|
|
|
class KeyPoint(BaseModel):
|
|
|
|
|
"""A key point from the meeting"""
|
|
|
|
|
text: str
|
|
|
|
|
timestamp: float
|
|
|
|
|
|
|
|
|
|
class Summary(BaseModel):
|
|
|
|
|
"""Overall summary of the meeting"""
|
|
|
|
|
text: str
|
|
|
|
|
duration_minutes: float
|
|
|
|
|
|
|
|
|
|
class BasicSummary(BaseModel):
|
|
|
|
|
"""Basic summary structure for freemium plan"""
|
|
|
|
|
Key_Points: List[KeyPoint]
|
|
|
|
|
Summary: Summary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Advanced Summary Models (Pro Plan)
|
|
|
|
|
class Purpose(BaseModel):
|
|
|
|
|
"""Purpose of the meeting"""
|
|
|
|
|
text: str
|
|
|
|
|
|
|
|
|
|
class ChapterContent(BaseModel):
|
|
|
|
|
"""Content item within a chapter"""
|
|
|
|
|
text: str
|
|
|
|
|
original_transcript_start: float
|
|
|
|
|
original_transcript_end: float
|
|
|
|
|
|
|
|
|
|
class WordTimestamp(BaseModel):
|
|
|
|
|
"""Word-level timestamp"""
|
|
|
|
|
word: str
|
|
|
|
|
timestamp: float
|
|
|
|
|
|
|
|
|
|
class TimeStamp(BaseModel):
|
|
|
|
|
"""Time range"""
|
|
|
|
|
start: float
|
|
|
|
|
end: float
|
|
|
|
|
|
|
|
|
|
class Chapter(BaseModel):
|
|
|
|
|
"""A chapter in the meeting"""
|
|
|
|
|
chapter: str
|
|
|
|
|
time_stamp: TimeStamp
|
|
|
|
|
content: List[ChapterContent]
|
|
|
|
|
words_time_stamp: List[WordTimestamp]
|
|
|
|
|
|
|
|
|
|
class Chapters(BaseModel):
|
|
|
|
|
"""Chapters section"""
|
|
|
|
|
minutes_total: float
|
|
|
|
|
content: List[Chapter]
|
|
|
|
|
|
|
|
|
|
class OutcomeContent(BaseModel):
|
|
|
|
|
"""Content item in outcomes"""
|
|
|
|
|
text: str
|
|
|
|
|
time_stamp: TimeStamp
|
|
|
|
|
words_time_stamp: List[WordTimestamp]
|
|
|
|
|
|
|
|
|
|
class Outcomes(BaseModel):
|
|
|
|
|
"""Outcomes section"""
|
|
|
|
|
minutes_total: float
|
|
|
|
|
content: List[OutcomeContent]
|
|
|
|
|
|
|
|
|
|
class ActionItem(BaseModel):
|
|
|
|
|
"""An action item"""
|
|
|
|
|
text: str
|
|
|
|
|
time_stamp: TimeStamp
|
|
|
|
|
words_time_stamp: List[WordTimestamp]
|
|
|
|
|
|
|
|
|
|
class ActionItemsPerUser(BaseModel):
|
|
|
|
|
"""Action items for a specific user"""
|
|
|
|
|
speaker: str
|
|
|
|
|
minutes_total: float
|
|
|
|
|
action_items: List[ActionItem]
|
|
|
|
|
|
|
|
|
|
class AdvancedSummary(BaseModel):
|
|
|
|
|
"""Advanced summary structure for pro plan"""
|
|
|
|
|
Purpose: Purpose
|
|
|
|
|
Chapters: Chapters
|
|
|
|
|
Outcomes: Outcomes
|
|
|
|
|
Action_Items_Per_User: List[ActionItemsPerUser]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ============================================================================
|
|
|
|
|
# Summary Generation Functions
|
|
|
|
|
# ============================================================================
|
|
|
|
|
|
2025-04-24 10:15:13 +01:00
|
|
|
def general_summary(transcription, plan_tier="pro"):
|
|
|
|
|
"""
|
|
|
|
|
Generate a summary of the transcription based on the user's plan tier.
|
2025-11-11 20:11:53 +00:00
|
|
|
Uses LangChain Anthropic with structured outputs.
|
2025-04-24 10:15:13 +01:00
|
|
|
|
|
|
|
|
Args:
|
2025-11-11 20:11:53 +00:00
|
|
|
transcription: The transcription to summarize (dict or JSON string)
|
2025-04-24 10:15:13 +01:00
|
|
|
plan_tier: The user's plan tier ("freemium" or "pro")
|
|
|
|
|
|
|
|
|
|
Returns:
|
2025-11-11 20:11:53 +00:00
|
|
|
A dict containing the summary (structured output)
|
2025-04-24 10:15:13 +01:00
|
|
|
"""
|
2025-11-11 20:11:53 +00:00
|
|
|
# Get API key (note: original code had typo ANTHTROPIC_API_KEY)
|
|
|
|
|
api_key = os.getenv("ANTHROPIC_API_KEY") or os.getenv("ANTHTROPIC_API_KEY")
|
|
|
|
|
if not api_key:
|
|
|
|
|
raise ValueError("ANTHROPIC_API_KEY environment variable is required")
|
|
|
|
|
|
|
|
|
|
logger.info(f"Generating {plan_tier} summary with structured output")
|
|
|
|
|
|
|
|
|
|
# Convert transcription to string if it's a dict
|
|
|
|
|
if isinstance(transcription, dict):
|
|
|
|
|
transcription_str = json.dumps(transcription)
|
|
|
|
|
else:
|
|
|
|
|
transcription_str = str(transcription)
|
2025-04-24 10:15:13 +01:00
|
|
|
|
2025-11-11 20:11:53 +00:00
|
|
|
# Select the appropriate prompt and schema based on the user's plan tier
|
2025-04-24 10:15:13 +01:00
|
|
|
if plan_tier.lower() == "freemium":
|
|
|
|
|
prompt = basic_summary_prompt
|
2025-11-11 20:11:53 +00:00
|
|
|
max_tokens = 2000
|
|
|
|
|
output_schema = BasicSummary
|
2025-04-24 10:15:13 +01:00
|
|
|
else: # Default to pro
|
|
|
|
|
prompt = advanced_summary_prompt
|
|
|
|
|
max_tokens = 4000
|
2025-11-11 20:11:53 +00:00
|
|
|
output_schema = AdvancedSummary
|
2025-04-24 10:15:13 +01:00
|
|
|
|
2025-11-11 20:11:53 +00:00
|
|
|
# Initialize LangChain Anthropic model
|
|
|
|
|
model = ChatAnthropic(
|
|
|
|
|
model="claude-sonnet-4-5-20250929",
|
|
|
|
|
api_key=api_key,
|
|
|
|
|
temperature=0.2,
|
|
|
|
|
max_tokens=max_tokens
|
2025-04-24 10:15:13 +01:00
|
|
|
)
|
|
|
|
|
|
2025-11-11 20:11:53 +00:00
|
|
|
# Create messages directly to avoid template variable parsing issues
|
|
|
|
|
messages = [
|
|
|
|
|
SystemMessage(content="You are an AI meeting transcript summary formatter. Follow the instructions carefully and return structured output."),
|
|
|
|
|
HumanMessage(content=prompt + "\n\nTranscription: " + transcription_str)
|
|
|
|
|
]
|
2025-04-24 10:15:13 +01:00
|
|
|
|
2025-11-11 20:11:53 +00:00
|
|
|
# Use structured output
|
|
|
|
|
structured_model = model.with_structured_output(output_schema)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# Invoke the structured model with messages
|
|
|
|
|
result = structured_model.invoke(messages)
|
|
|
|
|
|
|
|
|
|
# Convert Pydantic model to dict
|
|
|
|
|
if isinstance(result, BaseModel):
|
|
|
|
|
logger.info(f"Successfully generated {plan_tier} summary with structured output")
|
|
|
|
|
return result.model_dump()
|
|
|
|
|
else:
|
|
|
|
|
logger.info(f"Successfully generated {plan_tier} summary")
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
# Log error and return fallback
|
|
|
|
|
logger.error(f"Error generating summary with structured output: {e}")
|
|
|
|
|
# Fallback: try without structured output
|
|
|
|
|
try:
|
|
|
|
|
logger.warning("Falling back to non-structured output")
|
|
|
|
|
response = model.invoke(messages)
|
|
|
|
|
text = response.content if hasattr(response, 'content') else str(response)
|
|
|
|
|
return json.loads(text)
|
|
|
|
|
except Exception as fallback_error:
|
|
|
|
|
logger.error(f"Fallback also failed: {fallback_error}")
|
|
|
|
|
raise
|
2025-04-24 10:15:13 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def custom_summary(template, transcription):
|
2025-11-11 20:11:53 +00:00
|
|
|
"""
|
|
|
|
|
Generate a custom summary based on a user-defined template.
|
|
|
|
|
Uses LangChain Anthropic.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
template: The custom template (dict or JSON string)
|
|
|
|
|
transcription: The transcription to summarize (dict or JSON string)
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
A dict containing the custom summary
|
|
|
|
|
"""
|
|
|
|
|
# Get API key (note: original code had typo ANTHTROPIC_API_KEY)
|
|
|
|
|
api_key = os.getenv("ANTHROPIC_API_KEY") or os.getenv("ANTHTROPIC_API_KEY")
|
|
|
|
|
if not api_key:
|
|
|
|
|
raise ValueError("ANTHROPIC_API_KEY environment variable is required")
|
|
|
|
|
|
|
|
|
|
logger.info("Generating custom summary")
|
|
|
|
|
|
|
|
|
|
# Convert to strings if needed
|
|
|
|
|
if isinstance(template, dict):
|
|
|
|
|
template_str = json.dumps(template)
|
|
|
|
|
else:
|
|
|
|
|
template_str = str(template)
|
|
|
|
|
|
|
|
|
|
if isinstance(transcription, dict):
|
|
|
|
|
transcription_str = json.dumps(transcription)
|
|
|
|
|
else:
|
|
|
|
|
transcription_str = str(transcription)
|
|
|
|
|
|
|
|
|
|
# Initialize LangChain Anthropic model
|
|
|
|
|
model = ChatAnthropic(
|
|
|
|
|
model="claude-sonnet-4-5-20250929", # Using the same model as general_summary
|
|
|
|
|
api_key=api_key,
|
|
|
|
|
temperature=0.2,
|
|
|
|
|
max_tokens=8000
|
2025-04-24 10:15:13 +01:00
|
|
|
)
|
|
|
|
|
|
2025-11-11 20:11:53 +00:00
|
|
|
# Create messages directly to avoid template variable parsing issues
|
|
|
|
|
messages = [
|
|
|
|
|
SystemMessage(content="You are an AI meeting transcript summary formatter. Follow the user-defined template structure exactly."),
|
|
|
|
|
HumanMessage(content=custom_template_prompt + "\n\nTEMPLATE: " + template_str + "\n\nTranscription: " + transcription_str)
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
response = model.invoke(messages)
|
|
|
|
|
text = response.content if hasattr(response, 'content') else str(response)
|
|
|
|
|
|
|
|
|
|
# Try to parse as JSON
|
|
|
|
|
try:
|
|
|
|
|
return json.loads(text)
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
# If it's wrapped in markdown code blocks, try to extract JSON
|
|
|
|
|
if "```json" in text:
|
|
|
|
|
json_start = text.find("```json") + 7
|
|
|
|
|
json_end = text.find("```", json_start)
|
|
|
|
|
text = text[json_start:json_end].strip()
|
|
|
|
|
return json.loads(text)
|
|
|
|
|
elif "```" in text:
|
|
|
|
|
json_start = text.find("```") + 3
|
|
|
|
|
json_end = text.find("```", json_start)
|
|
|
|
|
text = text[json_start:json_end].strip()
|
|
|
|
|
return json.loads(text)
|
|
|
|
|
else:
|
|
|
|
|
raise ValueError(f"Could not parse response as JSON: {text[:200]}")
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Error generating custom summary: {e}")
|
|
|
|
|
raise
|