Fix ChatPromptTemplate variable parsing issue and update tests
- Replace ChatPromptTemplate with direct HumanMessage/SystemMessage to avoid template variable parsing - Fix f-string formatting issues in prompt strings - Update test_general_summary_fallback_on_error to properly mock fallback chain - Add tests directory with comprehensive test coverage
This commit is contained in:
+222
-31
@@ -1,61 +1,252 @@
|
||||
import anthropic
|
||||
import os
|
||||
import logging
|
||||
from typing import Dict, List, Any, Optional
|
||||
from dotenv import load_dotenv
|
||||
import json
|
||||
from pydantic import BaseModel
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.messages import HumanMessage, SystemMessage
|
||||
from src.prompt import advanced_summary_prompt, basic_summary_prompt, custom_template_prompt
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# Setup logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ============================================================================
|
||||
# Pydantic Models for Structured Outputs
|
||||
# ============================================================================
|
||||
|
||||
# Basic Summary Models (Freemium Plan)
|
||||
class KeyPoint(BaseModel):
|
||||
"""A key point from the meeting"""
|
||||
text: str
|
||||
timestamp: float
|
||||
|
||||
class Summary(BaseModel):
|
||||
"""Overall summary of the meeting"""
|
||||
text: str
|
||||
duration_minutes: float
|
||||
|
||||
class BasicSummary(BaseModel):
|
||||
"""Basic summary structure for freemium plan"""
|
||||
Key_Points: List[KeyPoint]
|
||||
Summary: Summary
|
||||
|
||||
|
||||
# Advanced Summary Models (Pro Plan)
|
||||
class Purpose(BaseModel):
|
||||
"""Purpose of the meeting"""
|
||||
text: str
|
||||
|
||||
class ChapterContent(BaseModel):
|
||||
"""Content item within a chapter"""
|
||||
text: str
|
||||
original_transcript_start: float
|
||||
original_transcript_end: float
|
||||
|
||||
class WordTimestamp(BaseModel):
|
||||
"""Word-level timestamp"""
|
||||
word: str
|
||||
timestamp: float
|
||||
|
||||
class TimeStamp(BaseModel):
|
||||
"""Time range"""
|
||||
start: float
|
||||
end: float
|
||||
|
||||
class Chapter(BaseModel):
|
||||
"""A chapter in the meeting"""
|
||||
chapter: str
|
||||
time_stamp: TimeStamp
|
||||
content: List[ChapterContent]
|
||||
words_time_stamp: List[WordTimestamp]
|
||||
|
||||
class Chapters(BaseModel):
|
||||
"""Chapters section"""
|
||||
minutes_total: float
|
||||
content: List[Chapter]
|
||||
|
||||
class OutcomeContent(BaseModel):
|
||||
"""Content item in outcomes"""
|
||||
text: str
|
||||
time_stamp: TimeStamp
|
||||
words_time_stamp: List[WordTimestamp]
|
||||
|
||||
class Outcomes(BaseModel):
|
||||
"""Outcomes section"""
|
||||
minutes_total: float
|
||||
content: List[OutcomeContent]
|
||||
|
||||
class ActionItem(BaseModel):
|
||||
"""An action item"""
|
||||
text: str
|
||||
time_stamp: TimeStamp
|
||||
words_time_stamp: List[WordTimestamp]
|
||||
|
||||
class ActionItemsPerUser(BaseModel):
|
||||
"""Action items for a specific user"""
|
||||
speaker: str
|
||||
minutes_total: float
|
||||
action_items: List[ActionItem]
|
||||
|
||||
class AdvancedSummary(BaseModel):
|
||||
"""Advanced summary structure for pro plan"""
|
||||
Purpose: Purpose
|
||||
Chapters: Chapters
|
||||
Outcomes: Outcomes
|
||||
Action_Items_Per_User: List[ActionItemsPerUser]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Summary Generation Functions
|
||||
# ============================================================================
|
||||
|
||||
def general_summary(transcription, plan_tier="pro"):
|
||||
"""
|
||||
Generate a summary of the transcription based on the user's plan tier.
|
||||
Uses LangChain Anthropic with structured outputs.
|
||||
|
||||
Args:
|
||||
transcription: The transcription to summarize
|
||||
transcription: The transcription to summarize (dict or JSON string)
|
||||
plan_tier: The user's plan tier ("freemium" or "pro")
|
||||
|
||||
Returns:
|
||||
A JSON object containing the summary
|
||||
A dict containing the summary (structured output)
|
||||
"""
|
||||
client = anthropic.Anthropic(
|
||||
api_key=os.getenv("ANTHTROPIC_API_KEY"),
|
||||
)
|
||||
# Get API key (note: original code had typo ANTHTROPIC_API_KEY)
|
||||
api_key = os.getenv("ANTHROPIC_API_KEY") or os.getenv("ANTHTROPIC_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError("ANTHROPIC_API_KEY environment variable is required")
|
||||
|
||||
logger.info(f"Generating {plan_tier} summary with structured output")
|
||||
|
||||
# Select the appropriate prompt based on the user's plan tier
|
||||
# Convert transcription to string if it's a dict
|
||||
if isinstance(transcription, dict):
|
||||
transcription_str = json.dumps(transcription)
|
||||
else:
|
||||
transcription_str = str(transcription)
|
||||
|
||||
# Select the appropriate prompt and schema based on the user's plan tier
|
||||
if plan_tier.lower() == "freemium":
|
||||
prompt = basic_summary_prompt
|
||||
max_tokens = 2000 # Reduced token count for basic summaries
|
||||
max_tokens = 2000
|
||||
output_schema = BasicSummary
|
||||
else: # Default to pro
|
||||
prompt = advanced_summary_prompt
|
||||
max_tokens = 4000
|
||||
output_schema = AdvancedSummary
|
||||
|
||||
message = client.messages.create(
|
||||
model="claude-3-5-sonnet-20241022",
|
||||
max_tokens=max_tokens,
|
||||
messages=[
|
||||
{"role": "user", "content": f"{prompt}"},
|
||||
{"role": "user", "content": f"Transcription: {transcription}"}
|
||||
]
|
||||
# Initialize LangChain Anthropic model
|
||||
model = ChatAnthropic(
|
||||
model="claude-sonnet-4-5-20250929",
|
||||
api_key=api_key,
|
||||
temperature=0.2,
|
||||
max_tokens=max_tokens
|
||||
)
|
||||
|
||||
text = message.content[0].text
|
||||
return json.loads(text)
|
||||
|
||||
# Create messages directly to avoid template variable parsing issues
|
||||
messages = [
|
||||
SystemMessage(content="You are an AI meeting transcript summary formatter. Follow the instructions carefully and return structured output."),
|
||||
HumanMessage(content=prompt + "\n\nTranscription: " + transcription_str)
|
||||
]
|
||||
|
||||
# Use structured output
|
||||
structured_model = model.with_structured_output(output_schema)
|
||||
|
||||
try:
|
||||
# Invoke the structured model with messages
|
||||
result = structured_model.invoke(messages)
|
||||
|
||||
# Convert Pydantic model to dict
|
||||
if isinstance(result, BaseModel):
|
||||
logger.info(f"Successfully generated {plan_tier} summary with structured output")
|
||||
return result.model_dump()
|
||||
else:
|
||||
logger.info(f"Successfully generated {plan_tier} summary")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
# Log error and return fallback
|
||||
logger.error(f"Error generating summary with structured output: {e}")
|
||||
# Fallback: try without structured output
|
||||
try:
|
||||
logger.warning("Falling back to non-structured output")
|
||||
response = model.invoke(messages)
|
||||
text = response.content if hasattr(response, 'content') else str(response)
|
||||
return json.loads(text)
|
||||
except Exception as fallback_error:
|
||||
logger.error(f"Fallback also failed: {fallback_error}")
|
||||
raise
|
||||
|
||||
|
||||
def custom_summary(template, transcription):
|
||||
client = anthropic.Anthropic(
|
||||
api_key=os.getenv("ANTHTROPIC_API_KEY"),
|
||||
)
|
||||
message = client.messages.create(
|
||||
model="claude-3-5-sonnet-20241022",
|
||||
max_tokens=8000,
|
||||
messages=[
|
||||
{"role": "user", "content": f"{custom_template_prompt}"},
|
||||
{"role": "user", "content": f"TEMPLATE : {template}"},
|
||||
{"role": "user", "content": f"Transcription: {transcription}"}
|
||||
]
|
||||
"""
|
||||
Generate a custom summary based on a user-defined template.
|
||||
Uses LangChain Anthropic.
|
||||
|
||||
Args:
|
||||
template: The custom template (dict or JSON string)
|
||||
transcription: The transcription to summarize (dict or JSON string)
|
||||
|
||||
Returns:
|
||||
A dict containing the custom summary
|
||||
"""
|
||||
# Get API key (note: original code had typo ANTHTROPIC_API_KEY)
|
||||
api_key = os.getenv("ANTHROPIC_API_KEY") or os.getenv("ANTHTROPIC_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError("ANTHROPIC_API_KEY environment variable is required")
|
||||
|
||||
logger.info("Generating custom summary")
|
||||
|
||||
# Convert to strings if needed
|
||||
if isinstance(template, dict):
|
||||
template_str = json.dumps(template)
|
||||
else:
|
||||
template_str = str(template)
|
||||
|
||||
if isinstance(transcription, dict):
|
||||
transcription_str = json.dumps(transcription)
|
||||
else:
|
||||
transcription_str = str(transcription)
|
||||
|
||||
# Initialize LangChain Anthropic model
|
||||
model = ChatAnthropic(
|
||||
model="claude-sonnet-4-5-20250929", # Using the same model as general_summary
|
||||
api_key=api_key,
|
||||
temperature=0.2,
|
||||
max_tokens=8000
|
||||
)
|
||||
|
||||
text = message.content[0].text
|
||||
return json.loads(text)
|
||||
# Create messages directly to avoid template variable parsing issues
|
||||
messages = [
|
||||
SystemMessage(content="You are an AI meeting transcript summary formatter. Follow the user-defined template structure exactly."),
|
||||
HumanMessage(content=custom_template_prompt + "\n\nTEMPLATE: " + template_str + "\n\nTranscription: " + transcription_str)
|
||||
]
|
||||
|
||||
try:
|
||||
response = model.invoke(messages)
|
||||
text = response.content if hasattr(response, 'content') else str(response)
|
||||
|
||||
# Try to parse as JSON
|
||||
try:
|
||||
return json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
# If it's wrapped in markdown code blocks, try to extract JSON
|
||||
if "```json" in text:
|
||||
json_start = text.find("```json") + 7
|
||||
json_end = text.find("```", json_start)
|
||||
text = text[json_start:json_end].strip()
|
||||
return json.loads(text)
|
||||
elif "```" in text:
|
||||
json_start = text.find("```") + 3
|
||||
json_end = text.find("```", json_start)
|
||||
text = text[json_start:json_end].strip()
|
||||
return json.loads(text)
|
||||
else:
|
||||
raise ValueError(f"Could not parse response as JSON: {text[:200]}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating custom summary: {e}")
|
||||
raise
|
||||
|
||||
Reference in New Issue
Block a user