102 lines
3.5 KiB
Python
102 lines
3.5 KiB
Python
import json
|
|
import os
|
|
from typing import Dict, List
|
|
|
|
from groq import Groq
|
|
|
|
|
|
def _format_messages_for_context(messages: List[dict]) -> str:
|
|
lines = []
|
|
for m in messages:
|
|
direction = "IN" if m.get("is_incoming", True) else "OUT"
|
|
date = m.get("date_sent")
|
|
subj = m.get("subject") or ""
|
|
from_email = m.get("from_email") or ""
|
|
to_email = m.get("to_email") or ""
|
|
body = (m.get("body") or "").strip()
|
|
if len(body) > 1000:
|
|
body = body[:1000] + "..."
|
|
lines.append(
|
|
f"[{date}] [{direction}] {from_email} -> {to_email}\nSubject: {subj}\n{body}"
|
|
)
|
|
return "\n\n---\n\n".join(lines)
|
|
|
|
|
|
def _heuristic_analyze(messages: List[dict]) -> Dict:
|
|
# Simple fallback if Groq isn't available
|
|
body_concat = "\n\n".join([(m.get("body") or "") for m in messages[-4:]])
|
|
question_like = "?" in body_concat or any(
|
|
kw in body_concat.lower()
|
|
for kw in ["could you", "can you", "please", "let me know", "need", "request"]
|
|
)
|
|
last_subj = (messages[-1].get("subject") or "") if messages else ""
|
|
return {
|
|
"actionable": bool(question_like),
|
|
"summary": (body_concat[:350] + "...")
|
|
if len(body_concat) > 350
|
|
else body_concat,
|
|
"subject": last_subj,
|
|
"confidence": 0.35,
|
|
"model": "heuristic",
|
|
}
|
|
|
|
|
|
def analyze_thread(
|
|
thread_subject: str, messages: List[dict], max_messages: int = 4
|
|
) -> Dict:
|
|
"""
|
|
Analyze a thread using Groq LLM. Returns dict with keys:
|
|
- actionable: bool
|
|
- summary: str
|
|
- subject: str
|
|
- confidence: float (0..1)
|
|
- model: str
|
|
Gracefully falls back to a heuristic when GROQ_API_KEY is missing or calls fail.
|
|
"""
|
|
msgs = messages[-max_messages:] if max_messages else messages
|
|
|
|
api_key = os.getenv("GROQ_API_KEY")
|
|
if not api_key:
|
|
return _heuristic_analyze(msgs)
|
|
|
|
client = Groq(api_key=api_key)
|
|
|
|
system_prompt = (
|
|
"You are a helpful assistant that triages email threads and writes concise summaries. "
|
|
"Decide if the thread requires a reply from our side now, based on the last few messages. "
|
|
"Ignore newsletters/automations (e.g., from no-reply), and focus on whether there's a clear question or request. "
|
|
"Return a strict JSON object with keys: actionable (true/false), summary (<= 80 words), confidence (0..1)."
|
|
)
|
|
|
|
user_prompt = (
|
|
f"Thread subject: {thread_subject or ''}\n\n"
|
|
"Recent messages (oldest to newest):\n\n"
|
|
f"{_format_messages_for_context(msgs)}\n\n"
|
|
"Respond with only JSON, no extra commentary."
|
|
)
|
|
|
|
try:
|
|
completion = client.chat.completions.create(
|
|
model=os.getenv("GROQ_MODEL", "llama-3.1-70b-versatile"),
|
|
messages=[
|
|
{"role": "system", "content": system_prompt},
|
|
{"role": "user", "content": user_prompt},
|
|
],
|
|
temperature=0.2,
|
|
max_tokens=300,
|
|
)
|
|
content = completion.choices[0].message.content.strip()
|
|
# Attempt to extract JSON
|
|
data = json.loads(content)
|
|
data.setdefault("subject", thread_subject or "")
|
|
data.setdefault("model", os.getenv("GROQ_MODEL", "llama-3.1-70b-versatile"))
|
|
# Basic validation
|
|
if not isinstance(data.get("actionable"), bool) or not isinstance(
|
|
data.get("summary"), str
|
|
):
|
|
raise ValueError("Invalid schema from model")
|
|
return data
|
|
except Exception:
|
|
# Fallback to heuristic
|
|
return _heuristic_analyze(msgs)
|