email_alerts_v2/ai.py

import json
import os
from typing import Dict, List

from groq import Groq


def _format_messages_for_context(messages: List[dict]) -> str:
    lines = []
    for m in messages:
        direction = "IN" if m.get("is_incoming", True) else "OUT"
        date = m.get("date_sent")
        subj = m.get("subject") or ""
        from_email = m.get("from_email") or ""
        to_email = m.get("to_email") or ""
        body = (m.get("body") or "").strip()
        if len(body) > 1000:
            body = body[:1000] + "..."
        lines.append(
            f"[{date}] [{direction}] {from_email} -> {to_email}\nSubject: {subj}\n{body}"
        )
    return "\n\n---\n\n".join(lines)


def _heuristic_analyze(messages: List[dict]) -> Dict:
    # Simple fallback if Groq isn't available
    body_concat = "\n\n".join([(m.get("body") or "") for m in messages[-4:]])
    question_like = "?" in body_concat or any(
        kw in body_concat.lower()
        for kw in ["could you", "can you", "please", "let me know", "need", "request"]
    )
    last_subj = (messages[-1].get("subject") or "") if messages else ""
    return {
        "actionable": bool(question_like),
        "summary": (body_concat[:350] + "...")
        if len(body_concat) > 350
        else body_concat,
        "subject": last_subj,
        "confidence": 0.35,
        "model": "heuristic",
    }


def analyze_thread(
    thread_subject: str, messages: List[dict], max_messages: int = 4
) -> Dict:
    """
    Analyze a thread using Groq LLM. Returns dict with keys:
      - actionable: bool
      - summary: str
      - subject: str
      - confidence: float (0..1)
      - model: str
    Gracefully falls back to a heuristic when GROQ_API_KEY is missing or calls fail.
    """
    msgs = messages[-max_messages:] if max_messages else messages

    api_key = os.getenv("GROQ_API_KEY")
    if not api_key:
        return _heuristic_analyze(msgs)

    client = Groq(api_key=api_key)

    system_prompt = (
        "You are a helpful assistant that triages email threads and writes concise summaries. "
        "Decide if the thread requires a reply from our side now, based on the last few messages. "
        "Ignore newsletters/automations (e.g., from no-reply), and focus on whether there's a clear question or request. "
        "Return a strict JSON object with keys: actionable (true/false), summary (<= 80 words), confidence (0..1)."
    )

    user_prompt = (
        f"Thread subject: {thread_subject or ''}\n\n"
        "Recent messages (oldest to newest):\n\n"
        f"{_format_messages_for_context(msgs)}\n\n"
        "Respond with only JSON, no extra commentary."
    )

    try:
        completion = client.chat.completions.create(
            model=os.getenv("GROQ_MODEL", "llama-3.1-70b-versatile"),
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
            ],
            temperature=0.2,
            max_tokens=300,
        )
        content = completion.choices[0].message.content.strip()
        # Attempt to extract JSON
        data = json.loads(content)
        data.setdefault("subject", thread_subject or "")
        data.setdefault("model", os.getenv("GROQ_MODEL", "llama-3.1-70b-versatile"))
        # Basic validation
        if not isinstance(data.get("actionable"), bool) or not isinstance(
            data.get("summary"), str
        ):
            raise ValueError("Invalid schema from model")
        return data
    except Exception:
        # Fallback to heuristic
        return _heuristic_analyze(msgs)