Refactor AI analysis and email fetching functionality

- Removed AI analysis guide and related documentation files.
- Updated email fetching logic to intelligently determine start date based on the latest email date in the database.
- Enhanced the app module to include a button for re-analyzing threads with AI.
- Improved database interactions to trigger AI analysis after ingesting sent emails.
- Adjusted the UI to display additional information about threads, including formatted latest message dates.
- Removed outdated test scripts for AI analysis and email fetching.
- Updated styles for better responsiveness and layout in the frontend.
This commit is contained in:
bolade
2025-08-12 09:54:10 +01:00
parent 75a0a3fde7
commit 1fd3a95093
14 changed files with 167 additions and 923 deletions
+113 -34
View File
@@ -5,6 +5,7 @@ import os
from contextlib import suppress
from typing import List
from dotenv import load_dotenv
from fastapi import BackgroundTasks, Depends, FastAPI, HTTPException, Request
from fastapi.responses import HTMLResponse, RedirectResponse
from fastapi.staticfiles import StaticFiles
@@ -22,7 +23,6 @@ from src.database import (
ingest_emails,
)
from src.zoho_client import ZohoClient
from dotenv import load_dotenv
load_dotenv()
@@ -50,15 +50,63 @@ app.mount("/static", StaticFiles(directory="static"), name="static")
@app.get("/", response_class=HTMLResponse)
def home(request: Request, db: Session = Depends(get_db), account: str | None = None):
q = db.query(Thread).order_by(Thread.updated_at.desc())
from datetime import datetime
from sqlalchemy import func
# Subquery to get the latest message date for each thread
latest_message_subq = (
db.query(
Message.thread_id, func.max(Message.date_sent).label("latest_message_date")
)
.group_by(Message.thread_id)
.subquery()
)
# Main query joining threads with their latest message dates
q = (
db.query(Thread, latest_message_subq.c.latest_message_date)
.outerjoin(latest_message_subq, Thread.id == latest_message_subq.c.thread_id)
.order_by(latest_message_subq.c.latest_message_date.desc().nulls_last())
)
if account:
q = q.filter(Thread.account_email == account.lower())
threads = q.limit(100).all()
results = q.limit(100).all()
# Create threads with additional info including sequential frontend ID and formatted latest message date
threads_with_info = []
for i, (thread, latest_message_date) in enumerate(results, 1):
# Format the latest message date to 12-hour format with hours and minutes only
formatted_date = None
if latest_message_date:
if isinstance(latest_message_date, str):
try:
# Try parsing if it's a string
dt = datetime.fromisoformat(
latest_message_date.replace("Z", "+00:00")
)
formatted_date = dt.strftime("%d/%m/%y %I:%M %p")
except Exception:
formatted_date = latest_message_date
elif hasattr(latest_message_date, "strftime"):
# It's already a datetime object
formatted_date = latest_message_date.strftime("%d/%m/%y %I:%M %p")
thread_info = {
"frontend_id": i,
"thread": thread,
"latest_message_date": latest_message_date,
"formatted_date": formatted_date or "N/A",
}
threads_with_info.append(thread_info)
return templates.TemplateResponse(
"threads.html",
{
"request": request,
"threads": threads,
"threads": threads_with_info,
"account": account or "",
"status": _status_for_templates(),
},
@@ -66,36 +114,67 @@ def home(request: Request, db: Session = Depends(get_db), account: str | None =
@app.get("/thread/{thread_id}", response_class=HTMLResponse)
def show_thread(thread_id: int, request: Request, db: Session = Depends(get_db)):
def show_thread(
thread_id: int,
request: Request,
db: Session = Depends(get_db),
force_analyze: bool = False,
):
thread = db.query(Thread).filter(Thread.id == thread_id).one_or_none()
if not thread:
raise HTTPException(status_code=404, detail="Thread not found")
messages: List[Message] = get_thread_messages(db, thread_id)
# Convert for AI analyzer and template
msg_dicts = [
{
"id": m.id,
"date_sent": m.date_sent,
"subject": m.subject,
"from_email": m.from_email,
"to_email": m.to_email,
"body": m.body,
"is_incoming": m.is_incoming,
}
for m in messages
]
ai = analyze_thread(thread.subject or "", msg_dicts)
# Save AI info on the thread for listing and downstream alerts
try:
from datetime import datetime, timezone
thread.actionable = bool(ai.get("actionable", False))
thread.ai_summary = ai.get("summary")
thread.ai_confidence = ai.get("confidence")
thread.last_analyzed_at = datetime.now(timezone.utc)
db.commit()
except Exception:
pass
# Check if we should use existing AI analysis or perform a new one
ai = None
should_analyze = force_analyze or not thread.last_analyzed_at
# Check if new messages have been added since last analysis
if not should_analyze and thread.last_analyzed_at and messages:
# Check if any messages are newer than the last analysis
newest_message_date = max(m.created_at for m in messages)
if newest_message_date > thread.last_analyzed_at:
should_analyze = True
# If we have existing analysis and don't need to re-analyze, use it
if not should_analyze and thread.last_analyzed_at:
ai = {
"actionable": thread.actionable,
"summary": thread.ai_summary,
"confidence": thread.ai_confidence,
"analyzed_at": thread.last_analyzed_at.isoformat()
if thread.last_analyzed_at
else None,
}
# Only analyze if we don't have existing data, force_analyze is True, or there are new messages
if should_analyze:
# Convert for AI analyzer
msg_dicts = [
{
"id": m.id,
"date_sent": m.date_sent,
"subject": m.subject,
"from_email": m.from_email,
"to_email": m.to_email,
"body": m.body,
"is_incoming": m.is_incoming,
}
for m in messages
]
ai = analyze_thread(thread.subject or "", msg_dicts)
# Save AI info on the thread for listing and downstream alerts
try:
from datetime import datetime, timezone
thread.actionable = bool(ai.get("actionable", False))
thread.ai_summary = ai.get("summary")
thread.ai_confidence = ai.get("confidence")
thread.last_analyzed_at = datetime.now(timezone.utc)
db.commit()
except Exception:
pass
return templates.TemplateResponse(
"thread_detail.html",
{
@@ -332,11 +411,11 @@ def _sync_emails_background_task():
cfg = load_config() # Reload in case it was modified
cfg.update(
{
"sync_in_progress": False,
"last_sync_status": "success",
"last_sync_at": datetime.now(timezone.utc).isoformat(),
"last_sync_count": count,
"last_sync_error": None,
"sync_in_progress": False,
"last_sync_status": "success",
"last_sync_at": datetime.now(timezone.utc).strftime("%d/%m/%y %I:%M %p"),
"last_sync_count": count,
"last_sync_error": None,
}
)
save_config(cfg)
+5 -1
View File
@@ -350,7 +350,7 @@ def ingest_emails(
Expected fields per email dict: subject, from, date, snippet/body, messageId, optional inReplyTo, optional to.
"""
from datetime import datetime
folder = default_folder
for e in emails:
# Map common keys from ZohoClient output
message_id = e.get("messageId") or e.get("id")
@@ -393,6 +393,10 @@ def ingest_emails(
db.commit()
if folder == "Sent":
analyze_and_update_threads(
account_email=account_email, max_concurrent=3, only_unanalyzed=True
)
def get_latest_email_date(
db: Session, account_email: str, folder: str = None
+7 -3
View File
@@ -70,7 +70,7 @@ class ZohoClient:
# Determine the start date for fetching emails
start_date = None
first_time = True
# If database session and account email are provided, try to get the latest email date
if db_session and account_email:
try:
@@ -82,11 +82,13 @@ class ZohoClient:
if latest_date:
# Add a small buffer (1 minute) to avoid missing emails with same timestamp
start_date = latest_date - timedelta(minutes=1)
first_time = False
print(f"📅 Using latest email date from database: {start_date}")
else:
print(
f"📅 No emails found in database, using days_back: {days_back}"
)
latest_date = datetime.now(timezone.utc) + timedelta(days=1)
except Exception as e:
print(
f"⚠️ Error getting latest date from database: {e}, falling back to days_back"
@@ -130,16 +132,18 @@ class ZohoClient:
email_message = email.message_from_bytes(raw_email)
date_header = email_message.get("Date", "")
email_date = parse_email_date_safely(date_header)
print(f"📅 Email date: {email_date} Latest: {latest_date}")
# Ensure both dates are timezone-aware for comparison
if email_date and latest_date:
print(f"📅 Email date: {email_date} Latest: {latest_date}")
# If latest_date is timezone-naive, make it timezone-aware (assume UTC)
if latest_date.tzinfo is None:
latest_date = latest_date.replace(tzinfo=timezone.utc)
if email_date > latest_date:
if (email_date > latest_date) or first_time:
# Extract headers
print(f"📅 Email date: {email_date} Latest: {latest_date}")
subject = self._decode_header(email_message.get("Subject", ""))
from_header = self._decode_header(email_message.get("From", ""))
to_header = self._decode_header(email_message.get("To", ""))