from email.utils import parseaddr from typing import Annotated, Iterable, List, Optional, Tuple from fastapi import Depends from sqlalchemy import ( Boolean, Column, DateTime, ForeignKey, Index, Integer, String, Text, UniqueConstraint, create_engine, func, ) from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import Session, relationship, sessionmaker SQLALCHEMY_DATABASE_URL = "sqlite:///./sql_app.db" engine = create_engine( SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False} ) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) def get_db(): db = SessionLocal() try: yield db finally: db.close() db_dependency = Annotated[Session, Depends(get_db)] Base = declarative_base() def create_db_tables(): Base.metadata.create_all(bind=engine) class Thread(Base): __tablename__ = "threads" id = Column(Integer, primary_key=True, index=True) # The mailbox this thread belongs to (scopes data when analyzing multiple inboxes) account_email = Column(String, nullable=False, index=True) # A stable key for the thread, typically the root message-id (or a synthetic key) thread_key = Column(String, nullable=False) subject = Column(String, index=True) requires_reply = Column(Boolean, nullable=False, default=False) created_at = Column(DateTime, nullable=False, server_default=func.now()) updated_at = Column( DateTime, nullable=False, server_default=func.now(), onupdate=func.now() ) # Ensure uniqueness per account __table_args__ = ( UniqueConstraint("account_email", "thread_key", name="uq_thread_account_key"), Index("ix_threads_account_updated", "account_email", "updated_at"), ) # ORM relationship messages = relationship( "Message", back_populates="thread", cascade="all, delete-orphan", order_by="Message.date_sent", ) class Message(Base): __tablename__ = "messages" id = Column(Integer, primary_key=True, index=True) # Links to Thread thread_id = Column(Integer, ForeignKey("threads.id"), nullable=False, index=True) # RFC 5322 identifiers for threading message_id = Column(String, nullable=False, unique=True, index=True) in_reply_to = Column(String, index=True) # parent message-id if any # Headers / metadata subject = Column(String, index=True) from_email = Column(String, index=True) to_email = Column(String, index=True) folder = Column(String, index=True) # e.g. INBOX, Sent is_incoming = Column(Boolean, nullable=False, default=True) date_sent = Column(DateTime, index=True) body = Column(Text) created_at = Column(DateTime, nullable=False, server_default=func.now()) updated_at = Column( DateTime, nullable=False, server_default=func.now(), onupdate=func.now() ) thread = relationship("Thread", back_populates="messages") __table_args__ = ( Index("ix_messages_thread_date", "thread_id", "date_sent"), Index("ix_messages_inreplyto", "in_reply_to"), ) # ---------------------- # Utility / DAO functions # ---------------------- def _normalize_email(addr: Optional[str]) -> str: if not addr: return "" name, email_addr = parseaddr(addr) return email_addr.lower() def _is_incoming_message(account_email: str, from_email: str) -> bool: account = (account_email or "").lower() sender = _normalize_email(from_email) # If sender is the account itself, it's outgoing; otherwise incoming return sender != account and sender != "" def find_or_create_thread( db: Session, *, account_email: str, subject: Optional[str], message_id: str, in_reply_to: Optional[str] = None, ) -> Thread: """ Resolves the thread for a message. Strategy: - If in_reply_to matches an existing message, reuse its thread and its thread_key. - Else if a message with message_id already exists, reuse its thread. - Else create a new thread using message_id as thread_key. """ account_email = (account_email or "").lower() # 1) Try to find parent by in_reply_to parent_msg: Optional[Message] = None if in_reply_to: parent_msg = ( db.query(Message) .join(Thread, Message.thread_id == Thread.id) .filter( Message.message_id == in_reply_to, Thread.account_email == account_email, ) .one_or_none() ) if parent_msg: # Parent's thread parent_thread = parent_msg.thread return parent_thread # 2) If message exists already, reuse its thread (idempotent ingest) existing_msg = ( db.query(Message) .join(Thread, Message.thread_id == Thread.id) .filter(Message.message_id == message_id, Thread.account_email == account_email) .one_or_none() ) if existing_msg: return existing_msg.thread # 3) Create a new thread using message_id as the thread_key thread = Thread(account_email=account_email, thread_key=message_id, subject=subject) db.add(thread) db.flush() # assign id return thread def upsert_message( db: Session, *, account_email: str, message_id: str, subject: Optional[str], from_email: Optional[str], to_email: Optional[str], date_sent, body: Optional[str], in_reply_to: Optional[str] = None, folder: Optional[str] = None, ) -> Tuple[Message, Thread]: """Insert or update a message, linking it to the proper thread.""" thread = find_or_create_thread( db, account_email=account_email, subject=subject, message_id=message_id, in_reply_to=in_reply_to, ) msg = db.query(Message).filter_by(message_id=message_id).one_or_none() if msg is None: msg = Message(message_id=message_id, thread_id=thread.id) db.add(msg) msg.thread_id = thread.id msg.in_reply_to = in_reply_to msg.subject = subject msg.from_email = _normalize_email(from_email) msg.to_email = _normalize_email(to_email) msg.date_sent = date_sent msg.body = body msg.folder = folder or "INBOX" msg.is_incoming = _is_incoming_message(account_email, msg.from_email) # Keep thread subject if missing; otherwise prefer the earliest subject if not thread.subject and subject: thread.subject = subject # Update requires_reply flag after inserting/updating the message update_thread_requires_reply(db, thread, account_email) return msg, thread def update_thread_requires_reply( db: Session, thread: Thread, account_email: str ) -> None: """Set thread.requires_reply based on the latest message direction. Rule: If the most recent message in the thread is incoming (from someone else), then the thread requires a reply. Otherwise, it doesn't. """ latest: Optional[Message] = ( db.query(Message) .filter(Message.thread_id == thread.id) .order_by(Message.date_sent.desc(), Message.id.desc()) .first() ) if latest is None: thread.requires_reply = False else: thread.requires_reply = latest.is_incoming # Touch updated_at thread.updated_at = func.now() db.flush() def get_thread_messages(db: Session, thread_id: int) -> List[Message]: return ( db.query(Message) .filter(Message.thread_id == thread_id) .order_by(Message.date_sent.asc(), Message.id.asc()) .all() ) def get_threads_requiring_reply(db: Session, account_email: str) -> List[Thread]: return ( db.query(Thread) .filter( Thread.account_email == account_email.lower(), Thread.requires_reply.is_(True), ) .order_by(Thread.updated_at.desc()) .all() ) def ingest_emails( db: Session, account_email: str, emails: Iterable[dict], default_folder: str = "INBOX", ) -> None: """ Bulk-ingest emails fetched via ZohoClient.fetch_emails. Expected fields per email dict: subject, from, date, snippet/body, messageId, optional inReplyTo, optional to. """ from datetime import datetime for e in emails: # Map common keys from ZohoClient output message_id = e.get("messageId") or e.get("id") if not message_id: # Skip messages without identifiers continue subject = e.get("subject") from_header = e.get("from") or e.get("from_email") to_header = e.get("to") or e.get("to_email") in_reply_to = e.get("inReplyTo") or e.get("in_reply_to") folder = e.get("folder") or default_folder body = e.get("body") or e.get("snippet") # Parse date if it's a string date_val = e.get("date") or e.get("date_sent") if isinstance(date_val, str): try: # Try multiple formats; fall back to now on failure from email.utils import parsedate_to_datetime date_sent = parsedate_to_datetime(date_val) except Exception: date_sent = datetime.utcnow() else: date_sent = date_val upsert_message( db, account_email=account_email, message_id=message_id, subject=subject, from_email=from_header, to_email=to_header, date_sent=date_sent, body=body, in_reply_to=in_reply_to, folder=folder, ) db.commit()