feat: Increase max email fetch results and enhance email body extraction

This commit is contained in:
bolade
2025-08-13 09:00:18 +01:00
parent 3ea27caca6
commit 411f47e039
3 changed files with 92 additions and 19 deletions
+1 -1
View File
@@ -364,7 +364,7 @@ def _sync_emails_once(cfg: dict) -> int:
days_back = max(1, delta_days)
except Exception:
pass
max_results = 5
max_results = 100
client = ZohoClient(
email=cfg.get("zoho_email") or account_email,
app_password=cfg.get("zoho_app_password"),
+57 -14
View File
@@ -132,21 +132,28 @@ class ZohoClient:
email_message = email.message_from_bytes(raw_email)
date_header = email_message.get("Date", "")
email_date = parse_email_date_safely(date_header)
# Ensure both dates are timezone-aware for comparison
if email_date and latest_date:
print(f"📅 Email date: {email_date} Latest: {latest_date}")
# If latest_date is timezone-naive, make it timezone-aware (assume UTC)
if latest_date.tzinfo is None:
latest_date = latest_date.replace(tzinfo=timezone.utc)
if (email_date > latest_date) or first_time:
# Extract headers
print(f"📅 Email date: {email_date} Latest: {latest_date}")
subject = self._decode_header(email_message.get("Subject", ""))
from_header = self._decode_header(email_message.get("From", ""))
to_header = self._decode_header(email_message.get("To", ""))
# Extract headers
print(
f"📅 Email date: {email_date} Latest: {latest_date}"
)
subject = self._decode_header(
email_message.get("Subject", "")
)
from_header = self._decode_header(
email_message.get("From", "")
)
to_header = self._decode_header(
email_message.get("To", "")
)
message_id = email_message.get("Message-ID", "")
in_reply_to = email_message.get("In-Reply-To", "")
@@ -156,7 +163,6 @@ class ZohoClient:
# Get email body snippet
body = self._get_email_body(email_message)
email_data = {
"id": num.decode(),
@@ -170,7 +176,7 @@ class ZohoClient:
"folder": folder,
"snippet": body,
}
emails.append(email_data)
logging.info(f"Long body: {body}")
except Exception as e:
@@ -227,18 +233,22 @@ class ZohoClient:
return str(header_value)
def _get_email_body(self, email_message) -> str:
"""Extract email body text"""
"""Extract email body text - get only the main content, not quoted replies"""
body = ""
if email_message.is_multipart():
# Get only the first text/plain part (main content)
for part in email_message.walk():
if part.get_content_type() == "text/plain":
if part.get_content_type() == "text/plain" and not part.get_filename():
try:
body += part.get_payload(decode=True).decode(
content = part.get_payload(decode=True).decode(
"utf-8", errors="ignore"
)
# Take only the first text part we find
body = content
break # Stop after first text/plain part
except Exception:
pass
continue
else:
try:
body = email_message.get_payload(decode=True).decode(
@@ -247,8 +257,41 @@ class ZohoClient:
except Exception:
pass
# Optional: Clean up the body by removing quoted content
body = self._clean_email_body(body)
return body
def _clean_email_body(self, body: str) -> str:
"""Clean email body by removing quoted content and signatures"""
if not body:
return ""
lines = body.split("\n")
cleaned_lines = []
for line in lines:
line = line.strip()
# Stop at common quote indicators
if (
line.startswith("---- On ")
or line.startswith("On ")
and "wrote:" in line
or line.startswith("From:")
or line.startswith("> ")
or line.startswith("-----Original Message-----")
or line.startswith("---------- Forwarded message ---------")
):
break
cleaned_lines.append(line)
# Remove trailing empty lines
while cleaned_lines and not cleaned_lines[-1]:
cleaned_lines.pop()
return "\n".join(cleaned_lines)
def get_thread_messages(self, thread_id: str) -> List[Dict[str, Any]]:
"""Get all messages in a thread (simplified for IMAP)"""
# For IMAP, we'll return a single message since thread grouping is more complex