feat: Increase max email fetch results and enhance email body extraction
This commit is contained in:
+1
-1
@@ -364,7 +364,7 @@ def _sync_emails_once(cfg: dict) -> int:
|
||||
days_back = max(1, delta_days)
|
||||
except Exception:
|
||||
pass
|
||||
max_results = 5
|
||||
max_results = 100
|
||||
client = ZohoClient(
|
||||
email=cfg.get("zoho_email") or account_email,
|
||||
app_password=cfg.get("zoho_app_password"),
|
||||
|
||||
+57
-14
@@ -132,21 +132,28 @@ class ZohoClient:
|
||||
email_message = email.message_from_bytes(raw_email)
|
||||
date_header = email_message.get("Date", "")
|
||||
email_date = parse_email_date_safely(date_header)
|
||||
|
||||
|
||||
|
||||
# Ensure both dates are timezone-aware for comparison
|
||||
if email_date and latest_date:
|
||||
print(f"📅 Email date: {email_date} Latest: {latest_date}")
|
||||
# If latest_date is timezone-naive, make it timezone-aware (assume UTC)
|
||||
if latest_date.tzinfo is None:
|
||||
latest_date = latest_date.replace(tzinfo=timezone.utc)
|
||||
|
||||
|
||||
if (email_date > latest_date) or first_time:
|
||||
# Extract headers
|
||||
print(f"📅 Email date: {email_date} Latest: {latest_date}")
|
||||
subject = self._decode_header(email_message.get("Subject", ""))
|
||||
from_header = self._decode_header(email_message.get("From", ""))
|
||||
to_header = self._decode_header(email_message.get("To", ""))
|
||||
# Extract headers
|
||||
print(
|
||||
f"📅 Email date: {email_date} Latest: {latest_date}"
|
||||
)
|
||||
subject = self._decode_header(
|
||||
email_message.get("Subject", "")
|
||||
)
|
||||
from_header = self._decode_header(
|
||||
email_message.get("From", "")
|
||||
)
|
||||
to_header = self._decode_header(
|
||||
email_message.get("To", "")
|
||||
)
|
||||
|
||||
message_id = email_message.get("Message-ID", "")
|
||||
in_reply_to = email_message.get("In-Reply-To", "")
|
||||
@@ -156,7 +163,6 @@ class ZohoClient:
|
||||
|
||||
# Get email body snippet
|
||||
body = self._get_email_body(email_message)
|
||||
|
||||
|
||||
email_data = {
|
||||
"id": num.decode(),
|
||||
@@ -170,7 +176,7 @@ class ZohoClient:
|
||||
"folder": folder,
|
||||
"snippet": body,
|
||||
}
|
||||
|
||||
|
||||
emails.append(email_data)
|
||||
logging.info(f"Long body: {body}")
|
||||
except Exception as e:
|
||||
@@ -227,18 +233,22 @@ class ZohoClient:
|
||||
return str(header_value)
|
||||
|
||||
def _get_email_body(self, email_message) -> str:
|
||||
"""Extract email body text"""
|
||||
"""Extract email body text - get only the main content, not quoted replies"""
|
||||
body = ""
|
||||
|
||||
if email_message.is_multipart():
|
||||
# Get only the first text/plain part (main content)
|
||||
for part in email_message.walk():
|
||||
if part.get_content_type() == "text/plain":
|
||||
if part.get_content_type() == "text/plain" and not part.get_filename():
|
||||
try:
|
||||
body += part.get_payload(decode=True).decode(
|
||||
content = part.get_payload(decode=True).decode(
|
||||
"utf-8", errors="ignore"
|
||||
)
|
||||
# Take only the first text part we find
|
||||
body = content
|
||||
break # Stop after first text/plain part
|
||||
except Exception:
|
||||
pass
|
||||
continue
|
||||
else:
|
||||
try:
|
||||
body = email_message.get_payload(decode=True).decode(
|
||||
@@ -247,8 +257,41 @@ class ZohoClient:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Optional: Clean up the body by removing quoted content
|
||||
body = self._clean_email_body(body)
|
||||
return body
|
||||
|
||||
def _clean_email_body(self, body: str) -> str:
|
||||
"""Clean email body by removing quoted content and signatures"""
|
||||
if not body:
|
||||
return ""
|
||||
|
||||
lines = body.split("\n")
|
||||
cleaned_lines = []
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
|
||||
# Stop at common quote indicators
|
||||
if (
|
||||
line.startswith("---- On ")
|
||||
or line.startswith("On ")
|
||||
and "wrote:" in line
|
||||
or line.startswith("From:")
|
||||
or line.startswith("> ")
|
||||
or line.startswith("-----Original Message-----")
|
||||
or line.startswith("---------- Forwarded message ---------")
|
||||
):
|
||||
break
|
||||
|
||||
cleaned_lines.append(line)
|
||||
|
||||
# Remove trailing empty lines
|
||||
while cleaned_lines and not cleaned_lines[-1]:
|
||||
cleaned_lines.pop()
|
||||
|
||||
return "\n".join(cleaned_lines)
|
||||
|
||||
def get_thread_messages(self, thread_id: str) -> List[Dict[str, Any]]:
|
||||
"""Get all messages in a thread (simplified for IMAP)"""
|
||||
# For IMAP, we'll return a single message since thread grouping is more complex
|
||||
|
||||
Reference in New Issue
Block a user