Skip to content

Commit 33f015b

Browse files
authored
fix(community): Handle UnicodeDecodeError in GmailSearch and ensure header robustness (#1226)
1 parent 13badbc commit 33f015b

File tree

1 file changed

+10
-6
lines changed
  • libs/community/langchain_google_community/gmail

1 file changed

+10
-6
lines changed

libs/community/langchain_google_community/gmail/search.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,8 @@ def _parse_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]
8989

9090
email_msg = email.message_from_bytes(raw_message)
9191

92-
subject = email_msg["Subject"]
93-
sender = email_msg["From"]
92+
subject = email_msg.get("Subject", "")
93+
sender = email_msg.get("From", "")
9494

9595
message_body = ""
9696
if email_msg.is_multipart():
@@ -99,18 +99,22 @@ def _parse_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]
9999
cdispo = str(part.get("Content-Disposition"))
100100
if ctype == "text/plain" and "attachment" not in cdispo:
101101
try:
102-
message_body = part.get_payload(decode=True).decode("utf-8") # type: ignore[union-attr]
102+
message_body = part.get_payload(decode=True).decode( # type: ignore[union-attr]
103+
"utf-8", errors="replace"
104+
)
103105
except UnicodeDecodeError:
104106
message_body = part.get_payload(decode=True).decode( # type: ignore[union-attr]
105-
"latin-1"
107+
"latin-1", errors="replace"
106108
)
107109
break
108110
else:
109111
try:
110-
message_body = email_msg.get_payload(decode=True).decode("utf-8") # type: ignore[union-attr]
112+
message_body = email_msg.get_payload(decode=True).decode( # type: ignore[union-attr]
113+
"utf-8", errors="replace"
114+
)
111115
except UnicodeDecodeError:
112116
message_body = email_msg.get_payload(decode=True).decode( # type: ignore[union-attr]
113-
"latin-1"
117+
"latin-1", errors="replace"
114118
)
115119

116120
body = clean_email_body(message_body)

0 commit comments

Comments
 (0)