Skip to content

Commit

Permalink
Should first get content charset then str_encode with charset.
Browse files Browse the repository at this point in the history
  • Loading branch information
sangkaka committed Sep 5, 2022
1 parent 43e9b5b commit 5018ccf
Showing 1 changed file with 13 additions and 7 deletions.
20 changes: 13 additions & 7 deletions imbox/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,19 +194,25 @@ def parse_flags(headers):


def parse_email(raw_email, policy=None):
if isinstance(raw_email, bytes):
raw_email = str_encode(raw_email, 'utf-8', errors='ignore')
if policy is not None:
email_parse_kwargs = dict(policy=policy)
else:
email_parse_kwargs = {}

try:
email_message = email.message_from_string(
# Should first get content charset then str_encode with charset.
if isinstance(raw_email, bytes):
email_message = email.message_from_bytes(
raw_email, **email_parse_kwargs)
except UnicodeEncodeError:
email_message = email.message_from_string(
raw_email.encode('utf-8'), **email_parse_kwargs)
charset = email_message.get_content_charset('utf-8')
raw_email = str_encode(raw_email, charset, errors='ignore')
else:
try:
email_message = email.message_from_string(
raw_email, **email_parse_kwargs)
except UnicodeEncodeError:
email_message = email.message_from_string(
raw_email.encode('utf-8'), **email_parse_kwargs)

maintype = email_message.get_content_maintype()
parsed_email = {'raw_email': raw_email}

Expand Down

0 comments on commit 5018ccf

Please sign in to comment.