Skip to content

Commit

Permalink
🐛 Fix crash when email headers contained unencoded unicode
Browse files Browse the repository at this point in the history
  • Loading branch information
pajowu authored and stefanw committed Nov 15, 2024
1 parent b276924 commit f7779cf
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 4 deletions.
10 changes: 6 additions & 4 deletions froide/helper/email_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
from dataclasses import dataclass, field
from datetime import datetime
from datetime import timezone as dt_timezone
from email.header import decode_header
from email.message import EmailMessage
from email.header import Header, decode_header
from email.message import EmailMessage, Message
from email.parser import BytesParser as Parser
from email.utils import getaddresses, parseaddr, parsedate_to_datetime
from io import BytesIO
Expand Down Expand Up @@ -87,7 +87,7 @@ class EmailAttachment(BytesIO):


def parse_email_body(
msgobj: EmailMessage,
msgobj: Message,
) -> Tuple[List[str], List[str], List[EmailAttachment]]:
body = []
html = []
Expand Down Expand Up @@ -141,7 +141,7 @@ def parse_dispositions(dispo):
return dispo_name, dispo_dict


def parse_attachment(message_part, ignore_content_types=None):
def parse_attachment(message_part: Message, ignore_content_types=None):
# this gives more info then message_part.get_content_disposition()
content_disposition = message_part.get("Content-Disposition", None)
content_type = message_part.get_content_type()
Expand Down Expand Up @@ -245,6 +245,8 @@ def parse_header_field(field):
# But encoded words may have been split up!
# Let's remove newlines that are not preceded by
# encoded word terminator and try again
if isinstance(field, Header):
field = str(field)
field = re.sub(r"(?<!\?\=)\n ", "=20", field)
decodefrag = decode_header(field)

Expand Down
14 changes: 14 additions & 0 deletions froide/helper/tests/test_email_parsing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from io import BytesIO
from pathlib import Path

from ..email_parsing import parse_email
from .test_email_log_parsing import TEST_DATA_ROOT


def test_parse_utf8_subject():
with open(Path(TEST_DATA_ROOT) / "email_utf8-subject.eml", "rb") as f:
email = parse_email(BytesIO(f.read()))

assert email.subject.startswith(
"utf8-subject - äöüß - "
) # putting unicode directly into an email is undefined, so we only check for the well-formatted part of the subject
10 changes: 10 additions & 0 deletions froide/helper/tests/testdata/email_utf8-subject.eml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Return-Path: <MAILER-DAEMON>
Message-Id: <12345678@localhost>
From: Mitgliederservice <1234@localhost>
Subject: utf8-subject - =?utf-8?Q?=C3=A4=C3=B6=C3=BC=C3=9F?= - 🥲
To: mail@mail.example.com
Content-Transfer-Encoding: 7bit
Content-Type: text/html; charset=UTF-8
Date: Wed, 13 Nov 2024 20:49:29 +0100

EMail-Body

0 comments on commit f7779cf

Please sign in to comment.