Skip to content

Commit

Permalink
bpo-40597: email: Use CTE if lines are longer than max_line_length co…
Browse files Browse the repository at this point in the history
…nsistently (gh-20038) (gh-20084)

raw_data_manager (default for EmailPolicy, EmailMessage)
does correct wrapping of 'text' parts as long as the message contains
characters outside of 7bit US-ASCII set: base64 or qp
Content-Transfer-Encoding is applied if the lines would be too long
without it.  It did not, however, do this for ascii-only text,
which could result in lines that were longer than
policy.max_line_length or even the rfc 998  maximum.

This changeset fixes the heuristic so that if lines are longer than
policy.max_line_length, it will always apply a
content-transfer-encoding so that the lines are wrapped correctly.
(cherry picked from commit 6f2f475)

Co-authored-by: Arkadiusz Hiler <arek.l1@gmail.com>
  • Loading branch information
miss-islington and ivyl authored May 18, 2020
1 parent 7a3522d commit c1f1ddf
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 7 deletions.
14 changes: 7 additions & 7 deletions Lib/email/contentmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,13 @@ def embedded_body(lines): return linesep.join(lines) + linesep
def normal_body(lines): return b'\n'.join(lines) + b'\n'
if cte==None:
# Use heuristics to decide on the "best" encoding.
try:
return '7bit', normal_body(lines).decode('ascii')
except UnicodeDecodeError:
pass
if (policy.cte_type == '8bit' and
max(len(x) for x in lines) <= policy.max_line_length):
return '8bit', normal_body(lines).decode('ascii', 'surrogateescape')
if max(len(x) for x in lines) <= policy.max_line_length:
try:
return '7bit', normal_body(lines).decode('ascii')
except UnicodeDecodeError:
pass
if policy.cte_type == '8bit':
return '8bit', normal_body(lines).decode('ascii', 'surrogateescape')
sniff = embedded_body(lines[:10])
sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'),
policy.max_line_length)
Expand Down
15 changes: 15 additions & 0 deletions Lib/test/test_email/test_contentmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,21 @@ def test_set_text_charset_latin_1(self):
self.assertEqual(m.get_payload(decode=True).decode('utf-8'), content)
self.assertEqual(m.get_content(), content)

def test_set_text_plain_long_line_heuristics(self):
m = self._make_message()
content = ("Simple but long message that is over 78 characters"
" long to force transfer encoding.\n")
raw_data_manager.set_content(m, content)
self.assertEqual(str(m), textwrap.dedent("""\
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Simple but long message that is over 78 characters long to =
force transfer encoding.
"""))
self.assertEqual(m.get_payload(decode=True).decode('utf-8'), content)
self.assertEqual(m.get_content(), content)

def test_set_text_short_line_minimal_non_ascii_heuristics(self):
m = self._make_message()
content = "et là il est monté sur moi et il commence à m'éto.\n"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
If text content lines are longer than policy.max_line_length, always use a content-encoding to make sure they are wrapped.

0 comments on commit c1f1ddf

Please sign in to comment.