From d3ca18a98e6876811a6dab9a23af15862e37466d Mon Sep 17 00:00:00 2001 From: Bas Bloemsaat Date: Sun, 14 Jul 2024 14:31:04 +0200 Subject: [PATCH 1/7] detect line splits, also at the end --- Lib/email/policy.py | 2 +- Lib/test/test_email/test_policy.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 46b7de5bb6d8ae..9150956ce2d5ed 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -140,7 +140,7 @@ def header_store_parse(self, name, value): """ if hasattr(value, 'name') and value.name.lower() == name.lower(): return (name, value) - if isinstance(value, str) and len(value.splitlines())>1: + if isinstance(value, str) and linesep_splitter.search(value): # XXX this error message isn't quite right when we use splitlines # (see issue 22233), but I'm not sure what should happen here. raise ValueError("Header values may not contain linefeed " diff --git a/Lib/test/test_email/test_policy.py b/Lib/test/test_email/test_policy.py index c6b9c80efe1b54..24a91d45d03d13 100644 --- a/Lib/test/test_email/test_policy.py +++ b/Lib/test/test_email/test_policy.py @@ -398,6 +398,9 @@ def test_header_store_parse_rejects_newlines(self): instance.header_store_parse, 'From', 'spam\negg@foo.py') + self.assertRaises(ValueError, + instance.header_store_parse, + 'Subject', 'te\nst') if __name__ == '__main__': unittest.main() From 891d4f8d5d6d11ab4273431996542a07429fd145 Mon Sep 17 00:00:00 2001 From: Bas Bloemsaat Date: Mon, 15 Jul 2024 21:48:20 +0200 Subject: [PATCH 2/7] also detect encoded newlines --- Lib/email/policy.py | 8 ++++---- Lib/test/test_email/test_policy.py | 4 ++++ .../2024-07-15-21-43-38.gh-issue-121650.A0FkCh.rst | 2 ++ 3 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-07-15-21-43-38.gh-issue-121650.A0FkCh.rst diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 9150956ce2d5ed..a289f14a7c38a9 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -140,11 +140,11 @@ def header_store_parse(self, name, value): """ if hasattr(value, 'name') and value.name.lower() == name.lower(): return (name, value) - if isinstance(value, str) and linesep_splitter.search(value): - # XXX this error message isn't quite right when we use splitlines - # (see issue 22233), but I'm not sure what should happen here. + if (isinstance(value, str) and + linesep_splitter.search(self.header_factory(name, value)) + ): raise ValueError("Header values may not contain linefeed " - "or carriage return characters") + "or carriage return characters") return (name, self.header_factory(name, value)) def header_fetch_parse(self, name, value): diff --git a/Lib/test/test_email/test_policy.py b/Lib/test/test_email/test_policy.py index 24a91d45d03d13..312c89309ffef2 100644 --- a/Lib/test/test_email/test_policy.py +++ b/Lib/test/test_email/test_policy.py @@ -402,5 +402,9 @@ def test_header_store_parse_rejects_newlines(self): instance.header_store_parse, 'Subject', 'te\nst') + self.assertRaises(ValueError, + instance.header_store_parse, + 'Subject', 'A 💩 subject=?UTF-8?Q?=0A?=Bcc: injected@example.com') + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2024-07-15-21-43-38.gh-issue-121650.A0FkCh.rst b/Misc/NEWS.d/next/Library/2024-07-15-21-43-38.gh-issue-121650.A0FkCh.rst new file mode 100644 index 00000000000000..9db83d6709bd82 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-15-21-43-38.gh-issue-121650.A0FkCh.rst @@ -0,0 +1,2 @@ +Fixes newlines (and carriage returns) in email headers, both at the end of +the header, and encoded entities that are resolved to newlines and returns From 62ceb4089683a8182d590cd9e38ff8e8bc7a15b6 Mon Sep 17 00:00:00 2001 From: Bas Bloemsaat Date: Tue, 16 Jul 2024 09:36:43 +0200 Subject: [PATCH 3/7] Update Lib/email/policy.py Co-authored-by: Peter Bierma --- Lib/email/policy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/email/policy.py b/Lib/email/policy.py index a289f14a7c38a9..655b06bd85fb5d 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -144,7 +144,7 @@ def header_store_parse(self, name, value): linesep_splitter.search(self.header_factory(name, value)) ): raise ValueError("Header values may not contain linefeed " - "or carriage return characters") + "or carriage return characters") return (name, self.header_factory(name, value)) def header_fetch_parse(self, name, value): From aa4fd24dc7179d147e8e8cabdd4a50caf82f6be8 Mon Sep 17 00:00:00 2001 From: Bas Bloemsaat Date: Tue, 16 Jul 2024 09:37:00 +0200 Subject: [PATCH 4/7] Update Misc/NEWS.d/next/Library/2024-07-15-21-43-38.gh-issue-121650.A0FkCh.rst Co-authored-by: Peter Bierma --- .../next/Library/2024-07-15-21-43-38.gh-issue-121650.A0FkCh.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-07-15-21-43-38.gh-issue-121650.A0FkCh.rst b/Misc/NEWS.d/next/Library/2024-07-15-21-43-38.gh-issue-121650.A0FkCh.rst index 9db83d6709bd82..09f2468a4c2972 100644 --- a/Misc/NEWS.d/next/Library/2024-07-15-21-43-38.gh-issue-121650.A0FkCh.rst +++ b/Misc/NEWS.d/next/Library/2024-07-15-21-43-38.gh-issue-121650.A0FkCh.rst @@ -1,2 +1,2 @@ -Fixes newlines (and carriage returns) in email headers, both at the end of +Fixed newlines and carriage returns in :mod:`email` headers, both at the end of the header, and encoded entities that are resolved to newlines and returns From 23fdf74761707639c59003fc15d85bd5b40550a9 Mon Sep 17 00:00:00 2001 From: Bas Bloemsaat Date: Tue, 16 Jul 2024 10:20:52 +0200 Subject: [PATCH 5/7] also check parsed headers for newlines and returns --- Lib/email/policy.py | 12 +++++++----- Lib/test/test_email/test_policy.py | 20 +++++++++++++++++++- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 655b06bd85fb5d..27e9b5cfdda7c7 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -139,13 +139,15 @@ def header_store_parse(self, name, value): """ if hasattr(value, 'name') and value.name.lower() == name.lower(): - return (name, value) - if (isinstance(value, str) and - linesep_splitter.search(self.header_factory(name, value)) - ): + header_value = value + else: + header_value = self.header_factory(name, value) + + if linesep_splitter.search(str(header_value)): raise ValueError("Header values may not contain linefeed " "or carriage return characters") - return (name, self.header_factory(name, value)) + + return (name, header_value) def header_fetch_parse(self, name, value): """+ diff --git a/Lib/test/test_email/test_policy.py b/Lib/test/test_email/test_policy.py index 312c89309ffef2..f6142a74b03e9c 100644 --- a/Lib/test/test_email/test_policy.py +++ b/Lib/test/test_email/test_policy.py @@ -404,7 +404,25 @@ def test_header_store_parse_rejects_newlines(self): self.assertRaises(ValueError, instance.header_store_parse, - 'Subject', 'A 💩 subject=?UTF-8?Q?=0A?=Bcc: injected@example.com') + 'Subject', + 'A 💩 subject=?UTF-8?Q?=0A?=Bcc: injected@example.com') + + self.assertRaises(ValueError, + instance.header_store_parse, + 'Subject', + "Here's an =?UTF-8?Q?embedded_newline=0A?=") + + factory = headerregistry.HeaderRegistry() + h = factory('subject', 'te\nst') + self.assertRaises(ValueError, + instance.header_store_parse, + 'Subject', h) + + h = factory('subject', + "Here's an =?UTF-8?Q?embedded_newline=0A?=") + self.assertRaises(ValueError, + instance.header_store_parse, + 'Subject', h) if __name__ == '__main__': unittest.main() From d2109413c52c3f18b5ee8b15bd6247cec393911e Mon Sep 17 00:00:00 2001 From: Bas Bloemsaat Date: Tue, 16 Jul 2024 10:28:48 +0200 Subject: [PATCH 6/7] More specific update blurb --- .../Library/2024-07-15-21-43-38.gh-issue-121650.A0FkCh.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2024-07-15-21-43-38.gh-issue-121650.A0FkCh.rst b/Misc/NEWS.d/next/Library/2024-07-15-21-43-38.gh-issue-121650.A0FkCh.rst index 09f2468a4c2972..4a55b181b311ff 100644 --- a/Misc/NEWS.d/next/Library/2024-07-15-21-43-38.gh-issue-121650.A0FkCh.rst +++ b/Misc/NEWS.d/next/Library/2024-07-15-21-43-38.gh-issue-121650.A0FkCh.rst @@ -1,2 +1,3 @@ -Fixed newlines and carriage returns in :mod:`email` headers, both at the end of -the header, and encoded entities that are resolved to newlines and returns +Fix :mod:`email.policy` to properly disallow newlines and carriage returns +in email headers, both at the end of the header, and encoded entities that are +resolved to newlines and carriage returns. From 5a163b54833062dccc3db0019feb612af028ccea Mon Sep 17 00:00:00 2001 From: Bas Bloemsaat Date: Sat, 20 Jul 2024 00:03:56 +0200 Subject: [PATCH 7/7] check all headers for newlines --- Lib/email/policy.py | 10 +++++++++- Lib/test/test_email/test_policy.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 27e9b5cfdda7c7..fc9c6c07985fdb 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -9,6 +9,7 @@ from email.headerregistry import HeaderRegistry as HeaderRegistry from email.contentmanager import raw_data_manager from email.message import EmailMessage +from email.errors import NonPrintableDefect __all__ = [ 'Compat32', @@ -143,7 +144,14 @@ def header_store_parse(self, name, value): else: header_value = self.header_factory(name, value) - if linesep_splitter.search(str(header_value)): + np = [ + non_printable + for defect in header_value.defects + if isinstance(defect, NonPrintableDefect) + for non_printable in defect.non_printables + ] + + if linesep_splitter.search(str(header_value)) or '\n' in np or '\r' in np: raise ValueError("Header values may not contain linefeed " "or carriage return characters") diff --git a/Lib/test/test_email/test_policy.py b/Lib/test/test_email/test_policy.py index f6142a74b03e9c..bf4e71dc717692 100644 --- a/Lib/test/test_email/test_policy.py +++ b/Lib/test/test_email/test_policy.py @@ -424,5 +424,34 @@ def test_header_store_parse_rejects_newlines(self): instance.header_store_parse, 'Subject', h) + self.assertRaises(ValueError, + instance.header_store_parse, + 'From', + "External Sender =?UTF-8?Q?embedded_newline=0A?=Smuggled-Data: Bad") + + self.assertRaises(ValueError, + instance.header_store_parse, + 'Content-Type', + "text/html; charset=UTF-8=?UTF-8?Q?embedded_newline=0A?=Smuggled-Data: Bad") + + self.assertRaises(ValueError, + instance.header_store_parse, + 'Content-Transfer-Encoding', + "External Sender =?UTF-8?Q?embedded_newline=0A?=Smuggled-Data: Bad") + + self.assertRaises(ValueError, + instance.header_store_parse, + 'Message-Id', + "1212334141-321313=?UTF-8?Q?embedded_newline=0A?=Smuggled-Data: Bad") + + self.assertRaises(ValueError, + instance.header_store_parse, + 'content-disposition', + "inline=?UTF-8?Q?embedded_newline=0A?=Smuggled-Data: Bad") + + # date and mime headers don't have embedded newlines, nor errors + + + if __name__ == '__main__': unittest.main()