diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst index 345b64001c1ace1..5d03c08e7914d9c 100644 --- a/Doc/library/email.utils.rst +++ b/Doc/library/email.utils.rst @@ -65,6 +65,11 @@ of the new API. *email address* parts. Returns a tuple of that information, unless the parse fails, in which case a 2-tuple of ``('', '')`` is returned. + If *strict* is true, use a strict parser which rejects malformed inputs. + + .. versionchanged:: 3.13 + Add *strict* optional parameter and reject malformed inputs by default. + .. function:: formataddr(pair, charset='utf-8') @@ -82,12 +87,15 @@ of the new API. Added the *charset* option. -.. function:: getaddresses(fieldvalues) +.. function:: getaddresses(fieldvalues, *, strict=True) This method returns a list of 2-tuples of the form returned by ``parseaddr()``. *fieldvalues* is a sequence of header field values as might be returned by - :meth:`Message.get_all `. Here's a simple - example that gets all the recipients of a message:: + :meth:`Message.get_all `. + + If *strict* is true, use a strict parser which rejects malformed inputs. + + Here's a simple example that gets all the recipients of a message:: from email.utils import getaddresses @@ -97,6 +105,9 @@ of the new API. resent_ccs = msg.get_all('resent-cc', []) all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) + .. versionchanged:: 3.13 + Add *strict* optional parameter and reject malformed inputs by default. + .. function:: parsedate(date) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 804c7a88d947119..3ac527959a43921 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -157,6 +157,17 @@ doctest :attr:`doctest.TestResults.skipped` attributes. (Contributed by Victor Stinner in :gh:`108794`.) +email +----- + +* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return + ``('', '')`` 2-tuples in more situations where invalid email addresses are + encountered instead of potentially inaccurate values. Add optional *strict* + parameter to these two functions: use ``strict=False`` to get the old + behavior, accept malformed inputs. + (Contributed by Thomas Dwyer for :gh:`102988` to ameliorate CVE-2023-27043 + fix.) + io -- diff --git a/Lib/email/utils.py b/Lib/email/utils.py index a49a8fa986ce0cf..ddb93ac74ac0ff2 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -103,12 +103,70 @@ def formataddr(pair, charset='utf-8'): return address +def _pre_parse_validation(email_header_fields): + accepted_values = [] + for v in email_header_fields: + s = v.replace('\\(', '').replace('\\)', '') + if s.count('(') != s.count(')'): + v = "('', '')" + accepted_values.append(v) -def getaddresses(fieldvalues): - """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" - all = COMMASPACE.join(str(v) for v in fieldvalues) - a = _AddressList(all) - return a.addresslist + return accepted_values + + +def _post_parse_validation(parsed_email_header_tuples): + accepted_values = [] + # The parser would have parsed a correctly formatted domain-literal + # The existence of an [ after parsing indicates a parsing failure + for v in parsed_email_header_tuples: + if '[' in v[1]: + v = ('', '') + accepted_values.append(v) + + return accepted_values + + +realname_comma_re = re.compile(r'"[^"]*,[^"]*"') + +def getaddresses(fieldvalues, *, strict=True): + """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. + + When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in + its place. + + If the resulting list of parsed address is greater than number of + fieldvalues in the input list a parsing error has occurred, so a list + containing a single empty 2-tuple [('', '')] is returned in its place. + This is done to avoid invalid output. + + Malformed input: getaddresses(['alice@example.com ']) + Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')] + Safe output: [('', '')] + + If strict is true, use a strict parser which rejects malformed inputs. + """ + if strict: + fieldvalues = [str(v) for v in fieldvalues] + fieldvalues = _pre_parse_validation(fieldvalues) + all = COMMASPACE.join(v for v in fieldvalues) + a = _AddressList(all) + result = _post_parse_validation(a.addresslist) + + # When a comma is used in the Real Name part it is not a deliminator + # So strip those out before counting the commas + n = 0 + for v in fieldvalues: + v = realname_comma_re.sub('', v) + n += v.count(',') + 1 + + if len(result) != n: + return [('', '')] + + return result + else: + all = COMMASPACE.join(str(v) for v in fieldvalues) + a = _AddressList(all) + return a.addresslist def _format_timetuple_and_zone(timetuple, zone): @@ -207,17 +265,34 @@ def parsedate_to_datetime(data): tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) -def parseaddr(addr): +def parseaddr(addr, *, strict=True): """ Parse addr into its constituent realname and email address parts. Return a tuple of realname and email address, unless the parse fails, in which case return a 2-tuple of ('', ''). + + If strict is true, use a strict parser which rejects malformed inputs. """ - addrs = _AddressList(addr).addresslist - if not addrs: - return '', '' - return addrs[0] + if strict: + if isinstance(addr, list): + addr = addr[0] + + if not isinstance(addr, str): + return ('', '') + + addr = _pre_parse_validation([addr])[0] + addrs = _post_parse_validation(_AddressList(addr).addresslist) + + if not addrs or len(addrs) > 1: + return ('', '') + + return addrs[0] + else: + addrs = _AddressList(addr).addresslist + if not addrs: + return '', '' + return addrs[0] # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3. diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 512464f87162cde..f248f5f52e0abf7 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -3320,32 +3320,101 @@ def test_getaddresses(self): [('Al Person', 'aperson@dom.ain'), ('Bud Person', 'bperson@dom.ain')]) - def test_getaddresses_comma_in_name(self): - """GH-106669 regression test.""" - self.assertEqual( - utils.getaddresses( - [ - '"Bud, Person" ', - 'aperson@dom.ain (Al Person)', - '"Mariusz Felisiak" ', - ] - ), - [ - ('Bud, Person', 'bperson@dom.ain'), - ('Al Person', 'aperson@dom.ain'), - ('Mariusz Felisiak', 'to@example.com'), - ], - ) + def test_parsing_errors(self): + """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056""" + alice = 'alice@example.org' + bob = 'bob@example.com' + empty = ('', '') + + # Test utils.getaddresses() and utils.parseaddr() on malformed email + # addresses: default behavior (strict=True) rejects malformed address, + # and strict=True which tolerates malformed address. + for invalid_separator, expected_non_strict in ( + ('(', [(f'<{bob}>', alice)]), + (')', [('', alice), empty, ('', bob)]), + ('<', [('', alice), empty, ('', bob), empty]), + ('>', [('', alice), empty, ('', bob)]), + ('[', [('', f'{alice}[<{bob}>]')]), + (']', [('', alice), empty, ('', bob)]), + ('@', [empty, empty, ('', bob)]), + (';', [('', alice), empty, ('', bob)]), + (':', [('', alice), ('', bob)]), + ('.', [('', alice + '.'), ('', bob)]), + ('"', [('', alice), ('', f'<{bob}>')]), + ): + address = f'{alice}{invalid_separator}<{bob}>' + with self.subTest(address=address): + self.assertEqual(utils.getaddresses([address]), + [empty]) + self.assertEqual(utils.getaddresses([address], strict=False), + expected_non_strict) + + self.assertEqual(utils.parseaddr([address]), + empty) + self.assertEqual(utils.parseaddr([address], strict=False), + ('', address)) + + # Comma (',') is treated differently depending on strict parameter. + # Comma without quotes. + address = f'{alice},<{bob}>' + self.assertEqual(utils.getaddresses([address]), + [('', alice), ('', bob)]) + self.assertEqual(utils.parseaddr([address]), + empty) + self.assertEqual(utils.parseaddr([address], strict=False), + ('', address)) + + # Comma with quotes. + address = '"Alice, alice@example.org" ' + expected_strict = ('Alice, alice@example.org', 'bob@example.com') + self.assertEqual(utils.getaddresses([address]), [expected_strict]) + self.assertEqual(utils.parseaddr([address]), expected_strict) + self.assertEqual(utils.parseaddr([address], strict=False), + ('', address)) def test_getaddresses_nasty(self): - eq = self.assertEqual - eq(utils.getaddresses(['foo: ;']), [('', '')]) - eq(utils.getaddresses( - ['[]*-- =~$']), - [('', ''), ('', ''), ('', '*--')]) - eq(utils.getaddresses( - ['foo: ;', '"Jason R. Mastaler" ']), - [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) + for addresses, expected in ( + (['"Sürname, Firstname" '], + [('Sürname, Firstname', 'to@example.com')]), + + (['foo: ;'], + [('', '')]), + + (['foo: ;', '"Jason R. Mastaler" '], + [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]), + + ([r'Pete(A nice \) chap) '], + [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]), + + (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'], + [('', '')]), + + (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'], + [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]), + + (['John Doe '], + [('John Doe (comment)', 'jdoe@machine.example')]), + + (['"Mary Smith: Personal Account" '], + [('Mary Smith: Personal Account', 'smith@home.example')]), + + (['Undisclosed recipients:;'], + [('', '')]), + + ([r', "Giant; \"Big\" Box" '], + [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]), + ): + with self.subTest(addresses=addresses): + self.assertEqual(utils.getaddresses(addresses), + expected) + self.assertEqual(utils.getaddresses(addresses, strict=False), + expected) + + addresses = ['[]*-- =~$'] + self.assertEqual(utils.getaddresses(addresses), + [('', '')]) + self.assertEqual(utils.getaddresses(addresses, strict=False), + [('', ''), ('', ''), ('', '*--')]) def test_getaddresses_embedded_comment(self): """Test proper handling of a nested comment""" diff --git a/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst new file mode 100644 index 000000000000000..8a8d3351998e66d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst @@ -0,0 +1,6 @@ +:func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now +return ``('', '')`` 2-tuples in more situations where invalid email +addresses are encountered instead of potentially inaccurate values. Add +optional *strict* parameter to these two functions: use ``strict=False`` to +get the old behavior, accept malformed inputs. Patch by Thomas Dwyer to +ameliorate CVE-2023-27043 fix.