Changes as per review

maxking · maxking · commit f1015d88ee2b · 2019-05-22T21:09:23.000-07:00
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
@@ -75,7 +75,6 @@
 from email import _encoded_words as _ew
 from email import errors
 from email import utils
-from email.header import ecre as rfc2047_matcher
 #
 # Useful constants and functions
 #
@@ -96,6 +95,18 @@
 def quote_string(value):
     return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
 
+# Match a RFC 2047 word, looks like =?utf-8?q?someword?=
+rfc2047_matcher = re.compile(r'''
+   =\?            # literal =?
+   [^?]*          # charset
+   \?             # literal ?
+   [qQbB]         # literal 'q' or 'b', case insensitive
+   \?             # literal ?
+  .*?             # encoded word
+  \?=             # literal ?=
+''', re.VERBOSE | re.MULTILINE)
+
+
 #
 # TokenList and its subclasses
 #
@@ -1049,8 +1060,8 @@ def get_encoded_word(value):
         _validate_xtext(vtext)
         ew.append(vtext)
         text = ''.join(remainder)
-    # Encoded words should be followed by a LWS.
-    if value and value[0] != ' ':
+    # Encoded words should be followed by a WS
+    if value and value[0] not in WSP:
         ew.defects.append(errors.InvalidHeaderDefect(
             "missing trailing whitespace after encoded-word"))
     return ew, value
@@ -1106,7 +1117,8 @@ def get_unstructured(value):
                 continue
         tok, *remainder = _wsp_splitter(value, 1)
         # Split in the middle of an atom if there is a rfc2047 encoded word
-        # which does not have WS on both sides.
+        # which does not have WSP on both sides. The defect will be registered
+        # the next time through the loop.
         if rfc2047_matcher.search(tok):
             tok, *remainder = value.partition('=?')
         vtext = ValueTerminal(tok, 'vtext')
diff --git a/Misc/NEWS.d/next/Library/2019-05-19-10-48-46.bpo-21315.PgXVqF.rst b/Misc/NEWS.d/next/Library/2019-05-19-10-48-46.bpo-21315.PgXVqF.rst
@@ -1,3 +1,4 @@
-Email headers containing 2047 encoded words with no leading whitespace are
-parsed correctly. Also, missing trailing whitespaces now register a defect
-instead of silently ignoring.
+Email headers containing RFC2047 encoded words are parsed despite the missing
+whitespace, and a defect registered. Also missing trailing whitespace after
+encoded words is now registered as a defect.
+