|
75 | 75 | from email import _encoded_words as _ew |
76 | 76 | from email import errors |
77 | 77 | from email import utils |
78 | | -from email.header import ecre as rfc2047_matcher |
79 | 78 | # |
80 | 79 | # Useful constants and functions |
81 | 80 | # |
|
96 | 95 | def quote_string(value): |
97 | 96 | return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' |
98 | 97 |
|
| 98 | +# Match a RFC 2047 word, looks like =?utf-8?q?someword?= |
| 99 | +rfc2047_matcher = re.compile(r''' |
| 100 | + =\? # literal =? |
| 101 | + [^?]* # charset |
| 102 | + \? # literal ? |
| 103 | + [qQbB] # literal 'q' or 'b', case insensitive |
| 104 | + \? # literal ? |
| 105 | + .*? # encoded word |
| 106 | + \?= # literal ?= |
| 107 | +''', re.VERBOSE | re.MULTILINE) |
| 108 | + |
| 109 | + |
99 | 110 | # |
100 | 111 | # TokenList and its subclasses |
101 | 112 | # |
@@ -1049,8 +1060,8 @@ def get_encoded_word(value): |
1049 | 1060 | _validate_xtext(vtext) |
1050 | 1061 | ew.append(vtext) |
1051 | 1062 | text = ''.join(remainder) |
1052 | | - # Encoded words should be followed by a LWS. |
1053 | | - if value and value[0] != ' ': |
| 1063 | + # Encoded words should be followed by a WS |
| 1064 | + if value and value[0] not in WSP: |
1054 | 1065 | ew.defects.append(errors.InvalidHeaderDefect( |
1055 | 1066 | "missing trailing whitespace after encoded-word")) |
1056 | 1067 | return ew, value |
@@ -1106,7 +1117,8 @@ def get_unstructured(value): |
1106 | 1117 | continue |
1107 | 1118 | tok, *remainder = _wsp_splitter(value, 1) |
1108 | 1119 | # Split in the middle of an atom if there is a rfc2047 encoded word |
1109 | | - # which does not have WS on both sides. |
| 1120 | + # which does not have WSP on both sides. The defect will be registered |
| 1121 | + # the next time through the loop. |
1110 | 1122 | if rfc2047_matcher.search(tok): |
1111 | 1123 | tok, *remainder = value.partition('=?') |
1112 | 1124 | vtext = ValueTerminal(tok, 'vtext') |
|
0 commit comments