10
10
from . import treewalkers , _utils
11
11
from xml .sax .saxutils import escape
12
12
13
- spaceCharacters = "" .join (spaceCharacters )
14
-
15
- quoteAttributeSpecChars = spaceCharacters + "\" '=<>`"
16
- quoteAttributeSpec = re .compile ("[" + quoteAttributeSpecChars + "]" )
17
- quoteAttributeLegacy = re .compile ("[" + quoteAttributeSpecChars +
18
- "\x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \t \n "
19
- "\x0b \x0c \r \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 "
20
- "\x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e \x1f "
21
- "\x20 \x2f \x60 \xa0 \u1680 \u180e \u180f \u2000 "
22
- "\u2001 \u2002 \u2003 \u2004 \u2005 \u2006 \u2007 "
23
- "\u2008 \u2009 \u200a \u2028 \u2029 \u202f \u205f "
24
- "\u3000 ]" )
25
-
26
-
27
- encode_entity_map = {}
28
- is_ucs4 = len ("\U0010FFFF " ) == 1
13
+ _quoteAttributeSpecChars = "" .join (spaceCharacters ) + "\" '=<>`"
14
+ _quoteAttributeSpec = re .compile ("[" + _quoteAttributeSpecChars + "]" )
15
+ _quoteAttributeLegacy = re .compile ("[" + _quoteAttributeSpecChars +
16
+ "\x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \t \n "
17
+ "\x0b \x0c \r \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 "
18
+ "\x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e \x1f "
19
+ "\x20 \x2f \x60 \xa0 \u1680 \u180e \u180f \u2000 "
20
+ "\u2001 \u2002 \u2003 \u2004 \u2005 \u2006 \u2007 "
21
+ "\u2008 \u2009 \u200a \u2028 \u2029 \u202f \u205f "
22
+ "\u3000 ]" )
23
+
24
+
25
+ _encode_entity_map = {}
26
+ _is_ucs4 = len ("\U0010FFFF " ) == 1
29
27
for k , v in list (entities .items ()):
30
28
# skip multi-character entities
31
- if ((is_ucs4 and len (v ) > 1 ) or
32
- (not is_ucs4 and len (v ) > 2 )):
29
+ if ((_is_ucs4 and len (v ) > 1 ) or
30
+ (not _is_ucs4 and len (v ) > 2 )):
33
31
continue
34
32
if v != "&" :
35
33
if len (v ) == 2 :
36
34
v = _utils .surrogatePairToCodepoint (v )
37
35
else :
38
36
v = ord (v )
39
- if v not in encode_entity_map or k .islower ():
37
+ if v not in _encode_entity_map or k .islower ():
40
38
# prefer < over < and similarly for &, >, etc.
41
- encode_entity_map [v ] = k
39
+ _encode_entity_map [v ] = k
42
40
43
41
44
42
def htmlentityreplace_errors (exc ):
@@ -58,7 +56,7 @@ def htmlentityreplace_errors(exc):
58
56
codepoint = ord (c )
59
57
codepoints .append (codepoint )
60
58
for cp in codepoints :
61
- e = encode_entity_map .get (cp )
59
+ e = _encode_entity_map .get (cp )
62
60
if e :
63
61
res .append ("&" )
64
62
res .append (e )
@@ -258,9 +256,9 @@ def serialize(self, treewalker, encoding=None):
258
256
if self .quote_attr_values == "always" or len (v ) == 0 :
259
257
quote_attr = True
260
258
elif self .quote_attr_values == "spec" :
261
- quote_attr = quoteAttributeSpec .search (v ) is not None
259
+ quote_attr = _quoteAttributeSpec .search (v ) is not None
262
260
elif self .quote_attr_values == "legacy" :
263
- quote_attr = quoteAttributeLegacy .search (v ) is not None
261
+ quote_attr = _quoteAttributeLegacy .search (v ) is not None
264
262
else :
265
263
raise ValueError ("quote_attr_values must be one of: "
266
264
"'always', 'spec', or 'legacy'" )
0 commit comments