9
9
import re
10
10
import sys
11
11
12
- # Python 2/3 compatibility:
12
+ # Python 2/3/MicroPython compatibility:
13
13
# - iterating through bytes is different
14
- # - codepoint2name lives in a different module
15
- import platform
16
-
17
- if platform .python_version_tuple ()[0 ] == "2" :
14
+ # - codepoint2name from html.entities is hard-coded
15
+ if sys .version_info [0 ] == 2 :
18
16
bytes_cons = lambda val , enc = None : bytearray (val )
19
- from htmlentitydefs import codepoint2name
20
- elif platform .python_version_tuple ()[0 ] == "3" :
17
+ elif sys .version_info [0 ] == 3 : # Also handles MicroPython
21
18
bytes_cons = bytes
22
- from html .entities import codepoint2name
19
+
20
+ # fmt: off
21
+ codepoint2name = {
22
+ 198 : "AElig" , 193 : "Aacute" , 194 : "Acirc" , 192 : "Agrave" , 913 : "Alpha" , 197 : "Aring" , 195 : "Atilde" ,
23
+ 196 : "Auml" , 914 : "Beta" , 199 : "Ccedil" , 935 : "Chi" , 8225 : "Dagger" , 916 : "Delta" , 208 : "ETH" ,
24
+ 201 : "Eacute" , 202 : "Ecirc" , 200 : "Egrave" , 917 : "Epsilon" , 919 : "Eta" , 203 : "Euml" , 915 : "Gamma" ,
25
+ 205 : "Iacute" , 206 : "Icirc" , 204 : "Igrave" , 921 : "Iota" , 207 : "Iuml" , 922 : "Kappa" , 923 : "Lambda" ,
26
+ 924 : "Mu" , 209 : "Ntilde" , 925 : "Nu" , 338 : "OElig" , 211 : "Oacute" , 212 : "Ocirc" , 210 : "Ograve" ,
27
+ 937 : "Omega" , 927 : "Omicron" , 216 : "Oslash" , 213 : "Otilde" , 214 : "Ouml" , 934 : "Phi" , 928 : "Pi" ,
28
+ 8243 : "Prime" , 936 : "Psi" , 929 : "Rho" , 352 : "Scaron" , 931 : "Sigma" , 222 : "THORN" , 932 : "Tau" ,
29
+ 920 : "Theta" , 218 : "Uacute" , 219 : "Ucirc" , 217 : "Ugrave" , 933 : "Upsilon" , 220 : "Uuml" , 926 : "Xi" ,
30
+ 221 : "Yacute" , 376 : "Yuml" , 918 : "Zeta" , 225 : "aacute" , 226 : "acirc" , 180 : "acute" , 230 : "aelig" ,
31
+ 224 : "agrave" , 8501 : "alefsym" , 945 : "alpha" , 38 : "amp" , 8743 : "and" , 8736 : "ang" , 229 : "aring" ,
32
+ 8776 : "asymp" , 227 : "atilde" , 228 : "auml" , 8222 : "bdquo" , 946 : "beta" , 166 : "brvbar" , 8226 : "bull" ,
33
+ 8745 : "cap" , 231 : "ccedil" , 184 : "cedil" , 162 : "cent" , 967 : "chi" , 710 : "circ" , 9827 : "clubs" ,
34
+ 8773 : "cong" , 169 : "copy" , 8629 : "crarr" , 8746 : "cup" , 164 : "curren" , 8659 : "dArr" , 8224 : "dagger" ,
35
+ 8595 : "darr" , 176 : "deg" , 948 : "delta" , 9830 : "diams" , 247 : "divide" , 233 : "eacute" , 234 : "ecirc" ,
36
+ 232 : "egrave" , 8709 : "empty" , 8195 : "emsp" , 8194 : "ensp" , 949 : "epsilon" , 8801 : "equiv" , 951 : "eta" ,
37
+ 240 : "eth" , 235 : "euml" , 8364 : "euro" , 8707 : "exist" , 402 : "fnof" , 8704 : "forall" , 189 : "frac12" ,
38
+ 188 : "frac14" , 190 : "frac34" , 8260 : "frasl" , 947 : "gamma" , 8805 : "ge" , 62 : "gt" , 8660 : "hArr" ,
39
+ 8596 : "harr" , 9829 : "hearts" , 8230 : "hellip" , 237 : "iacute" , 238 : "icirc" , 161 : "iexcl" , 236 : "igrave" ,
40
+ 8465 : "image" , 8734 : "infin" , 8747 : "int" , 953 : "iota" , 191 : "iquest" , 8712 : "isin" , 239 : "iuml" ,
41
+ 954 : "kappa" , 8656 : "lArr" , 955 : "lambda" , 9001 : "lang" , 171 : "laquo" , 8592 : "larr" , 8968 : "lceil" ,
42
+ 8220 : "ldquo" , 8804 : "le" , 8970 : "lfloor" , 8727 : "lowast" , 9674 : "loz" , 8206 : "lrm" , 8249 : "lsaquo" ,
43
+ 8216 : "lsquo" , 60 : "lt" , 175 : "macr" , 8212 : "mdash" , 181 : "micro" , 183 : "middot" , 8722 : "minus" ,
44
+ 956 : "mu" , 8711 : "nabla" , 160 : "nbsp" , 8211 : "ndash" , 8800 : "ne" , 8715 : "ni" , 172 : "not" , 8713 : "notin" ,
45
+ 8836 : "nsub" , 241 : "ntilde" , 957 : "nu" , 243 : "oacute" , 244 : "ocirc" , 339 : "oelig" , 242 : "ograve" ,
46
+ 8254 : "oline" , 969 : "omega" , 959 : "omicron" , 8853 : "oplus" , 8744 : "or" , 170 : "ordf" , 186 : "ordm" ,
47
+ 248 : "oslash" , 245 : "otilde" , 8855 : "otimes" , 246 : "ouml" , 182 : "para" , 8706 : "part" , 8240 : "permil" ,
48
+ 8869 : "perp" , 966 : "phi" , 960 : "pi" , 982 : "piv" , 177 : "plusmn" , 163 : "pound" , 8242 : "prime" ,
49
+ 8719 : "prod" , 8733 : "prop" , 968 : "psi" , 34 : "quot" , 8658 : "rArr" , 8730 : "radic" , 9002 : "rang" ,
50
+ 187 : "raquo" , 8594 : "rarr" , 8969 : "rceil" , 8221 : "rdquo" , 8476 : "real" , 174 : "reg" , 8971 : "rfloor" ,
51
+ 961 : "rho" , 8207 : "rlm" , 8250 : "rsaquo" , 8217 : "rsquo" , 8218 : "sbquo" , 353 : "scaron" , 8901 : "sdot" ,
52
+ 167 : "sect" , 173 : "shy" , 963 : "sigma" , 962 : "sigmaf" , 8764 : "sim" , 9824 : "spades" , 8834 : "sub" ,
53
+ 8838 : "sube" , 8721 : "sum" , 8835 : "sup" , 185 : "sup1" , 178 : "sup2" , 179 : "sup3" , 8839 : "supe" ,
54
+ 223 : "szlig" , 964 : "tau" , 8756 : "there4" , 952 : "theta" , 977 : "thetasym" , 8201 : "thinsp" , 254 : "thorn" ,
55
+ 732 : "tilde" , 215 : "times" , 8482 : "trade" , 8657 : "uArr" , 250 : "uacute" , 8593 : "uarr" , 251 : "ucirc" ,
56
+ 249 : "ugrave" , 168 : "uml" , 978 : "upsih" , 965 : "upsilon" , 252 : "uuml" , 8472 : "weierp" , 958 : "xi" ,
57
+ 253 : "yacute" , 165 : "yen" , 255 : "yuml" , 950 : "zeta" , 8205 : "zwj" , 8204 : "zwnj"
58
+ }
59
+ # fmt: on
23
60
# end compatibility code
24
61
25
62
codepoint2name [ord ("-" )] = "hyphen"
295
332
"<genexpr>" ,
296
333
}
297
334
335
+ # Matches any string that needs no escaping (alphanum + _ only)
336
+ RE_NO_ESCAPE = re .compile (r"^[a-zA-Z0-9_]$" )
337
+
298
338
299
339
# this must match the equivalent function in qstr.c
300
340
def compute_hash (qstr , bytes_hash ):
@@ -307,15 +347,17 @@ def compute_hash(qstr, bytes_hash):
307
347
308
348
309
349
def qstr_escape (qst ):
310
- def esc_char (m ):
311
- c = ord (m .group (0 ))
350
+ def esc_char (c ):
351
+ if RE_NO_ESCAPE .match (c ):
352
+ return c
353
+ c = ord (c )
312
354
try :
313
355
name = codepoint2name [c ]
314
356
except KeyError :
315
357
name = "0x%02x" % c
316
358
return "_" + name + "_"
317
359
318
- return re . sub ( r"[^A-Za-z0-9_]" , esc_char , qst )
360
+ return "" . join ( map ( esc_char , qst ) )
319
361
320
362
321
363
static_qstr_list_ident = list (map (qstr_escape , static_qstr_list ))
0 commit comments