|
15 | 15 | # define USE_UNICODE_WCHAR_CACHE 1
|
16 | 16 | #endif /* USE_UNICODE_WCHAR_CACHE */
|
17 | 17 |
|
18 |
| -/* Since splitting on whitespace is an important use case, and |
19 |
| - whitespace in most situations is solely ASCII whitespace, we |
20 |
| - optimize for the common case by using a quick look-up table |
21 |
| - _Py_ascii_whitespace (see below) with an inlined check. |
22 |
| -
|
23 |
| - */ |
24 |
| -#define Py_UNICODE_ISSPACE(ch) \ |
25 |
| - ((Py_UCS4)(ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch)) |
26 |
| - |
27 |
| -#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch) |
28 |
| -#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch) |
29 |
| -#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) |
30 |
| -#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) |
31 |
| - |
32 |
| -#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch) |
33 |
| -#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch) |
34 |
| -#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) |
35 |
| - |
36 |
| -#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) |
37 |
| -#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) |
38 |
| -#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) |
39 |
| -#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch) |
| 18 | +// Static inline functions to work with surrogates |
| 19 | +static inline int Py_UNICODE_IS_SURROGATE(Py_UCS4 ch) { |
| 20 | + return (0xD800 <= ch && ch <= 0xDFFF); |
| 21 | +} |
| 22 | +static inline int Py_UNICODE_IS_HIGH_SURROGATE(Py_UCS4 ch) { |
| 23 | + return (0xD800 <= ch && ch <= 0xDBFF); |
| 24 | +} |
| 25 | +static inline int Py_UNICODE_IS_LOW_SURROGATE(Py_UCS4 ch) { |
| 26 | + return (0xDC00 <= ch && ch <= 0xDFFF); |
| 27 | +} |
40 | 28 |
|
41 |
| -#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) |
42 |
| -#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) |
43 |
| -#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) |
| 29 | +// Join two surrogate characters and return a single Py_UCS4 value. |
| 30 | +static inline Py_UCS4 Py_UNICODE_JOIN_SURROGATES(Py_UCS4 high, Py_UCS4 low) { |
| 31 | + return 0x10000 + (((high & 0x03FF) << 10) | (low & 0x03FF)); |
| 32 | +} |
44 | 33 |
|
45 |
| -#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch) |
| 34 | +// High surrogate = top 10 bits added to D800 |
| 35 | +static inline Py_UCS4 Py_UNICODE_HIGH_SURROGATE(Py_UCS4 ch) { |
| 36 | + return (0xD800 - (0x10000 >> 10) + (ch >> 10)); |
| 37 | +} |
46 | 38 |
|
47 |
| -#define Py_UNICODE_ISALNUM(ch) \ |
48 |
| - (Py_UNICODE_ISALPHA(ch) || \ |
49 |
| - Py_UNICODE_ISDECIMAL(ch) || \ |
50 |
| - Py_UNICODE_ISDIGIT(ch) || \ |
51 |
| - Py_UNICODE_ISNUMERIC(ch)) |
52 |
| - |
53 |
| -/* macros to work with surrogates */ |
54 |
| -#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF) |
55 |
| -#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF) |
56 |
| -#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF) |
57 |
| -/* Join two surrogate characters and return a single Py_UCS4 value. */ |
58 |
| -#define Py_UNICODE_JOIN_SURROGATES(high, low) \ |
59 |
| - (((((Py_UCS4)(high) & 0x03FF) << 10) | \ |
60 |
| - ((Py_UCS4)(low) & 0x03FF)) + 0x10000) |
61 |
| -/* high surrogate = top 10 bits added to D800 */ |
62 |
| -#define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10)) |
63 |
| -/* low surrogate = bottom 10 bits added to DC00 */ |
64 |
| -#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF)) |
| 39 | +// Low surrogate = bottom 10 bits added to DC00 |
| 40 | +static inline Py_UCS4 Py_UNICODE_LOW_SURROGATE(Py_UCS4 ch) { |
| 41 | + return (0xDC00 + (ch & 0x3FF)); |
| 42 | +} |
65 | 43 |
|
66 | 44 | /* --- Unicode Type ------------------------------------------------------- */
|
67 | 45 |
|
@@ -1013,10 +991,6 @@ PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
|
1013 | 991 |
|
1014 | 992 | /* === Characters Type APIs =============================================== */
|
1015 | 993 |
|
1016 |
| -/* Helper array used by Py_UNICODE_ISSPACE(). */ |
1017 |
| - |
1018 |
| -PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[]; |
1019 |
| - |
1020 | 994 | /* These should not be used directly. Use the Py_UNICODE_IS* and
|
1021 | 995 | Py_UNICODE_TO* macros instead.
|
1022 | 996 |
|
@@ -1124,6 +1098,50 @@ PyAPI_FUNC(int) _PyUnicode_IsAlpha(
|
1124 | 1098 | Py_UCS4 ch /* Unicode character */
|
1125 | 1099 | );
|
1126 | 1100 |
|
| 1101 | +// Helper array used by Py_UNICODE_ISSPACE(). |
| 1102 | +PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[]; |
| 1103 | + |
| 1104 | +// Since splitting on whitespace is an important use case, and |
| 1105 | +// whitespace in most situations is solely ASCII whitespace, we |
| 1106 | +// optimize for the common case by using a quick look-up table |
| 1107 | +// _Py_ascii_whitespace (see below) with an inlined check. |
| 1108 | +static inline int Py_UNICODE_ISSPACE(Py_UCS4 ch) { |
| 1109 | + if (ch < 128) { |
| 1110 | + return _Py_ascii_whitespace[ch]; |
| 1111 | + } |
| 1112 | + return _PyUnicode_IsWhitespace(ch); |
| 1113 | +} |
| 1114 | + |
| 1115 | +#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch) |
| 1116 | +#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch) |
| 1117 | +#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) |
| 1118 | +#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) |
| 1119 | + |
| 1120 | +#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch) |
| 1121 | +#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch) |
| 1122 | +#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) |
| 1123 | + |
| 1124 | +#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) |
| 1125 | +#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) |
| 1126 | +#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) |
| 1127 | +#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch) |
| 1128 | + |
| 1129 | +#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) |
| 1130 | +#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) |
| 1131 | +#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) |
| 1132 | + |
| 1133 | +#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch) |
| 1134 | + |
| 1135 | +static inline int Py_UNICODE_ISALNUM(Py_UCS4 ch) { |
| 1136 | + return (Py_UNICODE_ISALPHA(ch) |
| 1137 | + || Py_UNICODE_ISDECIMAL(ch) |
| 1138 | + || Py_UNICODE_ISDIGIT(ch) |
| 1139 | + || Py_UNICODE_ISNUMERIC(ch)); |
| 1140 | +} |
| 1141 | + |
| 1142 | + |
| 1143 | +/* === Misc functions ===================================================== */ |
| 1144 | + |
1127 | 1145 | PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int);
|
1128 | 1146 |
|
1129 | 1147 | /* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
|
|
0 commit comments