From 24bf1dd86889b943c8a4296e03bdaaca075aa1bc Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Sun, 16 Apr 2023 21:49:33 +0200 Subject: [PATCH 1/3] Add codec deinit callback --- Modules/cjkcodecs/_codecs_hk.c | 5 +++++ Modules/cjkcodecs/_codecs_iso2022.c | 1 + Modules/cjkcodecs/_codecs_jp.c | 2 +- Modules/cjkcodecs/cjkcodecs.h | 13 ++++++++++--- Modules/cjkcodecs/multibytecodec.h | 2 ++ 5 files changed, 19 insertions(+), 4 deletions(-) diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c index 43593b873733e6..ad77db5a7f9f95 100644 --- a/Modules/cjkcodecs/_codecs_hk.c +++ b/Modules/cjkcodecs/_codecs_hk.c @@ -26,6 +26,11 @@ CODEC_INIT(big5hkscs) return 0; } +CODEC_DEINIT(big5hkscs) +{ + return 0; +} + /* * There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004: * U+00CA U+0304 -> 8862 (U+00CA alone is mapped to 8866) diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c index cf34752e16a527..c88efb031f3eec 100644 --- a/Modules/cjkcodecs/_codecs_iso2022.c +++ b/Modules/cjkcodecs/_codecs_iso2022.c @@ -1128,6 +1128,7 @@ NEXT_CODEC = (MultibyteCodec){ \ "iso2022_" #variation, \ &iso2022_##variation##_config, \ iso2022_codec_init, \ + NULL, \ _STATEFUL_METHODS(iso2022) \ }; diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c index 7a8b78a23592ea..92272f4d693443 100644 --- a/Modules/cjkcodecs/_codecs_jp.c +++ b/Modules/cjkcodecs/_codecs_jp.c @@ -748,7 +748,7 @@ BEGIN_MAPPINGS_LIST(11) END_MAPPINGS_LIST #define CODEC_CUSTOM(NAME, N, METH) \ - NEXT_CODEC = (MultibyteCodec){NAME, (void *)N, NULL, _STATELESS_METHODS(METH)}; + NEXT_CODEC = (MultibyteCodec){NAME, (void *)N, NULL, NULL, _STATELESS_METHODS(METH)}; BEGIN_CODECS_LIST(7) CODEC_STATELESS(shift_jis) diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h index 1b0355310eddab..7eb8e654de891b 100644 --- a/Modules/cjkcodecs/cjkcodecs.h +++ b/Modules/cjkcodecs/cjkcodecs.h @@ -78,6 +78,9 @@ get_module_state(PyObject *mod) #define CODEC_INIT(encoding) \ static int encoding##_codec_init(const void *config) +#define CODEC_DEINIT(encoding) \ + static int encoding##_codec_deinit(const void *config) + #define ENCODER_INIT(encoding) \ static int encoding##_encode_init( \ MultibyteCodec_State *state, const void *config) @@ -265,11 +268,15 @@ add_codecs(cjkcodecs_module_state *st) \ st->codec_list[idx++] #define CODEC_STATEFUL(enc) \ - NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, _STATEFUL_METHODS(enc)}; + NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, NULL, \ + _STATEFUL_METHODS(enc)}; #define CODEC_STATELESS(enc) \ - NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, _STATELESS_METHODS(enc)}; + NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, NULL, \ + _STATELESS_METHODS(enc)}; #define CODEC_STATELESS_WINIT(enc) \ - NEXT_CODEC = (MultibyteCodec){#enc, NULL, enc##_codec_init, _STATELESS_METHODS(enc)}; + NEXT_CODEC = (MultibyteCodec){#enc, NULL, \ + enc##_codec_init, enc##_codec_deinit, \ + _STATELESS_METHODS(enc)}; #define END_CODECS_LIST \ assert(st->num_codecs == idx); \ diff --git a/Modules/cjkcodecs/multibytecodec.h b/Modules/cjkcodecs/multibytecodec.h index 327cb51129d945..afddf4df87ddeb 100644 --- a/Modules/cjkcodecs/multibytecodec.h +++ b/Modules/cjkcodecs/multibytecodec.h @@ -28,6 +28,7 @@ typedef struct { } MultibyteCodec_State; typedef int (*mbcodec_init)(const void *config); +typedef int (*mbcodec_deinit)(const void *config); typedef Py_ssize_t (*mbencode_func)(MultibyteCodec_State *state, const void *config, int kind, const void *data, @@ -52,6 +53,7 @@ typedef struct { const char *encoding; const void *config; mbcodec_init codecinit; + mbcodec_init codecdeinit; mbencode_func encode; mbencodeinit_func encinit; mbencodereset_func encreset; From e1713e91aee4f974f63dd613e4c3934de9662d81 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Sun, 16 Apr 2023 22:04:46 +0200 Subject: [PATCH 2/3] Deinit codecs at cjk module free --- Modules/cjkcodecs/cjkcodecs.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h index 7eb8e654de891b..85f035183c0ccf 100644 --- a/Modules/cjkcodecs/cjkcodecs.h +++ b/Modules/cjkcodecs/cjkcodecs.h @@ -474,6 +474,23 @@ importmap(const char *modname, const char *symbol, } #endif +static struct PyModuleDef _cjk_module; + +static void +deinit_codecs(PyObject *mod) +{ + cjkcodecs_module_state *st = get_module_state(mod); + for (int i = 0; i < st->num_codecs; i++) { + const MultibyteCodec *codec = &st->codec_list[i]; + if (codec && codec->codecdeinit) { + fprintf(stderr, "- deinit[%d]: %s\n", i, codec->encoding); + if (codec->codecdeinit(codec->config) < 0) { + PyErr_WriteUnraisable(mod); + } + } + } +} + static int _cjk_exec(PyObject *module) { @@ -483,6 +500,8 @@ _cjk_exec(PyObject *module) static void _cjk_free(void *mod) { + deinit_codecs(mod); + cjkcodecs_module_state *st = get_module_state((PyObject *)mod); PyMem_Free(st->mapping_list); PyMem_Free(st->codec_list); From 5b8324dfb059da887090ea061f02b85911b60935 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Sun, 16 Apr 2023 17:30:16 -0500 Subject: [PATCH 3/3] Proof of concept: put big5 map pointers on heap --- Modules/cjkcodecs/_codecs_hk.c | 33 +++++++++++++++------ Modules/cjkcodecs/cjkcodecs.h | 9 ++++-- Tools/c-analyzer/cpython/globals-to-fix.tsv | 3 -- 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c index ad77db5a7f9f95..f13c860e71f677 100644 --- a/Modules/cjkcodecs/_codecs_hk.c +++ b/Modules/cjkcodecs/_codecs_hk.c @@ -13,21 +13,28 @@ * BIG5HKSCS codec */ -static const encode_map *big5_encmap = NULL; -static const decode_map *big5_decmap = NULL; +typedef struct { + const encode_map *big5_encmap; + const decode_map *big5_decmap; + int initialized; +} big5_state; CODEC_INIT(big5hkscs) { - static int initialized = 0; - - if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap)) + big5_state *st = (big5_state *)config; + if (st->initialized) { + return 0; + } + if (IMPORT_MAP(tw, big5, &st->big5_encmap, &st->big5_decmap)) { return -1; - initialized = 1; + } + st->initialized = 1; return 0; } CODEC_DEINIT(big5hkscs) { + PyMem_Free((void *)config); return 0; } @@ -58,6 +65,7 @@ ENCODER(big5hkscs) insize = 1; REQUIRE_OUTBUF(2); + big5_state *st = (big5_state *)config; if (c < 0x10000) { if (TRYMAP_ENC(big5hkscs_bmp, code, c)) { if (code == MULTIC) { @@ -86,7 +94,7 @@ ENCODER(big5hkscs) } } } - else if (TRYMAP_ENC(big5, code, c)) + else if (TRYMAP_ENC_PTR(st->big5_encmap, code, c)) ; else return 1; @@ -127,7 +135,8 @@ DECODER(big5hkscs) REQUIRE_INBUF(2); if (0xc6 > c || c > 0xc8 || (c < 0xc7 && INBYTE2 < 0xa1)) { - if (TRYMAP_DEC(big5, decoded, c, INBYTE2)) { + big5_state *st = (big5_state *)config; + if (TRYMAP_DEC_PTR(st->big5_decmap, decoded, c, INBYTE2)) { OUTCHAR(decoded); NEXT_IN(2); continue; @@ -189,7 +198,13 @@ BEGIN_MAPPINGS_LIST(3) END_MAPPINGS_LIST BEGIN_CODECS_LIST(1) - CODEC_STATELESS_WINIT(big5hkscs) +{ + big5_state *config = PyMem_Calloc(1, sizeof(big5_state)); + if (config == NULL) { + return -1; + } + CODEC_STATELESS_WINIT(big5hkscs, config) +} END_CODECS_LIST I_AM_A_MODULE_FOR(hk) diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h index 85f035183c0ccf..901c1e1860981c 100644 --- a/Modules/cjkcodecs/cjkcodecs.h +++ b/Modules/cjkcodecs/cjkcodecs.h @@ -208,6 +208,8 @@ get_module_state(PyObject *mod) (m)->bottom]) != NOCHAR) #define TRYMAP_ENC(charset, assi, uni) \ _TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff) +#define TRYMAP_ENC_PTR(map, assi, uni) \ + _TRYMAP_ENC(&(map)[(uni) >> 8], assi, (uni) & 0xff) #define _TRYMAP_DEC(m, assi, val) \ ((m)->map != NULL && \ @@ -216,6 +218,8 @@ get_module_state(PyObject *mod) ((assi) = (m)->map[(val) - (m)->bottom]) != UNIINV) #define TRYMAP_DEC(charset, assi, c1, c2) \ _TRYMAP_DEC(&charset##_decmap[c1], assi, c2) +#define TRYMAP_DEC_PTR(map, assi, c1, c2) \ + _TRYMAP_DEC(&(map)[c1], assi, c2) #define BEGIN_MAPPINGS_LIST(NUM) \ static int \ @@ -273,8 +277,8 @@ add_codecs(cjkcodecs_module_state *st) \ #define CODEC_STATELESS(enc) \ NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, NULL, \ _STATELESS_METHODS(enc)}; -#define CODEC_STATELESS_WINIT(enc) \ - NEXT_CODEC = (MultibyteCodec){#enc, NULL, \ +#define CODEC_STATELESS_WINIT(enc, config) \ + NEXT_CODEC = (MultibyteCodec){#enc, config, \ enc##_codec_init, enc##_codec_deinit, \ _STATELESS_METHODS(enc)}; @@ -483,7 +487,6 @@ deinit_codecs(PyObject *mod) for (int i = 0; i < st->num_codecs; i++) { const MultibyteCodec *codec = &st->codec_list[i]; if (codec && codec->codecdeinit) { - fprintf(stderr, "- deinit[%d]: %s\n", i, codec->encoding); if (codec->codecdeinit(codec->config) < 0) { PyErr_WriteUnraisable(mod); } diff --git a/Tools/c-analyzer/cpython/globals-to-fix.tsv b/Tools/c-analyzer/cpython/globals-to-fix.tsv index 849fd5d9a1e8d5..bb7e9904fdd0c5 100644 --- a/Tools/c-analyzer/cpython/globals-to-fix.tsv +++ b/Tools/c-analyzer/cpython/globals-to-fix.tsv @@ -485,9 +485,6 @@ Modules/_decimal/_decimal.c - _py_float_abs - Modules/_decimal/_decimal.c - _py_long_bit_length - Modules/_decimal/_decimal.c - _py_float_as_integer_ratio - Modules/_elementtree.c - expat_capi - -Modules/cjkcodecs/_codecs_hk.c - big5_encmap - -Modules/cjkcodecs/_codecs_hk.c - big5_decmap - -Modules/cjkcodecs/_codecs_hk.c big5hkscs_codec_init initialized - Modules/cjkcodecs/_codecs_iso2022.c - cp949_encmap - Modules/cjkcodecs/_codecs_iso2022.c - ksx1001_decmap - Modules/cjkcodecs/_codecs_iso2022.c - jisxcommon_encmap -