Skip to content

gh-103583: Isolate CJK modules, alternative 1 #103868

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 29 additions & 9 deletions Modules/cjkcodecs/_codecs_hk.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,28 @@
* BIG5HKSCS codec
*/

static const encode_map *big5_encmap = NULL;
static const decode_map *big5_decmap = NULL;
typedef struct {
const encode_map *big5_encmap;
const decode_map *big5_decmap;
int initialized;
} big5_state;

CODEC_INIT(big5hkscs)
{
static int initialized = 0;

if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap))
big5_state *st = (big5_state *)config;
if (st->initialized) {
return 0;
}
if (IMPORT_MAP(tw, big5, &st->big5_encmap, &st->big5_decmap)) {
return -1;
initialized = 1;
}
st->initialized = 1;
return 0;
}

CODEC_DEINIT(big5hkscs)
{
PyMem_Free((void *)config);
return 0;
}

Expand Down Expand Up @@ -53,6 +65,7 @@ ENCODER(big5hkscs)
insize = 1;
REQUIRE_OUTBUF(2);

big5_state *st = (big5_state *)config;
if (c < 0x10000) {
if (TRYMAP_ENC(big5hkscs_bmp, code, c)) {
if (code == MULTIC) {
Expand Down Expand Up @@ -81,7 +94,7 @@ ENCODER(big5hkscs)
}
}
}
else if (TRYMAP_ENC(big5, code, c))
else if (TRYMAP_ENC_PTR(st->big5_encmap, code, c))
;
else
return 1;
Expand Down Expand Up @@ -122,7 +135,8 @@ DECODER(big5hkscs)
REQUIRE_INBUF(2);

if (0xc6 > c || c > 0xc8 || (c < 0xc7 && INBYTE2 < 0xa1)) {
if (TRYMAP_DEC(big5, decoded, c, INBYTE2)) {
big5_state *st = (big5_state *)config;
if (TRYMAP_DEC_PTR(st->big5_decmap, decoded, c, INBYTE2)) {
OUTCHAR(decoded);
NEXT_IN(2);
continue;
Expand Down Expand Up @@ -184,7 +198,13 @@ BEGIN_MAPPINGS_LIST(3)
END_MAPPINGS_LIST

BEGIN_CODECS_LIST(1)
CODEC_STATELESS_WINIT(big5hkscs)
{
big5_state *config = PyMem_Calloc(1, sizeof(big5_state));
if (config == NULL) {
return -1;
}
CODEC_STATELESS_WINIT(big5hkscs, config)
}
END_CODECS_LIST

I_AM_A_MODULE_FOR(hk)
1 change: 1 addition & 0 deletions Modules/cjkcodecs/_codecs_iso2022.c
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,7 @@ NEXT_CODEC = (MultibyteCodec){ \
"iso2022_" #variation, \
&iso2022_##variation##_config, \
iso2022_codec_init, \
NULL, \
_STATEFUL_METHODS(iso2022) \
};

Expand Down
2 changes: 1 addition & 1 deletion Modules/cjkcodecs/_codecs_jp.c
Original file line number Diff line number Diff line change
Expand Up @@ -748,7 +748,7 @@ BEGIN_MAPPINGS_LIST(11)
END_MAPPINGS_LIST

#define CODEC_CUSTOM(NAME, N, METH) \
NEXT_CODEC = (MultibyteCodec){NAME, (void *)N, NULL, _STATELESS_METHODS(METH)};
NEXT_CODEC = (MultibyteCodec){NAME, (void *)N, NULL, NULL, _STATELESS_METHODS(METH)};

BEGIN_CODECS_LIST(7)
CODEC_STATELESS(shift_jis)
Expand Down
37 changes: 33 additions & 4 deletions Modules/cjkcodecs/cjkcodecs.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ get_module_state(PyObject *mod)
#define CODEC_INIT(encoding) \
static int encoding##_codec_init(const void *config)

#define CODEC_DEINIT(encoding) \
static int encoding##_codec_deinit(const void *config)

#define ENCODER_INIT(encoding) \
static int encoding##_encode_init( \
MultibyteCodec_State *state, const void *config)
Expand Down Expand Up @@ -205,6 +208,8 @@ get_module_state(PyObject *mod)
(m)->bottom]) != NOCHAR)
#define TRYMAP_ENC(charset, assi, uni) \
_TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff)
#define TRYMAP_ENC_PTR(map, assi, uni) \
_TRYMAP_ENC(&(map)[(uni) >> 8], assi, (uni) & 0xff)

#define _TRYMAP_DEC(m, assi, val) \
((m)->map != NULL && \
Expand All @@ -213,6 +218,8 @@ get_module_state(PyObject *mod)
((assi) = (m)->map[(val) - (m)->bottom]) != UNIINV)
#define TRYMAP_DEC(charset, assi, c1, c2) \
_TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
#define TRYMAP_DEC_PTR(map, assi, c1, c2) \
_TRYMAP_DEC(&(map)[c1], assi, c2)

#define BEGIN_MAPPINGS_LIST(NUM) \
static int \
Expand Down Expand Up @@ -265,11 +272,15 @@ add_codecs(cjkcodecs_module_state *st) \
st->codec_list[idx++]

#define CODEC_STATEFUL(enc) \
NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, _STATEFUL_METHODS(enc)};
NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, NULL, \
_STATEFUL_METHODS(enc)};
#define CODEC_STATELESS(enc) \
NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, _STATELESS_METHODS(enc)};
#define CODEC_STATELESS_WINIT(enc) \
NEXT_CODEC = (MultibyteCodec){#enc, NULL, enc##_codec_init, _STATELESS_METHODS(enc)};
NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, NULL, \
_STATELESS_METHODS(enc)};
#define CODEC_STATELESS_WINIT(enc, config) \
NEXT_CODEC = (MultibyteCodec){#enc, config, \
enc##_codec_init, enc##_codec_deinit, \
_STATELESS_METHODS(enc)};

#define END_CODECS_LIST \
assert(st->num_codecs == idx); \
Expand Down Expand Up @@ -467,6 +478,22 @@ importmap(const char *modname, const char *symbol,
}
#endif

static struct PyModuleDef _cjk_module;

static void
deinit_codecs(PyObject *mod)
{
cjkcodecs_module_state *st = get_module_state(mod);
for (int i = 0; i < st->num_codecs; i++) {
const MultibyteCodec *codec = &st->codec_list[i];
if (codec && codec->codecdeinit) {
if (codec->codecdeinit(codec->config) < 0) {
PyErr_WriteUnraisable(mod);
}
}
}
}

static int
_cjk_exec(PyObject *module)
{
Expand All @@ -476,6 +503,8 @@ _cjk_exec(PyObject *module)
static void
_cjk_free(void *mod)
{
deinit_codecs(mod);

cjkcodecs_module_state *st = get_module_state((PyObject *)mod);
PyMem_Free(st->mapping_list);
PyMem_Free(st->codec_list);
Expand Down
2 changes: 2 additions & 0 deletions Modules/cjkcodecs/multibytecodec.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ typedef struct {
} MultibyteCodec_State;

typedef int (*mbcodec_init)(const void *config);
typedef int (*mbcodec_deinit)(const void *config);
typedef Py_ssize_t (*mbencode_func)(MultibyteCodec_State *state,
const void *config,
int kind, const void *data,
Expand All @@ -52,6 +53,7 @@ typedef struct {
const char *encoding;
const void *config;
mbcodec_init codecinit;
mbcodec_init codecdeinit;
mbencode_func encode;
mbencodeinit_func encinit;
mbencodereset_func encreset;
Expand Down
3 changes: 0 additions & 3 deletions Tools/c-analyzer/cpython/globals-to-fix.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -485,9 +485,6 @@ Modules/_decimal/_decimal.c - _py_float_abs -
Modules/_decimal/_decimal.c - _py_long_bit_length -
Modules/_decimal/_decimal.c - _py_float_as_integer_ratio -
Modules/_elementtree.c - expat_capi -
Modules/cjkcodecs/_codecs_hk.c - big5_encmap -
Modules/cjkcodecs/_codecs_hk.c - big5_decmap -
Modules/cjkcodecs/_codecs_hk.c big5hkscs_codec_init initialized -
Modules/cjkcodecs/_codecs_iso2022.c - cp949_encmap -
Modules/cjkcodecs/_codecs_iso2022.c - ksx1001_decmap -
Modules/cjkcodecs/_codecs_iso2022.c - jisxcommon_encmap -
Expand Down