From 4644f7160339dea615cdc3ad6027105bb1d00ba7 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 26 Dec 2023 22:58:39 +0900 Subject: [PATCH 1/5] gh-111971: Make _PyUnicode_FromId thread-safe in --disable-gil --- Include/cpython/object.h | 6 +++++- Objects/unicodeobject.c | 11 +++++++---- Programs/_testembed.c | 1 + 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 762e8a3b86ee1e..8dac44bb1e2d48 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -39,13 +39,17 @@ typedef struct _Py_Identifier { // Index in PyInterpreterState.unicode.ids.array. It is process-wide // unique and must be initialized to -1. Py_ssize_t index; + // Hidden PyMutex struct for non free-threaded build. + struct { + uint8_t v; + } mutex; } _Py_Identifier; #ifndef Py_BUILD_CORE // For now we are keeping _Py_IDENTIFIER for continued use // in non-builtin extensions (and naughty PyPI modules). -#define _Py_static_string_init(value) { .string = (value), .index = -1 } +#define _Py_static_string_init(value) { .string = (value), .index = -1, mutex = 0 } #define _Py_static_string(varname, value) static _Py_Identifier varname = _Py_static_string_init(value) #define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ad87206b2a8200..3d044c6a1c9c89 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1897,6 +1897,7 @@ PyUnicode_FromString(const char *u) PyObject * _PyUnicode_FromId(_Py_Identifier *id) { + PyMutex_Lock((PyMutex *)&id->mutex); PyInterpreterState *interp = _PyInterpreterState_GET(); struct _Py_unicode_ids *ids = &interp->unicode.ids; @@ -1923,14 +1924,14 @@ _PyUnicode_FromId(_Py_Identifier *id) obj = ids->array[index]; if (obj) { // Return a borrowed reference - return obj; + goto end; } } obj = PyUnicode_DecodeUTF8Stateful(id->string, strlen(id->string), NULL, NULL); if (!obj) { - return NULL; + goto end; } PyUnicode_InternInPlace(&obj); @@ -1941,7 +1942,8 @@ _PyUnicode_FromId(_Py_Identifier *id) PyObject **new_array = PyMem_Realloc(ids->array, new_size * item_size); if (new_array == NULL) { PyErr_NoMemory(); - return NULL; + obj = NULL; + goto end; } memset(&new_array[ids->size], 0, (new_size - ids->size) * item_size); ids->array = new_array; @@ -1951,11 +1953,12 @@ _PyUnicode_FromId(_Py_Identifier *id) // The array stores a strong reference ids->array[index] = obj; +end: + PyMutex_Unlock((PyMutex *)&id->mutex); // Return a borrowed reference return obj; } - static void unicode_clear_identifiers(struct _Py_unicode_state *state) { diff --git a/Programs/_testembed.c b/Programs/_testembed.c index 30998bf80f9ce4..fa734356646c2e 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -1975,6 +1975,7 @@ static int test_unicode_id_init(void) static _Py_Identifier PyId_test_unicode_id_init = { .string = "test_unicode_id_init", .index = -1, + .mutex = 0, }; // Initialize Python once without using the identifier From 175ca6b813824e8c3ac5c54c56a68bbaac136396 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 26 Dec 2023 23:55:22 +0900 Subject: [PATCH 2/5] Compiler warn --- Include/cpython/object.h | 2 +- Programs/_testembed.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 8dac44bb1e2d48..c015eec306a3e7 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -49,7 +49,7 @@ typedef struct _Py_Identifier { // For now we are keeping _Py_IDENTIFIER for continued use // in non-builtin extensions (and naughty PyPI modules). -#define _Py_static_string_init(value) { .string = (value), .index = -1, mutex = 0 } +#define _Py_static_string_init(value) { .string = (value), .index = -1, mutex = {0} } #define _Py_static_string(varname, value) static _Py_Identifier varname = _Py_static_string_init(value) #define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname) diff --git a/Programs/_testembed.c b/Programs/_testembed.c index fa734356646c2e..d94ebb7b3623ad 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -1975,7 +1975,7 @@ static int test_unicode_id_init(void) static _Py_Identifier PyId_test_unicode_id_init = { .string = "test_unicode_id_init", .index = -1, - .mutex = 0, + .mutex = {0}, }; // Initialize Python once without using the identifier From 9f0cb0c079d062f69de37e92b4c8aded201ab735 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 27 Dec 2023 01:22:19 +0900 Subject: [PATCH 3/5] Address code review --- Include/cpython/object.h | 2 +- Programs/_testembed.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Include/cpython/object.h b/Include/cpython/object.h index c015eec306a3e7..2c433925453c6f 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -49,7 +49,7 @@ typedef struct _Py_Identifier { // For now we are keeping _Py_IDENTIFIER for continued use // in non-builtin extensions (and naughty PyPI modules). -#define _Py_static_string_init(value) { .string = (value), .index = -1, mutex = {0} } +#define _Py_static_string_init(value) { .string = (value), .index = -1, } #define _Py_static_string(varname, value) static _Py_Identifier varname = _Py_static_string_init(value) #define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname) diff --git a/Programs/_testembed.c b/Programs/_testembed.c index d94ebb7b3623ad..30998bf80f9ce4 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -1975,7 +1975,6 @@ static int test_unicode_id_init(void) static _Py_Identifier PyId_test_unicode_id_init = { .string = "test_unicode_id_init", .index = -1, - .mutex = {0}, }; // Initialize Python once without using the identifier From 7f97129940fbdcad160f352f4dfedc7a7b3dce2f Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 27 Dec 2023 01:22:59 +0900 Subject: [PATCH 4/5] nit --- Include/cpython/object.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 2c433925453c6f..d6482f4bc689a1 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -49,7 +49,7 @@ typedef struct _Py_Identifier { // For now we are keeping _Py_IDENTIFIER for continued use // in non-builtin extensions (and naughty PyPI modules). -#define _Py_static_string_init(value) { .string = (value), .index = -1, } +#define _Py_static_string_init(value) { .string = (value), .index = -1 } #define _Py_static_string(varname, value) static _Py_Identifier varname = _Py_static_string_init(value) #define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname) From 01417962c50b17b5917bfda0136b4d612febbaee Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 27 Dec 2023 01:25:17 +0900 Subject: [PATCH 5/5] nit --- Objects/unicodeobject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 3d044c6a1c9c89..4b03cc3f4da5fa 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1959,6 +1959,7 @@ _PyUnicode_FromId(_Py_Identifier *id) return obj; } + static void unicode_clear_identifiers(struct _Py_unicode_state *state) {