-
-
Notifications
You must be signed in to change notification settings - Fork 31.8k
Data race in compile_template
in sre.c
#129983
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Comments
This fixes the issue. Let me know if you want me to send up. Or if you prefer it can be done with mutex (but uglier). $ git diff main
diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c
index 0d8d4843d33..64ef6b202e5 100644
--- a/Modules/_sre/sre.c
+++ b/Modules/_sre/sre.c
@@ -1167,13 +1167,21 @@ compile_template(_sremodulestate *module_state,
PatternObject *pattern, PyObject *template)
{
/* delegate to Python code */
- PyObject *func = module_state->compile_template;
+ PyObject *func = FT_ATOMIC_LOAD_PTR_RELAXED(module_state->compile_template);
if (func == NULL) {
func = PyImport_ImportModuleAttrString("re", "_compile_template");
if (func == NULL) {
return NULL;
}
+#ifdef Py_GIL_DISABLED
+ PyObject *other_func = NULL;
+ if (!_Py_atomic_compare_exchange_ptr(&module_state->compile_template, &other_func, func)) {
+ Py_DECREF(func);
+ func = other_func;
+ }
+#else
Py_XSETREF(module_state->compile_template, func);
+#endif
}
PyObject *args[] = {(PyObject *)pattern, template}; Reproducer: import re
import threading
def test(b):
b.wait()
re.sub(r"(\d+)", r"\1kg", "Weight: 50")
def check(funcs, *args):
barrier = threading.Barrier(len(funcs))
thrds = []
for func in funcs:
thrd = threading.Thread(target=func, args=(barrier, *args))
thrds.append(thrd)
thrd.start()
for thrd in thrds:
thrd.join()
if __name__ == "__main__":
while True:
check([test] * 40) Also, while testing this I ran into this error exactly once and no more, though I don't think it has anything to do with this particular issue or fix:
|
Thanks - pease send a PR! |
I think the much simpler solution would be to initialize |
The code in $ git diff main
diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c
index 0d8d4843d33..c2b1be957f5 100644
--- a/Modules/_sre/sre.c
+++ b/Modules/_sre/sre.c
@@ -376,7 +376,7 @@ typedef struct {
PyTypeObject *Match_Type;
PyTypeObject *Scanner_Type;
PyTypeObject *Template_Type;
- PyObject *compile_template; // reference to re._compile_template
+ PyObject *re_module;
} _sremodulestate;
static _sremodulestate *
@@ -1167,17 +1167,13 @@ compile_template(_sremodulestate *module_state,
PatternObject *pattern, PyObject *template)
{
/* delegate to Python code */
- PyObject *func = module_state->compile_template;
+ PyObject *func = PyObject_GetAttrString(module_state->re_module, "_compile_template");
if (func == NULL) {
- func = PyImport_ImportModuleAttrString("re", "_compile_template");
- if (func == NULL) {
- return NULL;
- }
- Py_XSETREF(module_state->compile_template, func);
+ return NULL;
}
-
PyObject *args[] = {(PyObject *)pattern, template};
PyObject *result = PyObject_Vectorcall(func, args, 2, NULL);
+ Py_DECREF(func);
if (result == NULL && PyErr_ExceptionMatches(PyExc_TypeError)) {
/* If the replacement string is unhashable (e.g. bytearray),
@@ -3342,7 +3338,7 @@ sre_traverse(PyObject *module, visitproc visit, void *arg)
Py_VISIT(state->Match_Type);
Py_VISIT(state->Scanner_Type);
Py_VISIT(state->Template_Type);
- Py_VISIT(state->compile_template);
+ Py_VISIT(state->re_module);
return 0;
}
@@ -3356,7 +3352,7 @@ sre_clear(PyObject *module)
Py_CLEAR(state->Match_Type);
Py_CLEAR(state->Scanner_Type);
Py_CLEAR(state->Template_Type);
- Py_CLEAR(state->compile_template);
+ Py_CLEAR(state->re_module);
return 0;
}
@@ -3409,6 +3405,12 @@ sre_exec(PyObject *m)
goto error;
}
+ PyObject *re = PyImport_ImportModule("re");
+ if (re == NULL) {
+ goto error; |
Ah, we had similar issue in asyncio as well with circular import, it was solved by delaying the import of C impl later, probably we can do something similar here like cpython/Modules/_asynciomodule.c Line 4435 in 247b50d
|
Although I generally prefer initializing module state in the exec function, I'm hesitant to delay or reorder the imports here and would rather just use the atomics. |
The following seems to fix it: From e1ef31d9417fe9247603fdc66f2acc5cecfbb074 Mon Sep 17 00:00:00 2001
From: Kumar Aditya <kumaraditya@python.org>
Date: Tue, 11 Feb 2025 15:55:55 +0000
Subject: [PATCH] fix re
---
Lib/re/__init__.py | 16 +++++++++-------
Modules/_sre/sre.c | 14 +++++++-------
2 files changed, 16 insertions(+), 14 deletions(-)
diff --git a/Lib/re/__init__.py b/Lib/re/__init__.py
index 7e8abbf6ffe..9773c643065 100644
--- a/Lib/re/__init__.py
+++ b/Lib/re/__init__.py
@@ -123,8 +123,16 @@
"""
import enum
-from . import _compiler, _parser
import functools
+
+_MAXCACHE = 512
+@functools.lru_cache(_MAXCACHE)
+def _compile_template(pattern, repl):
+ # internal: compile replacement pattern
+ return _sre.template(pattern, _parser.parse_template(repl, pattern))
+
+
+from . import _compiler, _parser
import _sre
@@ -323,7 +331,6 @@ def escape(pattern):
# _cache uses the LRU policy which has better hit rate.
_cache = {} # LRU
_cache2 = {} # FIFO
-_MAXCACHE = 512
_MAXCACHE2 = 256
assert _MAXCACHE2 < _MAXCACHE
@@ -371,11 +378,6 @@ def _compile(pattern, flags):
_cache2[key] = p
return p
-@functools.lru_cache(_MAXCACHE)
-def _compile_template(pattern, repl):
- # internal: compile replacement pattern
- return _sre.template(pattern, _parser.parse_template(repl, pattern))
-
# register myself for pickling
import copyreg
diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c
index 0d8d4843d33..a2b5f82d8b9 100644
--- a/Modules/_sre/sre.c
+++ b/Modules/_sre/sre.c
@@ -1168,13 +1168,7 @@ compile_template(_sremodulestate *module_state,
{
/* delegate to Python code */
PyObject *func = module_state->compile_template;
- if (func == NULL) {
- func = PyImport_ImportModuleAttrString("re", "_compile_template");
- if (func == NULL) {
- return NULL;
- }
- Py_XSETREF(module_state->compile_template, func);
- }
+ assert(func != NULL);
PyObject *args[] = {(PyObject *)pattern, template};
PyObject *result = PyObject_Vectorcall(func, args, 2, NULL);
@@ -3409,6 +3403,12 @@ sre_exec(PyObject *m)
goto error;
}
+ state->compile_template = PyImport_ImportModuleAttrString("re", "_compile_template");
+
+ if (state->compile_template == NULL) {
+ goto error;
+ }
+
return 0;
It seems that we need to just import _sre after defining _compile_template to avoid circular import. |
A bit fragile? |
I'd be a lot more confident backporting the atomic change to 3.13 than reordering functions and imports in |
Bug report
Concurrent accesses to
module_state->compile_template
in the free threaded buildcpython/Modules/_sre/sre.c
Lines 1169 to 1177 in 1feaecc
Linked PRs
The text was updated successfully, but these errors were encountered: