Skip to content

Commit d8bf7d9

Browse files
committed
pythongh-124785: re-work fix so tracerefs test passes
1 parent 626d706 commit d8bf7d9

File tree

3 files changed

+99
-12
lines changed

3 files changed

+99
-12
lines changed

Include/internal/pycore_global_objects.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ struct _Py_static_objects {
6565

6666
struct _Py_interp_cached_objects {
6767
PyObject *interned_strings;
68+
PyObject *interned_strings_legacy;
6869

6970
/* AST */
7071
PyObject *str_replace_inf;

Lib/test/test_sys.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,21 +1076,33 @@ def test_getallocatedblocks(self):
10761076
# about the underlying implementation: the function might
10771077
# return 0 or something greater.
10781078
self.assertGreaterEqual(a, 0)
1079-
try:
1080-
# While we could imagine a Python session where the number of
1081-
# multiple buffer objects would exceed the sharing of references,
1082-
# it is unlikely to happen in a normal test run.
1083-
self.assertLess(a, sys.gettotalrefcount())
1084-
except AttributeError:
1085-
# gettotalrefcount() not available
1086-
pass
10871079
gc.collect()
10881080
b = sys.getallocatedblocks()
10891081
self.assertLessEqual(b, a)
10901082
gc.collect()
10911083
c = sys.getallocatedblocks()
10921084
self.assertIn(c, range(b - 50, b + 50))
10931085

1086+
@unittest.skipUnless(hasattr(sys, "getallocatedblocks"),
1087+
"sys.getallocatedblocks unavailable on this build")
1088+
def test_getallocatedblocks_refcount(self):
1089+
# While we could imagine a Python session where the number of multiple
1090+
# buffer objects would exceed the sharing of references, it is unlikely
1091+
# to happen given that we run this in a subinterpreter.
1092+
code = """if 1:
1093+
import sys
1094+
num_blocks = sys.getallocatedblocks()
1095+
try:
1096+
total_refcnt = sys.gettotalrefcount()
1097+
except AttributeError:
1098+
pass
1099+
else:
1100+
if num_blocks > total_refcnt:
1101+
raise AssertionError('allocated blocks exceeds total refcnt')
1102+
"""
1103+
self.assertEqual(support.run_in_subinterp(code), 0,
1104+
'subinterp code failure, check stderr.')
1105+
10941106
def test_is_gil_enabled(self):
10951107
if support.Py_GIL_DISABLED:
10961108
self.assertIs(type(sys._is_gil_enabled()), bool)

Objects/unicodeobject.c

Lines changed: 78 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -291,12 +291,18 @@ hashtable_unicode_compare(const void *key1, const void *key2)
291291
It's not safe to deallocate those strings until all interpreters that
292292
potentially use them are freed. By storing them in the main interpreter, we
293293
ensure they get freed after all other interpreters are freed.
294+
295+
Subtle detail: it's only required to share the interned string dict in the
296+
case that those kinds of legacy modules are actually imported. However, we
297+
can't wait until the import happens so we share if those kind of modules are
298+
allowed (the Py_RTFLAGS_MULTI_INTERP_EXTENSIONS flag is set).
294299
*/
295300
static bool
296301
has_shared_intern_dict(PyInterpreterState *interp)
297302
{
298303
PyInterpreterState *main_interp = _PyInterpreterState_Main();
299-
return interp != main_interp && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC;
304+
return (interp != main_interp &&
305+
!(interp->feature_flags & Py_RTFLAGS_MULTI_INTERP_EXTENSIONS));
300306
}
301307

302308
static int
@@ -305,8 +311,20 @@ init_interned_dict(PyInterpreterState *interp)
305311
assert(get_interned_dict(interp) == NULL);
306312
PyObject *interned;
307313
if (has_shared_intern_dict(interp)) {
308-
interned = get_interned_dict(_PyInterpreterState_Main());
309-
Py_INCREF(interned);
314+
PyInterpreterState *main = _PyInterpreterState_Main();
315+
interned = _Py_INTERP_CACHED_OBJECT(main, interned_strings_legacy);
316+
if (interned == NULL) {
317+
// allocate for main interpreter. We share obmalloc in this case
318+
// and we use a separate dict because it's cleaner to ensure these
319+
// objects don't show up in the main interpreter (which they could
320+
// if uswe use interned_strings). They will be shared by all
321+
// subinterpreters that allow legacy single-phase init modules.
322+
interned = PyDict_New();
323+
if (interned == NULL) {
324+
return -1;
325+
}
326+
_Py_INTERP_CACHED_OBJECT(main, interned_strings_legacy) = interned;
327+
}
310328
}
311329
else {
312330
interned = PyDict_New();
@@ -318,6 +336,61 @@ init_interned_dict(PyInterpreterState *interp)
318336
return 0;
319337
}
320338

339+
/* Clean the dict of interned strings that is used by subinterpreters that
340+
* allow basic single-phase extensions modules (has_shared_intern_dict() is
341+
* true). For those, they all share the interned_strings_legacy dict that's
342+
* owned by the main interpreter. Only the main interpreter does cleanup on
343+
* it. See GH-116510.
344+
*/
345+
static void
346+
clear_interned_dict_legacy(PyInterpreterState *interp)
347+
{
348+
PyObject *interned = _Py_INTERP_CACHED_OBJECT(interp,
349+
interned_strings_legacy);
350+
if (interned == NULL) {
351+
return;
352+
}
353+
// This is similar but slightly different logic compared with
354+
// _PyUnicode_ClearInterned(). These are strings created by
355+
// subinterpreters but stored in a dict owned by the main interpreter.
356+
// Immortalization loses the true reference count and so we need to ensure
357+
// all those subinterpreters have exited before cleaning these strings up.
358+
Py_ssize_t pos = 0;
359+
PyObject *s, *ignored_value;
360+
while (PyDict_Next(interned, &pos, &s, &ignored_value)) {
361+
assert(PyUnicode_IS_READY(s));
362+
#ifdef Py_TRACE_REFS
363+
_Py_AddToAllObjects(s);
364+
#endif
365+
switch (PyUnicode_CHECK_INTERNED(s)) {
366+
case SSTATE_INTERNED_IMMORTAL:
367+
case SSTATE_INTERNED_IMMORTAL_STATIC:
368+
_Py_SetMortal(s, 2);
369+
#ifdef Py_REF_DEBUG
370+
/* let's be pedantic with the ref total */
371+
_Py_IncRefTotal(_PyThreadState_GET());
372+
_Py_IncRefTotal(_PyThreadState_GET());
373+
#endif
374+
break;
375+
case SSTATE_INTERNED_MORTAL:
376+
Py_SET_REFCNT(s, Py_REFCNT(s) + 2);
377+
#ifdef Py_REF_DEBUG
378+
/* let's be pedantic with the ref total */
379+
_Py_IncRefTotal(_PyThreadState_GET());
380+
_Py_IncRefTotal(_PyThreadState_GET());
381+
#endif
382+
break;
383+
case SSTATE_NOT_INTERNED:
384+
/* fall through */
385+
default:
386+
Py_UNREACHABLE();
387+
}
388+
_PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED;
389+
}
390+
PyDict_Clear(interned);
391+
_Py_INTERP_CACHED_OBJECT(interp, interned_strings_legacy) = NULL;
392+
}
393+
321394
static void
322395
clear_interned_dict(PyInterpreterState *interp)
323396
{
@@ -326,8 +399,9 @@ clear_interned_dict(PyInterpreterState *interp)
326399
if (!has_shared_intern_dict(interp)) {
327400
// only clear if the dict belongs to this interpreter
328401
PyDict_Clear(interned);
402+
Py_DECREF(interned);
403+
clear_interned_dict_legacy(interp);
329404
}
330-
Py_DECREF(interned);
331405
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL;
332406
}
333407
}

0 commit comments

Comments
 (0)