From d2f0afd7742a5078ebba0e86018dd9681f8f1257 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Tue, 29 Aug 2023 00:17:25 -0700 Subject: [PATCH] GH-106176, GH-104702: Fix reference leak when importing across multiple threads (GH-108497) (cherry picked from commit 5f85b443f7119e1c68a15fc9a342655e544d2852) Co-authored-by: Brett Cannon --- Lib/importlib/_bootstrap.py | 115 ++++++++++++++++-- ...-08-25-14-51-06.gh-issue-106176.D1EA2a.rst | 4 + 2 files changed, 107 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-08-25-14-51-06.gh-issue-106176.D1EA2a.rst diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py index c48fd506a0e4eb..ec2e56f6ea9ca1 100644 --- a/Lib/importlib/_bootstrap.py +++ b/Lib/importlib/_bootstrap.py @@ -51,18 +51,106 @@ def _new_module(name): # Module-level locking ######################################################## -# A dict mapping module names to weakrefs of _ModuleLock instances -# Dictionary protected by the global import lock +# For a list that can have a weakref to it. +class _List(list): + pass + + +# Copied from weakref.py with some simplifications and modifications unique to +# bootstrapping importlib. Many methods were simply deleting for simplicity, so if they +# are needed in the future they may work if simply copied back in. +class _WeakValueDictionary: + + def __init__(self): + self_weakref = _weakref.ref(self) + + # Inlined to avoid issues with inheriting from _weakref.ref before _weakref is + # set by _setup(). Since there's only one instance of this class, this is + # not expensive. + class KeyedRef(_weakref.ref): + + __slots__ = "key", + + def __new__(type, ob, key): + self = super().__new__(type, ob, type.remove) + self.key = key + return self + + def __init__(self, ob, key): + super().__init__(ob, self.remove) + + @staticmethod + def remove(wr): + nonlocal self_weakref + + self = self_weakref() + if self is not None: + if self._iterating: + self._pending_removals.append(wr.key) + else: + _weakref._remove_dead_weakref(self.data, wr.key) + + self._KeyedRef = KeyedRef + self.clear() + + def clear(self): + self._pending_removals = [] + self._iterating = set() + self.data = {} + + def _commit_removals(self): + pop = self._pending_removals.pop + d = self.data + while True: + try: + key = pop() + except IndexError: + return + _weakref._remove_dead_weakref(d, key) + + def get(self, key, default=None): + if self._pending_removals: + self._commit_removals() + try: + wr = self.data[key] + except KeyError: + return default + else: + if (o := wr()) is None: + return default + else: + return o + + def setdefault(self, key, default=None): + try: + o = self.data[key]() + except KeyError: + o = None + if o is None: + if self._pending_removals: + self._commit_removals() + self.data[key] = self._KeyedRef(default, key) + return default + else: + return o + + +# A dict mapping module names to weakrefs of _ModuleLock instances. +# Dictionary protected by the global import lock. _module_locks = {} -# A dict mapping thread IDs to lists of _ModuleLock instances. This maps a -# thread to the module locks it is blocking on acquiring. The values are -# lists because a single thread could perform a re-entrant import and be "in -# the process" of blocking on locks for more than one module. A thread can -# be "in the process" because a thread cannot actually block on acquiring -# more than one lock but it can have set up bookkeeping that reflects that -# it intends to block on acquiring more than one lock. -_blocking_on = {} +# A dict mapping thread IDs to weakref'ed lists of _ModuleLock instances. +# This maps a thread to the module locks it is blocking on acquiring. The +# values are lists because a single thread could perform a re-entrant import +# and be "in the process" of blocking on locks for more than one module. A +# thread can be "in the process" because a thread cannot actually block on +# acquiring more than one lock but it can have set up bookkeeping that reflects +# that it intends to block on acquiring more than one lock. +# +# The dictionary uses a WeakValueDictionary to avoid keeping unnecessary +# lists around, regardless of GC runs. This way there's no memory leak if +# the list is no longer needed (GH-106176). +_blocking_on = None class _BlockingOnManager: @@ -79,7 +167,7 @@ def __enter__(self): # re-entrant (i.e., a single thread may take it more than once) so it # wouldn't help us be correct in the face of re-entrancy either. - self.blocked_on = _blocking_on.setdefault(self.thread_id, []) + self.blocked_on = _blocking_on.setdefault(self.thread_id, _List()) self.blocked_on.append(self.lock) def __exit__(self, *args, **kwargs): @@ -1409,7 +1497,7 @@ def _setup(sys_module, _imp_module): modules, those two modules must be explicitly passed in. """ - global _imp, sys + global _imp, sys, _blocking_on _imp = _imp_module sys = sys_module @@ -1437,6 +1525,9 @@ def _setup(sys_module, _imp_module): builtin_module = sys.modules[builtin_name] setattr(self_module, builtin_name, builtin_module) + # Instantiation requires _weakref to have been set. + _blocking_on = _WeakValueDictionary() + def _install(sys_module, _imp_module): """Install importers for builtin and frozen modules""" diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-08-25-14-51-06.gh-issue-106176.D1EA2a.rst b/Misc/NEWS.d/next/Core and Builtins/2023-08-25-14-51-06.gh-issue-106176.D1EA2a.rst new file mode 100644 index 00000000000000..7f63d1086ea39e --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-08-25-14-51-06.gh-issue-106176.D1EA2a.rst @@ -0,0 +1,4 @@ +Use a ``WeakValueDictionary`` to track the lists containing the modules each +thread is currently importing. This helps avoid a reference leak from +keeping the list around longer than necessary. Weakrefs are used as GC can't +interrupt the cleanup.