Skip to content

Commit

Permalink
pythonGH-106176, pythonGH-104702: Fix reference leak when importing a…
Browse files Browse the repository at this point in the history
…cross multiple threads (pythonGH-108497)

(cherry picked from commit 5f85b44)

Co-authored-by: Brett Cannon <brett@python.org>
  • Loading branch information
brettcannon authored and miss-islington committed Aug 29, 2023
1 parent 7c7b2bf commit d2f0afd
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 12 deletions.
115 changes: 103 additions & 12 deletions Lib/importlib/_bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,106 @@ def _new_module(name):

# Module-level locking ########################################################

# A dict mapping module names to weakrefs of _ModuleLock instances
# Dictionary protected by the global import lock
# For a list that can have a weakref to it.
class _List(list):
pass


# Copied from weakref.py with some simplifications and modifications unique to
# bootstrapping importlib. Many methods were simply deleting for simplicity, so if they
# are needed in the future they may work if simply copied back in.
class _WeakValueDictionary:

def __init__(self):
self_weakref = _weakref.ref(self)

# Inlined to avoid issues with inheriting from _weakref.ref before _weakref is
# set by _setup(). Since there's only one instance of this class, this is
# not expensive.
class KeyedRef(_weakref.ref):

__slots__ = "key",

def __new__(type, ob, key):
self = super().__new__(type, ob, type.remove)
self.key = key
return self

def __init__(self, ob, key):
super().__init__(ob, self.remove)

@staticmethod
def remove(wr):
nonlocal self_weakref

self = self_weakref()
if self is not None:
if self._iterating:
self._pending_removals.append(wr.key)
else:
_weakref._remove_dead_weakref(self.data, wr.key)

self._KeyedRef = KeyedRef
self.clear()

def clear(self):
self._pending_removals = []
self._iterating = set()
self.data = {}

def _commit_removals(self):
pop = self._pending_removals.pop
d = self.data
while True:
try:
key = pop()
except IndexError:
return
_weakref._remove_dead_weakref(d, key)

def get(self, key, default=None):
if self._pending_removals:
self._commit_removals()
try:
wr = self.data[key]
except KeyError:
return default
else:
if (o := wr()) is None:
return default
else:
return o

def setdefault(self, key, default=None):
try:
o = self.data[key]()
except KeyError:
o = None
if o is None:
if self._pending_removals:
self._commit_removals()
self.data[key] = self._KeyedRef(default, key)
return default
else:
return o


# A dict mapping module names to weakrefs of _ModuleLock instances.
# Dictionary protected by the global import lock.
_module_locks = {}

# A dict mapping thread IDs to lists of _ModuleLock instances. This maps a
# thread to the module locks it is blocking on acquiring. The values are
# lists because a single thread could perform a re-entrant import and be "in
# the process" of blocking on locks for more than one module. A thread can
# be "in the process" because a thread cannot actually block on acquiring
# more than one lock but it can have set up bookkeeping that reflects that
# it intends to block on acquiring more than one lock.
_blocking_on = {}
# A dict mapping thread IDs to weakref'ed lists of _ModuleLock instances.
# This maps a thread to the module locks it is blocking on acquiring. The
# values are lists because a single thread could perform a re-entrant import
# and be "in the process" of blocking on locks for more than one module. A
# thread can be "in the process" because a thread cannot actually block on
# acquiring more than one lock but it can have set up bookkeeping that reflects
# that it intends to block on acquiring more than one lock.
#
# The dictionary uses a WeakValueDictionary to avoid keeping unnecessary
# lists around, regardless of GC runs. This way there's no memory leak if
# the list is no longer needed (GH-106176).
_blocking_on = None


class _BlockingOnManager:
Expand All @@ -79,7 +167,7 @@ def __enter__(self):
# re-entrant (i.e., a single thread may take it more than once) so it
# wouldn't help us be correct in the face of re-entrancy either.

self.blocked_on = _blocking_on.setdefault(self.thread_id, [])
self.blocked_on = _blocking_on.setdefault(self.thread_id, _List())
self.blocked_on.append(self.lock)

def __exit__(self, *args, **kwargs):
Expand Down Expand Up @@ -1409,7 +1497,7 @@ def _setup(sys_module, _imp_module):
modules, those two modules must be explicitly passed in.
"""
global _imp, sys
global _imp, sys, _blocking_on
_imp = _imp_module
sys = sys_module

Expand Down Expand Up @@ -1437,6 +1525,9 @@ def _setup(sys_module, _imp_module):
builtin_module = sys.modules[builtin_name]
setattr(self_module, builtin_name, builtin_module)

# Instantiation requires _weakref to have been set.
_blocking_on = _WeakValueDictionary()


def _install(sys_module, _imp_module):
"""Install importers for builtin and frozen modules"""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Use a ``WeakValueDictionary`` to track the lists containing the modules each
thread is currently importing. This helps avoid a reference leak from
keeping the list around longer than necessary. Weakrefs are used as GC can't
interrupt the cleanup.

0 comments on commit d2f0afd

Please sign in to comment.