Skip to content

Commit c942fae

Browse files
[3.12] GH-106176, GH-104702: Fix reference leak when importing across multiple threads (GH-108497) (#108612)
GH-106176, GH-104702: Fix reference leak when importing across multiple threads (GH-108497) (cherry picked from commit 5f85b44) Co-authored-by: Brett Cannon <brett@python.org>
1 parent ae9bbd1 commit c942fae

File tree

2 files changed

+107
-12
lines changed

2 files changed

+107
-12
lines changed

Lib/importlib/_bootstrap.py

+103-12
Original file line numberDiff line numberDiff line change
@@ -51,18 +51,106 @@ def _new_module(name):
5151

5252
# Module-level locking ########################################################
5353

54-
# A dict mapping module names to weakrefs of _ModuleLock instances
55-
# Dictionary protected by the global import lock
54+
# For a list that can have a weakref to it.
55+
class _List(list):
56+
pass
57+
58+
59+
# Copied from weakref.py with some simplifications and modifications unique to
60+
# bootstrapping importlib. Many methods were simply deleting for simplicity, so if they
61+
# are needed in the future they may work if simply copied back in.
62+
class _WeakValueDictionary:
63+
64+
def __init__(self):
65+
self_weakref = _weakref.ref(self)
66+
67+
# Inlined to avoid issues with inheriting from _weakref.ref before _weakref is
68+
# set by _setup(). Since there's only one instance of this class, this is
69+
# not expensive.
70+
class KeyedRef(_weakref.ref):
71+
72+
__slots__ = "key",
73+
74+
def __new__(type, ob, key):
75+
self = super().__new__(type, ob, type.remove)
76+
self.key = key
77+
return self
78+
79+
def __init__(self, ob, key):
80+
super().__init__(ob, self.remove)
81+
82+
@staticmethod
83+
def remove(wr):
84+
nonlocal self_weakref
85+
86+
self = self_weakref()
87+
if self is not None:
88+
if self._iterating:
89+
self._pending_removals.append(wr.key)
90+
else:
91+
_weakref._remove_dead_weakref(self.data, wr.key)
92+
93+
self._KeyedRef = KeyedRef
94+
self.clear()
95+
96+
def clear(self):
97+
self._pending_removals = []
98+
self._iterating = set()
99+
self.data = {}
100+
101+
def _commit_removals(self):
102+
pop = self._pending_removals.pop
103+
d = self.data
104+
while True:
105+
try:
106+
key = pop()
107+
except IndexError:
108+
return
109+
_weakref._remove_dead_weakref(d, key)
110+
111+
def get(self, key, default=None):
112+
if self._pending_removals:
113+
self._commit_removals()
114+
try:
115+
wr = self.data[key]
116+
except KeyError:
117+
return default
118+
else:
119+
if (o := wr()) is None:
120+
return default
121+
else:
122+
return o
123+
124+
def setdefault(self, key, default=None):
125+
try:
126+
o = self.data[key]()
127+
except KeyError:
128+
o = None
129+
if o is None:
130+
if self._pending_removals:
131+
self._commit_removals()
132+
self.data[key] = self._KeyedRef(default, key)
133+
return default
134+
else:
135+
return o
136+
137+
138+
# A dict mapping module names to weakrefs of _ModuleLock instances.
139+
# Dictionary protected by the global import lock.
56140
_module_locks = {}
57141

58-
# A dict mapping thread IDs to lists of _ModuleLock instances. This maps a
59-
# thread to the module locks it is blocking on acquiring. The values are
60-
# lists because a single thread could perform a re-entrant import and be "in
61-
# the process" of blocking on locks for more than one module. A thread can
62-
# be "in the process" because a thread cannot actually block on acquiring
63-
# more than one lock but it can have set up bookkeeping that reflects that
64-
# it intends to block on acquiring more than one lock.
65-
_blocking_on = {}
142+
# A dict mapping thread IDs to weakref'ed lists of _ModuleLock instances.
143+
# This maps a thread to the module locks it is blocking on acquiring. The
144+
# values are lists because a single thread could perform a re-entrant import
145+
# and be "in the process" of blocking on locks for more than one module. A
146+
# thread can be "in the process" because a thread cannot actually block on
147+
# acquiring more than one lock but it can have set up bookkeeping that reflects
148+
# that it intends to block on acquiring more than one lock.
149+
#
150+
# The dictionary uses a WeakValueDictionary to avoid keeping unnecessary
151+
# lists around, regardless of GC runs. This way there's no memory leak if
152+
# the list is no longer needed (GH-106176).
153+
_blocking_on = None
66154

67155

68156
class _BlockingOnManager:
@@ -79,7 +167,7 @@ def __enter__(self):
79167
# re-entrant (i.e., a single thread may take it more than once) so it
80168
# wouldn't help us be correct in the face of re-entrancy either.
81169

82-
self.blocked_on = _blocking_on.setdefault(self.thread_id, [])
170+
self.blocked_on = _blocking_on.setdefault(self.thread_id, _List())
83171
self.blocked_on.append(self.lock)
84172

85173
def __exit__(self, *args, **kwargs):
@@ -1409,7 +1497,7 @@ def _setup(sys_module, _imp_module):
14091497
modules, those two modules must be explicitly passed in.
14101498
14111499
"""
1412-
global _imp, sys
1500+
global _imp, sys, _blocking_on
14131501
_imp = _imp_module
14141502
sys = sys_module
14151503

@@ -1437,6 +1525,9 @@ def _setup(sys_module, _imp_module):
14371525
builtin_module = sys.modules[builtin_name]
14381526
setattr(self_module, builtin_name, builtin_module)
14391527

1528+
# Instantiation requires _weakref to have been set.
1529+
_blocking_on = _WeakValueDictionary()
1530+
14401531

14411532
def _install(sys_module, _imp_module):
14421533
"""Install importers for builtin and frozen modules"""
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Use a ``WeakValueDictionary`` to track the lists containing the modules each
2+
thread is currently importing. This helps avoid a reference leak from
3+
keeping the list around longer than necessary. Weakrefs are used as GC can't
4+
interrupt the cleanup.

0 commit comments

Comments
 (0)