From d57f4ebba38dc6a360582575de966b148d4e5113 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Tue, 30 Jan 2024 22:30:11 -0500 Subject: [PATCH 1/9] gh-114763: Protect lazy loading modules from attribute access race --- Lib/importlib/util.py | 77 ++++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 30 deletions(-) diff --git a/Lib/importlib/util.py b/Lib/importlib/util.py index 3ad71d31c2f438..fe1a3110920d79 100644 --- a/Lib/importlib/util.py +++ b/Lib/importlib/util.py @@ -171,36 +171,49 @@ class _LazyModule(types.ModuleType): def __getattribute__(self, attr): """Trigger the load of the module and return the attribute.""" - # All module metadata must be garnered from __spec__ in order to avoid - # using mutated values. - # Stop triggering this method. - self.__class__ = types.ModuleType - # Get the original name to make sure no object substitution occurred - # in sys.modules. - original_name = self.__spec__.name - # Figure out exactly what attributes were mutated between the creation - # of the module and now. - attrs_then = self.__spec__.loader_state['__dict__'] - attrs_now = self.__dict__ - attrs_updated = {} - for key, value in attrs_now.items(): - # Code that set the attribute may have kept a reference to the - # assigned object, making identity more important than equality. - if key not in attrs_then: - attrs_updated[key] = value - elif id(attrs_now[key]) != id(attrs_then[key]): - attrs_updated[key] = value - self.__spec__.loader.exec_module(self) - # If exec_module() was used directly there is no guarantee the module - # object was put into sys.modules. - if original_name in sys.modules: - if id(self) != id(sys.modules[original_name]): - raise ValueError(f"module object for {original_name!r} " - "substituted in sys.modules during a lazy " - "load") - # Update after loading since that's what would happen in an eager - # loading situation. - self.__dict__.update(attrs_updated) + __spec__ = object.__getattribute__(self, '__spec__') + loader_state = __spec__.loader_state + with loader_state['lock']: + if object.__getattribute__(self, '__class__') is _LazyModule: + # exec_module() will access dunder attributes, so we use a reentrant + # lock and an event to prevent infinite recursion. + if loader_state['is_loading'].is_set() and attr[:2] == attr[-2:] == '__': + return object.__getattribute__(self, attr) + loader_state['is_loading'].set() + + __dict__ = object.__getattribute__(self, '__dict__') + + # All module metadata must be garnered from __spec__ in order to avoid + # using mutated values. + # Get the original name to make sure no object substitution occurred + # in sys.modules. + original_name = __spec__.name + # Figure out exactly what attributes were mutated between the creation + # of the module and now. + attrs_then = loader_state['__dict__'] + attrs_now = __dict__ + attrs_updated = {} + for key, value in attrs_now.items(): + # Code that set the attribute may have kept a reference to the + # assigned object, making identity more important than equality. + if key not in attrs_then: + attrs_updated[key] = value + elif id(attrs_now[key]) != id(attrs_then[key]): + attrs_updated[key] = value + __spec__.loader.exec_module(self) + # If exec_module() was used directly there is no guarantee the module + # object was put into sys.modules. + if original_name in sys.modules: + if id(self) != id(sys.modules[original_name]): + raise ValueError(f"module object for {original_name!r} " + "substituted in sys.modules during a lazy " + "load") + # Update after loading since that's what would happen in an eager + # loading situation. + __dict__.update(attrs_updated) + # Finally, stop triggering this method. + self.__class__ = types.ModuleType + return getattr(self, attr) def __delattr__(self, attr): @@ -235,6 +248,8 @@ def create_module(self, spec): def exec_module(self, module): """Make the module load lazily.""" + import threading + module.__spec__.loader = self.loader module.__loader__ = self.loader # Don't need to worry about deep-copying as trying to set an attribute @@ -244,5 +259,7 @@ def exec_module(self, module): loader_state = {} loader_state['__dict__'] = module.__dict__.copy() loader_state['__class__'] = module.__class__ + loader_state['lock'] = threading.RLock() + loader_state['is_loading'] = threading.Event() module.__spec__.loader_state = loader_state module.__class__ = _LazyModule From 3db717c7bbf0f3a6c013bf704718ee28a6edd275 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Tue, 30 Jan 2024 23:07:33 -0500 Subject: [PATCH 2/9] TEST: Reproduce gh-114763 --- Lib/test/test_importlib/test_lazy.py | 40 ++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_importlib/test_lazy.py b/Lib/test/test_importlib/test_lazy.py index cc993f333e355a..49190d293747da 100644 --- a/Lib/test/test_importlib/test_lazy.py +++ b/Lib/test/test_importlib/test_lazy.py @@ -2,6 +2,8 @@ from importlib import abc from importlib import util import sys +import time +import threading import types import unittest @@ -40,6 +42,7 @@ class TestingImporter(abc.MetaPathFinder, abc.Loader): module_name = 'lazy_loader_test' mutated_name = 'changed' loaded = None + load_count = 0 source_code = 'attr = 42; __name__ = {!r}'.format(mutated_name) def find_spec(self, name, path, target=None): @@ -48,8 +51,10 @@ def find_spec(self, name, path, target=None): return util.spec_from_loader(name, util.LazyLoader(self)) def exec_module(self, module): + time.sleep(0.01) # Simulate a slow load. exec(self.source_code, module.__dict__) self.loaded = module + self.load_count += 1 class LazyLoaderTests(unittest.TestCase): @@ -59,8 +64,9 @@ def test_init(self): # Classes that don't define exec_module() trigger TypeError. util.LazyLoader(object) - def new_module(self, source_code=None): - loader = TestingImporter() + def new_module(self, source_code=None, loader=None): + if loader is None: + loader = TestingImporter() if source_code is not None: loader.source_code = source_code spec = util.spec_from_loader(TestingImporter.module_name, @@ -140,6 +146,36 @@ def test_module_already_in_sys(self): # Force the load; just care that no exception is raised. module.__name__ + def test_module_load_race(self): + with test_util.uncache(TestingImporter.module_name): + loader = TestingImporter() + module = self.new_module(loader=loader) + assert loader.load_count == 0 + + class RaisingThread(threading.Thread): + exc = None + def run(self): + try: + super().run() + except Exception as exc: + self.exc = exc + + def access_module(): + return module.attr + + threads = [] + for _ in range(2): + threads.append(thread := RaisingThread(target=access_module)) + thread.start() + + # Races could cause errors + for thread in threads: + thread.join() + assert thread.exc is None + + # Or multiple load attempts + assert loader.load_count == 1 + if __name__ == '__main__': unittest.main() From 5424d0c1c8be0a3ff296ed397b92e3412b098d0e Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Tue, 30 Jan 2024 23:28:46 -0500 Subject: [PATCH 3/9] Add new blurb --- .../Library/2024-01-30-23-28-29.gh-issue-114763.BRjKkg.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-01-30-23-28-29.gh-issue-114763.BRjKkg.rst diff --git a/Misc/NEWS.d/next/Library/2024-01-30-23-28-29.gh-issue-114763.BRjKkg.rst b/Misc/NEWS.d/next/Library/2024-01-30-23-28-29.gh-issue-114763.BRjKkg.rst new file mode 100644 index 00000000000000..e8bdb83dde61fb --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-30-23-28-29.gh-issue-114763.BRjKkg.rst @@ -0,0 +1,3 @@ +Protect modules loaded with :class:`importlib.util.LazyLoader` from race +conditions when multiple threads try to access attributes before the loading +is complete. From 96c08c5ad9e63f9f56941ebdfa8afa7296f2e060 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Wed, 31 Jan 2024 19:22:45 -0500 Subject: [PATCH 4/9] Improve comments around the branching --- Lib/importlib/util.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Lib/importlib/util.py b/Lib/importlib/util.py index fe1a3110920d79..8492580ec9a343 100644 --- a/Lib/importlib/util.py +++ b/Lib/importlib/util.py @@ -174,9 +174,14 @@ def __getattribute__(self, attr): __spec__ = object.__getattribute__(self, '__spec__') loader_state = __spec__.loader_state with loader_state['lock']: + # Only the first thread to get the lock should trigger the load + # and reset the module's class. The rest can now getattr(). if object.__getattribute__(self, '__class__') is _LazyModule: - # exec_module() will access dunder attributes, so we use a reentrant - # lock and an event to prevent infinite recursion. + # The first thread comes here multiple times as it descends the + # call stack. The first time, it sets is_loading and triggers + # exec_module(), which will access module.__dict__, module.__name__, + # and/or module.__spec__, reentering this method. These accesses + # need to be allowed to proceed without triggering the load again. if loader_state['is_loading'].is_set() and attr[:2] == attr[-2:] == '__': return object.__getattribute__(self, attr) loader_state['is_loading'].set() From 3642ddca0ecc7bd8ef40e3b864f883eba8614716 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Wed, 31 Jan 2024 19:26:23 -0500 Subject: [PATCH 5/9] TEST: Use TestCase.assert* methods --- Lib/test/test_importlib/test_lazy.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_importlib/test_lazy.py b/Lib/test/test_importlib/test_lazy.py index 49190d293747da..b672298d8f4265 100644 --- a/Lib/test/test_importlib/test_lazy.py +++ b/Lib/test/test_importlib/test_lazy.py @@ -150,7 +150,7 @@ def test_module_load_race(self): with test_util.uncache(TestingImporter.module_name): loader = TestingImporter() module = self.new_module(loader=loader) - assert loader.load_count == 0 + self.assertEqual(loader.load_count, 0) class RaisingThread(threading.Thread): exc = None @@ -171,10 +171,10 @@ def access_module(): # Races could cause errors for thread in threads: thread.join() - assert thread.exc is None + self.assertIsNone(thread.exc) # Or multiple load attempts - assert loader.load_count == 1 + self.assertEqual(loader.load_count, 1) if __name__ == '__main__': From 6accf0c07546fbb65559384b3140b91c52c621be Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Fri, 16 Feb 2024 19:32:43 -0500 Subject: [PATCH 6/9] Apply suggestions from code review Co-authored-by: Brett Cannon --- Lib/importlib/util.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/importlib/util.py b/Lib/importlib/util.py index 8492580ec9a343..f512b1dd88b49e 100644 --- a/Lib/importlib/util.py +++ b/Lib/importlib/util.py @@ -182,13 +182,13 @@ def __getattribute__(self, attr): # exec_module(), which will access module.__dict__, module.__name__, # and/or module.__spec__, reentering this method. These accesses # need to be allowed to proceed without triggering the load again. - if loader_state['is_loading'].is_set() and attr[:2] == attr[-2:] == '__': + if loader_state['is_loading'].is_set() and attr.startswith('__') and attr.endswith('__'): return object.__getattribute__(self, attr) loader_state['is_loading'].set() __dict__ = object.__getattribute__(self, '__dict__') - # All module metadata must be garnered from __spec__ in order to avoid + # All module metadata must be gathered from __spec__ in order to avoid # using mutated values. # Get the original name to make sure no object substitution occurred # in sys.modules. @@ -199,7 +199,7 @@ def __getattribute__(self, attr): attrs_now = __dict__ attrs_updated = {} for key, value in attrs_now.items(): - # Code that set the attribute may have kept a reference to the + # Code that set an attribute may have kept a reference to the # assigned object, making identity more important than equality. if key not in attrs_then: attrs_updated[key] = value From bb2292f66f7523e1573a5fb5d1d835c253155375 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Fri, 16 Feb 2024 19:38:44 -0500 Subject: [PATCH 7/9] TEST: Require working threading for test_module_load_race --- Lib/test/test_importlib/test_lazy.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/test/test_importlib/test_lazy.py b/Lib/test/test_importlib/test_lazy.py index b672298d8f4265..38ab21907b58d9 100644 --- a/Lib/test/test_importlib/test_lazy.py +++ b/Lib/test/test_importlib/test_lazy.py @@ -7,6 +7,7 @@ import types import unittest +from test.support import threading_helper from test.test_importlib import util as test_util @@ -146,6 +147,7 @@ def test_module_already_in_sys(self): # Force the load; just care that no exception is raised. module.__name__ + @threading_helper.requires_working_threading() def test_module_load_race(self): with test_util.uncache(TestingImporter.module_name): loader = TestingImporter() From 57fe084717a74ae5bb861385ce2ebfbe318720aa Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Fri, 16 Feb 2024 19:39:55 -0500 Subject: [PATCH 8/9] Move threading import to module level of importlib.util --- Lib/importlib/util.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/importlib/util.py b/Lib/importlib/util.py index f512b1dd88b49e..f3cf3b30ed9a3a 100644 --- a/Lib/importlib/util.py +++ b/Lib/importlib/util.py @@ -13,6 +13,7 @@ import _imp import sys +import threading import types @@ -253,8 +254,6 @@ def create_module(self, spec): def exec_module(self, module): """Make the module load lazily.""" - import threading - module.__spec__.loader = self.loader module.__loader__ = self.loader # Don't need to worry about deep-copying as trying to set an attribute From 40383a049de3d72382adc1481fce9fcb32465bea Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Sat, 17 Feb 2024 10:15:12 -0500 Subject: [PATCH 9/9] Make is_loading a bool instead of Event --- Lib/importlib/util.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/importlib/util.py b/Lib/importlib/util.py index f3cf3b30ed9a3a..ff4f12fb1af70c 100644 --- a/Lib/importlib/util.py +++ b/Lib/importlib/util.py @@ -183,9 +183,9 @@ def __getattribute__(self, attr): # exec_module(), which will access module.__dict__, module.__name__, # and/or module.__spec__, reentering this method. These accesses # need to be allowed to proceed without triggering the load again. - if loader_state['is_loading'].is_set() and attr.startswith('__') and attr.endswith('__'): + if loader_state['is_loading'] and attr.startswith('__') and attr.endswith('__'): return object.__getattribute__(self, attr) - loader_state['is_loading'].set() + loader_state['is_loading'] = True __dict__ = object.__getattribute__(self, '__dict__') @@ -264,6 +264,6 @@ def exec_module(self, module): loader_state['__dict__'] = module.__dict__.copy() loader_state['__class__'] = module.__class__ loader_state['lock'] = threading.RLock() - loader_state['is_loading'] = threading.Event() + loader_state['is_loading'] = False module.__spec__.loader_state = loader_state module.__class__ = _LazyModule