From 69649c1a0114fb7c1fb78c395d5e5b87934d2546 Mon Sep 17 00:00:00 2001 From: "Brian Wickman (Twitter)" Date: Tue, 18 Mar 2014 16:43:18 -0700 Subject: [PATCH] Improve scrubbing of namespace packages Problem: since we can't force python -S using /usr/bin/env, we can't prevent importing site. But 'import site' populates sys.modules with placeholder modules containing .__path__ attributes with site-packages baked in. This means even if we scrub sys.path and sys.path_importer_cache, it's still possible for __import__ to pick up things from site-packages. This scrubs sys.modules as well. Reviewed at https://rbcommons.com/s/twitter/r/116/ (sapling split of a11f3bc4d0fa25aebf803ce2b511e7bff45f83f5) --- src/python/twitter/common/python/common.py | 91 ++++++++-------------- src/python/twitter/common/python/pex.py | 76 ++++++++++++++---- 2 files changed, 93 insertions(+), 74 deletions(-) diff --git a/src/python/twitter/common/python/common.py b/src/python/twitter/common/python/common.py index 485a3d3f5..67f2230b0 100644 --- a/src/python/twitter/common/python/common.py +++ b/src/python/twitter/common/python/common.py @@ -27,29 +27,31 @@ import zipfile -_MKDTEMP_CLEANER = None -_MKDTEMP_DIRS = defaultdict(set) -_MKDTEMP_LOCK = threading.RLock() +# See http://stackoverflow.com/questions/2572172/referencing-other-modules-in-atexit +class MktempTeardownRegistry(object): + def __init__(self): + self._registry = defaultdict(set) + self._getpid = os.getpid + self._lock = threading.RLock() + self._exists = os.path.exists + self._rmtree = shutil.rmtree + atexit.register(self.teardown) + def __del__(self): + self.teardown() -def _mkdtemp_atexit_cleaner(): - for td in _MKDTEMP_DIRS.pop(os.getpid(), []): - safe_rmtree(td) + def register(self, path): + with self._lock: + self._registry[self._getpid()].add(path) + return path + def teardown(self): + for td in self._registry.pop(self._getpid(), []): + if self._exists(td): + self._rmtree(td) -def _mkdtemp_unregister_cleaner(): - global _MKDTEMP_CLEANER - _MKDTEMP_CLEANER = None - -def _mkdtemp_register_cleaner(cleaner): - global _MKDTEMP_CLEANER - if not cleaner: - return - assert callable(cleaner) - if _MKDTEMP_CLEANER is None: - atexit.register(cleaner) - _MKDTEMP_CLEANER = cleaner +_MKDTEMP_SINGLETON = MktempTeardownRegistry() @contextlib.contextmanager @@ -61,32 +63,20 @@ def open_zip(path, *args, **kwargs): yield zip -def safe_mkdtemp(cleaner=_mkdtemp_atexit_cleaner, **kw): +def safe_mkdtemp(**kw): """ Given the parameters to standard tempfile.mkdtemp, create a temporary directory that is cleaned up on process exit. """ # proper lock sanitation on fork [issue 6721] would be desirable here. - with _MKDTEMP_LOCK: - return register_rmtree(tempfile.mkdtemp(**kw), cleaner=cleaner) + return _MKDTEMP_SINGLETON.register(tempfile.mkdtemp(**kw)) -def register_rmtree(directory, cleaner=_mkdtemp_atexit_cleaner): +def register_rmtree(directory): """ Register an existing directory to be cleaned up at process exit. """ - with _MKDTEMP_LOCK: - _mkdtemp_register_cleaner(cleaner) - _MKDTEMP_DIRS[os.getpid()].add(directory) - return directory - - -def safe_rmtree(directory): - """ - Delete a directory if it's present. If it's not present, no-op. - """ - if os.path.exists(directory): - shutil.rmtree(directory, True) + return _MKDTEMP_SINGLETON.register(directory) def safe_mkdir(directory, clean=False): @@ -123,6 +113,14 @@ def safe_delete(filename): raise +def safe_rmtree(directory): + """ + Delete a directory if it's present. If it's not present, no-op. + """ + if os.path.exists(directory): + shutil.rmtree(directory, True) + + def chmod_plus_x(path): """ Equivalent of unix `chmod a+x path` @@ -168,31 +166,6 @@ def touch(file, times=None): os.utime(file, times) -@contextlib.contextmanager -def mutable_sys(): - """ - A with-context that does backup/restore of sys.argv, sys.path and - sys.stderr/stdout/stdin following execution. - """ - SAVED_ATTRIBUTES = [ - 'stdin', 'stdout', 'stderr', - 'argv', 'path', 'path_importer_cache', 'path_hooks', - 'modules', '__egginsert' - ] - - _sys_backup = dict((key, getattr(sys, key)) for key in SAVED_ATTRIBUTES if hasattr(sys, key)) - _sys_delete = set(filter(lambda key: not hasattr(sys, key), SAVED_ATTRIBUTES)) - - try: - yield sys - finally: - for attribute in _sys_backup: - setattr(sys, attribute, _sys_backup[attribute]) - for attribute in _sys_delete: - if hasattr(sys, attribute): - delattr(sys, attribute) - - class Chroot(object): """ A chroot of files overlayed from one directory to another directory. diff --git a/src/python/twitter/common/python/pex.py b/src/python/twitter/common/python/pex.py index 523271689..faf050431 100644 --- a/src/python/twitter/common/python/pex.py +++ b/src/python/twitter/common/python/pex.py @@ -6,7 +6,7 @@ from site import USER_SITE import sys -from .common import mutable_sys, safe_mkdir +from .common import safe_mkdir from .compatibility import exec_function from .environment import PEXEnvironment from .interpreter import PythonInterpreter @@ -87,19 +87,20 @@ def _site_libs(cls): sysconfig.get_python_lib(plat_specific=True)]) @classmethod - def minimum_path(cls): - """ - Return as a tuple the emulated sys.path and sys.path_importer_cache of - a bare python installation, a la python -S. - """ - site_libs = set(cls._site_libs()) - for site_lib in site_libs: - TRACER.log('Found site-library: %s' % site_lib) - for extras_path in cls._extras_paths(): - TRACER.log('Found site extra: %s' % extras_path) - site_libs.add(extras_path) - site_libs = set(os.path.normpath(path) for path in site_libs) + def minimum_sys_modules(cls, site_libs): + new_modules = {} + for module_name, module in sys.modules.items(): + if any(path.startswith(site_lib) for path in getattr(module, '__path__', ()) + for site_lib in site_libs): + TRACER.log('Scrubbing %s from sys.modules' % module) + else: + new_modules[module_name] = module + + return new_modules + + @classmethod + def minimum_sys_path(cls, site_libs): site_distributions = OrderedSet() for path_element in sys.path: if any(path_element.startswith(site_lib) for site_lib in site_libs): @@ -122,9 +123,30 @@ def minimum_path(cls): if key not in scrub_from_importer_cache) return scrubbed_sys_path, scrubbed_importer_cache + @classmethod + def minimum_sys(cls): + """Return the minimum sys necessary to run this interpreter, a la python -S. + + :returns: (sys.path, sys.path_importer_cache, sys.modules) tuple of a + bare python installation. + """ + site_libs = set(cls._site_libs()) + for site_lib in site_libs: + TRACER.log('Found site-library: %s' % site_lib) + for extras_path in cls._extras_paths(): + TRACER.log('Found site extra: %s' % extras_path) + site_libs.add(extras_path) + site_libs = set(os.path.normpath(path) for path in site_libs) + + sys_modules = cls.minimum_sys_modules(site_libs) + sys_path, sys_path_importer_cache = cls.minimum_sys_path(site_libs) + + return sys_path, sys_path_importer_cache, sys_modules + @classmethod @contextmanager def patch_pkg_resources(cls, working_set): + """Patch pkg_resources given a new working set.""" def patch(working_set): pkg_resources.working_set = working_set pkg_resources.require = working_set.require @@ -139,10 +161,34 @@ def patch(working_set): finally: patch(old_working_set) + @classmethod + @contextmanager + def patch_sys(cls): + """Patch sys with all site scrubbed.""" + def patch_dict(old_value, new_value): + old_value.clear() + old_value.update(new_value) + + def patch_all(path, path_importer_cache, modules): + sys.path[:] = path + patch_dict(sys.path_importer_cache, path_importer_cache) + patch_dict(sys.modules, modules) + + old_sys_path, old_sys_path_importer_cache, old_sys_modules = ( + sys.path[:], sys.path_importer_cache.copy(), sys.modules.copy()) + new_sys_path, new_sys_path_importer_cache, new_sys_modules = cls.minimum_sys() + + patch_all(new_sys_path, new_sys_path_importer_cache, new_sys_modules) + + try: + yield + finally: + patch_all(old_sys_path, old_sys_path_importer_cache, old_sys_modules) + def execute(self, args=()): entry_point = self.entry() - with mutable_sys(): - sys.path, sys.path_importer_cache = self.minimum_path() + + with self.patch_sys(): working_set = self._env.activate() if 'PEX_COVERAGE' in os.environ: PEX.start_coverage()