diff --git a/src/python/pants/backend/python/subsystems/pex_build_util.py b/src/python/pants/backend/python/subsystems/pex_build_util.py index 141a6d06c59b..0c77f3945baa 100644 --- a/src/python/pants/backend/python/subsystems/pex_build_util.py +++ b/src/python/pants/backend/python/subsystems/pex_build_util.py @@ -34,51 +34,6 @@ from pants.util.contextutil import temporary_file -# NB: Keep this entire class definition in sync between -# `pants.backend.python.subsystems.pex_build_util.IPEXBuilder` and the contents of _IPEX_PREAMBLE! -class IPEXBuilder(PEXBuilder): - "Custom PEXBuilder subclass to expose the 'wheel' and 'vendor' packages." - - def _prepare_bootstrap(self): - """Override of PEXBuilder._prepare_bootstrap(self) in pex/pex_builder.py in the pex source. - - Specifically, https://github.com/pantsbuild/pex/blob/e5317cbc94a49d5129144806ce4117ccd926fb6a/pex/pex_builder.py#L390-L412. - """ - # TODO: Expose 'wheel' and 'vendor' in upstream pex with these changes! - from pex import vendor - from pex.pex_builder import BOOTSTRAP_DIR, LEGACY_BOOSTRAP_PKG - from pex.third_party.pkg_resources import DefaultProvider, ZipProvider, get_provider - - try: - vendor.vendor_runtime(chroot=self._chroot, - dest_basedir=BOOTSTRAP_DIR, - label='bootstrap', - # Below -- original comment from the pex source: - # NB: We use wheel here in the builder, but that's only at build-time. - root_module_names=['pkg_resources', 'wheel']) - except ValueError: - pass - - source_name = 'pex' - provider = get_provider(source_name) - if not isinstance(provider, DefaultProvider): - mod = __import__(source_name, fromlist=['ignore']) - provider = ZipProvider(mod) - # TODO: We add 'vendor' here. It's unclear if that's necessary, but we probably want to instead - # make this change in upstream pex. - for package in ('', 'third_party', 'vendor'): - for fn in provider.resource_listdir(package): - if fn.endswith('.py'): - rel_path = os.path.join(package, fn) - self._chroot.write(provider.get_resource_string(source_name, rel_path), - os.path.join(BOOTSTRAP_DIR, source_name, rel_path), - 'bootstrap') - - # Setup a re-director package to support the legacy pex runtime `_pex` APIs through a - # VendorImporter. - self._chroot.touch(os.path.join(BOOTSTRAP_DIR, LEGACY_BOOSTRAP_PKG, '__init__.py'), 'bootstrap') - - # This bootstrapping technique for lazily loading requirements comes from the pex project test case: # https://github.com/pantsbuild/pex/commit/66aa5ef69e8b65004a548b44edc0b81dfa8ec736#diff-b5b57c9d4fc663f59ecaedb851c13bd8R23 _IPEX_PREAMBLE = """\ @@ -98,51 +53,6 @@ def _prepare_bootstrap(self): from pex.variables import ENV -# NB: Keep this entire class definition in sync between -# `pants.backend.python.subsystems.pex_build_util.IPEXBuilder` and the contents of _IPEX_PREAMBLE! -class IPEXBuilder(PEXBuilder): - "Custom PEXBuilder subclass to expose the 'wheel' and 'vendor' packages." - - def _prepare_bootstrap(self): - \"""Override of PEXBuilder._prepare_bootstrap(self) in pex/pex_builder.py in the pex source. - - Specifically, https://github.com/pantsbuild/pex/blob/e5317cbc94a49d5129144806ce4117ccd926fb6a/pex/pex_builder.py#L390-L412. - \""" - # TODO: Expose 'wheel' and 'vendor' in upstream pex with these changes! - from pex import vendor - from pex.pex_builder import BOOTSTRAP_DIR, LEGACY_BOOSTRAP_PKG - from pex.third_party.pkg_resources import DefaultProvider, ZipProvider, get_provider - - try: - vendor.vendor_runtime(chroot=self._chroot, - dest_basedir=BOOTSTRAP_DIR, - label='bootstrap', - # Below -- original comment from the pex source: - # NB: We use wheel here in the builder, but that's only at build-time. - root_module_names=['pkg_resources', 'wheel']) - except ValueError: - pass - - source_name = 'pex' - provider = get_provider(source_name) - if not isinstance(provider, DefaultProvider): - mod = __import__(source_name, fromlist=['ignore']) - provider = ZipProvider(mod) - # TODO: We add 'vendor' here. It's unclear if that's necessary, but we probably want to instead - # make this change in upstream pex. - for package in ('', 'third_party', 'vendor'): - for fn in provider.resource_listdir(package): - if fn.endswith('.py'): - rel_path = os.path.join(package, fn) - self._chroot.write(provider.get_resource_string(source_name, rel_path), - os.path.join(BOOTSTRAP_DIR, source_name, rel_path), - 'bootstrap') - - # Setup a re-director package to support the legacy pex runtime `_pex` APIs through a - # VendorImporter. - self._chroot.touch(os.path.join(BOOTSTRAP_DIR, LEGACY_BOOSTRAP_PKG, '__init__.py'), 'bootstrap') - - self = sys.argv[0] filename_base, ext = tuple(os.path.splitext(self)) if ext == '.pex': @@ -159,7 +69,7 @@ def _prepare_bootstrap(self): with open_zip(self) as zf: # Populate the pex with the pinned requirements and distribution names & hashes. orig_pex_info = PexInfo.from_json(zf.read('IPEX-INFO')) - ipex_builder = IPEXBuilder(pex_info=orig_pex_info) + ipex_builder = PEXBuilder(pex_info=orig_pex_info) # Populate the pex with the needed code. ptex_info = json.loads(zf.read('PTEX-INFO').decode('utf-8')) @@ -172,22 +82,6 @@ def _prepare_bootstrap(self): if plat.platform not in transitive_requirements_by_platform: raise ValueError("Current platform {} was not found in the 'transitive_requirements' field in PTEX-INFO:\\n{}".format(plat, json.dumps(ptex_info, indent=4))) - # Hack the vendor specs so that they point to existing directories so that they can be copied to - # the IPEXBuilder chroot. - setuptools_spec = vendor.VendorSpec.create('setuptools==40.6.2') - wheel_spec = vendor.VendorSpec.create('wheel==0.31.1') - vendored_specs = [setuptools_spec, wheel_spec] - - for spec in vendored_specs: - if os.path.isdir(spec.target_dir): - raise ValueError("Vendored specs are expected to be within the zipped pex, but {} from {} is a real directory!".format(spec.target_dir, spec)) - files_in_dir = [name for name in zf.namelist() - if name.startswith(os.path.join(BOOTSTRAP_DIR, spec.relpath))] - zf.extractall(code_root, files_in_dir) - spec.ROOT = os.path.join(code_root, BOOTSTRAP_DIR) - # NB: Monkey-patch the pex.vendor.iter_vendor_specs() method! - vendor.iter_vendor_specs = lambda: vendored_specs - # Perform a fully pinned intransitive resolve to hydrate the install cache (not the # pex!). resolver_settings = ptex_info['resolver_settings'] @@ -322,12 +216,6 @@ def register_options(cls, register): register('--setuptools-version', advanced=True, default='40.6.3', fingerprint=True, help='The setuptools version to include in the pex if namespace packages need to be ' 'injected.') - register('--generate-ipex', type=bool, default=False, fingerprint=True, - help='Whether to generate a .ipex file, which will "hydrate" its dependencies when ' - 'it is first executed, rather than at build time (the normal pex behavior). ' - 'This option can reduce the size of a shipped pex file by over 100x for common' - 'deps such as tensorflow, but it does require access to a pypi-esque index ' - 'when first executed.') @classmethod def subsystem_dependencies(cls): @@ -341,12 +229,10 @@ def generate_ipex(self) -> bool: return cast(bool, self.get_options().generate_ipex) @classmethod - def create(cls, builder, log=None, parent_optionable=None): - if parent_optionable is None: - options = cls.global_instance().get_options() - else: - options = cls.scoped_instance(parent_optionable).get_options() + def create(cls, builder, log=None, generate_ipex=None, pex_requirement=None): + options = cls.global_instance().get_options() setuptools_requirement = f'setuptools=={options.setuptools_version}' + pex_requirement = f'pex=={pex_requirement}' if pex_requirement else 'pex' log = log or logging.getLogger(__name__) @@ -354,14 +240,16 @@ def create(cls, builder, log=None, parent_optionable=None): python_repos_subsystem=PythonRepos.global_instance(), python_setup_subsystem=PythonSetup.global_instance(), setuptools_requirement=PythonRequirement(setuptools_requirement), + pex_requirement=PythonRequirement(pex_requirement), log=log, - generate_ipex=options.generate_ipex) + generate_ipex=bool(generate_ipex)) def __init__(self, builder: PEXBuilder, python_repos_subsystem: PythonRepos, python_setup_subsystem: PythonSetup, setuptools_requirement: PythonRequirement, + pex_requirement: PythonRequirement, log: Any, generate_ipex: bool = False): assert log is not None @@ -370,6 +258,7 @@ def __init__(self, self._python_repos_subsystem = python_repos_subsystem self._python_setup_subsystem = python_setup_subsystem self._setuptools_requirement = setuptools_requirement + self._pex_requirement = pex_requirement self._log = log self._distributions: Dict[str, Distribution] = {} @@ -454,12 +343,8 @@ def add_resolved_requirements(self, reqs, platforms=None): for platform, dists in distributions.items(): for dist in dists: if dist.location not in locations: - if self.generate_ipex: - # The requirements are added to the ipex in self._resolve_distributions_by_platform. - self._log.debug(f' *Avoiding* dumping ipex distribution: .../{os.path.basename(dist.location)}') - else: - self._log.debug(f' Dumping distribution: .../{os.path.basename(dist.location)}') - self.add_distribution(dist) + self._log.debug(f' Dumping distribution: .../{os.path.basename(dist.location)}') + self.add_distribution(dist) locations.add(dist.location) def _resolve_multi(self, interpreter, requirements, platforms, find_links) -> Dict[str, List[Distribution]]: @@ -548,18 +433,26 @@ def _shuffle_original_build_info_into_ipex(self): See this test case as background: https://github.com/pantsbuild/pex/commit/66aa5ef69e8b65004a548b44edc0b81dfa8ec736#diff-b5b57c9d4fc663f59ecaedb851c13bd8R23 """ + ipex_info = self._builder.info.copy() + chroot = self._builder.chroot() + # Replace the original builder with a new one, and just pull files from the old chroot. + self._builder = PEXBuilder(interpreter=self._builder.interpreter) + # If the dependencies are very large, this flag is indicated to be useful in RAM-constrained - # environments. This will be set in both the PEX-INFO and the IPEX-INFO in the produced .ipex - # file. + # environments. self._builder.info.always_write_cache = True # PTEX-INFO: Contains resolver settings and source files to copy over into the dehydrated .ipex # file. - chroot = self._builder.chroot() code = [ f for f in chroot.get('source') | chroot.get('resource') if f not in ['__main__.py', PexInfo.PATH] ] + all_code = [] + for label in ['source', 'resource']: + for src in chroot.get(label): + all_code.append(src) + self._builder.chroot().copy(os.path.join(chroot.path(), src), src, label=label) python_setup = self._python_setup_subsystem python_repos = self._python_repos_subsystem @@ -587,7 +480,6 @@ def _shuffle_original_build_info_into_ipex(self): # IPEX-INFO: The original PEX-INFO, which should be the PEX-INFO in the hydrated .pex file that # is generated when the .ipex is first executed. - ipex_info = self._builder.info.copy() with temporary_file(permissions=0o644) as ipex_info_file: ipex_info_file.write(ipex_info.dump().encode()) ipex_info_file.flush() @@ -605,11 +497,18 @@ def _shuffle_original_build_info_into_ipex(self): # There is an assertion checking this in _ptex_launcher.py due to the use of .zip_unsafe_cache. self._builder.info.zip_safe = False - # The PEX-INFO we generate shouldn't have any requirements, or they will fail to bootstrap - # because they were unable to find those distributions. Instead, the .pex file produced when the - # .ipex is first executed will read and resolve all those requirements from the IPEX-INFO. - self._builder.requirements = [] - self._builder.info._requirements = set() + + # The PEX-INFO we generate shouldn't have any requirements (except pex itself), or they will + # fail to bootstrap because they were unable to find those distributions. Instead, the .pex file + # produced when the .ipex is first executed will read and resolve all those requirements from + # the IPEX-INFO. + pex_dist = self._distributions.get('pex') + if pex_dist: + pex_reqs = pex_dist.requires() + if not pex_dist: + # If pex was not already resolved to a dist, add it. + pex_reqs = [self._pex_requirement] + self.add_resolved_requirements(pex_reqs) def freeze(self) -> None: if self._frozen: diff --git a/src/python/pants/backend/python/tasks/python_binary_create.py b/src/python/pants/backend/python/tasks/python_binary_create.py index a8d17e0f470a..5f300d7fb245 100644 --- a/src/python/pants/backend/python/tasks/python_binary_create.py +++ b/src/python/pants/backend/python/tasks/python_binary_create.py @@ -8,7 +8,6 @@ from pex.pex_info import PexInfo from pants.backend.python.subsystems.pex_build_util import ( - IPEXBuilder, PexBuilderWrapper, has_python_requirements, has_python_sources, @@ -39,11 +38,21 @@ def register_options(cls, register): "created and the command line used to create it. This information may be helpful to you, but means " "that the generated PEX will not be reproducible; that is, future runs of `./pants binary` will not " "create the same byte-for-byte identical .pex files.") + # TODO: make an analogy to cls.register_jvm_tool that can be overridden for python subsystems + # by a python_requirement_library() target, not just via pants.ini! + register('--ipex-injected-pex-version', advanced=True, default='1.6.12', fingerprint=True, + help='The pex version to include in the generated .ipex if --generate-ipex is on.') + register('--generate-ipex', type=bool, default=False, fingerprint=True, + help='Whether to generate a .ipex file, which will "hydrate" its dependencies when ' + 'it is first executed, rather than at build time (the normal pex behavior). ' + 'This option can reduce the size of a shipped pex file by over 100x for common' + 'deps such as tensorflow, but it does require access to a pypi-esque index ' + 'when first executed.') @classmethod def subsystem_dependencies(cls): return super().subsystem_dependencies() + ( - PexBuilderWrapper.Factory.scoped(cls), + PexBuilderWrapper.Factory, PythonNativeCode.scoped(cls), ) @@ -80,7 +89,7 @@ def __init__(self, *args, **kwargs): @property def _generate_ipex(self) -> bool: - return PexBuilderWrapper.Factory.scoped_instance(self).generate_ipex + return self.get_options().generate_ipex def _get_output_pex_filename(self, target_name): # If generating a dehydrated "ipex" file, name it appropriately. Note that the the ipex will @@ -140,11 +149,12 @@ def _create_binary(self, binary_tgt, results_dir): pex_info = binary_tgt.pexinfo.copy() pex_info.build_properties = build_properties - builder_class = IPEXBuilder if self._generate_ipex else PEXBuilder pex_builder = PexBuilderWrapper.Factory.create( - builder=builder_class(path=tmpdir, interpreter=interpreter, pex_info=pex_info, copy=True), + builder=PEXBuilder(path=tmpdir, interpreter=interpreter, pex_info=pex_info, copy=True), log=self.context.log, - parent_optionable=self) + generate_ipex=self.get_options().generate_ipex, + pex_requirement=self.get_options().ipex_injected_pex_version, + ) if binary_tgt.shebang: self.context.log.info('Found Python binary target {} with customized shebang, using it: {}' diff --git a/tests/python/pants_test/backend/python/tasks/test_python_binary_create.py b/tests/python/pants_test/backend/python/tasks/test_python_binary_create.py index ced2ba566818..611ceabea0f8 100644 --- a/tests/python/pants_test/backend/python/tasks/test_python_binary_create.py +++ b/tests/python/pants_test/backend/python/tasks/test_python_binary_create.py @@ -126,9 +126,12 @@ def test_generate_ipex_ansicolors(self): ':lib', ]) + self.set_options(generate_ipex=True) dist_dir = os.path.join(self.build_root, 'dist') self._assert_pex(binary, - options={'pex-builder-wrapper': dict(generate_ipex=True)}, + options={ + 'binary.py': dict(generate_ipex=True), + }, expected_output=dedent(f"""\ Hydrating {dist_dir}/bin.ipex to {dist_dir}/bin.pex {blue('i just lazy-loaded the ansicolors dependency!')}