Skip to content

Commit

Permalink
remove a ton of hackery in the ptex launcher
Browse files Browse the repository at this point in the history
  • Loading branch information
cosmicexplorer committed Dec 12, 2019
1 parent 4cce241 commit 660d253
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 141 deletions.
167 changes: 33 additions & 134 deletions src/python/pants/backend/python/subsystems/pex_build_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,51 +34,6 @@
from pants.util.contextutil import temporary_file


# NB: Keep this entire class definition in sync between
# `pants.backend.python.subsystems.pex_build_util.IPEXBuilder` and the contents of _IPEX_PREAMBLE!
class IPEXBuilder(PEXBuilder):
"Custom PEXBuilder subclass to expose the 'wheel' and 'vendor' packages."

def _prepare_bootstrap(self):
"""Override of PEXBuilder._prepare_bootstrap(self) in pex/pex_builder.py in the pex source.
Specifically, https://github.com/pantsbuild/pex/blob/e5317cbc94a49d5129144806ce4117ccd926fb6a/pex/pex_builder.py#L390-L412.
"""
# TODO: Expose 'wheel' and 'vendor' in upstream pex with these changes!
from pex import vendor
from pex.pex_builder import BOOTSTRAP_DIR, LEGACY_BOOSTRAP_PKG
from pex.third_party.pkg_resources import DefaultProvider, ZipProvider, get_provider

try:
vendor.vendor_runtime(chroot=self._chroot,
dest_basedir=BOOTSTRAP_DIR,
label='bootstrap',
# Below -- original comment from the pex source:
# NB: We use wheel here in the builder, but that's only at build-time.
root_module_names=['pkg_resources', 'wheel'])
except ValueError:
pass

source_name = 'pex'
provider = get_provider(source_name)
if not isinstance(provider, DefaultProvider):
mod = __import__(source_name, fromlist=['ignore'])
provider = ZipProvider(mod)
# TODO: We add 'vendor' here. It's unclear if that's necessary, but we probably want to instead
# make this change in upstream pex.
for package in ('', 'third_party', 'vendor'):
for fn in provider.resource_listdir(package):
if fn.endswith('.py'):
rel_path = os.path.join(package, fn)
self._chroot.write(provider.get_resource_string(source_name, rel_path),
os.path.join(BOOTSTRAP_DIR, source_name, rel_path),
'bootstrap')

# Setup a re-director package to support the legacy pex runtime `_pex` APIs through a
# VendorImporter.
self._chroot.touch(os.path.join(BOOTSTRAP_DIR, LEGACY_BOOSTRAP_PKG, '__init__.py'), 'bootstrap')


# This bootstrapping technique for lazily loading requirements comes from the pex project test case:
# https://github.com/pantsbuild/pex/commit/66aa5ef69e8b65004a548b44edc0b81dfa8ec736#diff-b5b57c9d4fc663f59ecaedb851c13bd8R23
_IPEX_PREAMBLE = """\
Expand All @@ -98,51 +53,6 @@ def _prepare_bootstrap(self):
from pex.variables import ENV
# NB: Keep this entire class definition in sync between
# `pants.backend.python.subsystems.pex_build_util.IPEXBuilder` and the contents of _IPEX_PREAMBLE!
class IPEXBuilder(PEXBuilder):
"Custom PEXBuilder subclass to expose the 'wheel' and 'vendor' packages."
def _prepare_bootstrap(self):
\"""Override of PEXBuilder._prepare_bootstrap(self) in pex/pex_builder.py in the pex source.
Specifically, https://github.com/pantsbuild/pex/blob/e5317cbc94a49d5129144806ce4117ccd926fb6a/pex/pex_builder.py#L390-L412.
\"""
# TODO: Expose 'wheel' and 'vendor' in upstream pex with these changes!
from pex import vendor
from pex.pex_builder import BOOTSTRAP_DIR, LEGACY_BOOSTRAP_PKG
from pex.third_party.pkg_resources import DefaultProvider, ZipProvider, get_provider
try:
vendor.vendor_runtime(chroot=self._chroot,
dest_basedir=BOOTSTRAP_DIR,
label='bootstrap',
# Below -- original comment from the pex source:
# NB: We use wheel here in the builder, but that's only at build-time.
root_module_names=['pkg_resources', 'wheel'])
except ValueError:
pass
source_name = 'pex'
provider = get_provider(source_name)
if not isinstance(provider, DefaultProvider):
mod = __import__(source_name, fromlist=['ignore'])
provider = ZipProvider(mod)
# TODO: We add 'vendor' here. It's unclear if that's necessary, but we probably want to instead
# make this change in upstream pex.
for package in ('', 'third_party', 'vendor'):
for fn in provider.resource_listdir(package):
if fn.endswith('.py'):
rel_path = os.path.join(package, fn)
self._chroot.write(provider.get_resource_string(source_name, rel_path),
os.path.join(BOOTSTRAP_DIR, source_name, rel_path),
'bootstrap')
# Setup a re-director package to support the legacy pex runtime `_pex` APIs through a
# VendorImporter.
self._chroot.touch(os.path.join(BOOTSTRAP_DIR, LEGACY_BOOSTRAP_PKG, '__init__.py'), 'bootstrap')
self = sys.argv[0]
filename_base, ext = tuple(os.path.splitext(self))
if ext == '.pex':
Expand All @@ -159,7 +69,7 @@ def _prepare_bootstrap(self):
with open_zip(self) as zf:
# Populate the pex with the pinned requirements and distribution names & hashes.
orig_pex_info = PexInfo.from_json(zf.read('IPEX-INFO'))
ipex_builder = IPEXBuilder(pex_info=orig_pex_info)
ipex_builder = PEXBuilder(pex_info=orig_pex_info)
# Populate the pex with the needed code.
ptex_info = json.loads(zf.read('PTEX-INFO').decode('utf-8'))
Expand All @@ -172,22 +82,6 @@ def _prepare_bootstrap(self):
if plat.platform not in transitive_requirements_by_platform:
raise ValueError("Current platform {} was not found in the 'transitive_requirements' field in PTEX-INFO:\\n{}".format(plat, json.dumps(ptex_info, indent=4)))
# Hack the vendor specs so that they point to existing directories so that they can be copied to
# the IPEXBuilder chroot.
setuptools_spec = vendor.VendorSpec.create('setuptools==40.6.2')
wheel_spec = vendor.VendorSpec.create('wheel==0.31.1')
vendored_specs = [setuptools_spec, wheel_spec]
for spec in vendored_specs:
if os.path.isdir(spec.target_dir):
raise ValueError("Vendored specs are expected to be within the zipped pex, but {} from {} is a real directory!".format(spec.target_dir, spec))
files_in_dir = [name for name in zf.namelist()
if name.startswith(os.path.join(BOOTSTRAP_DIR, spec.relpath))]
zf.extractall(code_root, files_in_dir)
spec.ROOT = os.path.join(code_root, BOOTSTRAP_DIR)
# NB: Monkey-patch the pex.vendor.iter_vendor_specs() method!
vendor.iter_vendor_specs = lambda: vendored_specs
# Perform a fully pinned intransitive resolve to hydrate the install cache (not the
# pex!).
resolver_settings = ptex_info['resolver_settings']
Expand Down Expand Up @@ -322,12 +216,6 @@ def register_options(cls, register):
register('--setuptools-version', advanced=True, default='40.6.3', fingerprint=True,
help='The setuptools version to include in the pex if namespace packages need to be '
'injected.')
register('--generate-ipex', type=bool, default=False, fingerprint=True,
help='Whether to generate a .ipex file, which will "hydrate" its dependencies when '
'it is first executed, rather than at build time (the normal pex behavior). '
'This option can reduce the size of a shipped pex file by over 100x for common'
'deps such as tensorflow, but it does require access to a pypi-esque index '
'when first executed.')

@classmethod
def subsystem_dependencies(cls):
Expand All @@ -341,27 +229,27 @@ def generate_ipex(self) -> bool:
return cast(bool, self.get_options().generate_ipex)

@classmethod
def create(cls, builder, log=None, parent_optionable=None):
if parent_optionable is None:
options = cls.global_instance().get_options()
else:
options = cls.scoped_instance(parent_optionable).get_options()
def create(cls, builder, log=None, generate_ipex=None, pex_requirement=None):
options = cls.global_instance().get_options()
setuptools_requirement = f'setuptools=={options.setuptools_version}'
pex_requirement = f'pex=={pex_requirement}' if pex_requirement else 'pex'

log = log or logging.getLogger(__name__)

return PexBuilderWrapper(builder=builder,
python_repos_subsystem=PythonRepos.global_instance(),
python_setup_subsystem=PythonSetup.global_instance(),
setuptools_requirement=PythonRequirement(setuptools_requirement),
pex_requirement=PythonRequirement(pex_requirement),
log=log,
generate_ipex=options.generate_ipex)
generate_ipex=bool(generate_ipex))

def __init__(self,
builder: PEXBuilder,
python_repos_subsystem: PythonRepos,
python_setup_subsystem: PythonSetup,
setuptools_requirement: PythonRequirement,
pex_requirement: PythonRequirement,
log: Any,
generate_ipex: bool = False):
assert log is not None
Expand All @@ -370,6 +258,7 @@ def __init__(self,
self._python_repos_subsystem = python_repos_subsystem
self._python_setup_subsystem = python_setup_subsystem
self._setuptools_requirement = setuptools_requirement
self._pex_requirement = pex_requirement
self._log = log

self._distributions: Dict[str, Distribution] = {}
Expand Down Expand Up @@ -454,12 +343,8 @@ def add_resolved_requirements(self, reqs, platforms=None):
for platform, dists in distributions.items():
for dist in dists:
if dist.location not in locations:
if self.generate_ipex:
# The requirements are added to the ipex in self._resolve_distributions_by_platform.
self._log.debug(f' *Avoiding* dumping ipex distribution: .../{os.path.basename(dist.location)}')
else:
self._log.debug(f' Dumping distribution: .../{os.path.basename(dist.location)}')
self.add_distribution(dist)
self._log.debug(f' Dumping distribution: .../{os.path.basename(dist.location)}')
self.add_distribution(dist)
locations.add(dist.location)

def _resolve_multi(self, interpreter, requirements, platforms, find_links) -> Dict[str, List[Distribution]]:
Expand Down Expand Up @@ -548,18 +433,26 @@ def _shuffle_original_build_info_into_ipex(self):
See this test case as background:
https://github.com/pantsbuild/pex/commit/66aa5ef69e8b65004a548b44edc0b81dfa8ec736#diff-b5b57c9d4fc663f59ecaedb851c13bd8R23
"""
ipex_info = self._builder.info.copy()
chroot = self._builder.chroot()
# Replace the original builder with a new one, and just pull files from the old chroot.
self._builder = PEXBuilder(interpreter=self._builder.interpreter)

# If the dependencies are very large, this flag is indicated to be useful in RAM-constrained
# environments. This will be set in both the PEX-INFO and the IPEX-INFO in the produced .ipex
# file.
# environments.
self._builder.info.always_write_cache = True

# PTEX-INFO: Contains resolver settings and source files to copy over into the dehydrated .ipex
# file.
chroot = self._builder.chroot()
code = [
f for f in chroot.get('source') | chroot.get('resource')
if f not in ['__main__.py', PexInfo.PATH]
]
all_code = []
for label in ['source', 'resource']:
for src in chroot.get(label):
all_code.append(src)
self._builder.chroot().copy(os.path.join(chroot.path(), src), src, label=label)

python_setup = self._python_setup_subsystem
python_repos = self._python_repos_subsystem
Expand Down Expand Up @@ -587,7 +480,6 @@ def _shuffle_original_build_info_into_ipex(self):

# IPEX-INFO: The original PEX-INFO, which should be the PEX-INFO in the hydrated .pex file that
# is generated when the .ipex is first executed.
ipex_info = self._builder.info.copy()
with temporary_file(permissions=0o644) as ipex_info_file:
ipex_info_file.write(ipex_info.dump().encode())
ipex_info_file.flush()
Expand All @@ -605,11 +497,18 @@ def _shuffle_original_build_info_into_ipex(self):

# There is an assertion checking this in _ptex_launcher.py due to the use of .zip_unsafe_cache.
self._builder.info.zip_safe = False
# The PEX-INFO we generate shouldn't have any requirements, or they will fail to bootstrap
# because they were unable to find those distributions. Instead, the .pex file produced when the
# .ipex is first executed will read and resolve all those requirements from the IPEX-INFO.
self._builder.requirements = []
self._builder.info._requirements = set()

# The PEX-INFO we generate shouldn't have any requirements (except pex itself), or they will
# fail to bootstrap because they were unable to find those distributions. Instead, the .pex file
# produced when the .ipex is first executed will read and resolve all those requirements from
# the IPEX-INFO.
pex_dist = self._distributions.get('pex')
if pex_dist:
pex_reqs = pex_dist.requires()
if not pex_dist:
# If pex was not already resolved to a dist, add it.
pex_reqs = [self._pex_requirement]
self.add_resolved_requirements(pex_reqs)

def freeze(self) -> None:
if self._frozen:
Expand Down
22 changes: 16 additions & 6 deletions src/python/pants/backend/python/tasks/python_binary_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from pex.pex_info import PexInfo

from pants.backend.python.subsystems.pex_build_util import (
IPEXBuilder,
PexBuilderWrapper,
has_python_requirements,
has_python_sources,
Expand Down Expand Up @@ -39,11 +38,21 @@ def register_options(cls, register):
"created and the command line used to create it. This information may be helpful to you, but means "
"that the generated PEX will not be reproducible; that is, future runs of `./pants binary` will not "
"create the same byte-for-byte identical .pex files.")
# TODO: make an analogy to cls.register_jvm_tool that can be overridden for python subsystems
# by a python_requirement_library() target, not just via pants.ini!
register('--ipex-injected-pex-version', advanced=True, default='1.6.12', fingerprint=True,
help='The pex version to include in the generated .ipex if --generate-ipex is on.')
register('--generate-ipex', type=bool, default=False, fingerprint=True,
help='Whether to generate a .ipex file, which will "hydrate" its dependencies when '
'it is first executed, rather than at build time (the normal pex behavior). '
'This option can reduce the size of a shipped pex file by over 100x for common'
'deps such as tensorflow, but it does require access to a pypi-esque index '
'when first executed.')

@classmethod
def subsystem_dependencies(cls):
return super().subsystem_dependencies() + (
PexBuilderWrapper.Factory.scoped(cls),
PexBuilderWrapper.Factory,
PythonNativeCode.scoped(cls),
)

Expand Down Expand Up @@ -80,7 +89,7 @@ def __init__(self, *args, **kwargs):

@property
def _generate_ipex(self) -> bool:
return PexBuilderWrapper.Factory.scoped_instance(self).generate_ipex
return self.get_options().generate_ipex

def _get_output_pex_filename(self, target_name):
# If generating a dehydrated "ipex" file, name it appropriately. Note that the the ipex will
Expand Down Expand Up @@ -140,11 +149,12 @@ def _create_binary(self, binary_tgt, results_dir):
pex_info = binary_tgt.pexinfo.copy()
pex_info.build_properties = build_properties

builder_class = IPEXBuilder if self._generate_ipex else PEXBuilder
pex_builder = PexBuilderWrapper.Factory.create(
builder=builder_class(path=tmpdir, interpreter=interpreter, pex_info=pex_info, copy=True),
builder=PEXBuilder(path=tmpdir, interpreter=interpreter, pex_info=pex_info, copy=True),
log=self.context.log,
parent_optionable=self)
generate_ipex=self.get_options().generate_ipex,
pex_requirement=self.get_options().ipex_injected_pex_version,
)

if binary_tgt.shebang:
self.context.log.info('Found Python binary target {} with customized shebang, using it: {}'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,12 @@ def test_generate_ipex_ansicolors(self):
':lib',
])

self.set_options(generate_ipex=True)
dist_dir = os.path.join(self.build_root, 'dist')
self._assert_pex(binary,
options={'pex-builder-wrapper': dict(generate_ipex=True)},
options={
'binary.py': dict(generate_ipex=True),
},
expected_output=dedent(f"""\
Hydrating {dist_dir}/bin.ipex to {dist_dir}/bin.pex
{blue('i just lazy-loaded the ansicolors dependency!')}
Expand Down

0 comments on commit 660d253

Please sign in to comment.