Skip to content

Commit

Permalink
first implement v1 of robot impl (pantsbuild#8793)
Browse files Browse the repository at this point in the history
See pex-tool/pex#789 for a description of the issue, and https://docs.google.com/document/d/1B_g0Ofs8aQsJtrePPR1PCtSAKgBG1o59AhS_NwfFnbI/edit for a google doc with pros and cons of different approaches.

@jsirois was extremely helpful throughout the development of this feature, and pex-tool/pex#819 and pex-tool/pex#821 in pex `2.0.3` will help to optimize several other aspects of this process when we can unrevert pantsbuild#8787.

**Note:** `src/python/pants/backend/python/subsystems/pex_build_util.py` was removed in this PR, along with all floating references to it.

With `--binary-py-generate-ipex`, a `.ipex` file will be created when `./pants binary` is run against a `python_binary()` target. This `.ipex` archive will create a `.pex` file and run it when first executed. The `.ipex` archive contains:
- in `IPEX-INFO`: the source files to inject into the resulting `.pex`, and pypi indices to resolve requirements from.
- in `BOOSTRAP-PEX-INFO`: the `PEX-INFO` of the pex file that *would* have been generated if `--generate-ipex` was False.
- in `ipex.py`: A bootstrap script which will generate a `.pex` file when the `.ipex` file is first executed.

For a `.ipex` file which hydrates the `tensorflow==1.14.0` dependency when it is first run, this translates to a >100x decrease in file size:
```bash
X> ls dist
total 145M
-rwxr-xr-x 1 dmcclanahan staff 267k Dec 10 21:11 dehydrated.ipex*
-rwxr-xr-x 1 dmcclanahan staff 134M Dec 10 21:11 dehydrated.pex*
```
  • Loading branch information
cosmicexplorer committed Mar 31, 2020
1 parent 9145352 commit 1776dea
Show file tree
Hide file tree
Showing 38 changed files with 1,143 additions and 555 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from packaging import version
from pants.backend.python.interpreter_cache import PythonInterpreterCache
from pants.backend.python.python_requirement import PythonRequirement
from pants.backend.python.subsystems.pex_build_util import PexBuilderWrapper
from pants.python.pex_build_util import PexBuilderWrapper
from pants.backend.python.subsystems.python_setup import PythonSetup
from pants.backend.python.targets.python_requirement_library import PythonRequirementLibrary
from pants.backend.python.targets.python_target import PythonTarget
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
import pkgutil

from pants.backend.python.interpreter_cache import PythonInterpreterCache
from pants.backend.python.subsystems.pex_build_util import (PexBuilderWrapper,
has_python_requirements,
has_python_sources)
from pants.backend.python.targets.python_binary import PythonBinary
from pants.backend.python.targets.python_library import PythonLibrary
from pants.backend.python.targets.python_target import PythonTarget
Expand All @@ -17,6 +14,11 @@
from pants.base.exceptions import TaskError
from pants.base.generator import Generator, TemplateData
from pants.base.workunit import WorkUnit, WorkUnitLabel
from pants.python.pex_build_util import (
PexBuilderWrapper,
has_python_requirements,
has_python_sources,
)
from pants.task.lint_task_mixin import LintTaskMixin
from pants.util.dirutil import safe_concurrent_creation, safe_mkdir
from pants.util.memo import memoized_property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from pants.backend.python.targets.python_library import PythonLibrary
from pants.base.build_environment import get_buildroot
from pants.base.exceptions import TaskError
from pants.python.python_repos import PythonRepos
from pants.backend.python.subsystems.python_repos import PythonRepos
from pants.util.contextutil import environment_as
from pants.util.dirutil import safe_mkdtemp, safe_rmtree
from pants_test.backend.python.tasks.python_task_test_base import PythonTaskTestBase
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from textwrap import dedent

from pants.backend.python.subsystems.python_setup import PythonSetup
from pants.python.python_repos import PythonRepos
from pants.backend.python.subsystems.python_repos import PythonRepos
from pants_test.backend.python.tasks.python_task_test_base import PythonTaskTestBase

from pants.contrib.python.checks.tasks.python_eval import PythonEval
Expand Down
5 changes: 5 additions & 0 deletions examples/3rdparty/python/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,8 @@ unpacked_whls(
],
within_data_subdir='purelib/tensorflow',
)

files(
name='examples_python_3rdparty',
sources=['**/*'],
)
18 changes: 18 additions & 0 deletions examples/src/python/example/tensorflow_custom_op/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,21 @@ ctypes_compatible_cpp_library(
],
ctypes_native_library=native_artifact(lib_name='tensorflow-zero-out-operator'),
)


python_binary(
name='show-tf-version',
source='show_tf_version.py',
dependencies=[
'examples/3rdparty/python:tensorflow',
],
compatibility=['CPython>=3.6,<4'],
)

files(
name='show-tf-version-files',
sources=['**/*'],
dependencies=[
'examples/3rdparty/python:examples_python_3rdparty',
],
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Copyright 2019 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import tensorflow as tf

print(f"tf version: {tf.__version__}")
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from pants.backend.codegen.grpcio.python.grpcio_prep import GrpcioPrep
from pants.backend.codegen.grpcio.python.python_grpcio_library import PythonGrpcioLibrary
from pants.backend.python.subsystems.pex_build_util import identify_missing_init_files
from pants.python.pex_build_util import identify_missing_init_files
from pants.backend.python.targets.python_library import PythonLibrary
from pants.base.build_environment import get_buildroot
from pants.base.exceptions import TaskError
Expand Down
2 changes: 1 addition & 1 deletion src/python/pants/backend/project_info/tasks/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from pants.backend.jvm.tasks.ivy_task_mixin import IvyTaskMixin
from pants.backend.project_info.tasks.export_version import DEFAULT_EXPORT_VERSION
from pants.backend.python.interpreter_cache import PythonInterpreterCache
from pants.backend.python.subsystems.pex_build_util import has_python_requirements
from pants.backend.python.targets.python_requirement_library import PythonRequirementLibrary
from pants.backend.python.targets.python_target import PythonTarget
from pants.backend.python.targets.python_tests import PythonTests
Expand All @@ -34,6 +33,7 @@
from pants.java.distribution.distribution import DistributionLocator
from pants.java.executor import SubprocessExecutor
from pants.java.jar.jar_dependency_utils import M2Coordinate
from pants.python.pex_build_util import has_python_requirements
from pants.task.console_task import ConsoleTask
from pants.util.memo import memoized_property

Expand Down
7 changes: 6 additions & 1 deletion src/python/pants/backend/python/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,15 @@
from pants.build_graph.build_file_aliases import BuildFileAliases
from pants.build_graph.resources import Resources
from pants.goal.task_registrar import TaskRegistrar as task
from pants.python.pex_build_util import PexBuilderWrapper


def global_subsystems():
return python_native_code.PythonNativeCode, subprocess_environment.SubprocessEnvironment
return {
python_native_code.PythonNativeCode,
subprocess_environment.SubprocessEnvironment,
PexBuilderWrapper.Factory,
}


def build_file_aliases():
Expand Down
1 change: 1 addition & 0 deletions src/python/pants/backend/python/rules/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ python_library(
'3rdparty/python:dataclasses',
'3rdparty/python:setuptools',
'src/python/pants/backend/python/subsystems',
'src/python/pants/backend/python/subsystems/ipex',
'src/python/pants/build_graph',
'src/python/pants/engine/legacy:graph',
'src/python/pants/engine:build_files',
Expand Down
2 changes: 1 addition & 1 deletion src/python/pants/backend/python/rules/inject_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from dataclasses import dataclass

from pants.backend.python.subsystems.pex_build_util import identify_missing_init_files
from pants.python.pex_build_util import identify_missing_init_files
from pants.engine.fs import EMPTY_DIRECTORY_DIGEST, Digest, Snapshot
from pants.engine.isolated_process import ExecuteProcessRequest, ExecuteProcessResult
from pants.engine.rules import rule
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pex.pex_builder import PEXBuilder
from pex.pex_info import PexInfo

from pants.backend.python.subsystems.pex_build_util import PexBuilderWrapper
from pants.python.pex_build_util import PexBuilderWrapper
from pants.subsystem.subsystem import Subsystem
from pants.util.dirutil import is_executable, safe_concurrent_creation

Expand Down
7 changes: 7 additions & 0 deletions src/python/pants/backend/python/subsystems/ipex/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Copyright 2019 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

# NB: This target is written into an .ipex file as the main script, and should not have any
# dependencies on another python code! .ipex files should always contain pex and setuptools
# requirements in order to run the main script!
python_library()
131 changes: 131 additions & 0 deletions src/python/pants/backend/python/subsystems/ipex/ipex_launcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# Copyright 2019 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

"""Entrypoint script for a "dehydrated" .ipex file generated with --generate-ipex.
This script will "hydrate" a normal .pex file in the same directory, then execute it.
"""

import json
import os
import sys
import tempfile

from pex import resolver
from pex.common import open_zip
from pex.fetcher import Fetcher, PyPIFetcher
from pex.interpreter import PythonInterpreter
from pex.pex_builder import PEXBuilder
from pex.pex_info import PexInfo
from pkg_resources import Requirement


APP_CODE_PREFIX = 'user_files/'


def _strip_app_code_prefix(path):
if not path.startswith(APP_CODE_PREFIX):
raise ValueError("Path {path} in IPEX-INFO did not begin with '{APP_CODE_PREFIX}'."
.format(path=path, APP_CODE_PREFIX=APP_CODE_PREFIX))
return path[len(APP_CODE_PREFIX):]


def _log(message):
sys.stderr.write(message + '\n')


def _sanitize_requirements(requirements):
"""
Remove duplicate keys such as setuptools or pex which may be injected multiple times into the
resulting ipex when first executed.
"""
project_names = []
new_requirements = {}

for r in requirements:
r = Requirement(r)
if r.marker and not r.marker.evaluate():
continue
if r.name not in new_requirements:
project_names.append(r.name)
new_requirements[r.name] = str(r)
sanitized_requirements = [new_requirements[n] for n in project_names]

return sanitized_requirements


def modify_pex_info(pex_info, **kwargs):
new_info = json.loads(pex_info.dump())
new_info.update(kwargs)
return PexInfo.from_json(json.dumps(new_info))


def _hydrate_pex_file(self, hydrated_pex_file):
# We extract source files into a temporary directory before creating the pex.
td = tempfile.mkdtemp()

with open_zip(self) as zf:
# Populate the pex with the pinned requirements and distribution names & hashes.
bootstrap_info = PexInfo.from_json(zf.read('BOOTSTRAP-PEX-INFO'))
bootstrap_builder = PEXBuilder(pex_info=bootstrap_info, interpreter=PythonInterpreter.get())

# Populate the pex with the needed code.
try:
ipex_info = json.loads(zf.read('IPEX-INFO').decode('utf-8'))
for path in ipex_info['code']:
unzipped_source = zf.extract(path, td)
bootstrap_builder.add_source(unzipped_source, env_filename=_strip_app_code_prefix(path))
except Exception as e:
raise ValueError("Error: {e}. The IPEX-INFO for this .ipex file was:\n{info}"
.format(e=e, info=json.dumps(ipex_info, indent=4)))

# Perform a fully pinned intransitive resolve to hydrate the install cache.
resolver_settings = ipex_info['resolver_settings']
# TODO: Here we convert .indexes and .find_links into the old .fetchers until pants upgrades to
# pex 2.0. At that time, we can remove anything relating to fetchers from `resolver_settings`, and
# avoid removing the 'indexes' and 'find_links' keys, which are correct for pex 2.0.
fetchers = [PyPIFetcher(url) for url in resolver_settings.pop('indexes')]
fetchers.extend(Fetcher([url]) for url in resolver_settings.pop('find_links'))
resolver_settings['fetchers'] = fetchers

sanitized_requirements = _sanitize_requirements(bootstrap_info.requirements)
bootstrap_info = modify_pex_info(bootstrap_info, requirements=sanitized_requirements)
bootstrap_builder.info = bootstrap_info

resolved_distributions = resolver.resolve(
requirements=bootstrap_info.requirements,
cache=bootstrap_info.pex_root,
platform='current',
transitive=False,
interpreter=bootstrap_builder.interpreter,
**resolver_settings
)
# TODO: this shouldn't be necessary, as we should be able to use the same 'distributions' from
# BOOTSTRAP-PEX-INFO. When the .ipex is executed, the normal pex bootstrap fails to see these
# requirements or recognize that they should be pulled from the cache for some reason.
for resolved_dist in resolved_distributions:
bootstrap_builder.add_distribution(resolved_dist.distribution)

bootstrap_builder.build(hydrated_pex_file, bytecode_compile=False)


def main(self):
filename_base, ext = os.path.splitext(self)

# If the ipex (this pex) is already named '.pex', ensure the output filename doesn't collide by
# inserting an intermediate '.ipex'!
if ext == '.pex':
hydrated_pex_file = '{filename_base}.ipex.pex'.format(filename_base=filename_base)
else:
hydrated_pex_file = '{filename_base}.pex'.format(filename_base=filename_base)

if not os.path.exists(hydrated_pex_file):
_log('Hydrating {} to {}...'.format(self, hydrated_pex_file))
_hydrate_pex_file(self, hydrated_pex_file)

os.execv(sys.executable, [sys.executable, hydrated_pex_file] + sys.argv[1:])


if __name__ == '__main__':
self = sys.argv[0]
main(self)
Loading

0 comments on commit 1776dea

Please sign in to comment.