Skip to content

Commit

Permalink
cache python tools in ~/.cache/pants (pantsbuild#7236)
Browse files Browse the repository at this point in the history
### Problem

This runs for (on my laptop) about 16 seconds every time I do a `clean-all`:
```
22:27:23 00:02   [native-compile]
22:27:23 00:02     [conan-prep]
22:27:23 00:02       [create-conan-pex]
22:27:39 00:18     [conan-fetch]
```

It doesn't seem like we need to be putting this tool in the task workdir as the python requirements list is pretty static. Conan in particular will be instantiated by invoking almost every goal, and it is a nontrivial piece of software to resolve each time.

Also, we aren't mixing in interpreter identity to the generated pex filename, which is a bug that has so far gone undetected: see pantsbuild#7236 (comment).

### Solution

- Take the `stable_json_sha1()` of the requirements of each python tool generated by `PythonToolPrepBase` to generate a fingerprinted pex filename.
- Stick it in the pants cachedir so it doesn't get blown away by a clean-all.
- Add an `--interpreter-constraints` option to pex tools (where previously the repo's `--python-setup-interpreter-constraints` were implicitly used).
- Ensure the selected interpreter identity is mixed into the fingerprinted filename.
- Add a test for the pex filename fingerprinting and that the pex can be successfully executed for python 2 and 3 constraints.

### Result

A significant amount of time spent waiting after clean builds is removed, and pex tools can have their own interpreter constraints as necessary.
  • Loading branch information
cosmicexplorer authored Feb 22, 2019
1 parent 4097052 commit 8069653
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 4 deletions.
1 change: 1 addition & 0 deletions src/python/pants/backend/native/subsystems/conan.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class Conan(PythonToolBase):
'pylint==1.9.3',
]
default_entry_point = 'conans.conan'
default_interpreter_constraints = ['CPython>=2.7,<4']

@classmethod
def register_options(cls, register):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,26 @@ class PythonToolBase(Subsystem):
# Subclasses must set.
default_requirements = None
default_entry_point = None
# Subclasses need not override.
default_interpreter_constraints = []

@classmethod
def register_options(cls, register):
super(PythonToolBase, cls).register_options(register)
register('--interpreter-constraints', type=list, advanced=True, fingerprint=True,
default=cls.default_interpreter_constraints,
help='Python interpreter constraints for this tool. An empty list uses the default '
'interpreter constraints for the repo.')
register('--requirements', type=list, advanced=True, fingerprint=True,
default=cls.default_requirements,
help='Python requirement strings for the tool.')
register('--entry-point', type=str, advanced=True, fingerprint=True,
default=cls.default_entry_point,
help='The main module for the tool.')

def get_interpreter_constraints(self):
return self.get_options().interpreter_constraints

def get_requirement_specs(self):
return self.get_options().requirements

Expand Down
36 changes: 32 additions & 4 deletions src/python/pants/backend/python/tasks/python_tool_prep_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from __future__ import absolute_import, division, print_function, unicode_literals

import os
from builtins import str
from contextlib import contextmanager

from pex.pex import PEX
Expand All @@ -13,6 +14,8 @@
from pants.backend.python.interpreter_cache import PythonInterpreterCache
from pants.backend.python.python_requirement import PythonRequirement
from pants.backend.python.subsystems.pex_build_util import PexBuilderWrapper
from pants.base.build_environment import get_pants_cachedir
from pants.base.hash_utils import stable_json_sha1
from pants.base.workunit import WorkUnitLabel
from pants.task.task import Task
from pants.util.dirutil import safe_concurrent_creation
Expand All @@ -23,11 +26,16 @@
class PythonToolInstance(object):
def __init__(self, pex_path, interpreter):
self._pex = PEX(pex_path, interpreter=interpreter)
self._interpreter = interpreter

@property
def pex(self):
return self._pex

@property
def interpreter(self):
return self._interpreter

def _pretty_cmdline(self, args):
return safe_shlex_join(self._pex.cmdline(args))

Expand Down Expand Up @@ -63,6 +71,12 @@ def run(self, *args, **kwargs):
return cmdline, exit_code


# TODO: This python tool setup ends up eagerly generating each pex for each task in every goal which
# is transitively required by the command-line goals, even for tasks which no-op. This requires each
# pex for each relevant python tool to be buildable on the current host, even if it may never be
# intended to be invoked. Especially given the existing clear separation of concerns into
# PythonToolBase/PythonToolInstance/PythonToolPrepBase, this seems like an extremely ripe use case
# for some v2 rules for free caching and no-op when not required for the command-line goals.
class PythonToolPrepBase(Task):
"""Base class for tasks that resolve a python tool to be invoked out-of-process."""

Expand Down Expand Up @@ -97,16 +111,30 @@ def _build_tool_pex(self, tool_subsystem, interpreter, pex_path):
pex_builder.set_entry_point(tool_subsystem.get_entry_point())
pex_builder.freeze()

def _generate_fingerprinted_pex_path(self, tool_subsystem, interpreter):
# `tool_subsystem.get_requirement_specs()` is a list, but order shouldn't actually matter. This
# should probably be sorted, but it's possible a user could intentionally tweak order to work
# around a particular requirement resolution resolve-order issue. In practice the lists are
# expected to be mostly static, so we accept the risk of too-fine-grained caching creating lots
# of pexes in the cache dir.
specs_fingerprint = stable_json_sha1(tool_subsystem.get_requirement_specs())
return os.path.join(
get_pants_cachedir(),
'python',
str(interpreter.identity),
self.fingerprint,
'{}-{}.pex'.format(tool_subsystem.options_scope, specs_fingerprint),
)

def execute(self):
tool_subsystem = self.tool_subsystem_cls.scoped_instance(self)
pex_name = tool_subsystem.options_scope
pex_path = os.path.join(self.workdir, self.fingerprint, '{}.pex'.format(pex_name))

interpreter_cache = PythonInterpreterCache.global_instance()
interpreter = interpreter_cache.select_interpreter_for_targets([])
interpreter = min(interpreter_cache.setup(filters=tool_subsystem.get_interpreter_constraints()))

pex_path = self._generate_fingerprinted_pex_path(tool_subsystem, interpreter)
if not os.path.exists(pex_path):
with self.context.new_workunit(name='create-{}-pex'.format(pex_name),
with self.context.new_workunit(name='create-{}-pex'.format(tool_subsystem.options_scope),
labels=[WorkUnitLabel.PREP]):
self._build_tool_pex(tool_subsystem=tool_subsystem,
interpreter=interpreter,
Expand Down
87 changes: 87 additions & 0 deletions tests/python/pants_test/backend/python/tasks/test_python_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# coding=utf-8
# Copyright 2019 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import absolute_import, division, print_function, unicode_literals

import os
import re

from pants.backend.python.subsystems.python_tool_base import PythonToolBase
from pants.backend.python.tasks.python_tool_prep_base import PythonToolInstance, PythonToolPrepBase
from pants.task.task import Task
from pants.util.contextutil import temporary_dir
from pants_test.backend.python.tasks.python_task_test_base import PythonTaskTestBase


class Tool(PythonToolBase):
options_scope = 'test-tool'
default_requirements = [
'pex==1.5.3',
]
default_entry_point = 'pex.bin.pex:main'


class ToolInstance(PythonToolInstance):
pass


class ToolPrep(PythonToolPrepBase):
options_scope = 'tool-prep-task'
tool_subsystem_cls = Tool
tool_instance_cls = ToolInstance


class ToolTask(Task):
options_scope = 'tool-task'

@classmethod
def prepare(cls, options, round_manager):
super(ToolTask, cls).prepare(options, round_manager)
round_manager.require_data(ToolPrep.tool_instance_cls)

def execute(self):
tool_for_pex = self.context.products.get_data(ToolPrep.tool_instance_cls)
stdout, _, exit_code, _ = tool_for_pex.output(['--version'])
assert re.match(r'.*\.pex 1.5.3', stdout)
assert 0 == exit_code


class PythonToolPrepTest(PythonTaskTestBase):

@classmethod
def task_type(cls):
return ToolTask

def _assert_tool_execution_for_python_version(self, use_py3=True):
scope_string = '3' if use_py3 else '2'
constraint_string = 'CPython>=3' if use_py3 else 'CPython<3'
tool_prep_type = self.synthesize_task_subtype(ToolPrep, 'tp_scope_py{}'.format(scope_string))
with temporary_dir() as tmp_dir:
context = self.context(for_task_types=[tool_prep_type], for_subsystems=[Tool], options={
'': {
'pants_bootstrapdir': tmp_dir,
},
'test-tool': {
'interpreter_constraints': [constraint_string],
},
})
tool_prep_task = tool_prep_type(context, os.path.join(
self.pants_workdir, 'tp_py{}'.format(scope_string)))
tool_prep_task.execute()
# Check that the tool can be created and executed successfully.
self.create_task(context).execute()
pex_tool = context.products.get_data(ToolPrep.tool_instance_cls)
# Check that our pex tool wrapper was constructed with the expected interpreter.
self.assertTrue(pex_tool.interpreter.identity.matches(constraint_string))
return pex_tool

def test_tool_execution(self):
"""Test that python tools are fingerprinted by python interpreter."""
py3_pex_tool = self._assert_tool_execution_for_python_version(use_py3=True)
py3_pex_tool_path = py3_pex_tool.pex.path()
self.assertTrue(os.path.isdir(py3_pex_tool_path))
py2_pex_tool = self._assert_tool_execution_for_python_version(use_py3=False)
py2_pex_tool_path = py2_pex_tool.pex.path()
self.assertTrue(os.path.isdir(py2_pex_tool_path))
self.assertNotEqual(py3_pex_tool_path, py2_pex_tool_path)

0 comments on commit 8069653

Please sign in to comment.