Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support tests for BinaryPaths. #10770

Merged
merged 2 commits into from
Sep 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 36 additions & 3 deletions src/python/pants/backend/python/util_rules/pex_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import os
from dataclasses import dataclass
from textwrap import dedent
from typing import Iterable, Mapping, Optional, Tuple, cast

from pants.core.util_rules import subprocess_environment
Expand Down Expand Up @@ -132,7 +133,7 @@ async def find_pex_python(
pex_runtime_env: PexRuntimeEnvironment,
subprocess_env_vars: SubprocessEnvironmentVars,
) -> PexEnvironment:
# PEX files are compatible with bootstrapping via python2.7 or python 3.5+. The bootstrap
# PEX files are compatible with bootstrapping via Python 2.7 or Python 3.5+. The bootstrap
# code will then re-exec itself if the underlying PEX user code needs a more specific python
# interpreter. As such, we look for many Pythons usable by the PEX bootstrap code here for
# maximum flexibility.
Expand All @@ -141,7 +142,39 @@ async def find_pex_python(
Get(
BinaryPaths,
BinaryPathRequest(
search_path=python_setup.interpreter_search_paths, binary_name=binary_name
search_path=python_setup.interpreter_search_paths,
binary_name=binary_name,
test_args=[
jsirois marked this conversation as resolved.
Show resolved Hide resolved
"-c",
# N.B.: The following code snippet must be compatible with Python 2.7 and
# Python 3.5+.
dedent(
"""\
import sys
major, minor = sys.version_info[:2]
if (major, minor) == (2, 7) or (major == 3 and minor >= 5):
# Here we hash the underlying python interpreter executable to
# ensure we detect changes in the real interpreter that might
# otherwise be masked by pyenv shim scripts found on the search
# path. Naively, just printing out the full version_info would be
# enough, but that does not account for supported abi changes (e.g.:
# a pyenv switch from a py27mu interpreter to a py27m interpreter.
import hashlib
hasher = hashlib.sha256()
with open(sys.executable, "rb") as fp:
# We pick 8192 for efficiency of reads and fingerprint updates
# (writes) since it's a common OS buffer size and an even
# multiple of the hash block size.
for chunk in iter(lambda: fp.read(8192), b""):
hasher.update(chunk)
sys.stdout.write(hasher.hexdigest())
sys.exit(0)
else:
sys.exit(1)
"""
),
],
),
)
for binary_name in pex_runtime_env.bootstrap_interpreter_names
Expand All @@ -151,7 +184,7 @@ async def find_pex_python(
def first_python_binary() -> Optional[str]:
for binary_paths in all_python_binary_paths:
if binary_paths.first_path:
return binary_paths.first_path
return binary_paths.first_path.path
return None

return PexEnvironment(
Expand Down
6 changes: 4 additions & 2 deletions src/python/pants/engine/desktop.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,15 @@ async def find_open_program(request: OpenFilesRequest, plat: Platform) -> OpenFi
if plat == Platform.darwin:
processes = [
InteractiveProcess(
argv=(open_program_paths.first_path, *(str(f) for f in request.files)),
argv=(open_program_paths.first_path.path, *(str(f) for f in request.files)),
run_in_workspace=True,
)
]
else:
processes = [
InteractiveProcess(argv=(open_program_paths.first_path, str(f)), run_in_workspace=True)
InteractiveProcess(
argv=(open_program_paths.first_path.path, str(f)), run_in_workspace=True
)
for f in request.files
]

Expand Down
82 changes: 73 additions & 9 deletions src/python/pants/engine/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import dataclasses
import hashlib
import logging
from dataclasses import dataclass
from textwrap import dedent
Expand All @@ -11,6 +12,7 @@
from pants.base.exception_sink import ExceptionSink
from pants.engine.engine_aware import EngineAwareReturnType
from pants.engine.fs import EMPTY_DIGEST, CreateDigest, Digest, FileContent
from pants.engine.internals.selectors import MultiGet
from pants.engine.internals.uuid import UUIDRequest
from pants.engine.platform import Platform, PlatformConstraint
from pants.engine.rules import Get, collect_rules, rule, side_effecting
Expand Down Expand Up @@ -314,23 +316,64 @@ def run(self, request: InteractiveProcess) -> InteractiveProcessResult:
@frozen_after_init
@dataclass(unsafe_hash=True)
class BinaryPathRequest:
"""Request to find a binary of a given name.
If `test_args` are specified all binaries that are found will be executed with these args and
only those binaries whose test executions exit with return code 0 will be retained.
Additionally, if test execution includes stdout content, that will be used to fingerprint the
binary path so that upgrades and downgrades can be detected. A reasonable test for many programs
might be to pass `["--version"]` for `test_args` since it will both ensure the program runs and
also produce stdout text that changes upon upgrade or downgrade of the binary at the discovered
path.
"""

search_path: Tuple[str, ...]
binary_name: str
test_args: Optional[Tuple[str, ...]]

def __init__(self, *, search_path: Iterable[str], binary_name: str) -> None:
def __init__(
self,
*,
search_path: Iterable[str],
binary_name: str,
test_args: Optional[Iterable[str]] = None,
jsirois marked this conversation as resolved.
Show resolved Hide resolved
) -> None:
self.search_path = tuple(OrderedSet(search_path))
self.binary_name = binary_name
self.test_args = tuple(test_args or ())


@frozen_after_init
@dataclass(unsafe_hash=True)
class BinaryPath:
path: str
fingerprint: str

def __init__(self, path: str, fingerprint: Optional[str] = None) -> None:
self.path = path
self.fingerprint = self._fingerprint() if fingerprint is None else fingerprint

@staticmethod
def _fingerprint(content: Optional[Union[bytes, bytearray, memoryview]] = None) -> str:
hasher = hashlib.sha256() if content is None else hashlib.sha256(content)
return hasher.hexdigest()

@classmethod
def fingerprinted(
cls, path: str, representative_content: Union[bytes, bytearray, memoryview]
) -> "BinaryPath":
return cls(path, fingerprint=cls._fingerprint(representative_content))


@frozen_after_init
@dataclass(unsafe_hash=True)
class BinaryPaths(EngineAwareReturnType):
binary_name: str
paths: Tuple[str, ...]
paths: Tuple[BinaryPath, ...]

def __init__(self, binary_name: str, paths: Iterable[str]):
def __init__(self, binary_name: str, paths: Optional[Iterable[BinaryPath]] = None):
self.binary_name = binary_name
self.paths = tuple(OrderedSet(paths))
self.paths = tuple(OrderedSet(paths) if paths else ())

def message(self) -> str:
if not self.paths:
Expand All @@ -341,7 +384,7 @@ def message(self) -> str:
return found_msg

@property
def first_path(self) -> Optional[str]:
def first_path(self) -> Optional[BinaryPath]:
"""Return the first path to the binary that was discovered, if any."""
return next(iter(self.paths), None)

Expand Down Expand Up @@ -398,7 +441,6 @@ async def find_binary(request: BinaryPathRequest) -> BinaryPaths:
CreateDigest([FileContent(script_path, script_content.encode(), is_executable=True)]),
)

paths = []
search_path = create_path_env_var(request.search_path)
result = await Get(
FallibleProcessResult,
Expand All @@ -416,10 +458,32 @@ async def find_binary(request: BinaryPathRequest) -> BinaryPaths:
)
),
)
if result.exit_code == 0:
paths.extend(result.stdout.decode().splitlines())

return BinaryPaths(binary_name=request.binary_name, paths=paths)
binary_paths = BinaryPaths(binary_name=request.binary_name)
if result.exit_code != 0:
return binary_paths

found_paths = result.stdout.decode().splitlines()
if not request.test_args:
return dataclasses.replace(binary_paths, paths=[BinaryPath(path) for path in found_paths])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dataclasses.replace is neat, but also more cognitive load. How about instead having line 453 use return BinaryPaths(binary_name=request.binary_name), and likewise creating a new BinaryPaths with these returns?

Yes, more duplication. But simpler.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I started with the more duplication code (and more nested code, which I'm fine with but you tend to push back on) and landed here so this appears to be eye of the beholder. Since dataclasses are ubiquitous in rules, using standard dataclasses APIs does not strike me as neat, its just pedestrian.


results = await MultiGet(
Get(
FallibleProcessResult,
UncacheableProcess(
Process(argv=[path, *request.test_args], description=f"Test binary {path}.")
),
)
for path in found_paths
)
return dataclasses.replace(
binary_paths,
paths=[
BinaryPath.fingerprinted(path, result.stdout)
for path, result in zip(found_paths, results)
if result.exit_code == 0
],
)


def rules():
Expand Down