pantsbuild · jsirois · Sep 13, 2020 · Sep 13, 2020 · Sep 13, 2020 · Eric-Arellano
diff --git a/src/python/pants/backend/python/util_rules/pex_environment.py b/src/python/pants/backend/python/util_rules/pex_environment.py
@@ -3,6 +3,7 @@
 
 import os
 from dataclasses import dataclass
+from textwrap import dedent
 from typing import Iterable, Mapping, Optional, Tuple, cast
 
 from pants.core.util_rules import subprocess_environment
@@ -132,7 +133,7 @@ async def find_pex_python(
     pex_runtime_env: PexRuntimeEnvironment,
     subprocess_env_vars: SubprocessEnvironmentVars,
 ) -> PexEnvironment:
-    # PEX files are compatible with bootstrapping via python2.7 or python 3.5+. The bootstrap
+    # PEX files are compatible with bootstrapping via Python 2.7 or Python 3.5+. The bootstrap
     # code will then re-exec itself if the underlying PEX user code needs a more specific python
     # interpreter. As such, we look for many Pythons usable by the PEX bootstrap code here for
     # maximum flexibility.
@@ -141,7 +142,39 @@ async def find_pex_python(
             Get(
                 BinaryPaths,
                 BinaryPathRequest(
-                    search_path=python_setup.interpreter_search_paths, binary_name=binary_name
+                    search_path=python_setup.interpreter_search_paths,
+                    binary_name=binary_name,
+                    test_args=[
+                        "-c",
+                        # N.B.: The following code snippet must be compatible with Python 2.7 and
+                        # Python 3.5+.
+                        dedent(
+                            """\
+                            import sys
+
+                            major, minor = sys.version_info[:2]
+                            if (major, minor) == (2, 7) or (major == 3 and minor >= 5):
+                                # Here we hash the underlying python interpreter executable to
+                                # ensure we detect changes in the real interpreter that might
+                                # otherwise be masked by pyenv shim scripts found on the search
+                                # path. Naively, just printing out the full version_info would be
+                                # enough, but that does not account for supported abi changes (e.g.:
+                                # a pyenv switch from a py27mu interpreter to a py27m interpreter.
+                                import hashlib
+                                hasher = hashlib.sha256()
+                                with open(sys.executable, "rb") as fp:
+                                    # We pick 8192 for efficiency of reads and fingerprint updates
+                                    # (writes) since it's a common OS buffer size and an even
+                                    # multiple of the hash block size.
+                                    for chunk in iter(lambda: fp.read(8192), b""):
+                                        hasher.update(chunk)
+                                sys.stdout.write(hasher.hexdigest())
+                                sys.exit(0)
+                            else:
+                                sys.exit(1)
+                            """
+                        ),
+                    ],
                 ),
             )
             for binary_name in pex_runtime_env.bootstrap_interpreter_names
@@ -151,7 +184,7 @@ async def find_pex_python(
     def first_python_binary() -> Optional[str]:
         for binary_paths in all_python_binary_paths:
             if binary_paths.first_path:
-                return binary_paths.first_path
+                return binary_paths.first_path.path
         return None
 
     return PexEnvironment(

diff --git a/src/python/pants/engine/desktop.py b/src/python/pants/engine/desktop.py
@@ -50,13 +50,15 @@ async def find_open_program(request: OpenFilesRequest, plat: Platform) -> OpenFi
     if plat == Platform.darwin:
         processes = [
             InteractiveProcess(
-                argv=(open_program_paths.first_path, *(str(f) for f in request.files)),
+                argv=(open_program_paths.first_path.path, *(str(f) for f in request.files)),
                 run_in_workspace=True,
             )
         ]
     else:
         processes = [
-            InteractiveProcess(argv=(open_program_paths.first_path, str(f)), run_in_workspace=True)
+            InteractiveProcess(
+                argv=(open_program_paths.first_path.path, str(f)), run_in_workspace=True
+            )
             for f in request.files
         ]
 

diff --git a/src/python/pants/engine/process.py b/src/python/pants/engine/process.py
@@ -2,6 +2,7 @@
 # Licensed under the Apache License, Version 2.0 (see LICENSE).
 
 import dataclasses
+import hashlib
 import logging
 from dataclasses import dataclass
 from textwrap import dedent
@@ -11,6 +12,7 @@
 from pants.base.exception_sink import ExceptionSink
 from pants.engine.engine_aware import EngineAwareReturnType
 from pants.engine.fs import EMPTY_DIGEST, CreateDigest, Digest, FileContent
+from pants.engine.internals.selectors import MultiGet
 from pants.engine.internals.uuid import UUIDRequest
 from pants.engine.platform import Platform, PlatformConstraint
 from pants.engine.rules import Get, collect_rules, rule, side_effecting
@@ -314,23 +316,64 @@ def run(self, request: InteractiveProcess) -> InteractiveProcessResult:
 @frozen_after_init
 @dataclass(unsafe_hash=True)
 class BinaryPathRequest:
+    """Request to find a binary of a given name.
+
+    If `test_args` are specified all binaries that are found will be executed with these args and
+    only those binaries whose test executions exit with return code 0 will be retained.
+    Additionally, if test execution includes stdout content, that will be used to fingerprint the
+    binary path so that upgrades and downgrades can be detected. A reasonable test for many programs
+    might be to pass `["--version"]` for `test_args` since it will both ensure the program runs and
+    also produce stdout text that changes upon upgrade or downgrade of the binary at the discovered
+    path.
+    """
+
     search_path: Tuple[str, ...]
     binary_name: str
+    test_args: Optional[Tuple[str, ...]]
 
-    def __init__(self, *, search_path: Iterable[str], binary_name: str) -> None:
+    def __init__(
+        self,
+        *,
+        search_path: Iterable[str],
+        binary_name: str,
+        test_args: Optional[Iterable[str]] = None,
+    ) -> None:
         self.search_path = tuple(OrderedSet(search_path))
         self.binary_name = binary_name
+        self.test_args = tuple(test_args or ())
+
+
+@frozen_after_init
+@dataclass(unsafe_hash=True)
+class BinaryPath:
+    path: str
+    fingerprint: str
+
+    def __init__(self, path: str, fingerprint: Optional[str] = None) -> None:
+        self.path = path
+        self.fingerprint = self._fingerprint() if fingerprint is None else fingerprint
+
+    @staticmethod
+    def _fingerprint(content: Optional[Union[bytes, bytearray, memoryview]] = None) -> str:
+        hasher = hashlib.sha256() if content is None else hashlib.sha256(content)
+        return hasher.hexdigest()
+
+    @classmethod
+    def fingerprinted(
+        cls, path: str, representative_content: Union[bytes, bytearray, memoryview]
+    ) -> "BinaryPath":
+        return cls(path, fingerprint=cls._fingerprint(representative_content))
 
 
 @frozen_after_init
 @dataclass(unsafe_hash=True)
 class BinaryPaths(EngineAwareReturnType):
     binary_name: str
-    paths: Tuple[str, ...]
+    paths: Tuple[BinaryPath, ...]
 
-    def __init__(self, binary_name: str, paths: Iterable[str]):
+    def __init__(self, binary_name: str, paths: Optional[Iterable[BinaryPath]] = None):
         self.binary_name = binary_name
-        self.paths = tuple(OrderedSet(paths))
+        self.paths = tuple(OrderedSet(paths) if paths else ())
 
     def message(self) -> str:
         if not self.paths:
@@ -341,7 +384,7 @@ def message(self) -> str:
         return found_msg
 
     @property
-    def first_path(self) -> Optional[str]:
+    def first_path(self) -> Optional[BinaryPath]:
         """Return the first path to the binary that was discovered, if any."""
         return next(iter(self.paths), None)
 
@@ -398,7 +441,6 @@ async def find_binary(request: BinaryPathRequest) -> BinaryPaths:
         CreateDigest([FileContent(script_path, script_content.encode(), is_executable=True)]),
     )
 
-    paths = []
     search_path = create_path_env_var(request.search_path)
     result = await Get(
         FallibleProcessResult,
@@ -416,10 +458,32 @@ async def find_binary(request: BinaryPathRequest) -> BinaryPaths:
             )
         ),
     )
-    if result.exit_code == 0:
-        paths.extend(result.stdout.decode().splitlines())
 
-    return BinaryPaths(binary_name=request.binary_name, paths=paths)
+    binary_paths = BinaryPaths(binary_name=request.binary_name)
+    if result.exit_code != 0:
+        return binary_paths
+
+    found_paths = result.stdout.decode().splitlines()
+    if not request.test_args:
+        return dataclasses.replace(binary_paths, paths=[BinaryPath(path) for path in found_paths])
+
+    results = await MultiGet(
+        Get(
+            FallibleProcessResult,
+            UncacheableProcess(
+                Process(argv=[path, *request.test_args], description=f"Test binary {path}.")
+            ),
+        )
+        for path in found_paths
+    )
+    return dataclasses.replace(
+        binary_paths,
+        paths=[
+            BinaryPath.fingerprinted(path, result.stdout)
+            for path, result in zip(found_paths, results)
+            if result.exit_code == 0
+        ],
+    )
 
 
 def rules():