diff --git a/src/python/pants/backend/python/goals/pytest_runner.py b/src/python/pants/backend/python/goals/pytest_runner.py index 515cc86860e..d9043c44e72 100644 --- a/src/python/pants/backend/python/goals/pytest_runner.py +++ b/src/python/pants/backend/python/goals/pytest_runner.py @@ -6,7 +6,6 @@ from dataclasses import dataclass from pathlib import PurePath from typing import Optional, Tuple -from uuid import UUID from pants.backend.python.goals.coverage_py import ( CoverageConfig, @@ -43,7 +42,6 @@ from pants.core.util_rules.source_files import SourceFiles, SourceFilesRequest from pants.engine.addresses import Address, Addresses, AddressInput from pants.engine.fs import AddPrefix, Digest, DigestSubset, MergeDigests, PathGlobs, Snapshot -from pants.engine.internals.uuid import UUIDRequest from pants.engine.process import FallibleProcessResult, InteractiveProcess, Process from pants.engine.rules import Get, MultiGet, collect_rules, rule from pants.engine.target import ( @@ -253,17 +251,6 @@ async def setup_pytest_for_target( extra_env.update(test_extra_env.env) - if test_subsystem.force and not request.is_debug: - # This is a slightly hacky way to force the process to run: since the env var - # value is unique, this input combination will never have been seen before, - # and therefore never cached. The two downsides are: - # 1. This leaks into the test's environment, albeit with a funky var name that is - # unlikely to cause problems in practice. - # 2. This run will be cached even though it can never be re-used. - # TODO: A more principled way of forcing rules to run? - uuid = await Get(UUID, UUIDRequest()) - extra_env["__PANTS_FORCE_TEST_RUN__"] = str(uuid) - process = await Get( Process, PexProcess( @@ -276,6 +263,7 @@ async def setup_pytest_for_target( execution_slot_variable=pytest.options.execution_slot_var, description=f"Run Pytest for {request.field_set.address}", level=LogLevel.DEBUG, + uncacheable=test_subsystem.force and not request.is_debug, ), ) return TestSetup(process, results_file_name=results_file_name) diff --git a/src/python/pants/backend/python/util_rules/pex.py b/src/python/pants/backend/python/util_rules/pex.py index 4fbc22aaf51..7f01a751591 100644 --- a/src/python/pants/backend/python/util_rules/pex.py +++ b/src/python/pants/backend/python/util_rules/pex.py @@ -40,7 +40,7 @@ PathGlobs, ) from pants.engine.platform import Platform, PlatformConstraint -from pants.engine.process import MultiPlatformProcess, Process, ProcessResult +from pants.engine.process import MultiPlatformProcess, Process, ProcessResult, UncacheableProcess from pants.engine.rules import Get, collect_rules, rule from pants.python.python_repos import PythonRepos from pants.python.python_setup import PythonSetup @@ -564,6 +564,7 @@ class PexProcess: output_directories: Optional[Tuple[str, ...]] timeout_seconds: Optional[int] execution_slot_variable: Optional[str] + uncacheable: bool def __init__( self, @@ -578,6 +579,7 @@ def __init__( output_directories: Optional[Iterable[str]] = None, timeout_seconds: Optional[int] = None, execution_slot_variable: Optional[str] = None, + uncacheable: bool = False, ) -> None: self.pex = pex self.argv = tuple(argv) @@ -589,6 +591,7 @@ def __init__( self.output_directories = tuple(output_directories) if output_directories else None self.timeout_seconds = timeout_seconds self.execution_slot_variable = execution_slot_variable + self.uncacheable = uncacheable @rule @@ -601,7 +604,7 @@ async def setup_pex_process(request: PexProcess, pex_environment: PexEnvironment always_use_shebang=request.pex.internal_only, ) env = {**pex_environment.environment_dict, **(request.extra_env or {})} - return Process( + process = Process( argv, description=request.description, level=request.level, @@ -612,6 +615,7 @@ async def setup_pex_process(request: PexProcess, pex_environment: PexEnvironment timeout_seconds=request.timeout_seconds, execution_slot_variable=request.execution_slot_variable, ) + return await Get(Process, UncacheableProcess(process)) if request.uncacheable else process def rules(): diff --git a/src/python/pants/engine/process.py b/src/python/pants/engine/process.py index e50946e0b9f..cdea5e870e9 100644 --- a/src/python/pants/engine/process.py +++ b/src/python/pants/engine/process.py @@ -6,10 +6,12 @@ from dataclasses import dataclass from textwrap import dedent from typing import TYPE_CHECKING, Dict, Iterable, Mapping, Optional, Tuple, Union +from uuid import UUID from pants.base.exception_sink import ExceptionSink from pants.engine.engine_aware import EngineAwareReturnType from pants.engine.fs import EMPTY_DIGEST, CreateDigest, Digest, FileContent +from pants.engine.internals.uuid import UUIDRequest from pants.engine.platform import Platform, PlatformConstraint from pants.engine.rules import Get, collect_rules, rule, side_effecting from pants.util.frozendict import FrozenDict @@ -344,13 +346,37 @@ def first_path(self) -> Optional[str]: return next(iter(self.paths), None) +@dataclass(frozen=True) +class UncacheableProcess: + """Ensures the wrapped Process will always be run and its results never re-used.""" + + process: Process + + +@rule +async def make_process_uncacheable(uncacheable_process: UncacheableProcess) -> Process: + uuid = await Get(UUID, UUIDRequest()) + + process = uncacheable_process.process + env = dict(process.env) + + # This is a slightly hacky way to force the process to run: since the env var + # value is unique, this input combination will never have been seen before, + # and therefore never cached. The two downsides are: + # 1. This leaks into the process' environment, albeit with a funky var name that is + # unlikely to cause problems in practice. + # 2. This run will be cached even though it can never be re-used. + # TODO: A more principled way of forcing rules to run? + env["__PANTS_FORCE_PROCESS_RUN__"] = str(uuid) + + return dataclasses.replace(process, env=FrozenDict(env)) + + @rule(desc="Find binary path", level=LogLevel.DEBUG) async def find_binary(request: BinaryPathRequest) -> BinaryPaths: # TODO(John Sirois): Replace this script with a statically linked native binary so we don't # depend on either /bin/bash being available on the Process host. - # TODO(#10507): Running the script directly from a shebang sometimes results in a "Text file - # busy" error. - # + # Note: the backslash after the """ marker ensures that the shebang is at the start of the # script file. Many OSs will not see the shebang if there is intervening whitespace. script_path = "./script.sh" @@ -376,12 +402,18 @@ async def find_binary(request: BinaryPathRequest) -> BinaryPaths: search_path = create_path_env_var(request.search_path) result = await Get( FallibleProcessResult, - Process( - description=f"Searching for `{request.binary_name}` on PATH={search_path}", - level=LogLevel.DEBUG, - input_digest=script_digest, - argv=[script_path, request.binary_name], - env={"PATH": search_path}, + # We use a volatile process to force re-run since any binary found on the host system today + # could be gone tomorrow. Ideally we'd only do this for local processes since all known + # remoting configurations include a static container image as part of their cache key which + # automatically avoids this problem. + UncacheableProcess( + Process( + description=f"Searching for `{request.binary_name}` on PATH={search_path}", + level=LogLevel.DEBUG, + input_digest=script_digest, + argv=[script_path, request.binary_name], + env={"PATH": search_path}, + ) ), ) if result.exit_code == 0: