From 8105c6bac8213c4c52ad41807bced42fb8b0c03d Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Tue, 21 Jul 2020 21:24:09 -0700 Subject: [PATCH] Implement cycle detection in transitive_targets, and tolerate cycles in file-addresses. (#10409) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Problem After more deeply investigating the issues with #10230 (demonstrated in #10393 and its revert), it doesn't seem like the right idea to rely on the engine's cycle detection (which the implementation of #10230 showed should primarily be used for deadlock detection) to expose high level cycle tolerance for the build graph. ### Solution Move to iterative transitive target expansion (à la #10046), and cycle detect afterward using DFS with a stack of visited entries. ### Result Fixes #10059, and closes #10229. A followup will back out portions of #10230 (but not all of it, as there were a number of other improvements mixed in). [ci skip-rust-tests] --- src/python/pants/engine/internals/graph.py | 89 ++++++++++++------ .../pants/engine/internals/graph_test.py | 92 +++++++++++++++++-- src/python/pants/engine/target.py | 8 -- 3 files changed, 144 insertions(+), 45 deletions(-) diff --git a/src/python/pants/engine/internals/graph.py b/src/python/pants/engine/internals/graph.py index 17178130f6a..91dd062a4be 100644 --- a/src/python/pants/engine/internals/graph.py +++ b/src/python/pants/engine/internals/graph.py @@ -5,7 +5,7 @@ import itertools import logging import os.path -from collections import defaultdict, deque +from collections import defaultdict from dataclasses import dataclass from pathlib import PurePath from typing import DefaultDict, Dict, Iterable, List, NamedTuple, Sequence, Set, Tuple, Type, Union @@ -61,7 +61,6 @@ TargetsToValidFieldSetsRequest, TargetsWithOrigins, TargetWithOrigin, - TransitiveTarget, TransitiveTargets, UnrecognizedTargetTypeException, WrappedTarget, @@ -135,37 +134,74 @@ async def resolve_targets_with_origins( # ----------------------------------------------------------------------------------------------- -@rule -async def transitive_target(wrapped_root: WrappedTarget) -> TransitiveTarget: - root = wrapped_root.target - if not root.has_field(Dependencies): - return TransitiveTarget(root, ()) - dependency_addresses = await Get(Addresses, DependenciesRequest(root[Dependencies])) - dependencies = await MultiGet(Get(TransitiveTarget, Address, d) for d in dependency_addresses) - return TransitiveTarget(root, dependencies) +class CycleException(Exception): + def __init__(self, subject: Address, path: Tuple[Address, ...]) -> None: + path_string = "\n".join((f"-> {a}" if a == subject else f" {a}") for a in path) + super().__init__(f"Dependency graph contained a cycle:\n{path_string}") + self.subject = subject + self.path = path + + +def _detect_cycles( + roots: Tuple[Address, ...], dependency_mapping: Dict[Address, Tuple[Address, ...]] +) -> None: + path_stack: OrderedSet[Address] = OrderedSet() + visited: Set[Address] = set() + + def visit(address: Address): + if address in visited: + # NB: File-level dependencies are cycle tolerant. + if not address.generated_base_target_name and address in path_stack: + raise CycleException(address, (*path_stack, address)) + return + path_stack.add(address) + visited.add(address) + + for dep_address in dependency_mapping[address]: + visit(dep_address) + + path_stack.remove(address) + + for root in roots: + visit(root) + if path_stack: + raise AssertionError( + f"The stack of visited nodes should have been empty at the end of recursion, " + f"but it still contained: {path_stack}" + ) @rule -async def transitive_targets(addresses: Addresses) -> TransitiveTargets: - """Given Addresses, kicks off recursion on expansion of TransitiveTargets. +async def transitive_targets(targets: Targets) -> TransitiveTargets: + """Find all the targets transitively depended upon by the target roots. - The TransitiveTarget dataclass represents a structure-shared graph, which we walk and flatten - here. The engine memoizes the computation of TransitiveTarget, so when multiple - TransitiveTargets objects are being constructed for multiple roots, their structure will be - shared. + This uses iteration, rather than recursion, so that we can tolerate dependency cycles. Unlike a + traditional BFS algorithm, we batch each round of traversals via `MultiGet` for improved + performance / concurrency. """ - tts = await MultiGet(Get(TransitiveTarget, Address, a) for a in addresses) + visited: OrderedSet[Target] = OrderedSet() + queued = FrozenOrderedSet(targets) + dependency_mapping: Dict[Address, Tuple[Address, ...]] = {} + while queued: + direct_dependencies = await MultiGet( + Get(Targets, DependenciesRequest(tgt.get(Dependencies))) for tgt in queued + ) - dependencies: OrderedSet[Target] = OrderedSet() - to_visit = deque(itertools.chain.from_iterable(tt.dependencies for tt in tts)) - while to_visit: - tt = to_visit.popleft() - if tt.root in dependencies: - continue - dependencies.add(tt.root) - to_visit.extend(tt.dependencies) + dependency_mapping.update( + zip( + (t.address for t in queued), + (tuple(t.address for t in deps) for deps in direct_dependencies), + ) + ) + + queued = FrozenOrderedSet(itertools.chain.from_iterable(direct_dependencies)).difference( + visited + ) + visited.update(queued) - return TransitiveTargets(tuple(tt.root for tt in tts), FrozenOrderedSet(dependencies)) + transitive_targets = TransitiveTargets(tuple(targets), FrozenOrderedSet(visited)) + _detect_cycles(tuple(t.address for t in targets), dependency_mapping) + return transitive_targets # ----------------------------------------------------------------------------------------------- @@ -916,7 +952,6 @@ def rules(): resolve_target_with_origin, resolve_targets_with_origins, # TransitiveTargets - transitive_target, transitive_targets, # Owners find_owners, diff --git a/src/python/pants/engine/internals/graph_test.py b/src/python/pants/engine/internals/graph_test.py index 58e63510d65..63bee0f8dcc 100644 --- a/src/python/pants/engine/internals/graph_test.py +++ b/src/python/pants/engine/internals/graph_test.py @@ -5,7 +5,7 @@ from dataclasses import dataclass from pathlib import PurePath from textwrap import dedent -from typing import Iterable, List, Sequence, Type +from typing import Iterable, List, Sequence, Tuple, Type import pytest @@ -29,6 +29,7 @@ from pants.engine.internals.graph import ( AmbiguousCodegenImplementationsException, AmbiguousImplementationsException, + CycleException, InvalidFileDependencyException, NoValidTargetsException, Owners, @@ -61,7 +62,6 @@ TargetsToValidFieldSetsRequest, TargetsWithOrigins, TargetWithOrigin, - TransitiveTarget, TransitiveTargets, WrappedTarget, ) @@ -116,14 +116,6 @@ def get_target(name: str) -> Target: ) assert direct_deps == Targets([d1, d2, d3]) - transitive_target = self.request_single_product( - TransitiveTarget, Params(WrappedTarget(root), create_options_bootstrapper()) - ) - assert transitive_target.root == root - assert { - dep_transitive_target.root for dep_transitive_target in transitive_target.dependencies - } == {d1, d2, d3} - transitive_targets = self.request_single_product( TransitiveTargets, Params(Addresses([root.address, d2.address]), create_options_bootstrapper()), @@ -133,6 +125,86 @@ def get_target(name: str) -> Target: assert transitive_targets.dependencies == FrozenOrderedSet([d1, d2, d3, t2, t1]) assert transitive_targets.closure == FrozenOrderedSet([root, d2, d1, d3, t2, t1]) + def test_transitive_targets_tolerates_subtarget_cycles(self) -> None: + """For generated subtargets, we should tolerate cycles between targets. + + This only works with generated subtargets, so we use explicit file dependencies in this + test. + """ + self.create_files("", ["dep.txt", "t1.txt", "t2.txt"]) + self.add_to_build_file( + "", + dedent( + """\ + target(name='dep', sources=['dep.txt']) + target(name='t1', sources=['t1.txt'], dependencies=['dep.txt', 't2.txt']) + target(name='t2', sources=['t2.txt'], dependencies=['t1.txt']) + """ + ), + ) + result = self.request_single_product( + TransitiveTargets, + Params(Addresses([Address.parse("//:t2")]), create_options_bootstrapper()), + ) + assert len(result.roots) == 1 + assert result.roots[0].address == Address.parse("//:t2") + assert [tgt.address for tgt in result.dependencies] == [ + Address("", target_name="t1.txt", generated_base_target_name="t1"), + Address("", target_name="dep.txt", generated_base_target_name="dep"), + Address("", target_name="t2.txt", generated_base_target_name="t2"), + ] + + def assert_failed_cycle( + self, root_addr: str, subject_addr: str, path_addrs: Tuple[str, ...] + ) -> None: + with self.assertRaises(ExecutionError) as e: + self.request_single_product( + TransitiveTargets, + Params(Addresses([Address.parse(root_addr)]), create_options_bootstrapper()), + ) + (cycle_exception,) = e.exception.wrapped_exceptions + assert isinstance(cycle_exception, CycleException) + assert cycle_exception.subject == Address.parse(subject_addr) + assert cycle_exception.path == tuple(Address.parse(p) for p in path_addrs) + + def test_cycle_self(self) -> None: + self.add_to_build_file( + "", + dedent( + """\ + target(name='t1', dependencies=[':t1']) + """ + ), + ) + self.assert_failed_cycle("//:t1", "//:t1", ("//:t1", "//:t1")) + + def test_cycle_direct(self) -> None: + self.add_to_build_file( + "", + dedent( + """\ + target(name='t1', dependencies=[':t2']) + target(name='t2', dependencies=[':t1']) + """ + ), + ) + self.assert_failed_cycle("//:t1", "//:t1", ("//:t1", "//:t2", "//:t1")) + self.assert_failed_cycle("//:t2", "//:t2", ("//:t2", "//:t1", "//:t2")) + + def test_cycle_indirect(self) -> None: + self.add_to_build_file( + "", + dedent( + """\ + target(name='t1', dependencies=[':t2']) + target(name='t2', dependencies=[':t3']) + target(name='t3', dependencies=[':t2']) + """ + ), + ) + self.assert_failed_cycle("//:t1", "//:t2", ("//:t1", "//:t2", "//:t3", "//:t2")) + self.assert_failed_cycle("//:t2", "//:t2", ("//:t2", "//:t3", "//:t2")) + def test_resolve_generated_subtarget(self) -> None: self.add_to_build_file("demo", "target(sources=['f1.txt', 'f2.txt'])") generated_target_addresss = Address( diff --git a/src/python/pants/engine/target.py b/src/python/pants/engine/target.py index 9f1492f36a4..4b203f2a929 100644 --- a/src/python/pants/engine/target.py +++ b/src/python/pants/engine/target.py @@ -524,14 +524,6 @@ def targets(self) -> Tuple[Target, ...]: return tuple(tgt_with_origin.target for tgt_with_origin in self) -@dataclass(frozen=True) -class TransitiveTarget: - """A recursive structure wrapping a Target root and TransitiveTarget deps.""" - - root: Target - dependencies: Tuple["TransitiveTarget", ...] - - @dataclass(frozen=True) class TransitiveTargets: """A set of Target roots, and their transitive, flattened, de-duped dependencies.