From d8b533a47eb318832438c99f20dde1a1d9c6d0a8 Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Mon, 20 Jul 2020 16:24:23 -0700 Subject: [PATCH 1/3] Implement cycle detection in transitive_targets, and tolerate cycles in file-addresses. [ci skip-rust-tests] --- src/python/pants/engine/internals/graph.py | 89 +++++++++++++------ .../pants/engine/internals/graph_test.py | 86 ++++++++++++++++-- 2 files changed, 139 insertions(+), 36 deletions(-) diff --git a/src/python/pants/engine/internals/graph.py b/src/python/pants/engine/internals/graph.py index 17178130f6a..e894825e433 100644 --- a/src/python/pants/engine/internals/graph.py +++ b/src/python/pants/engine/internals/graph.py @@ -5,7 +5,7 @@ import itertools import logging import os.path -from collections import defaultdict, deque +from collections import defaultdict from dataclasses import dataclass from pathlib import PurePath from typing import DefaultDict, Dict, Iterable, List, NamedTuple, Sequence, Set, Tuple, Type, Union @@ -61,7 +61,6 @@ TargetsToValidFieldSetsRequest, TargetsWithOrigins, TargetWithOrigin, - TransitiveTarget, TransitiveTargets, UnrecognizedTargetTypeException, WrappedTarget, @@ -135,37 +134,74 @@ async def resolve_targets_with_origins( # ----------------------------------------------------------------------------------------------- -@rule -async def transitive_target(wrapped_root: WrappedTarget) -> TransitiveTarget: - root = wrapped_root.target - if not root.has_field(Dependencies): - return TransitiveTarget(root, ()) - dependency_addresses = await Get(Addresses, DependenciesRequest(root[Dependencies])) - dependencies = await MultiGet(Get(TransitiveTarget, Address, d) for d in dependency_addresses) - return TransitiveTarget(root, dependencies) +class CycleException(Exception): + def __init__(self, subject: Address, path: Tuple[Address, ...]) -> None: + path_string = "\n".join((f"-> {a}" if a == subject else f" {a}") for a in path) + super().__init__(f"Dependency graph contained a cycle:\n{path_string}") + self.subject = subject + self.path = path + + +def _detect_cycles( + roots: Tuple[Address, ...], dependencies: Dict[Address, Tuple[Address, ...]] +) -> None: + path_stack: OrderedSet[Address] = OrderedSet() + visited: Set[Address] = set() + + def visit(address: Address): + if address in visited: + # NB: File-level dependencies are cycle tolerant. + if not address.generated_base_target_name and address in path_stack: + raise CycleException(address, tuple(path_stack) + (address,)) + return + path_stack.add(address) + visited.add(address) + + for dep_address in dependencies[address]: + visit(dep_address) + + path_stack.remove(address) + + for root in roots: + visit(root) + if path_stack: + raise AssertionError( + f"The stack of visited nodes should have been empty at the end of recursion, " + f"but it still contained: {path_stack}" + ) @rule -async def transitive_targets(addresses: Addresses) -> TransitiveTargets: - """Given Addresses, kicks off recursion on expansion of TransitiveTargets. +async def transitive_targets(targets: Targets) -> TransitiveTargets: + """Find all the targets transitively depended upon by the target roots. - The TransitiveTarget dataclass represents a structure-shared graph, which we walk and flatten - here. The engine memoizes the computation of TransitiveTarget, so when multiple - TransitiveTargets objects are being constructed for multiple roots, their structure will be - shared. + This uses iteration, rather than recursion, so that we can tolerate dependency cycles. Unlike a + traditional BFS algorithm, we batch each round of traversals via `MultiGet` for improved + performance / concurrency. """ - tts = await MultiGet(Get(TransitiveTarget, Address, a) for a in addresses) + visited: OrderedSet[Target] = OrderedSet() + queued = FrozenOrderedSet(targets) + dependency_mapping: Dict[Address, Tuple[Address, ...]] = {} + while queued: + direct_dependencies = await MultiGet( + Get(Targets, DependenciesRequest(tgt.get(Dependencies))) for tgt in queued + ) - dependencies: OrderedSet[Target] = OrderedSet() - to_visit = deque(itertools.chain.from_iterable(tt.dependencies for tt in tts)) - while to_visit: - tt = to_visit.popleft() - if tt.root in dependencies: - continue - dependencies.add(tt.root) - to_visit.extend(tt.dependencies) + dependency_mapping.update( + zip( + (t.address for t in queued), + (tuple(t.address for t in deps) for deps in direct_dependencies), + ) + ) + + queued = FrozenOrderedSet(itertools.chain.from_iterable(direct_dependencies)).difference( + visited + ) + visited.update(queued) - return TransitiveTargets(tuple(tt.root for tt in tts), FrozenOrderedSet(dependencies)) + transitive_targets = TransitiveTargets(tuple(targets), FrozenOrderedSet(visited)) + _detect_cycles(tuple(t.address for t in targets), dependency_mapping) + return transitive_targets # ----------------------------------------------------------------------------------------------- @@ -916,7 +952,6 @@ def rules(): resolve_target_with_origin, resolve_targets_with_origins, # TransitiveTargets - transitive_target, transitive_targets, # Owners find_owners, diff --git a/src/python/pants/engine/internals/graph_test.py b/src/python/pants/engine/internals/graph_test.py index 58e63510d65..ba77494e855 100644 --- a/src/python/pants/engine/internals/graph_test.py +++ b/src/python/pants/engine/internals/graph_test.py @@ -61,7 +61,6 @@ TargetsToValidFieldSetsRequest, TargetsWithOrigins, TargetWithOrigin, - TransitiveTarget, TransitiveTargets, WrappedTarget, ) @@ -116,14 +115,6 @@ def get_target(name: str) -> Target: ) assert direct_deps == Targets([d1, d2, d3]) - transitive_target = self.request_single_product( - TransitiveTarget, Params(WrappedTarget(root), create_options_bootstrapper()) - ) - assert transitive_target.root == root - assert { - dep_transitive_target.root for dep_transitive_target in transitive_target.dependencies - } == {d1, d2, d3} - transitive_targets = self.request_single_product( TransitiveTargets, Params(Addresses([root.address, d2.address]), create_options_bootstrapper()), @@ -133,6 +124,83 @@ def get_target(name: str) -> Target: assert transitive_targets.dependencies == FrozenOrderedSet([d1, d2, d3, t2, t1]) assert transitive_targets.closure == FrozenOrderedSet([root, d2, d1, d3, t2, t1]) + def test_transitive_targets_tolerates_subtarget_cycles(self) -> None: + """For generated subtargets, we should tolerate cycles between targets. + + This only works with generated subtargets, so we use explicit file dependencies in this + test. + """ + self.create_files("", ["dep.txt", "t1.txt", "t2.txt"]) + self.add_to_build_file( + "", + dedent( + """\ + target(name='dep', sources=['dep.txt']) + target(name='t1', sources=['t1.txt'], dependencies=['dep.txt', 't2.txt']) + target(name='t2', sources=['t2.txt'], dependencies=['t1.txt']) + """ + ), + ) + result = self.request_single_product( + TransitiveTargets, + Params(Addresses([Address.parse("//:t2")]), create_options_bootstrapper()), + ) + assert len(result.roots) == 1 + assert result.roots[0].address == Address.parse("//:t2") + assert [tgt.address for tgt in result.dependencies] == [ + Address("", target_name="t1.txt", generated_base_target_name="t1"), + Address("", target_name="dep.txt", generated_base_target_name="dep"), + Address("", target_name="t2.txt", generated_base_target_name="t2"), + ] + + def do_test_failed_cycle(self, address_str: str, cyclic_address_str: str) -> None: + with self.assertRaisesRegex( + Exception, + f"(?ms)Dependency graph contained a cycle:.*-> {cyclic_address_str}.*-> {cyclic_address_str}.*", + ): + self.request_single_product( + TransitiveTargets, + Params(Addresses([Address.parse(address_str)]), create_options_bootstrapper()), + ) + + def test_cycle_self(self) -> None: + self.add_to_build_file( + "", + dedent( + """\ + target(name='t1', dependencies=[':t1']) + """ + ), + ) + self.do_test_failed_cycle("//:t1", "//:t1") + + def test_cycle_direct(self) -> None: + self.add_to_build_file( + "", + dedent( + """\ + target(name='t1', dependencies=[':t2']) + target(name='t2', dependencies=[':t1']) + """ + ), + ) + self.do_test_failed_cycle("//:t1", "//:t1") + self.do_test_failed_cycle("//:t2", "//:t2") + + def test_cycle_indirect(self) -> None: + self.add_to_build_file( + "", + dedent( + """\ + target(name='t1', dependencies=[':t2']) + target(name='t2', dependencies=[':t3']) + target(name='t3', dependencies=[':t2']) + """ + ), + ) + self.do_test_failed_cycle("//:t1", "//:t2") + self.do_test_failed_cycle("//:t2", "//:t2") + def test_resolve_generated_subtarget(self) -> None: self.add_to_build_file("demo", "target(sources=['f1.txt', 'f2.txt'])") generated_target_addresss = Address( From 48067242b09e594a215cdc0a69a3388f03f731b3 Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Mon, 20 Jul 2020 19:42:45 -0700 Subject: [PATCH 2/3] Review feedback. [ci skip-rust-tests] --- src/python/pants/engine/internals/graph_test.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/python/pants/engine/internals/graph_test.py b/src/python/pants/engine/internals/graph_test.py index ba77494e855..d0d1a39d541 100644 --- a/src/python/pants/engine/internals/graph_test.py +++ b/src/python/pants/engine/internals/graph_test.py @@ -153,7 +153,7 @@ def test_transitive_targets_tolerates_subtarget_cycles(self) -> None: Address("", target_name="t2.txt", generated_base_target_name="t2"), ] - def do_test_failed_cycle(self, address_str: str, cyclic_address_str: str) -> None: + def assert_failed_cycle(self, address_str: str, cyclic_address_str: str) -> None: with self.assertRaisesRegex( Exception, f"(?ms)Dependency graph contained a cycle:.*-> {cyclic_address_str}.*-> {cyclic_address_str}.*", @@ -172,7 +172,7 @@ def test_cycle_self(self) -> None: """ ), ) - self.do_test_failed_cycle("//:t1", "//:t1") + self.assert_failed_cycle("//:t1", "//:t1") def test_cycle_direct(self) -> None: self.add_to_build_file( @@ -184,8 +184,8 @@ def test_cycle_direct(self) -> None: """ ), ) - self.do_test_failed_cycle("//:t1", "//:t1") - self.do_test_failed_cycle("//:t2", "//:t2") + self.assert_failed_cycle("//:t1", "//:t1") + self.assert_failed_cycle("//:t2", "//:t2") def test_cycle_indirect(self) -> None: self.add_to_build_file( @@ -198,8 +198,8 @@ def test_cycle_indirect(self) -> None: """ ), ) - self.do_test_failed_cycle("//:t1", "//:t2") - self.do_test_failed_cycle("//:t2", "//:t2") + self.assert_failed_cycle("//:t1", "//:t2") + self.assert_failed_cycle("//:t2", "//:t2") def test_resolve_generated_subtarget(self) -> None: self.add_to_build_file("demo", "target(sources=['f1.txt', 'f2.txt'])") From 43aa18816554b1f3fe8cb2295d197a2c1bffd651 Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Tue, 21 Jul 2020 19:56:03 -0700 Subject: [PATCH 3/3] Review feedback. [ci skip-rust-tests] --- src/python/pants/engine/internals/graph.py | 6 ++-- .../pants/engine/internals/graph_test.py | 28 +++++++++++-------- src/python/pants/engine/target.py | 8 ------ 3 files changed, 19 insertions(+), 23 deletions(-) diff --git a/src/python/pants/engine/internals/graph.py b/src/python/pants/engine/internals/graph.py index e894825e433..91dd062a4be 100644 --- a/src/python/pants/engine/internals/graph.py +++ b/src/python/pants/engine/internals/graph.py @@ -143,7 +143,7 @@ def __init__(self, subject: Address, path: Tuple[Address, ...]) -> None: def _detect_cycles( - roots: Tuple[Address, ...], dependencies: Dict[Address, Tuple[Address, ...]] + roots: Tuple[Address, ...], dependency_mapping: Dict[Address, Tuple[Address, ...]] ) -> None: path_stack: OrderedSet[Address] = OrderedSet() visited: Set[Address] = set() @@ -152,12 +152,12 @@ def visit(address: Address): if address in visited: # NB: File-level dependencies are cycle tolerant. if not address.generated_base_target_name and address in path_stack: - raise CycleException(address, tuple(path_stack) + (address,)) + raise CycleException(address, (*path_stack, address)) return path_stack.add(address) visited.add(address) - for dep_address in dependencies[address]: + for dep_address in dependency_mapping[address]: visit(dep_address) path_stack.remove(address) diff --git a/src/python/pants/engine/internals/graph_test.py b/src/python/pants/engine/internals/graph_test.py index d0d1a39d541..63bee0f8dcc 100644 --- a/src/python/pants/engine/internals/graph_test.py +++ b/src/python/pants/engine/internals/graph_test.py @@ -5,7 +5,7 @@ from dataclasses import dataclass from pathlib import PurePath from textwrap import dedent -from typing import Iterable, List, Sequence, Type +from typing import Iterable, List, Sequence, Tuple, Type import pytest @@ -29,6 +29,7 @@ from pants.engine.internals.graph import ( AmbiguousCodegenImplementationsException, AmbiguousImplementationsException, + CycleException, InvalidFileDependencyException, NoValidTargetsException, Owners, @@ -153,15 +154,18 @@ def test_transitive_targets_tolerates_subtarget_cycles(self) -> None: Address("", target_name="t2.txt", generated_base_target_name="t2"), ] - def assert_failed_cycle(self, address_str: str, cyclic_address_str: str) -> None: - with self.assertRaisesRegex( - Exception, - f"(?ms)Dependency graph contained a cycle:.*-> {cyclic_address_str}.*-> {cyclic_address_str}.*", - ): + def assert_failed_cycle( + self, root_addr: str, subject_addr: str, path_addrs: Tuple[str, ...] + ) -> None: + with self.assertRaises(ExecutionError) as e: self.request_single_product( TransitiveTargets, - Params(Addresses([Address.parse(address_str)]), create_options_bootstrapper()), + Params(Addresses([Address.parse(root_addr)]), create_options_bootstrapper()), ) + (cycle_exception,) = e.exception.wrapped_exceptions + assert isinstance(cycle_exception, CycleException) + assert cycle_exception.subject == Address.parse(subject_addr) + assert cycle_exception.path == tuple(Address.parse(p) for p in path_addrs) def test_cycle_self(self) -> None: self.add_to_build_file( @@ -172,7 +176,7 @@ def test_cycle_self(self) -> None: """ ), ) - self.assert_failed_cycle("//:t1", "//:t1") + self.assert_failed_cycle("//:t1", "//:t1", ("//:t1", "//:t1")) def test_cycle_direct(self) -> None: self.add_to_build_file( @@ -184,8 +188,8 @@ def test_cycle_direct(self) -> None: """ ), ) - self.assert_failed_cycle("//:t1", "//:t1") - self.assert_failed_cycle("//:t2", "//:t2") + self.assert_failed_cycle("//:t1", "//:t1", ("//:t1", "//:t2", "//:t1")) + self.assert_failed_cycle("//:t2", "//:t2", ("//:t2", "//:t1", "//:t2")) def test_cycle_indirect(self) -> None: self.add_to_build_file( @@ -198,8 +202,8 @@ def test_cycle_indirect(self) -> None: """ ), ) - self.assert_failed_cycle("//:t1", "//:t2") - self.assert_failed_cycle("//:t2", "//:t2") + self.assert_failed_cycle("//:t1", "//:t2", ("//:t1", "//:t2", "//:t3", "//:t2")) + self.assert_failed_cycle("//:t2", "//:t2", ("//:t2", "//:t3", "//:t2")) def test_resolve_generated_subtarget(self) -> None: self.add_to_build_file("demo", "target(sources=['f1.txt', 'f2.txt'])") diff --git a/src/python/pants/engine/target.py b/src/python/pants/engine/target.py index 9f1492f36a4..4b203f2a929 100644 --- a/src/python/pants/engine/target.py +++ b/src/python/pants/engine/target.py @@ -524,14 +524,6 @@ def targets(self) -> Tuple[Target, ...]: return tuple(tgt_with_origin.target for tgt_with_origin in self) -@dataclass(frozen=True) -class TransitiveTarget: - """A recursive structure wrapping a Target root and TransitiveTarget deps.""" - - root: Target - dependencies: Tuple["TransitiveTarget", ...] - - @dataclass(frozen=True) class TransitiveTargets: """A set of Target roots, and their transitive, flattened, de-duped dependencies.