Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement cycle detection in transitive_targets, and tolerate cycles in file-addresses. #10409

Merged
merged 3 commits into from
Jul 22, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 62 additions & 27 deletions src/python/pants/engine/internals/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import itertools
import logging
import os.path
from collections import defaultdict, deque
from collections import defaultdict
from dataclasses import dataclass
from pathlib import PurePath
from typing import DefaultDict, Dict, Iterable, List, NamedTuple, Sequence, Set, Tuple, Type, Union
Expand Down Expand Up @@ -61,7 +61,6 @@
TargetsToValidFieldSetsRequest,
TargetsWithOrigins,
TargetWithOrigin,
TransitiveTarget,
TransitiveTargets,
UnrecognizedTargetTypeException,
WrappedTarget,
Expand Down Expand Up @@ -135,37 +134,74 @@ async def resolve_targets_with_origins(
# -----------------------------------------------------------------------------------------------


@rule
async def transitive_target(wrapped_root: WrappedTarget) -> TransitiveTarget:
root = wrapped_root.target
if not root.has_field(Dependencies):
return TransitiveTarget(root, ())
dependency_addresses = await Get(Addresses, DependenciesRequest(root[Dependencies]))
dependencies = await MultiGet(Get(TransitiveTarget, Address, d) for d in dependency_addresses)
return TransitiveTarget(root, dependencies)
class CycleException(Exception):
def __init__(self, subject: Address, path: Tuple[Address, ...]) -> None:
path_string = "\n".join((f"-> {a}" if a == subject else f" {a}") for a in path)
super().__init__(f"Dependency graph contained a cycle:\n{path_string}")
stuhood marked this conversation as resolved.
Show resolved Hide resolved
self.subject = subject
self.path = path
stuhood marked this conversation as resolved.
Show resolved Hide resolved


def _detect_cycles(
roots: Tuple[Address, ...], dependency_mapping: Dict[Address, Tuple[Address, ...]]
) -> None:
path_stack: OrderedSet[Address] = OrderedSet()
visited: Set[Address] = set()

def visit(address: Address):
if address in visited:
# NB: File-level dependencies are cycle tolerant.
if not address.generated_base_target_name and address in path_stack:
raise CycleException(address, (*path_stack, address))
return
path_stack.add(address)
visited.add(address)

for dep_address in dependency_mapping[address]:
visit(dep_address)

path_stack.remove(address)

for root in roots:
visit(root)
if path_stack:
raise AssertionError(
f"The stack of visited nodes should have been empty at the end of recursion, "
f"but it still contained: {path_stack}"
)


@rule
async def transitive_targets(addresses: Addresses) -> TransitiveTargets:
"""Given Addresses, kicks off recursion on expansion of TransitiveTargets.
async def transitive_targets(targets: Targets) -> TransitiveTargets:
"""Find all the targets transitively depended upon by the target roots.
The TransitiveTarget dataclass represents a structure-shared graph, which we walk and flatten
here. The engine memoizes the computation of TransitiveTarget, so when multiple
TransitiveTargets objects are being constructed for multiple roots, their structure will be
shared.
This uses iteration, rather than recursion, so that we can tolerate dependency cycles. Unlike a
traditional BFS algorithm, we batch each round of traversals via `MultiGet` for improved
performance / concurrency.
"""
tts = await MultiGet(Get(TransitiveTarget, Address, a) for a in addresses)
visited: OrderedSet[Target] = OrderedSet()
queued = FrozenOrderedSet(targets)
dependency_mapping: Dict[Address, Tuple[Address, ...]] = {}
while queued:
direct_dependencies = await MultiGet(
Get(Targets, DependenciesRequest(tgt.get(Dependencies))) for tgt in queued
)

dependencies: OrderedSet[Target] = OrderedSet()
to_visit = deque(itertools.chain.from_iterable(tt.dependencies for tt in tts))
while to_visit:
tt = to_visit.popleft()
if tt.root in dependencies:
continue
dependencies.add(tt.root)
to_visit.extend(tt.dependencies)
dependency_mapping.update(
zip(
(t.address for t in queued),
(tuple(t.address for t in deps) for deps in direct_dependencies),
)
stuhood marked this conversation as resolved.
Show resolved Hide resolved
)

queued = FrozenOrderedSet(itertools.chain.from_iterable(direct_dependencies)).difference(
visited
)
visited.update(queued)

return TransitiveTargets(tuple(tt.root for tt in tts), FrozenOrderedSet(dependencies))
transitive_targets = TransitiveTargets(tuple(targets), FrozenOrderedSet(visited))
_detect_cycles(tuple(t.address for t in targets), dependency_mapping)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect running DFS has a real performance impact on mission-critical code. But I also don't know of another way to detect these cycles at the same time as we're resolvings deps via BFS..

Any idea how bad the impact is when running ./pants dependencies --transitive ::?

One thing Benjy and I started to discuss this morning is whether to have a global flag --tolerate-cycles. Gordon gave feedback that he wouldn't want to tolerate cycles with dep inference / generated subtargets either. We would only run this code if you have --no-tolerate-cycles.

Copy link
Member Author

@stuhood stuhood Jul 21, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It appears to add about 4.8% to the runtime.

One thing Benjy and I started to discuss this morning is whether to have a global flag --tolerate-cycles.

I'd prefer something that could be toggled on a target by target basis, but failing that, maybe a ternary of --tolerate-cycles=file|target|none.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that would be fine with me to be a target by target basis. I'd rather have that than always no matter what tolerating generated subtargets.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As long as there's a way to opt-in to (or out of) enforcing cycle hygiene.

return transitive_targets


# -----------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -916,7 +952,6 @@ def rules():
resolve_target_with_origin,
resolve_targets_with_origins,
# TransitiveTargets
transitive_target,
stuhood marked this conversation as resolved.
Show resolved Hide resolved
transitive_targets,
# Owners
find_owners,
Expand Down
92 changes: 82 additions & 10 deletions src/python/pants/engine/internals/graph_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from dataclasses import dataclass
from pathlib import PurePath
from textwrap import dedent
from typing import Iterable, List, Sequence, Type
from typing import Iterable, List, Sequence, Tuple, Type

import pytest

Expand All @@ -29,6 +29,7 @@
from pants.engine.internals.graph import (
AmbiguousCodegenImplementationsException,
AmbiguousImplementationsException,
CycleException,
InvalidFileDependencyException,
NoValidTargetsException,
Owners,
Expand Down Expand Up @@ -61,7 +62,6 @@
TargetsToValidFieldSetsRequest,
TargetsWithOrigins,
TargetWithOrigin,
TransitiveTarget,
TransitiveTargets,
WrappedTarget,
)
Expand Down Expand Up @@ -116,14 +116,6 @@ def get_target(name: str) -> Target:
)
assert direct_deps == Targets([d1, d2, d3])

transitive_target = self.request_single_product(
TransitiveTarget, Params(WrappedTarget(root), create_options_bootstrapper())
)
assert transitive_target.root == root
assert {
dep_transitive_target.root for dep_transitive_target in transitive_target.dependencies
} == {d1, d2, d3}

transitive_targets = self.request_single_product(
TransitiveTargets,
Params(Addresses([root.address, d2.address]), create_options_bootstrapper()),
Expand All @@ -133,6 +125,86 @@ def get_target(name: str) -> Target:
assert transitive_targets.dependencies == FrozenOrderedSet([d1, d2, d3, t2, t1])
assert transitive_targets.closure == FrozenOrderedSet([root, d2, d1, d3, t2, t1])

def test_transitive_targets_tolerates_subtarget_cycles(self) -> None:
"""For generated subtargets, we should tolerate cycles between targets.
This only works with generated subtargets, so we use explicit file dependencies in this
test.
"""
self.create_files("", ["dep.txt", "t1.txt", "t2.txt"])
self.add_to_build_file(
"",
dedent(
"""\
target(name='dep', sources=['dep.txt'])
target(name='t1', sources=['t1.txt'], dependencies=['dep.txt', 't2.txt'])
target(name='t2', sources=['t2.txt'], dependencies=['t1.txt'])
"""
),
)
result = self.request_single_product(
TransitiveTargets,
Params(Addresses([Address.parse("//:t2")]), create_options_bootstrapper()),
)
assert len(result.roots) == 1
assert result.roots[0].address == Address.parse("//:t2")
assert [tgt.address for tgt in result.dependencies] == [
Address("", target_name="t1.txt", generated_base_target_name="t1"),
Address("", target_name="dep.txt", generated_base_target_name="dep"),
Address("", target_name="t2.txt", generated_base_target_name="t2"),
]

def assert_failed_cycle(
self, root_addr: str, subject_addr: str, path_addrs: Tuple[str, ...]
) -> None:
with self.assertRaises(ExecutionError) as e:
self.request_single_product(
TransitiveTargets,
Params(Addresses([Address.parse(root_addr)]), create_options_bootstrapper()),
)
(cycle_exception,) = e.exception.wrapped_exceptions
assert isinstance(cycle_exception, CycleException)
assert cycle_exception.subject == Address.parse(subject_addr)
assert cycle_exception.path == tuple(Address.parse(p) for p in path_addrs)

def test_cycle_self(self) -> None:
self.add_to_build_file(
"",
dedent(
"""\
target(name='t1', dependencies=[':t1'])
"""
),
)
self.assert_failed_cycle("//:t1", "//:t1", ("//:t1", "//:t1"))

def test_cycle_direct(self) -> None:
self.add_to_build_file(
"",
dedent(
"""\
target(name='t1', dependencies=[':t2'])
target(name='t2', dependencies=[':t1'])
"""
),
)
self.assert_failed_cycle("//:t1", "//:t1", ("//:t1", "//:t2", "//:t1"))
self.assert_failed_cycle("//:t2", "//:t2", ("//:t2", "//:t1", "//:t2"))

def test_cycle_indirect(self) -> None:
self.add_to_build_file(
"",
dedent(
"""\
target(name='t1', dependencies=[':t2'])
target(name='t2', dependencies=[':t3'])
target(name='t3', dependencies=[':t2'])
"""
),
)
self.assert_failed_cycle("//:t1", "//:t2", ("//:t1", "//:t2", "//:t3", "//:t2"))
self.assert_failed_cycle("//:t2", "//:t2", ("//:t2", "//:t3", "//:t2"))

def test_resolve_generated_subtarget(self) -> None:
self.add_to_build_file("demo", "target(sources=['f1.txt', 'f2.txt'])")
generated_target_addresss = Address(
Expand Down
8 changes: 0 additions & 8 deletions src/python/pants/engine/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,14 +524,6 @@ def targets(self) -> Tuple[Target, ...]:
return tuple(tgt_with_origin.target for tgt_with_origin in self)


@dataclass(frozen=True)
class TransitiveTarget:
"""A recursive structure wrapping a Target root and TransitiveTarget deps."""

root: Target
dependencies: Tuple["TransitiveTarget", ...]


@dataclass(frozen=True)
class TransitiveTargets:
"""A set of Target roots, and their transitive, flattened, de-duped dependencies.
Expand Down