diff --git a/CHANGELOG.md b/CHANGELOG.md index a01d69138d1..a66a3e98b7d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ ### Changes - Graph refactor: fix common issues with load order ([#292](https://github.com/fishtown-analytics/dbt/pull/292)) +- Speedup: detect cycles at the end of compilation ([#307](https://github.com/fishtown-analytics/dbt/pull/307)) ## dbt 0.7.1 (February 28, 2017) diff --git a/dbt/compilation.py b/dbt/compilation.py index 7feb0e4edd7..e7fc5bce45d 100644 --- a/dbt/compilation.py +++ b/dbt/compilation.py @@ -464,6 +464,11 @@ def compile_nodes(self, linker, nodes, macro_generator): "dependency {} not found in graph!".format( dependency)) + cycle = linker.find_cycles() + + if cycle: + raise RuntimeError("Found a cycle: {}".format(cycle)) + return wrapped_nodes, written_nodes def generate_macros(self, all_macros): diff --git a/dbt/linker.py b/dbt/linker.py index a56dc29777b..0fe716ecbb6 100644 --- a/dbt/linker.py +++ b/dbt/linker.py @@ -28,6 +28,17 @@ def run_type(self): def get_node(self, node): return self.graph.node[node] + def find_cycles(self): + try: + cycles = nx.algorithms.find_cycle(self.graph) + except nx.exception.NetworkXNoCycle: + return None + + if cycles: + return " --> ".join([".".join(node) for node in cycles]) + + return None + def as_topological_ordering(self, limit_to=None): try: return nx.topological_sort(self.graph, nbunch=limit_to) @@ -103,11 +114,6 @@ def dependency(self, node1, node2): self.graph.add_node(node2) self.graph.add_edge(node2, node1) - if len(list(nx.simple_cycles(self.graph))) > 0: - raise ValidationException( - "Detected a cycle when adding dependency from {} to {}" - .format(node1, node2)) - def add_node(self, node): self.graph.add_node(node) diff --git a/test/unit/test_linker.py b/test/unit/test_linker.py index 9b9b77cb8d2..0cbf81645a2 100644 --- a/test/unit/test_linker.py +++ b/test/unit/test_linker.py @@ -70,3 +70,19 @@ def test_linker_bad_limit_throws_runtime_error(self): self.linker.dependency(l, r) self.assertRaises(RuntimeError, self.linker.as_dependency_list, ['ZZZ']) + + def test__find_cycles__cycles(self): + actual_deps = [('A', 'B'), ('B', 'C'), ('C', 'A')] + + for (l, r) in actual_deps: + self.linker.dependency(l, r) + + self.assertIsNotNone(self.linker.find_cycles()) + + def test__find_cycles__no_cycles(self): + actual_deps = [('A', 'B'), ('B', 'C'), ('C', 'D')] + + for (l, r) in actual_deps: + self.linker.dependency(l, r) + + self.assertIsNone(self.linker.find_cycles())