kedro-org · noklam · Jun 16, 2022 · Jun 9, 2022 · Jun 14, 2022 · Jun 14, 2022
@@ -25,9 +25,11 @@
 * Reduced number of log lines by changing the logging level from `INFO` to `DEBUG` for low priority messages.
 * Kedro's framework-side logging configuration no longer performs file-based logging. Hence superfluous `info.log`/`errors.log` files are no longer created in your project root, and running Kedro on read-only file systems such as Databricks Repos is now possible.
 * The `root` logger is now set to the Python default level of `WARNING` rather than `INFO`. Kedro's logger is still set to emit `INFO` level messages.
+* Kedro pipeline will have consistent execution order given the same set of nodes when using with `SequentialRunner`.
-* Kedro pipeline will have consistent execution order given the same set of nodes when using with `SequentialRunner`.
+* `SequentialRunner` now consistently runs nodes in the same order across multiple runs.
-* Kedro pipeline will have consistent execution order given the same set of nodes when using with `SequentialRunner`.
+* `SequentialRunner` now consistently runs nodes in the same order across multiple runs.
 * `kedro jupyter notebook/lab` no longer reuses a Jupyter kernel.
 * Required `cookiecutter>=2.1.1` to address a [known command injection vulnerability](https://security.snyk.io/vuln/SNYK-PYTHON-COOKIECUTTER-2414281).
 
+
 ## Upcoming deprecations for Kedro 0.19.0
 * `kedro.extras.ColorHandler` will be removed in 0.19.0.
 

@@ -352,7 +352,7 @@ def nodes(self) -> List[Node]:
         return list(chain.from_iterable(self._topo_sorted_nodes))
 
     @property
-    def grouped_nodes(self) -> List[Set[Node]]:
+    def grouped_nodes(self) -> List[List[Node]]:
         """Return a list of the pipeline nodes in topologically ordered groups,
         i.e. if node A needs to be run before node B, it will appear in an
         earlier group.
@@ -870,7 +870,7 @@ def _validate_transcoded_inputs_outputs(nodes: List[Node]) -> None:
         )
 
 
-def _topologically_sorted(node_dependencies) -> List[Set[Node]]:
+def _topologically_sorted(node_dependencies) -> List[List[Node]]:
     """Topologically group and sort (order) nodes such that no node depends on
     a node that appears in the same or a later group.
 
@@ -894,7 +894,11 @@ def _circle_error_message(error_data: Dict[str, str]) -> str:
         return f"Circular dependencies exist among these items: {circular}"
 
     try:
-        return list(toposort(node_dependencies))
+        result = []
+        for dependencies in toposort(node_dependencies):
+            # Sort it so it has consistent order when run with SequentialRunner
+            result.append(sorted(dependencies))
+        return result
     except ToposortCircleError as exc:
         message = _circle_error_message(exc.data)
         raise CircularDependencyError(message) from exc

@@ -1,10 +1,13 @@
+import random
 import re
 from itertools import chain
+from typing import List
 
 import pytest
 
 import kedro
 from kedro.pipeline import Pipeline, node
+from kedro.pipeline.node import Node
 from kedro.pipeline.pipeline import (
     CircularDependencyError,
     ConfirmNotUniqueError,
@@ -253,8 +256,9 @@ def test_grouped_nodes(self, input_data):
         grouped = pipeline.grouped_nodes
         # Flatten a list of grouped nodes
         assert pipeline.nodes == list(chain.from_iterable(grouped))
-        # Check each grouped node matches with expected group
-        assert all(g == e for g, e in zip(grouped, expected))
+        # Check each grouped node matches with the expected group, the order is
+        # non-deterministic, so we are only checking they have the same set of nodes.
+        assert all(set(g) == e for g, e in zip(grouped, expected))
 
     def test_free_input(self, input_data):
         nodes = input_data["nodes"]
@@ -588,6 +592,52 @@ def test_connected_pipeline(self, disjoint_pipeline):
         assert len(pipeline.inputs()) == 1
         assert len(pipeline.outputs()) == 1
 
+    def test_pipeline_consistent_nodes_order(self, mocker):
+        """
+            Pipeline that have multiple possible execution orders should have consistent
+            solutions
+            Possible Solutions:
+            1. A -> B -> C -> D -> E -> F
+            2. B -> A -> C -> D -> E -> F
+            3 ... Any permutation as long as F is executed last.
+
+            Although we are not sure which permutation it is, but it should always output
+            the same permutation.
+
+            A-- \
+            B--- \
+            C---- F
+            D--- /
+            E-- /
+        """
+
+        def multiconcat(*args):
+            return sum(args)
+
+        mock_hash = mocker.patch(f"{__name__}.Node.__hash__")
+        expected_sorted_nodes: List[List[Node]] = None
+
+        # Repeat 10 times so we can be sure it is not purely by chance
+        for _ in range(10):
+            mock_hash.return_value = random.randint(1, 1e20)
+
+            inverted_fork_dags = Pipeline(
+                [
+                    node(constant_output, None, "A"),
+                    node(constant_output, None, "B"),
+                    node(constant_output, None, "C"),
+                    node(constant_output, None, "D"),
+                    node(constant_output, None, "E"),
+                    node(multiconcat, ["A", "B", "C", "D", "E"], "F"),
+                ]
+            )
+            if not expected_sorted_nodes:
+                expected_sorted_nodes = inverted_fork_dags.nodes
+
+            else:
+
+                assert expected_sorted_nodes == inverted_fork_dags.nodes
+
 
 class TestPipelineDescribe:
     def test_names_only(self, str_node_inputs_list):

@@ -121,7 +121,7 @@ def test_grouped_nodes(self, input_data):
         # Flatten a list of grouped nodes
         assert pipeline.nodes == list(chain.from_iterable(grouped))
         # Check each grouped node matches with expected group
-        assert all(g == e for g, e in zip(grouped, expected))
+        assert all(set(g) == e for g, e in zip(grouped, expected))
 
     def test_free_input(self, input_data):
         nodes = input_data["nodes"]