diff --git a/lib/Doctrine/ORM/Internal/TopologicalSort.php b/lib/Doctrine/ORM/Internal/TopologicalSort.php new file mode 100644 index 00000000000..35bc43c7269 --- /dev/null +++ b/lib/Doctrine/ORM/Internal/TopologicalSort.php @@ -0,0 +1,165 @@ + + */ + private $nodes = []; + + /** + * DFS state for the different nodes, indexed by node object id and using one of + * this class' constants as value. + * + * @var array + */ + private $states = []; + + /** + * Edges between the nodes. The first-level key is the object id of the outgoing + * node; the second array maps the destination node by object id as key. The final + * boolean value indicates whether the edge is optional or not. + * + * @var array> + */ + private $edges = []; + + /** + * Builds up the result during the DFS. + * + * @psalm-var list + */ + private $sortResult = []; + + /** @param object $node */ + public function addNode($node): void + { + $id = spl_object_id($node); + $this->nodes[$id] = $node; + $this->states[$id] = self::NOT_VISITED; + $this->edges[$id] = []; + } + + /** @param object $node */ + public function hasNode($node): bool + { + return isset($this->nodes[spl_object_id($node)]); + } + + /** + * Adds a new edge between two nodes to the graph + * + * @param object $from + * @param object $to + * @param bool $optional This indicates whether the edge may be ignored during the topological sort if it is necessary to break cycles. + */ + public function addEdge($from, $to, bool $optional): void + { + $fromId = spl_object_id($from); + $toId = spl_object_id($to); + + if (isset($this->edges[$fromId][$toId]) && $this->edges[$fromId][$toId] === false) { + return; // we already know about this dependency, and it is not optional + } + + $this->edges[$fromId][$toId] = $optional; + } + + /** + * Returns a topological sort of all nodes. When we have an edge A->B between two nodes + * A and B, then A will be listed before B in the result. + * + * @psalm-return list + */ + public function sort() + { + /* + * When possible, keep objects in the result in the same order in which they were added as nodes. + * Since nodes are unshifted into $this->>sortResult (see the visit() method), that means we + * need to work them in array_reverse order here. + */ + foreach (array_reverse(array_keys($this->nodes)) as $oid) { + if ($this->states[$oid] === self::NOT_VISITED) { + $this->visit($oid); + } + } + + return $this->sortResult; + } + + private function visit(int $oid): void + { + if ($this->states[$oid] === self::IN_PROGRESS) { + // This node is already on the current DFS stack. We've found a cycle! + throw new CycleDetectedException($this->nodes[$oid]); + } + + if ($this->states[$oid] === self::VISITED) { + // We've reached a node that we've already seen, including all + // other nodes that are reachable from here. We're done here, return. + return; + } + + $this->states[$oid] = self::IN_PROGRESS; + + // Continue the DFS downwards the edge list + foreach ($this->edges[$oid] as $adjacentId => $optional) { + try { + $this->visit($adjacentId); + } catch (CycleDetectedException $exception) { + if ($exception->isCycleCollected()) { + // There is a complete cycle downstream of the current node. We cannot + // do anything about that anymore. + throw $exception; + } + + if ($optional) { + // The current edge is part of a cycle, but it is optional and the closest + // such edge while backtracking. Break the cycle here by skipping the edge + // and continuing with the next one. + continue; + } + + // We have found a cycle and cannot break it at $edge. Best we can do + // is to retreat from the current vertex, hoping that somewhere up the + // stack this can be salvaged. + $this->states[$oid] = self::NOT_VISITED; + $exception->addToCycle($this->nodes[$oid]); + + throw $exception; + } + } + + // We have traversed all edges and visited all other nodes reachable from here. + // So we're done with this vertex as well. + + $this->states[$oid] = self::VISITED; + array_unshift($this->sortResult, $this->nodes[$oid]); + } +} diff --git a/lib/Doctrine/ORM/Internal/TopologicalSort/CycleDetectedException.php b/lib/Doctrine/ORM/Internal/TopologicalSort/CycleDetectedException.php new file mode 100644 index 00000000000..9b0bc49d257 --- /dev/null +++ b/lib/Doctrine/ORM/Internal/TopologicalSort/CycleDetectedException.php @@ -0,0 +1,55 @@ + */ + private $cycle; + + /** @var object */ + private $startNode; + + /** + * Do we have the complete cycle collected? + * + * @var bool + */ + private $cycleCollected = false; + + /** @param object $startNode */ + public function __construct($startNode) + { + parent::__construct('A cycle has been detected, so a topological sort is not possible. The getCycle() method provides the list of nodes that form the cycle.'); + + $this->startNode = $startNode; + $this->cycle = [$startNode]; + } + + /** @return list */ + public function getCycle(): array + { + return $this->cycle; + } + + /** @param object $node */ + public function addToCycle($node): void + { + array_unshift($this->cycle, $node); + + if ($node === $this->startNode) { + $this->cycleCollected = true; + } + } + + public function isCycleCollected(): bool + { + return $this->cycleCollected; + } +} diff --git a/tests/Doctrine/Tests/ORM/Internal/TopologicalSortTest.php b/tests/Doctrine/Tests/ORM/Internal/TopologicalSortTest.php new file mode 100644 index 00000000000..bba00d2d44e --- /dev/null +++ b/tests/Doctrine/Tests/ORM/Internal/TopologicalSortTest.php @@ -0,0 +1,248 @@ + */ + private $nodes = []; + + /** @var TopologicalSort */ + private $topologicalSort; + + protected function setUp(): void + { + $this->topologicalSort = new TopologicalSort(); + } + + public function testSimpleOrdering(): void + { + $this->addNodes('C', 'B', 'A', 'E'); + + $this->addEdge('A', 'B'); + $this->addEdge('B', 'C'); + $this->addEdge('E', 'A'); + + // There is only 1 valid ordering for this constellation + self::assertSame(['E', 'A', 'B', 'C'], $this->computeResult()); + } + + public function testSkipOptionalEdgeToBreakCycle(): void + { + $this->addNodes('A', 'B'); + + $this->addEdge('A', 'B', true); + $this->addEdge('B', 'A', false); + + self::assertSame(['B', 'A'], $this->computeResult()); + } + + public function testBreakCycleByBacktracking(): void + { + $this->addNodes('A', 'B', 'C', 'D'); + + $this->addEdge('A', 'B'); + $this->addEdge('B', 'C', true); + $this->addEdge('C', 'D'); + $this->addEdge('D', 'A'); // closes the cycle + + // We can only break B -> C, so the result must be C -> D -> A -> B + self::assertSame(['C', 'D', 'A', 'B'], $this->computeResult()); + } + + public function testCycleRemovedByEliminatingLastOptionalEdge(): void + { + // The cycle-breaking algorithm is currently very naive. It breaks the cycle + // at the last optional edge while it backtracks. In this example, we might + // get away with one extra update if we'd break A->B; instead, we break up + // B->C and B->D. + + $this->addNodes('A', 'B', 'C', 'D'); + + $this->addEdge('A', 'B', true); + $this->addEdge('B', 'C', true); + $this->addEdge('C', 'A'); + $this->addEdge('B', 'D', true); + $this->addEdge('D', 'A'); + + self::assertSame(['C', 'D', 'A', 'B'], $this->computeResult()); + } + + public function testGH7180Example(): void + { + // Example given in https://github.com/doctrine/orm/pull/7180#issuecomment-381341943 + + $this->addNodes('E', 'F', 'D', 'G'); + + $this->addEdge('D', 'G'); + $this->addEdge('D', 'F', true); + $this->addEdge('F', 'E'); + $this->addEdge('E', 'D'); + + self::assertSame(['F', 'E', 'D', 'G'], $this->computeResult()); + } + + public function testCommitOrderingFromGH7259Test(): void + { + // this test corresponds to the GH7259Test::testPersistFileBeforeVersion functional test + $this->addNodes('A', 'B', 'C', 'D'); + + $this->addEdge('D', 'A'); + $this->addEdge('A', 'B'); + $this->addEdge('D', 'C'); + $this->addEdge('A', 'D', true); + + // There is only multiple valid ordering for this constellation, but + // the D -> A -> B ordering is important to break the cycle + // on the nullable link. + $correctOrders = [ + ['D', 'A', 'B', 'C'], + ['D', 'A', 'C', 'B'], + ['D', 'C', 'A', 'B'], + ]; + + self::assertContains($this->computeResult(), $correctOrders); + } + + public function testCommitOrderingFromGH8349Case1Test(): void + { + $this->addNodes('A', 'B', 'C', 'D'); + + $this->addEdge('D', 'A'); + $this->addEdge('A', 'B', true); + $this->addEdge('B', 'D', true); + $this->addEdge('B', 'C', true); + $this->addEdge('C', 'D', true); + + // Many orderings are possible here, but the bottom line is D must be before A (it's the only hard requirement). + $result = $this->computeResult(); + + $indexA = array_search('A', $result, true); + $indexD = array_search('D', $result, true); + self::assertTrue($indexD < $indexA); + } + + public function testCommitOrderingFromGH8349Case2Test(): void + { + $this->addNodes('A', 'B'); + + $this->addEdge('B', 'A'); + $this->addEdge('B', 'A', true); // interesting: We have two edges in that direction + $this->addEdge('A', 'B', true); + + // The B -> A requirement determines the result here + self::assertSame(['B', 'A'], $this->computeResult()); + } + + public function testNodesMaintainOrderWhenNoDepencency(): void + { + $this->addNodes('A', 'B', 'C'); + + // Nodes that are not constrained by dependencies shall maintain the order + // in which they were added + self::assertSame(['A', 'B', 'C'], $this->computeResult()); + } + + public function testDetectSmallCycle(): void + { + $this->addNodes('A', 'B'); + + $this->addEdge('A', 'B'); + $this->addEdge('B', 'A'); + + $this->expectException(CycleDetectedException::class); + $this->computeResult(); + } + + public function testMultipleEdges(): void + { + // There may be more than one association between two given entities. + // For the commit order, we only need to track this once, since the + // result is the same (one entity must be processed before the other). + // + // In case one of the associations is optional and the other one is not, + // we must honor the non-optional one, regardless of the order in which + // they were declared. + + $this->addNodes('A', 'B'); + + $this->addEdge('A', 'B', true); // optional comes first + $this->addEdge('A', 'B', false); + $this->addEdge('B', 'A', false); + $this->addEdge('B', 'A', true); // optional comes last + + // Both edges A -> B and B -> A are non-optional, so this is a cycle + // that cannot be broken. + + $this->expectException(CycleDetectedException::class); + $this->computeResult(); + } + + public function testDetectLargerCycleNotIncludingStartNode(): void + { + $this->addNodes('A', 'B', 'C', 'D'); + + $this->addEdge('A', 'B'); + $this->addEdge('B', 'C'); + $this->addEdge('C', 'D'); + $this->addEdge('D', 'B'); + + // The sort has to start with the last node being added to make it possible that + // the result is in the order the nodes were added (if permitted by edges). + // That means the cycle will be detected when starting at D, so it is D -> B -> C -> D. + + try { + $this->computeResult(); + } catch (CycleDetectedException $exception) { + self::assertEquals( + [$this->nodes['D'], $this->nodes['B'], $this->nodes['C'], $this->nodes['D']], + $exception->getCycle() + ); + } + } + + private function addNodes(string ...$names): void + { + foreach ($names as $name) { + $node = new Node($name); + $this->nodes[$name] = $node; + $this->topologicalSort->addNode($node); + } + } + + private function addEdge(string $from, string $to, bool $optional = false): void + { + $this->topologicalSort->addEdge($this->nodes[$from], $this->nodes[$to], $optional); + } + + /** + * @return list + */ + private function computeResult(): array + { + return array_map(static function (Node $n): string { + return $n->name; + }, array_values($this->topologicalSort->sort())); + } +} + +class Node +{ + /** @var string */ + public $name; + + public function __construct(string $name) + { + $this->name = $name; + } +}