Rollup merge of rust-lang#123980 - WaffleLapkin:graph-average-refacto…

…r, r=wesleywiser Add an opt-in to store incoming edges in `VecGraph` + misc r? `@fmease` needed for rust-lang#123939
GuillaumeGomez · Apr 18, 2024 · abdeb13 · abdeb13
2 parents aa82d66 + 523fe2b
commit abdeb13
Show file tree

Hide file tree

Showing 5 changed files with 265 additions and 70 deletions.
diff --git a/compiler/rustc_data_structures/src/graph/iterate/mod.rs b/compiler/rustc_data_structures/src/graph/iterate/mod.rs
@@ -70,21 +70,21 @@ pub fn reverse_post_order<G: DirectedGraph + Successors>(
 }
 
 /// A "depth-first search" iterator for a directed graph.
-pub struct DepthFirstSearch<'graph, G>
+pub struct DepthFirstSearch<G>
 where
-    G: ?Sized + DirectedGraph + Successors,
+    G: DirectedGraph + Successors,
 {
-    graph: &'graph G,
+    graph: G,
     stack: Vec<G::Node>,
     visited: BitSet<G::Node>,
 }
 
-impl<'graph, G> DepthFirstSearch<'graph, G>
+impl<G> DepthFirstSearch<G>
 where
-    G: ?Sized + DirectedGraph + Successors,
+    G: DirectedGraph + Successors,
 {
-    pub fn new(graph: &'graph G) -> Self {
-        Self { graph, stack: vec![], visited: BitSet::new_empty(graph.num_nodes()) }
+    pub fn new(graph: G) -> Self {
+        Self { stack: vec![], visited: BitSet::new_empty(graph.num_nodes()), graph }
     }
 
     /// Version of `push_start_node` that is convenient for chained
@@ -125,9 +125,9 @@ where
     }
 }
 
-impl<G> std::fmt::Debug for DepthFirstSearch<'_, G>
+impl<G> std::fmt::Debug for DepthFirstSearch<G>
 where
-    G: ?Sized + DirectedGraph + Successors,
+    G: DirectedGraph + Successors,
 {
     fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         let mut f = fmt.debug_set();
@@ -138,9 +138,9 @@ where
     }
 }
 
-impl<G> Iterator for DepthFirstSearch<'_, G>
+impl<G> Iterator for DepthFirstSearch<G>
 where
-    G: ?Sized + DirectedGraph + Successors,
+    G: DirectedGraph + Successors,
 {
     type Item = G::Node;
 

diff --git a/compiler/rustc_data_structures/src/graph/mod.rs b/compiler/rustc_data_structures/src/graph/mod.rs
@@ -46,9 +46,35 @@ where
         .is_some()
 }
 
-pub fn depth_first_search<G>(graph: &G, from: G::Node) -> iterate::DepthFirstSearch<'_, G>
+pub fn depth_first_search<G>(graph: G, from: G::Node) -> iterate::DepthFirstSearch<G>
 where
-    G: ?Sized + Successors,
+    G: Successors,
 {
     iterate::DepthFirstSearch::new(graph).with_start_node(from)
 }
+
+pub fn depth_first_search_as_undirected<G>(
+    graph: G,
+    from: G::Node,
+) -> iterate::DepthFirstSearch<impl Successors<Node = G::Node>>
+where
+    G: Successors + Predecessors,
+{
+    struct AsUndirected<G>(G);
+
+    impl<G: DirectedGraph> DirectedGraph for AsUndirected<G> {
+        type Node = G::Node;
+
+        fn num_nodes(&self) -> usize {
+            self.0.num_nodes()
+        }
+    }
+
+    impl<G: Successors + Predecessors> Successors for AsUndirected<G> {
+        fn successors(&self, node: Self::Node) -> impl Iterator<Item = Self::Node> {
+            self.0.successors(node).chain(self.0.predecessors(node))
+        }
+    }
+
+    iterate::DepthFirstSearch::new(AsUndirected(graph)).with_start_node(from)
+}
diff --git a/compiler/rustc_data_structures/src/graph/vec_graph/mod.rs b/compiler/rustc_data_structures/src/graph/vec_graph/mod.rs
@@ -1,99 +1,235 @@
-use crate::graph::{DirectedGraph, NumEdges, Successors};
+use crate::graph::{DirectedGraph, NumEdges, Predecessors, Successors};
 use rustc_index::{Idx, IndexVec};
 
 #[cfg(test)]
 mod tests;
 
-pub struct VecGraph<N: Idx> {
-    /// Maps from a given node to an index where the set of successors
-    /// for that node starts. The index indexes into the `edges`
-    /// vector. To find the range for a given node, we look up the
-    /// start for that node and then the start for the next node
-    /// (i.e., with an index 1 higher) and get the range between the
-    /// two. This vector always has an extra entry so that this works
-    /// even for the max element.
+/// A directed graph, efficient for cases where node indices are pre-existing.
+///
+/// If `BR` is true, the graph will store back-references, allowing you to get predecessors.
+pub struct VecGraph<N: Idx, const BR: bool = false> {
+    // This is basically a `HashMap<N, (Vec<N>, If<BR, Vec<N>>)>` -- a map from a node index, to
+    // a list of targets of outgoing edges and (if enabled) a list of sources of incoming edges.
+    //
+    // However, it is condensed into two arrays as an optimization.
+    //
+    // `node_starts[n]` is the start of the list of targets of outgoing edges for node `n`.
+    // So you can get node's successors with `edge_targets[node_starts[n]..node_starts[n + 1]]`.
+    //
+    // If `BR` is true (back references are enabled), then `node_starts[n + edge_count]` is the
+    // start of the list of *sources* of incoming edges. You can get predecessors of a node
+    // similarly to its successors but offsetting by `edge_count`. `edge_count` is
+    // `edge_targets.len()/2` (again, in case BR is true) because half of the vec is back refs.
+    //
+    // All of this might be confusing, so here is an example graph and its representation:
+    //
+    //       n3 ----+
+    //        ^     |                           (if BR = true)
+    //        |     v     outgoing edges        incoming edges
+    // n0 -> n1 -> n2     ______________      __________________
+    //                   /              \    /                  \
+    //  node indices[1]:  n0, n1, n2, n3,     n0, n1, n2, n3,       n/a
+    //      vec indices:  n0, n1, n2, n3,     n4, n5, n6, n7,       n8
+    //      node_starts:  [0,  1,  3,  4       4,  4,  5,  7,        8]
+    //                     |   |   |   |       |   |   |   |         |
+    //                     |   |   +---+       +---+   |   +---+     |
+    //                     |   |       |           |   |       |     |
+    //                     v   v       v           v   v       v     v
+    //     edge_targets: [n1, n2, n3, n2          n0, n1, n3, n1]
+    //                   /    \____/   |           |  \____/    \
+    //             n0->n1     /        |           |       \     n3<-n1
+    //                       /    n3->n2 [2]  n1<-n0 [2]    \
+    //         n1->n2, n1->n3                                n2<-n1, n2<-n3
+    //
+    // The incoming edges are basically stored in the same way as outgoing edges, but offset and
+    // the graph they store is the inverse of the original. Last index in the `node_starts` array
+    // always points to one-past-the-end, so that we don't need to bound check `node_starts[n + 1]`
+    //
+    // [1]: "node indices" are the indices a user of `VecGraph` might use,
+    //      note that they are different from "vec indices",
+    //      which are the real indices you need to index `node_starts`
+    //
+    // [2]: Note that even though n2 also points to here,
+    //      the next index also points here, so n2 has no
+    //      successors (`edge_targets[3..3] = []`).
+    //      Similarly with n0 and incoming edges
+    //
+    // If this is still confusing... then sorry :(
+    //
+    /// Indices into `edge_targets` that signify a start of list of edges.
     node_starts: IndexVec<N, usize>,
 
+    /// Targets (or sources for back refs) of edges
     edge_targets: Vec<N>,
 }
 
-impl<N: Idx + Ord> VecGraph<N> {
+impl<N: Idx + Ord, const BR: bool> VecGraph<N, BR> {
     pub fn new(num_nodes: usize, mut edge_pairs: Vec<(N, N)>) -> Self {
+        let num_edges = edge_pairs.len();
+
+        let nodes_cap = match BR {
+            // +1 for special entry at the end, pointing one past the end of `edge_targets`
+            false => num_nodes + 1,
+            // *2 for back references
+            true => (num_nodes * 2) + 1,
+        };
+
+        let edges_cap = match BR {
+            false => num_edges,
+            // *2 for back references
+            true => num_edges * 2,
+        };
+
+        let mut node_starts = IndexVec::with_capacity(nodes_cap);
+        let mut edge_targets = Vec::with_capacity(edges_cap);
+
         // Sort the edges by the source -- this is important.
         edge_pairs.sort();
 
-        let num_edges = edge_pairs.len();
+        // Fill forward references
+        create_index(
+            num_nodes,
+            &mut edge_pairs.iter().map(|&(src, _)| src),
+            &mut edge_pairs.iter().map(|&(_, tgt)| tgt),
+            &mut edge_targets,
+            &mut node_starts,
+        );
 
-        // Store the *target* of each edge into `edge_targets`.
-        let edge_targets: Vec<N> = edge_pairs.iter().map(|&(_, target)| target).collect();
-
-        // Create the *edge starts* array. We are iterating over the
-        // (sorted) edge pairs. We maintain the invariant that the
-        // length of the `node_starts` array is enough to store the
-        // current source node -- so when we see that the source node
-        // for an edge is greater than the current length, we grow the
-        // edge-starts array by just enough.
-        let mut node_starts = IndexVec::with_capacity(num_edges);
-        for (index, &(source, _)) in edge_pairs.iter().enumerate() {
-            // If we have a list like `[(0, x), (2, y)]`:
-            //
-            // - Start out with `node_starts` of `[]`
-            // - Iterate to `(0, x)` at index 0:
-            //   - Push one entry because `node_starts.len()` (0) is <= the source (0)
-            //   - Leaving us with `node_starts` of `[0]`
-            // - Iterate to `(2, y)` at index 1:
-            //   - Push one entry because `node_starts.len()` (1) is <= the source (2)
-            //   - Push one entry because `node_starts.len()` (2) is <= the source (2)
-            //   - Leaving us with `node_starts` of `[0, 1, 1]`
-            // - Loop terminates
-            while node_starts.len() <= source.index() {
-                node_starts.push(index);
-            }
-        }
+        // Fill back references
+        if BR {
+            // Pop the special "last" entry, it will be replaced by first back ref
+            node_starts.pop();
 
-        // Pad out the `node_starts` array so that it has `num_nodes +
-        // 1` entries. Continuing our example above, if `num_nodes` is
-        // be `3`, we would push one more index: `[0, 1, 1, 2]`.
-        //
-        // Interpretation of that vector:
-        //
-        // [0, 1, 1, 2]
-        //        ---- range for N=2
-        //     ---- range for N=1
-        //  ---- range for N=0
-        while node_starts.len() <= num_nodes {
-            node_starts.push(edge_targets.len());
-        }
+            // Re-sort the edges so that they are sorted by target
+            edge_pairs.sort_by_key(|&(src, tgt)| (tgt, src));
 
-        assert_eq!(node_starts.len(), num_nodes + 1);
+            create_index(
+                // Back essentially double the number of nodes
+                num_nodes * 2,
+                // NB: the source/target are switched here too
+                // NB: we double the key index, so that we can later use *2 to get the back references
+                &mut edge_pairs.iter().map(|&(_, tgt)| N::new(tgt.index() + num_nodes)),
+                &mut edge_pairs.iter().map(|&(src, _)| src),
+                &mut edge_targets,
+                &mut node_starts,
+            );
+        }
 
         Self { node_starts, edge_targets }
     }
 
     /// Gets the successors for `source` as a slice.
     pub fn successors(&self, source: N) -> &[N] {
+        assert!(source.index() < self.num_nodes());
+
         let start_index = self.node_starts[source];
         let end_index = self.node_starts[source.plus(1)];
         &self.edge_targets[start_index..end_index]
     }
 }
 
-impl<N: Idx> DirectedGraph for VecGraph<N> {
+impl<N: Idx + Ord> VecGraph<N, true> {
+    /// Gets the predecessors for `target` as a slice.
+    pub fn predecessors(&self, target: N) -> &[N] {
+        assert!(target.index() < self.num_nodes());
+
+        let target = N::new(target.index() + self.num_nodes());
+
+        let start_index = self.node_starts[target];
+        let end_index = self.node_starts[target.plus(1)];
+        &self.edge_targets[start_index..end_index]
+    }
+}
+
+/// Creates/initializes the index for the [`VecGraph`]. A helper for [`VecGraph::new`].
+///
+/// - `num_nodes` is the target number of nodes in the graph
+/// - `sorted_edge_sources` are the edge sources, sorted
+/// - `associated_edge_targets` are the edge *targets* in the same order as sources
+/// - `edge_targets` is the vec of targets to be extended
+/// - `node_starts` is the index to be filled
+fn create_index<N: Idx + Ord>(
+    num_nodes: usize,
+    sorted_edge_sources: &mut dyn Iterator<Item = N>,
+    associated_edge_targets: &mut dyn Iterator<Item = N>,
+    edge_targets: &mut Vec<N>,
+    node_starts: &mut IndexVec<N, usize>,
+) {
+    let offset = edge_targets.len();
+
+    // Store the *target* of each edge into `edge_targets`.
+    edge_targets.extend(associated_edge_targets);
+
+    // Create the *edge starts* array. We are iterating over the
+    // (sorted) edge pairs. We maintain the invariant that the
+    // length of the `node_starts` array is enough to store the
+    // current source node -- so when we see that the source node
+    // for an edge is greater than the current length, we grow the
+    // edge-starts array by just enough.
+    for (index, source) in sorted_edge_sources.enumerate() {
+        // If we have a list like `[(0, x), (2, y)]`:
+        //
+        // - Start out with `node_starts` of `[]`
+        // - Iterate to `(0, x)` at index 0:
+        //   - Push one entry because `node_starts.len()` (0) is <= the source (0)
+        //   - Leaving us with `node_starts` of `[0]`
+        // - Iterate to `(2, y)` at index 1:
+        //   - Push one entry because `node_starts.len()` (1) is <= the source (2)
+        //   - Push one entry because `node_starts.len()` (2) is <= the source (2)
+        //   - Leaving us with `node_starts` of `[0, 1, 1]`
+        // - Loop terminates
+        while node_starts.len() <= source.index() {
+            node_starts.push(index + offset);
+        }
+    }
+
+    // Pad out the `node_starts` array so that it has `num_nodes +
+    // 1` entries. Continuing our example above, if `num_nodes` is
+    // be `3`, we would push one more index: `[0, 1, 1, 2]`.
+    //
+    // Interpretation of that vector:
+    //
+    // [0, 1, 1, 2]
+    //        ---- range for N=2
+    //     ---- range for N=1
+    //  ---- range for N=0
+    while node_starts.len() <= num_nodes {
+        node_starts.push(edge_targets.len());
+    }
+
+    assert_eq!(node_starts.len(), num_nodes + 1);
+}
+
+impl<N: Idx, const BR: bool> DirectedGraph for VecGraph<N, BR> {
     type Node = N;
 
     fn num_nodes(&self) -> usize {
-        self.node_starts.len() - 1
+        match BR {
+            false => self.node_starts.len() - 1,
+            // If back refs are enabled, half of the array is said back refs
+            true => (self.node_starts.len() - 1) / 2,
+        }
     }
 }
 
-impl<N: Idx> NumEdges for VecGraph<N> {
+impl<N: Idx, const BR: bool> NumEdges for VecGraph<N, BR> {
     fn num_edges(&self) -> usize {
-        self.edge_targets.len()
+        match BR {
+            false => self.edge_targets.len(),
+            // If back refs are enabled, half of the array is reversed edges for them
+            true => self.edge_targets.len() / 2,
+        }
     }
 }
 
-impl<N: Idx + Ord> Successors for VecGraph<N> {
+impl<N: Idx + Ord, const BR: bool> Successors for VecGraph<N, BR> {
     fn successors(&self, node: N) -> impl Iterator<Item = Self::Node> {
         self.successors(node).iter().cloned()
     }
 }
+
+impl<N: Idx + Ord> Predecessors for VecGraph<N, true> {
+    fn predecessors(&self, node: Self::Node) -> impl Iterator<Item = Self::Node> {
+        self.predecessors(node).iter().cloned()
+    }
+}