From 86a576528c003992c7a4ba9dd0601ad637340efb Mon Sep 17 00:00:00 2001 From: Maybe Waffle Date: Mon, 15 Apr 2024 18:14:28 +0000 Subject: [PATCH 1/4] Add an opt-in to store incoming edges in `VecGraph` + some docs --- .../src/graph/vec_graph/mod.rs | 248 ++++++++++++++---- .../infer/region_constraints/leak_check.rs | 2 +- 2 files changed, 193 insertions(+), 57 deletions(-) diff --git a/compiler/rustc_data_structures/src/graph/vec_graph/mod.rs b/compiler/rustc_data_structures/src/graph/vec_graph/mod.rs index 26c86469fad84..120244c8918a0 100644 --- a/compiler/rustc_data_structures/src/graph/vec_graph/mod.rs +++ b/compiler/rustc_data_structures/src/graph/vec_graph/mod.rs @@ -1,99 +1,235 @@ -use crate::graph::{DirectedGraph, NumEdges, Successors}; +use crate::graph::{DirectedGraph, NumEdges, Predecessors, Successors}; use rustc_index::{Idx, IndexVec}; #[cfg(test)] mod tests; -pub struct VecGraph { - /// Maps from a given node to an index where the set of successors - /// for that node starts. The index indexes into the `edges` - /// vector. To find the range for a given node, we look up the - /// start for that node and then the start for the next node - /// (i.e., with an index 1 higher) and get the range between the - /// two. This vector always has an extra entry so that this works - /// even for the max element. +/// A directed graph, efficient for cases where node indices are pre-existing. +/// +/// If `BR` is true, the graph will store back-references, allowing you to get predecessors. +pub struct VecGraph { + // This is basically a `HashMap, If>)>` -- a map from a node index, to + // a list of targets of outgoing edges and (if enabled) a list of sources of incoming edges. + // + // However, it is condensed into two arrays as an optimization. + // + // `node_starts[n]` is the start of the list of targets of outgoing edges for node `n`. + // So you can get node's successors with `edge_targets[node_starts[n]..node_starts[n + 1]]`. + // + // If `BR` is true (back references are enabled), then `node_starts[n + edge_count]` is the + // start of the list of *sources* of incoming edges. You can get predecessors of a node + // similarly to its successors but offsetting by `edge_count`. `edge_count` is + // `edge_targets.len()/2` (again, in case BR is true) because half of the vec is back refs. + // + // All of this might be confusing, so here is an example graph and its representation: + // + // n3 ----+ + // ^ | (if BR = true) + // | v outgoing edges incoming edges + // n0 -> n1 -> n2 ______________ __________________ + // / \ / \ + // node indices[1]: n0, n1, n2, n3, n0, n1, n2, n3, n/a + // vec indices: n0, n1, n2, n3, n4, n5, n6, n7, n8 + // node_starts: [0, 1, 3, 4 4, 4, 5, 7, 8] + // | | | | | | | | | + // | | +---+ +---+ | +---+ | + // | | | | | | | + // v v v v v v v + // edge_targets: [n1, n2, n3, n2 n0, n1, n3, n1] + // / \____/ | | \____/ \ + // n0->n1 / | | \ n3<-n1 + // / n3->n2 [2] n1<-n0 [2] \ + // n1->n2, n1->n3 n2<-n1, n2<-n3 + // + // The incoming edges are basically stored in the same way as outgoing edges, but offset and + // the graph they store is the inverse of the original. Last index in the `node_starts` array + // always points to one-past-the-end, so that we don't need to bound check `node_starts[n + 1]` + // + // [1]: "node indices" are the indices a user of `VecGraph` might use, + // note that they are different from "vec indices", + // which are the real indices you need to index `node_starts` + // + // [2]: Note that even though n2 also points to here, + // the next index also points here, so n2 has no + // successors (`edge_targets[3..3] = []`). + // Similarly with n0 and incoming edges + // + // If this is still confusing... then sorry :( + // + /// Indices into `edge_targets` that signify a start of list of edges. node_starts: IndexVec, + /// Targets (or sources for back refs) of edges edge_targets: Vec, } -impl VecGraph { +impl VecGraph { pub fn new(num_nodes: usize, mut edge_pairs: Vec<(N, N)>) -> Self { + let num_edges = edge_pairs.len(); + + let nodes_cap = match BR { + // +1 for special entry at the end, pointing one past the end of `edge_targets` + false => num_nodes + 1, + // *2 for back references + true => (num_nodes * 2) + 1, + }; + + let edges_cap = match BR { + false => num_edges, + // *2 for back references + true => num_edges * 2, + }; + + let mut node_starts = IndexVec::with_capacity(nodes_cap); + let mut edge_targets = Vec::with_capacity(edges_cap); + // Sort the edges by the source -- this is important. edge_pairs.sort(); - let num_edges = edge_pairs.len(); + // Fill forward references + create_index( + num_nodes, + &mut edge_pairs.iter().map(|&(src, _)| src), + &mut edge_pairs.iter().map(|&(_, tgt)| tgt), + &mut edge_targets, + &mut node_starts, + ); - // Store the *target* of each edge into `edge_targets`. - let edge_targets: Vec = edge_pairs.iter().map(|&(_, target)| target).collect(); - - // Create the *edge starts* array. We are iterating over the - // (sorted) edge pairs. We maintain the invariant that the - // length of the `node_starts` array is enough to store the - // current source node -- so when we see that the source node - // for an edge is greater than the current length, we grow the - // edge-starts array by just enough. - let mut node_starts = IndexVec::with_capacity(num_edges); - for (index, &(source, _)) in edge_pairs.iter().enumerate() { - // If we have a list like `[(0, x), (2, y)]`: - // - // - Start out with `node_starts` of `[]` - // - Iterate to `(0, x)` at index 0: - // - Push one entry because `node_starts.len()` (0) is <= the source (0) - // - Leaving us with `node_starts` of `[0]` - // - Iterate to `(2, y)` at index 1: - // - Push one entry because `node_starts.len()` (1) is <= the source (2) - // - Push one entry because `node_starts.len()` (2) is <= the source (2) - // - Leaving us with `node_starts` of `[0, 1, 1]` - // - Loop terminates - while node_starts.len() <= source.index() { - node_starts.push(index); - } - } + // Fill back references + if BR { + // Pop the special "last" entry, it will be replaced by first back ref + node_starts.pop(); - // Pad out the `node_starts` array so that it has `num_nodes + - // 1` entries. Continuing our example above, if `num_nodes` is - // be `3`, we would push one more index: `[0, 1, 1, 2]`. - // - // Interpretation of that vector: - // - // [0, 1, 1, 2] - // ---- range for N=2 - // ---- range for N=1 - // ---- range for N=0 - while node_starts.len() <= num_nodes { - node_starts.push(edge_targets.len()); - } + // Re-sort the edges so that they are sorted by target + edge_pairs.sort_by_key(|&(src, tgt)| (tgt, src)); - assert_eq!(node_starts.len(), num_nodes + 1); + create_index( + // Back essentially double the number of nodes + num_nodes * 2, + // NB: the source/target are switched here too + // NB: we double the key index, so that we can later use *2 to get the back references + &mut edge_pairs.iter().map(|&(_, tgt)| N::new(tgt.index() + num_nodes)), + &mut edge_pairs.iter().map(|&(src, _)| src), + &mut edge_targets, + &mut node_starts, + ); + } Self { node_starts, edge_targets } } /// Gets the successors for `source` as a slice. pub fn successors(&self, source: N) -> &[N] { + assert!(source.index() < self.num_nodes()); + let start_index = self.node_starts[source]; let end_index = self.node_starts[source.plus(1)]; &self.edge_targets[start_index..end_index] } } -impl DirectedGraph for VecGraph { +impl VecGraph { + /// Gets the predecessors for `target` as a slice. + pub fn predecessors(&self, target: N) -> &[N] { + assert!(target.index() < self.num_nodes()); + + let target = N::new(target.index() + self.num_nodes()); + + let start_index = self.node_starts[target]; + let end_index = self.node_starts[target.plus(1)]; + &self.edge_targets[start_index..end_index] + } +} + +/// Creates/initializes the index for the [`VecGraph`]. A helper for [`VecGraph::new`]. +/// +/// - `num_nodes` is the target number of nodes in the graph +/// - `sorted_edge_sources` are the edge sources, sorted +/// - `associated_edge_targets` are the edge *targets* in the same order as sources +/// - `edge_targets` is the vec of targets to be extended +/// - `node_starts` is the index to be filled +fn create_index( + num_nodes: usize, + sorted_edge_sources: &mut dyn Iterator, + associated_edge_targets: &mut dyn Iterator, + edge_targets: &mut Vec, + node_starts: &mut IndexVec, +) { + let offset = edge_targets.len(); + + // Store the *target* of each edge into `edge_targets`. + edge_targets.extend(associated_edge_targets); + + // Create the *edge starts* array. We are iterating over the + // (sorted) edge pairs. We maintain the invariant that the + // length of the `node_starts` array is enough to store the + // current source node -- so when we see that the source node + // for an edge is greater than the current length, we grow the + // edge-starts array by just enough. + for (index, source) in sorted_edge_sources.enumerate() { + // If we have a list like `[(0, x), (2, y)]`: + // + // - Start out with `node_starts` of `[]` + // - Iterate to `(0, x)` at index 0: + // - Push one entry because `node_starts.len()` (0) is <= the source (0) + // - Leaving us with `node_starts` of `[0]` + // - Iterate to `(2, y)` at index 1: + // - Push one entry because `node_starts.len()` (1) is <= the source (2) + // - Push one entry because `node_starts.len()` (2) is <= the source (2) + // - Leaving us with `node_starts` of `[0, 1, 1]` + // - Loop terminates + while node_starts.len() <= source.index() { + node_starts.push(index + offset); + } + } + + // Pad out the `node_starts` array so that it has `num_nodes + + // 1` entries. Continuing our example above, if `num_nodes` is + // be `3`, we would push one more index: `[0, 1, 1, 2]`. + // + // Interpretation of that vector: + // + // [0, 1, 1, 2] + // ---- range for N=2 + // ---- range for N=1 + // ---- range for N=0 + while node_starts.len() <= num_nodes { + node_starts.push(edge_targets.len()); + } + + assert_eq!(node_starts.len(), num_nodes + 1); +} + +impl DirectedGraph for VecGraph { type Node = N; fn num_nodes(&self) -> usize { - self.node_starts.len() - 1 + match BR { + false => self.node_starts.len() - 1, + // If back refs are enabled, half of the array is said back refs + true => (self.node_starts.len() - 1) / 2, + } } } -impl NumEdges for VecGraph { +impl NumEdges for VecGraph { fn num_edges(&self) -> usize { - self.edge_targets.len() + match BR { + false => self.edge_targets.len(), + // If back refs are enabled, half of the array is reversed edges for them + true => self.edge_targets.len() / 2, + } } } -impl Successors for VecGraph { +impl Successors for VecGraph { fn successors(&self, node: N) -> impl Iterator { self.successors(node).iter().cloned() } } + +impl Predecessors for VecGraph { + fn predecessors(&self, node: Self::Node) -> impl Iterator { + self.predecessors(node).iter().cloned() + } +} diff --git a/compiler/rustc_infer/src/infer/region_constraints/leak_check.rs b/compiler/rustc_infer/src/infer/region_constraints/leak_check.rs index 06f8dd4a4c6cd..6e8efa3e7c182 100644 --- a/compiler/rustc_infer/src/infer/region_constraints/leak_check.rs +++ b/compiler/rustc_infer/src/infer/region_constraints/leak_check.rs @@ -382,7 +382,7 @@ impl<'tcx> MiniGraph<'tcx> { edges.push((source_node, target_node)); }, ); - let graph = VecGraph::new(nodes.len(), edges); + let graph = VecGraph::<_, false>::new(nodes.len(), edges); let sccs = Sccs::new(&graph); Self { nodes, sccs } } From 7d2cb3dda732028c7f8dfa8de5876da5be4bad9d Mon Sep 17 00:00:00 2001 From: Maybe Waffle Date: Mon, 15 Apr 2024 18:17:51 +0000 Subject: [PATCH 2/4] Make `graph::DepthFirstSearch` accept `G` by value It's required for the next commit. Note that you can still have `G = &H`, since there are implementations of all the graph traits for references. --- .../src/graph/iterate/mod.rs | 22 +++++++++---------- .../rustc_data_structures/src/graph/mod.rs | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/compiler/rustc_data_structures/src/graph/iterate/mod.rs b/compiler/rustc_data_structures/src/graph/iterate/mod.rs index 7a77f2c0dbbf1..78d05a6e1951b 100644 --- a/compiler/rustc_data_structures/src/graph/iterate/mod.rs +++ b/compiler/rustc_data_structures/src/graph/iterate/mod.rs @@ -70,21 +70,21 @@ pub fn reverse_post_order( } /// A "depth-first search" iterator for a directed graph. -pub struct DepthFirstSearch<'graph, G> +pub struct DepthFirstSearch where - G: ?Sized + DirectedGraph + Successors, + G: DirectedGraph + Successors, { - graph: &'graph G, + graph: G, stack: Vec, visited: BitSet, } -impl<'graph, G> DepthFirstSearch<'graph, G> +impl DepthFirstSearch where - G: ?Sized + DirectedGraph + Successors, + G: DirectedGraph + Successors, { - pub fn new(graph: &'graph G) -> Self { - Self { graph, stack: vec![], visited: BitSet::new_empty(graph.num_nodes()) } + pub fn new(graph: G) -> Self { + Self { stack: vec![], visited: BitSet::new_empty(graph.num_nodes()), graph } } /// Version of `push_start_node` that is convenient for chained @@ -125,9 +125,9 @@ where } } -impl std::fmt::Debug for DepthFirstSearch<'_, G> +impl std::fmt::Debug for DepthFirstSearch where - G: ?Sized + DirectedGraph + Successors, + G: DirectedGraph + Successors, { fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut f = fmt.debug_set(); @@ -138,9 +138,9 @@ where } } -impl Iterator for DepthFirstSearch<'_, G> +impl Iterator for DepthFirstSearch where - G: ?Sized + DirectedGraph + Successors, + G: DirectedGraph + Successors, { type Item = G::Node; diff --git a/compiler/rustc_data_structures/src/graph/mod.rs b/compiler/rustc_data_structures/src/graph/mod.rs index 3ae3023a91b34..0906c04716bd0 100644 --- a/compiler/rustc_data_structures/src/graph/mod.rs +++ b/compiler/rustc_data_structures/src/graph/mod.rs @@ -46,9 +46,9 @@ where .is_some() } -pub fn depth_first_search(graph: &G, from: G::Node) -> iterate::DepthFirstSearch<'_, G> +pub fn depth_first_search(graph: G, from: G::Node) -> iterate::DepthFirstSearch where - G: ?Sized + Successors, + G: Successors, { iterate::DepthFirstSearch::new(graph).with_start_node(from) } From fa134b5e0f87c44074549af8bd6d7479b0954799 Mon Sep 17 00:00:00 2001 From: Maybe Waffle Date: Mon, 15 Apr 2024 18:20:30 +0000 Subject: [PATCH 3/4] Add `graph::depth_first_search_as_undirected` --- .../rustc_data_structures/src/graph/mod.rs | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/compiler/rustc_data_structures/src/graph/mod.rs b/compiler/rustc_data_structures/src/graph/mod.rs index 0906c04716bd0..103ddd917bf84 100644 --- a/compiler/rustc_data_structures/src/graph/mod.rs +++ b/compiler/rustc_data_structures/src/graph/mod.rs @@ -52,3 +52,29 @@ where { iterate::DepthFirstSearch::new(graph).with_start_node(from) } + +pub fn depth_first_search_as_undirected( + graph: G, + from: G::Node, +) -> iterate::DepthFirstSearch> +where + G: Successors + Predecessors, +{ + struct AsUndirected(G); + + impl DirectedGraph for AsUndirected { + type Node = G::Node; + + fn num_nodes(&self) -> usize { + self.0.num_nodes() + } + } + + impl Successors for AsUndirected { + fn successors(&self, node: Self::Node) -> impl Iterator { + self.0.successors(node).chain(self.0.predecessors(node)) + } + } + + iterate::DepthFirstSearch::new(AsUndirected(graph)).with_start_node(from) +} From 523fe2b67ba7a977ceda3e76c5b605b304d9b6bf Mon Sep 17 00:00:00 2001 From: Maybe Waffle Date: Thu, 18 Apr 2024 17:32:42 +0000 Subject: [PATCH 4/4] Add tests for predecessor-aware `VecGraph` mode --- .../src/graph/vec_graph/tests.rs | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/compiler/rustc_data_structures/src/graph/vec_graph/tests.rs b/compiler/rustc_data_structures/src/graph/vec_graph/tests.rs index 87c8d25f09492..a077d9d081364 100644 --- a/compiler/rustc_data_structures/src/graph/vec_graph/tests.rs +++ b/compiler/rustc_data_structures/src/graph/vec_graph/tests.rs @@ -18,10 +18,18 @@ fn create_graph() -> VecGraph { VecGraph::new(7, vec![(0, 1), (1, 2), (1, 3), (3, 4), (5, 1)]) } +fn create_graph_with_back_refs() -> VecGraph { + // Same as above + VecGraph::new(7, vec![(0, 1), (1, 2), (1, 3), (3, 4), (5, 1)]) +} + #[test] fn num_nodes() { let graph = create_graph(); assert_eq!(graph.num_nodes(), 7); + + let graph = create_graph_with_back_refs(); + assert_eq!(graph.num_nodes(), 7); } #[test] @@ -34,6 +42,27 @@ fn successors() { assert_eq!(graph.successors(4), &[] as &[usize]); assert_eq!(graph.successors(5), &[1]); assert_eq!(graph.successors(6), &[] as &[usize]); + + let graph = create_graph_with_back_refs(); + assert_eq!(graph.successors(0), &[1]); + assert_eq!(graph.successors(1), &[2, 3]); + assert_eq!(graph.successors(2), &[] as &[usize]); + assert_eq!(graph.successors(3), &[4]); + assert_eq!(graph.successors(4), &[] as &[usize]); + assert_eq!(graph.successors(5), &[1]); + assert_eq!(graph.successors(6), &[] as &[usize]); +} + +#[test] +fn predecessors() { + let graph = create_graph_with_back_refs(); + assert_eq!(graph.predecessors(0), &[]); + assert_eq!(graph.predecessors(1), &[0, 5]); + assert_eq!(graph.predecessors(2), &[1]); + assert_eq!(graph.predecessors(3), &[1]); + assert_eq!(graph.predecessors(4), &[3]); + assert_eq!(graph.predecessors(5), &[]); + assert_eq!(graph.predecessors(6), &[]); } #[test] @@ -41,4 +70,8 @@ fn dfs() { let graph = create_graph(); let dfs: Vec<_> = graph::depth_first_search(&graph, 0).collect(); assert_eq!(dfs, vec![0, 1, 3, 4, 2]); + + let graph = create_graph_with_back_refs(); + let dfs: Vec<_> = graph::depth_first_search(&graph, 0).collect(); + assert_eq!(dfs, vec![0, 1, 3, 4, 2]); }