Skip to content

Commit

Permalink
Add an opt-in to store incoming edges in VecGraph + some docs
Browse files Browse the repository at this point in the history
  • Loading branch information
WaffleLapkin committed Apr 15, 2024
1 parent 3493a56 commit 86a5765
Show file tree
Hide file tree
Showing 2 changed files with 193 additions and 57 deletions.
248 changes: 192 additions & 56 deletions compiler/rustc_data_structures/src/graph/vec_graph/mod.rs
Original file line number Diff line number Diff line change
@@ -1,99 +1,235 @@
use crate::graph::{DirectedGraph, NumEdges, Successors};
use crate::graph::{DirectedGraph, NumEdges, Predecessors, Successors};
use rustc_index::{Idx, IndexVec};

#[cfg(test)]
mod tests;

pub struct VecGraph<N: Idx> {
/// Maps from a given node to an index where the set of successors
/// for that node starts. The index indexes into the `edges`
/// vector. To find the range for a given node, we look up the
/// start for that node and then the start for the next node
/// (i.e., with an index 1 higher) and get the range between the
/// two. This vector always has an extra entry so that this works
/// even for the max element.
/// A directed graph, efficient for cases where node indices are pre-existing.
///
/// If `BR` is true, the graph will store back-references, allowing you to get predecessors.
pub struct VecGraph<N: Idx, const BR: bool = false> {
// This is basically a `HashMap<N, (Vec<N>, If<BR, Vec<N>>)>` -- a map from a node index, to
// a list of targets of outgoing edges and (if enabled) a list of sources of incoming edges.
//
// However, it is condensed into two arrays as an optimization.
//
// `node_starts[n]` is the start of the list of targets of outgoing edges for node `n`.
// So you can get node's successors with `edge_targets[node_starts[n]..node_starts[n + 1]]`.
//
// If `BR` is true (back references are enabled), then `node_starts[n + edge_count]` is the
// start of the list of *sources* of incoming edges. You can get predecessors of a node
// similarly to its successors but offsetting by `edge_count`. `edge_count` is
// `edge_targets.len()/2` (again, in case BR is true) because half of the vec is back refs.
//
// All of this might be confusing, so here is an example graph and its representation:
//
// n3 ----+
// ^ | (if BR = true)
// | v outgoing edges incoming edges
// n0 -> n1 -> n2 ______________ __________________
// / \ / \
// node indices[1]: n0, n1, n2, n3, n0, n1, n2, n3, n/a
// vec indices: n0, n1, n2, n3, n4, n5, n6, n7, n8
// node_starts: [0, 1, 3, 4 4, 4, 5, 7, 8]
// | | | | | | | | |
// | | +---+ +---+ | +---+ |
// | | | | | | |
// v v v v v v v
// edge_targets: [n1, n2, n3, n2 n0, n1, n3, n1]
// / \____/ | | \____/ \
// n0->n1 / | | \ n3<-n1
// / n3->n2 [2] n1<-n0 [2] \
// n1->n2, n1->n3 n2<-n1, n2<-n3
//
// The incoming edges are basically stored in the same way as outgoing edges, but offset and
// the graph they store is the inverse of the original. Last index in the `node_starts` array
// always points to one-past-the-end, so that we don't need to bound check `node_starts[n + 1]`
//
// [1]: "node indices" are the indices a user of `VecGraph` might use,
// note that they are different from "vec indices",
// which are the real indices you need to index `node_starts`
//
// [2]: Note that even though n2 also points to here,
// the next index also points here, so n2 has no
// successors (`edge_targets[3..3] = []`).
// Similarly with n0 and incoming edges
//
// If this is still confusing... then sorry :(
//
/// Indices into `edge_targets` that signify a start of list of edges.
node_starts: IndexVec<N, usize>,

/// Targets (or sources for back refs) of edges
edge_targets: Vec<N>,
}

impl<N: Idx + Ord> VecGraph<N> {
impl<N: Idx + Ord, const BR: bool> VecGraph<N, BR> {
pub fn new(num_nodes: usize, mut edge_pairs: Vec<(N, N)>) -> Self {
let num_edges = edge_pairs.len();

let nodes_cap = match BR {
// +1 for special entry at the end, pointing one past the end of `edge_targets`
false => num_nodes + 1,
// *2 for back references
true => (num_nodes * 2) + 1,
};

let edges_cap = match BR {
false => num_edges,
// *2 for back references
true => num_edges * 2,
};

let mut node_starts = IndexVec::with_capacity(nodes_cap);
let mut edge_targets = Vec::with_capacity(edges_cap);

// Sort the edges by the source -- this is important.
edge_pairs.sort();

let num_edges = edge_pairs.len();
// Fill forward references
create_index(
num_nodes,
&mut edge_pairs.iter().map(|&(src, _)| src),
&mut edge_pairs.iter().map(|&(_, tgt)| tgt),
&mut edge_targets,
&mut node_starts,
);

// Store the *target* of each edge into `edge_targets`.
let edge_targets: Vec<N> = edge_pairs.iter().map(|&(_, target)| target).collect();

// Create the *edge starts* array. We are iterating over the
// (sorted) edge pairs. We maintain the invariant that the
// length of the `node_starts` array is enough to store the
// current source node -- so when we see that the source node
// for an edge is greater than the current length, we grow the
// edge-starts array by just enough.
let mut node_starts = IndexVec::with_capacity(num_edges);
for (index, &(source, _)) in edge_pairs.iter().enumerate() {
// If we have a list like `[(0, x), (2, y)]`:
//
// - Start out with `node_starts` of `[]`
// - Iterate to `(0, x)` at index 0:
// - Push one entry because `node_starts.len()` (0) is <= the source (0)
// - Leaving us with `node_starts` of `[0]`
// - Iterate to `(2, y)` at index 1:
// - Push one entry because `node_starts.len()` (1) is <= the source (2)
// - Push one entry because `node_starts.len()` (2) is <= the source (2)
// - Leaving us with `node_starts` of `[0, 1, 1]`
// - Loop terminates
while node_starts.len() <= source.index() {
node_starts.push(index);
}
}
// Fill back references
if BR {
// Pop the special "last" entry, it will be replaced by first back ref
node_starts.pop();

// Pad out the `node_starts` array so that it has `num_nodes +
// 1` entries. Continuing our example above, if `num_nodes` is
// be `3`, we would push one more index: `[0, 1, 1, 2]`.
//
// Interpretation of that vector:
//
// [0, 1, 1, 2]
// ---- range for N=2
// ---- range for N=1
// ---- range for N=0
while node_starts.len() <= num_nodes {
node_starts.push(edge_targets.len());
}
// Re-sort the edges so that they are sorted by target
edge_pairs.sort_by_key(|&(src, tgt)| (tgt, src));

assert_eq!(node_starts.len(), num_nodes + 1);
create_index(
// Back essentially double the number of nodes
num_nodes * 2,
// NB: the source/target are switched here too
// NB: we double the key index, so that we can later use *2 to get the back references
&mut edge_pairs.iter().map(|&(_, tgt)| N::new(tgt.index() + num_nodes)),
&mut edge_pairs.iter().map(|&(src, _)| src),
&mut edge_targets,
&mut node_starts,
);
}

Self { node_starts, edge_targets }
}

/// Gets the successors for `source` as a slice.
pub fn successors(&self, source: N) -> &[N] {
assert!(source.index() < self.num_nodes());

let start_index = self.node_starts[source];
let end_index = self.node_starts[source.plus(1)];
&self.edge_targets[start_index..end_index]
}
}

impl<N: Idx> DirectedGraph for VecGraph<N> {
impl<N: Idx + Ord> VecGraph<N, true> {
/// Gets the predecessors for `target` as a slice.
pub fn predecessors(&self, target: N) -> &[N] {
assert!(target.index() < self.num_nodes());

let target = N::new(target.index() + self.num_nodes());

let start_index = self.node_starts[target];
let end_index = self.node_starts[target.plus(1)];
&self.edge_targets[start_index..end_index]
}
}

/// Creates/initializes the index for the [`VecGraph`]. A helper for [`VecGraph::new`].
///
/// - `num_nodes` is the target number of nodes in the graph
/// - `sorted_edge_sources` are the edge sources, sorted
/// - `associated_edge_targets` are the edge *targets* in the same order as sources
/// - `edge_targets` is the vec of targets to be extended
/// - `node_starts` is the index to be filled
fn create_index<N: Idx + Ord>(
num_nodes: usize,
sorted_edge_sources: &mut dyn Iterator<Item = N>,
associated_edge_targets: &mut dyn Iterator<Item = N>,
edge_targets: &mut Vec<N>,
node_starts: &mut IndexVec<N, usize>,
) {
let offset = edge_targets.len();

// Store the *target* of each edge into `edge_targets`.
edge_targets.extend(associated_edge_targets);

// Create the *edge starts* array. We are iterating over the
// (sorted) edge pairs. We maintain the invariant that the
// length of the `node_starts` array is enough to store the
// current source node -- so when we see that the source node
// for an edge is greater than the current length, we grow the
// edge-starts array by just enough.
for (index, source) in sorted_edge_sources.enumerate() {
// If we have a list like `[(0, x), (2, y)]`:
//
// - Start out with `node_starts` of `[]`
// - Iterate to `(0, x)` at index 0:
// - Push one entry because `node_starts.len()` (0) is <= the source (0)
// - Leaving us with `node_starts` of `[0]`
// - Iterate to `(2, y)` at index 1:
// - Push one entry because `node_starts.len()` (1) is <= the source (2)
// - Push one entry because `node_starts.len()` (2) is <= the source (2)
// - Leaving us with `node_starts` of `[0, 1, 1]`
// - Loop terminates
while node_starts.len() <= source.index() {
node_starts.push(index + offset);
}
}

// Pad out the `node_starts` array so that it has `num_nodes +
// 1` entries. Continuing our example above, if `num_nodes` is
// be `3`, we would push one more index: `[0, 1, 1, 2]`.
//
// Interpretation of that vector:
//
// [0, 1, 1, 2]
// ---- range for N=2
// ---- range for N=1
// ---- range for N=0
while node_starts.len() <= num_nodes {
node_starts.push(edge_targets.len());
}

assert_eq!(node_starts.len(), num_nodes + 1);
}

impl<N: Idx, const BR: bool> DirectedGraph for VecGraph<N, BR> {
type Node = N;

fn num_nodes(&self) -> usize {
self.node_starts.len() - 1
match BR {
false => self.node_starts.len() - 1,
// If back refs are enabled, half of the array is said back refs
true => (self.node_starts.len() - 1) / 2,
}
}
}

impl<N: Idx> NumEdges for VecGraph<N> {
impl<N: Idx, const BR: bool> NumEdges for VecGraph<N, BR> {
fn num_edges(&self) -> usize {
self.edge_targets.len()
match BR {
false => self.edge_targets.len(),
// If back refs are enabled, half of the array is reversed edges for them
true => self.edge_targets.len() / 2,
}
}
}

impl<N: Idx + Ord> Successors for VecGraph<N> {
impl<N: Idx + Ord, const BR: bool> Successors for VecGraph<N, BR> {
fn successors(&self, node: N) -> impl Iterator<Item = Self::Node> {
self.successors(node).iter().cloned()
}
}

impl<N: Idx + Ord> Predecessors for VecGraph<N, true> {
fn predecessors(&self, node: Self::Node) -> impl Iterator<Item = Self::Node> {
self.predecessors(node).iter().cloned()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ impl<'tcx> MiniGraph<'tcx> {
edges.push((source_node, target_node));
},
);
let graph = VecGraph::new(nodes.len(), edges);
let graph = VecGraph::<_, false>::new(nodes.len(), edges);
let sccs = Sccs::new(&graph);
Self { nodes, sccs }
}
Expand Down

0 comments on commit 86a5765

Please sign in to comment.