Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge Compaction and Promotion #1914

Merged
merged 15 commits into from
Feb 16, 2024
200 changes: 200 additions & 0 deletions calyx-opt/src/analysis/compaction_analysis.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
use crate::analysis::{ControlOrder, PromotionAnalysis};
use calyx_ir::{self as ir};
use ir::GetAttributes;
use itertools::Itertools;
use petgraph::{algo, graph::NodeIndex};
use std::collections::HashMap;

use super::read_write_set::AssignmentAnalysis;

/// Struct to perform compaction on `seqs`.
/// It will only work if you update_cont_read_writes for each component that
/// you run it on.
#[derive(Debug, Default)]
pub struct CompactionAnalysis {
cont_reads: Vec<ir::RRC<ir::Cell>>,
cont_writes: Vec<ir::RRC<ir::Cell>>,
}

impl CompactionAnalysis {
/// Updates self so that compaction will take continuous assignments into account
pub fn update_cont_read_writes(&mut self, comp: &mut ir::Component) {
let (cont_reads, cont_writes) = (
comp.continuous_assignments
.iter()
.analysis()
.cell_reads()
.collect(),
comp.continuous_assignments
.iter()
.analysis()
.cell_writes()
.collect(),
);
self.cont_reads = cont_reads;
self.cont_writes = cont_writes;
}

// Given a total_order and sorted schedule, builds a vec of the original seq.
// Note that this function assumes the `total_order`` and `sorted_schedule`
// represent a completely sequential schedule.
fn recover_seq(
mut total_order: petgraph::graph::DiGraph<Option<ir::Control>, ()>,
sorted_schedule: Vec<(NodeIndex, u64)>,
) -> Vec<ir::Control> {
sorted_schedule
.into_iter()
.map(|(i, _)| total_order[i].take().unwrap())
.collect_vec()
}

/// Takes a vec of ctrl stmts and turns it into a compacted schedule.
/// If compaction doesn't lead to any latency decreases, it just returns
/// a vec of stmts in the original order.
/// If it can compact, then it returns a vec with one
/// element: a compacted static par.
pub fn compact_control_vec(
&mut self,
stmts: Vec<ir::Control>,
promotion_analysis: &mut PromotionAnalysis,
builder: &mut ir::Builder,
) -> Vec<ir::Control> {
// Records the corresponding node indices that each control program
// has data dependency on.
let mut dependency: HashMap<NodeIndex, Vec<NodeIndex>> = HashMap::new();
// Records the latency of corresponding control operator for each
// node index.
let mut latency_map: HashMap<NodeIndex, u64> = HashMap::new();
// Records the scheduled start time of corresponding control operator
// for each node index.
let mut schedule: HashMap<NodeIndex, u64> = HashMap::new();

let og_latency: u64 = stmts
.iter()
.map(PromotionAnalysis::get_inferred_latency)
.sum();

let mut total_order = ControlOrder::<false>::get_dependency_graph_seq(
stmts.into_iter(),
(&self.cont_reads, &self.cont_writes),
&mut dependency,
&mut latency_map,
);

if let Ok(order) = algo::toposort(&total_order, None) {
let mut total_time: u64 = 0;

// First we build the schedule.
for i in order {
// Start time is when the latest dependency finishes
let start = dependency
.get(&i)
.unwrap()
.iter()
.map(|node| schedule[node] + latency_map[node])
.max()
.unwrap_or(0);
schedule.insert(i, start);
total_time = std::cmp::max(start + latency_map[&i], total_time);
}

// We sort the schedule by start time.
let mut sorted_schedule: Vec<(NodeIndex, u64)> =
schedule.into_iter().collect();
sorted_schedule
.sort_by(|(k1, v1), (k2, v2)| (v1, k1).cmp(&(v2, k2)));

if total_time == og_latency {
// If we can't comapct at all, then just recover the and return
// the original seq.
return Self::recover_seq(total_order, sorted_schedule);
}

// Threads for the static par, where each entry is (thread, thread_latency)
let mut par_threads: Vec<(Vec<ir::Control>, u64)> = Vec::new();

// We encode the schedule while trying to minimize the number of
// par threads.
'outer: for (i, start) in sorted_schedule {
let control = total_order[i].take().unwrap();
for (thread, thread_latency) in par_threads.iter_mut() {
if *thread_latency <= start {
if *thread_latency < start {
// Need a no-op group so the schedule starts correctly
let no_op = builder.add_static_group(
"no-op",
start - *thread_latency,
);
thread.push(ir::Control::Static(
ir::StaticControl::Enable(ir::StaticEnable {
group: no_op,
attributes: ir::Attributes::default(),
}),
));
*thread_latency = start;
}
thread.push(control);
*thread_latency += latency_map[&i];
continue 'outer;
}
}
// We must create a new par thread.
if start > 0 {
// If start > 0, then we must add a delay to the start of the
// group.
let no_op = builder.add_static_group("no-op", start);
let no_op_enable = ir::Control::Static(
ir::StaticControl::Enable(ir::StaticEnable {
group: no_op,
attributes: ir::Attributes::default(),
}),
);
par_threads.push((
vec![no_op_enable, control],
start + latency_map[&i],
));
} else {
par_threads.push((vec![control], latency_map[&i]));
}
}
// Turn Vec<ir::StaticControl> -> StaticSeq
let mut par_control_threads: Vec<ir::StaticControl> = Vec::new();
for (thread, thread_latency) in par_threads {
let mut promoted_stmts = thread
.into_iter()
.map(|mut stmt| {
promotion_analysis.convert_to_static(&mut stmt, builder)
})
.collect_vec();
if promoted_stmts.len() == 1 {
// Don't wrap in static seq if we don't need to.
par_control_threads.push(promoted_stmts.pop().unwrap());
} else {
par_control_threads.push(ir::StaticControl::Seq(
ir::StaticSeq {
stmts: promoted_stmts,
attributes: ir::Attributes::default(),
latency: thread_latency,
},
));
}
}
// Double checking that we have built the static par correctly.
let max: Option<u64> =
par_control_threads.iter().map(|c| c.get_latency()).max();
assert!(max.unwrap() == total_time, "The schedule expects latency {}. The static par that was built has latency {}", total_time, max.unwrap());

let mut s_par = ir::StaticControl::Par(ir::StaticPar {
stmts: par_control_threads,
attributes: ir::Attributes::default(),
latency: total_time,
});
s_par.get_mut_attributes().insert(ir::BoolAttr::Promoted, 1);
vec![ir::Control::Static(s_par)]
} else {
panic!(
"Error when producing topo sort. Dependency graph has a cycle."
);
}
}
}
37 changes: 34 additions & 3 deletions calyx-opt/src/analysis/inference_analysis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,11 @@ impl InferenceAnalysis {
/// Note that this expects that the component already is accounted for
/// in self.latency_data and self.static_component_latencies.
pub fn remove_component(&mut self, comp_name: ir::Id) {
self.updated_components.insert(comp_name);
if self.latency_data.contains_key(&comp_name) {
// To make inference as strong as possible, only update updated_components
// if we actually updated it.
self.updated_components.insert(comp_name);
}
self.latency_data.remove(&comp_name);
self.static_component_latencies.remove(&comp_name);
}
Expand All @@ -216,15 +220,22 @@ impl InferenceAnalysis {
&mut self,
(comp_name, adjusted_latency): (ir::Id, u64),
) {
self.updated_components.insert(comp_name);
// Check whether we actually updated the component's latency.
let mut updated = false;
self.latency_data.entry(comp_name).and_modify(|go_done| {
for (_, _, cur_latency) in &mut go_done.ports {
// Updating components with latency data.
*cur_latency = adjusted_latency;
if *cur_latency != adjusted_latency {
*cur_latency = adjusted_latency;
updated = true;
}
}
});
self.static_component_latencies
.insert(comp_name, adjusted_latency);
if updated {
self.updated_components.insert(comp_name);
}
}

/// Return true if the edge (`src`, `dst`) meet one these criteria, and false otherwise:
Expand Down Expand Up @@ -502,6 +513,26 @@ impl InferenceAnalysis {
seq.update_static(&self.static_component_latencies);
}

pub fn fixup_par(&self, par: &mut ir::Par) {
par.update_static(&self.static_component_latencies);
}

pub fn fixup_if(&self, _if: &mut ir::If) {
_if.update_static(&self.static_component_latencies);
}

pub fn fixup_while(&self, _while: &mut ir::While) {
_while.update_static(&self.static_component_latencies);
}

pub fn fixup_repeat(&self, repeat: &mut ir::Repeat) {
repeat.update_static(&self.static_component_latencies);
}

pub fn fixup_ctrl(&self, ctrl: &mut ir::Control) {
ctrl.update_static(&self.static_component_latencies);
}

/// "Fixes Up" the component. In particular:
/// 1. Removes @promotable annotations for any groups that write to any
/// `updated_components`.
Expand Down
2 changes: 2 additions & 0 deletions calyx-opt/src/analysis/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
//! The analyses construct data-structures that make answering certain queries
//! about Calyx programs easier.

mod compaction_analysis;
mod compute_static;
mod control_id;
mod control_order;
Expand All @@ -22,6 +23,7 @@ mod share_set;
mod static_par_timing;
mod variable_detection;

pub use compaction_analysis::CompactionAnalysis;
pub use compute_static::IntoStatic;
pub use compute_static::WithStatic;
pub use control_id::ControlId;
Expand Down
11 changes: 4 additions & 7 deletions calyx-opt/src/default_passes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ use crate::passes::{
DeadAssignmentRemoval, DeadCellRemoval, DeadGroupRemoval, DiscoverExternal,
Externalize, GoInsertion, GroupToInvoke, GroupToSeq, HoleInliner,
InferShare, LowerGuards, MergeAssign, Papercut, ParToSeq,
RegisterUnsharing, RemoveIds, ResetInsertion, ScheduleCompaction,
SimplifyStaticGuards, SimplifyWithControl, StaticInference, StaticInliner,
StaticPromotion, SynthesisPapercut, TopDownCompileControl, UnrollBounded,
WellFormed, WireInliner, WrapMain,
RegisterUnsharing, RemoveIds, ResetInsertion, SimplifyStaticGuards,
SimplifyWithControl, StaticInference, StaticInliner, StaticPromotion,
SynthesisPapercut, TopDownCompileControl, UnrollBounded, WellFormed,
WireInliner, WrapMain,
};
use crate::traversal::Named;
use crate::{pass_manager::PassManager, register_alias};
Expand All @@ -35,7 +35,6 @@ impl PassManager {
pm.register_pass::<GroupToSeq>()?;
pm.register_pass::<InferShare>()?;
pm.register_pass::<CellShare>()?;
pm.register_pass::<ScheduleCompaction>()?;
pm.register_pass::<StaticInference>()?;
pm.register_pass::<StaticPromotion>()?;
pm.register_pass::<SimplifyStaticGuards>()?;
Expand Down Expand Up @@ -94,8 +93,6 @@ impl PassManager {
SimplifyWithControl, // Must run before compile-invoke
CompileInvoke, // creates dead comb groups
StaticInference,
ScheduleCompaction,
StaticPromotion,
StaticPromotion,
CompileRepeat,
DeadGroupRemoval, // Since previous passes potentially create dead groups
Expand Down
2 changes: 0 additions & 2 deletions calyx-opt/src/passes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ mod par_to_seq;
mod register_unsharing;
mod remove_ids;
mod reset_insertion;
mod schedule_compaction;
mod simplify_static_guards;
mod static_inference;
mod static_inliner;
Expand Down Expand Up @@ -72,7 +71,6 @@ pub use par_to_seq::ParToSeq;
pub use register_unsharing::RegisterUnsharing;
pub use remove_ids::RemoveIds;
pub use reset_insertion::ResetInsertion;
pub use schedule_compaction::ScheduleCompaction;
pub use simplify_static_guards::SimplifyStaticGuards;
pub use simplify_with_control::SimplifyWithControl;
pub use static_inference::StaticInference;
Expand Down
Loading
Loading