diff --git a/Cargo.lock b/Cargo.lock index 53d5ebd..428115e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,15 +4,15 @@ version = 3 [[package]] name = "arrayvec" -version = "0.7.4" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "bytemuck" -version = "1.14.1" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed2490600f404f2b94c167e31d3ed1d5f3c225a0f3b80230053b3e0b7b962bd9" +checksum = "94bbb0ad554ad961ddc5da507a12a29b14e4ae5bda06b19f575a3e6079d2e2ae" [[package]] name = "convert_case" @@ -22,22 +22,22 @@ checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" [[package]] name = "derive_more" -version = "0.99.17" +version = "0.99.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" +checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce" dependencies = [ "convert_case", "proc-macro2", "quote", "rustc_version", - "syn 1.0.109", + "syn", ] [[package]] name = "either" -version = "1.9.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "elsa" @@ -57,15 +57,15 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "hashbrown" -version = "0.14.3" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" [[package]] name = "indexmap" -version = "2.2.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "433de089bd45971eecf4668ee0ee8f4cec17db4f8bd8f7bc3197a6ce37aa7d9b" +checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" dependencies = [ "equivalent", "hashbrown", @@ -88,15 +88,15 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "longest-increasing-subsequence" @@ -104,20 +104,26 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3bd0dd2cd90571056fdb71f6275fada10131182f84899f4b2a916e565d81d86" +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + [[package]] name = "proc-macro2" -version = "1.0.78" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.35" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] @@ -130,61 +136,62 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustc_version" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ "semver", ] [[package]] name = "ryu" -version = "1.0.16" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "semver" -version = "1.0.21" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] name = "serde" -version = "1.0.196" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "870026e60fa08c69f064aa766c10f10b1d62db9ccd4d0abb206472bee0ce3b32" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.196" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn", ] [[package]] name = "serde_json" -version = "1.0.113" +version = "1.0.128" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69801b70b1c3dac963ecb03a364ba0ceda9cf60c71cfe475e99864759c8b8a79" +checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] [[package]] name = "smallvec" -version = "1.13.1" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" dependencies = [ "serde", ] @@ -216,20 +223,9 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "syn" -version = "2.0.48" +version = "2.0.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" dependencies = [ "proc-macro2", "quote", @@ -238,6 +234,6 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" diff --git a/src/cfg.rs b/src/cfg.rs index de8a629..75f5220 100644 --- a/src/cfg.rs +++ b/src/cfg.rs @@ -1,10 +1,10 @@ //! Control-flow graph (CFG) abstractions and utilities. +use crate::transform::{InnerInPlaceTransform as _, Transformer}; use crate::{ - spv, AttrSet, Const, ConstDef, ConstKind, Context, ControlNode, ControlNodeDef, - ControlNodeKind, ControlNodeOutputDecl, ControlRegion, ControlRegionDef, - EntityOrientedDenseMap, FuncDefBody, FxIndexMap, FxIndexSet, SelectionKind, Type, TypeKind, - Value, + AttrSet, Const, ConstDef, ConstKind, Context, ControlNode, ControlNodeDef, ControlNodeKind, + ControlNodeOutputDecl, ControlRegion, ControlRegionDef, EntityOrientedDenseMap, FuncDefBody, + FxIndexMap, FxIndexSet, SelectionKind, Type, TypeKind, Value, spv, }; use itertools::{Either, Itertools}; use smallvec::SmallVec; @@ -78,26 +78,23 @@ impl ControlFlowGraph { /// reverse post-order (RPO). /// /// RPO iteration over a CFG provides certain guarantees, most importantly - /// that SSA definitions are visited before any of their uses. + /// that dominators are visited before the entire subgraph they dominate. pub fn rev_post_order( &self, func_def_body: &FuncDefBody, ) -> impl DoubleEndedIterator { let mut post_order = SmallVec::<[_; 8]>::new(); - self.traverse_whole_func( - func_def_body, - &mut TraversalState { - incoming_edge_counts: EntityOrientedDenseMap::new(), + self.traverse_whole_func(func_def_body, &mut TraversalState { + incoming_edge_counts: EntityOrientedDenseMap::new(), - pre_order_visit: |_| {}, - post_order_visit: |region| post_order.push(region), + pre_order_visit: |_| {}, + post_order_visit: |region| post_order.push(region), - // NOTE(eddyb) this doesn't impact semantics, but combined with - // the final reversal, it should keep targets in the original - // order in the cases when they didn't get deduplicated. - reverse_targets: true, - }, - ); + // NOTE(eddyb) this doesn't impact semantics, but combined with + // the final reversal, it should keep targets in the original + // order in the cases when they didn't get deduplicated. + reverse_targets: true, + }); post_order.into_iter().rev() } } @@ -228,19 +225,52 @@ struct LoopFinder<'a> { scc_state: EntityOrientedDenseMap, } -// FIXME(eddyb) make `Option>` the same size somehow. #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] struct SccStackIdx(u32); #[derive(PartialEq, Eq)] enum SccState { /// CFG node has been reached and ended up somewhere on the `scc_stack`, - /// where it will remain after the SCC it's part of will be completed. + /// where it will remain until the SCC it's part of will be completed. Pending(SccStackIdx), /// CFG node had been reached once, but is no longer on the `scc_stack`, its /// parent SCC having been completed (or it wasn't in an SCC to begin with). - Complete, + Complete(EventualCfgExits), +} + +/// Summary of all the ways in which a CFG node may eventually leave the CFG. +/// +// HACK(eddyb) a loop can reach a CFG subgraph that happens to always "diverge" +// (e.g. ending in `unreachable`, `ExitInvocation`, or even infinite loops, +// though those have other issues) and strictly speaking that would always be +// an edge leaving the SCC of the loop (as it can't reach a backedge), but it +// still shouldn't be treated as an exit because it doesn't reconverge to the +// rest of the function, i.e. it can't reach any `return`s, which is what this +// tracks in order to later make a more accurate decision wrt loop exits. +// +// NOTE(eddyb) only in the case where a loop *also* has non-"diverging" exits, +// do the "diverging" ones not get treated as exits, as the presence of both +// disambiguates `break`s from naturally "diverging" sections of the loop body +// (at least for CFGs built from languages without labelled `break` or `goto`, +// but even then it would be pretty convoluted to set up `break` to diverge, +// while `break some_outer_label` to reconverge to the rest of the function). +#[derive(Copy, Clone, Default, PartialEq, Eq)] +struct EventualCfgExits { + // FIXME(eddyb) do the other situations need their own flags here? + may_return_from_func: bool, +} + +impl std::ops::BitOr for EventualCfgExits { + type Output = Self; + fn bitor(self, other: Self) -> Self { + Self { may_return_from_func: self.may_return_from_func | other.may_return_from_func } + } +} +impl std::ops::BitOrAssign for EventualCfgExits { + fn bitor_assign(&mut self, other: Self) { + *self = *self | other; + } } impl<'a> LoopFinder<'a> { @@ -261,12 +291,25 @@ impl<'a> LoopFinder<'a> { /// /// Here we track stack indices (as the stack order is the traversal order), /// and distinguish the acyclic case to avoid treating most nodes as self-loops. - fn find_earliest_scc_root_of(&mut self, node: ControlRegion) -> Option { + // + // FIXME(eddyb) name of the function is a bit clunky wrt its return type. + fn find_earliest_scc_root_of( + &mut self, + node: ControlRegion, + ) -> (Option, EventualCfgExits) { let state_entry = self.scc_state.entry(node); if let Some(state) = &state_entry { return match *state { - SccState::Pending(scc_stack_idx) => Some(scc_stack_idx), - SccState::Complete => None, + SccState::Pending(scc_stack_idx) => { + // HACK(eddyb) this means that `EventualCfgExits`s will be + // inconsistently observed across the `Pending` nodes of a + // loop body, but that is sound as it cannot feed into any + // `Complete` state until the loop header itself is complete, + // and the monotonic nature of `EventualCfgExits` means that + // the loop header will still get to see the complete picture. + (Some(scc_stack_idx), EventualCfgExits::default()) + } + SccState::Complete(eventual_cfg_exits) => (None, eventual_cfg_exits), }; } let scc_stack_idx = SccStackIdx(self.scc_stack.len().try_into().unwrap()); @@ -279,10 +322,20 @@ impl<'a> LoopFinder<'a> { .get(node) .expect("cfg: missing `ControlInst`, despite having left structured control-flow"); + let mut eventual_cfg_exits = EventualCfgExits::default(); + + if let ControlInstKind::Return = control_inst.kind { + eventual_cfg_exits.may_return_from_func = true; + } + let earliest_scc_root = control_inst .targets .iter() .flat_map(|&target| { + let (earliest_scc_root_of_target, eventual_cfg_exits_of_target) = + self.find_earliest_scc_root_of(target); + eventual_cfg_exits |= eventual_cfg_exits_of_target; + // HACK(eddyb) if one of the edges is already known to be a loop exit // (from `OpLoopMerge` specifically), treat it almost like a backedge, // but with the additional requirement that the loop header is already @@ -295,9 +348,7 @@ impl<'a> LoopFinder<'a> { } }); - self.find_earliest_scc_root_of(target) - .into_iter() - .chain(root_candidate_from_loop_merge) + earliest_scc_root_of_target.into_iter().chain(root_candidate_from_loop_merge) }) .min(); @@ -307,21 +358,36 @@ impl<'a> LoopFinder<'a> { // It's now possible to find all the loop exits: they're all the // edges from nodes of this SCC (loop) to nodes not in the SCC. - let target_in_scc = |target| match self.scc_state.get(target) { - Some(&SccState::Pending(i)) => i >= scc_stack_idx, - _ => false, + let target_is_exit = |target| { + match self.scc_state[target] { + SccState::Pending(i) => { + assert!(i >= scc_stack_idx); + false + } + SccState::Complete(eventual_cfg_exits_of_target) => { + let EventualCfgExits { may_return_from_func: loop_may_reconverge } = + eventual_cfg_exits; + let EventualCfgExits { may_return_from_func: target_may_reconverge } = + eventual_cfg_exits_of_target; + + // HACK(eddyb) see comment on `EventualCfgExits` for why + // edges leaving the SCC aren't treated as loop exits + // when they're "more divergent" than the loop itself, + // i.e. if any edges leaving the SCC can reconverge, + // (and therefore the loop as a whole can reconverge) + // only those edges are kept as loop exits. + target_may_reconverge == loop_may_reconverge + } + } }; self.loop_header_to_exit_targets.insert( node, self.scc_stack[scc_start..] .iter() .flat_map(|&scc_node| { - self.cfg.control_inst_on_exit_from[scc_node] - .targets - .iter() - .copied() - .filter(|&target| !target_in_scc(target)) + self.cfg.control_inst_on_exit_from[scc_node].targets.iter().copied() }) + .filter(|&target| target_is_exit(target)) .collect(), ); @@ -331,7 +397,7 @@ impl<'a> LoopFinder<'a> { // loop header itself, are already marked as complete, meaning that // all exits and backedges will be ignored, and the recursion will // only find more SCCs within the loop body (i.e. nested loops). - self.scc_state[node] = SccState::Complete; + self.scc_state[node] = SccState::Complete(eventual_cfg_exits); let loop_body_range = scc_start + 1..self.scc_stack.len(); for &scc_node in &self.scc_stack[loop_body_range.clone()] { self.scc_state.remove(scc_node); @@ -344,16 +410,16 @@ impl<'a> LoopFinder<'a> { // Remove the entire SCC from the accumulation stack all at once. self.scc_stack.truncate(scc_start); - return None; + return (None, eventual_cfg_exits); } // Not actually in an SCC at all, just some node outside any CFG cycles. if earliest_scc_root.is_none() { assert!(self.scc_stack.pop() == Some(node)); - self.scc_state[node] = SccState::Complete; + self.scc_state[node] = SccState::Complete(eventual_cfg_exits); } - earliest_scc_root + (earliest_scc_root, eventual_cfg_exits) } } @@ -406,23 +472,91 @@ pub struct Structurizer<'a> { incoming_edge_counts_including_loop_exits: EntityOrientedDenseMap, - /// Keyed by the input to `structurize_region_from` (the start [`ControlRegion`]), - /// and describing the state of that partial structurization step. - /// - /// See also [`StructurizeRegionState`]'s docs. + /// `structurize_region_state[region]` tracks `.structurize_region(region)` + /// progress/results (see also [`StructurizeRegionState`]'s docs). // // FIXME(eddyb) use `EntityOrientedDenseMap` (which lacks iteration by design). structurize_region_state: FxIndexMap, - /// Accumulated replacements (caused by `target_inputs`s), i.e.: - /// `Value::ControlRegionInput { region, input_idx }` must be replaced - /// with `control_region_input_replacements[region][input_idx]`, as - /// the original `region` cannot have be directly reused. - control_region_input_replacements: EntityOrientedDenseMap>, + /// Accumulated rewrites (caused by e.g. `target_inputs`s, but not only), + /// i.e.: `Value::ControlRegionInput { region, input_idx }` must be + /// rewritten based on `control_region_input_rewrites[region]`, as either + /// the original `region` wasn't reused, or its inputs were renumbered. + control_region_input_rewrites: + EntityOrientedDenseMap, +} + +/// How all `Value::ControlRegionInput { region, input_idx }` for a `region` +/// must be rewritten (see also `control_region_input_rewrites` docs). +enum ControlRegionInputRewrites { + /// Complete replacement with another value (which can take any form), as + /// `region` wasn't kept in its original form in the final structured IR. + /// + /// **Note**: such replacement can be chained, i.e. a replacement value can + /// be `Value::ControlRegionInput { region: other_region, .. }`, and then + /// `other_region` itself may have its inputs written. + ReplaceWith(SmallVec<[Value; 2]>), + + /// The value may remain an input of the same `region`, only changing its + /// `input_idx` (e.g. if indices need compaction after removing some inputs), + /// or get replaced anyway, depending on the `Result` for `input_idx`. + /// + /// **Note**: renumbering can only be the last rewrite step of a value, + /// as `region` must've been chosen to be kept in the final structured IR, + /// but the `Err` cases are transitive just like `ReplaceWith`. + // + // FIXME(eddyb) this is a bit silly, maybe try to rely more on hermeticity + // to get rid of this? + RenumberOrReplaceWith(SmallVec<[Result; 2]>), +} + +impl ControlRegionInputRewrites { + // HACK(eddyb) this is here because it depends on a field of `Structurizer` + // and borrowing issues ensue if it's made a method of `Structurizer`. + fn rewrite_all( + rewrites: &EntityOrientedDenseMap, + ) -> impl crate::transform::Transformer + '_ { + // FIXME(eddyb) maybe this should be provided by `transform`. + use crate::transform::*; + struct ReplaceValueWith(F); + impl Option> Transformer for ReplaceValueWith { + fn transform_value_use(&mut self, v: &Value) -> Transformed { + self.0(*v).map_or(Transformed::Unchanged, Transformed::Changed) + } + } + + ReplaceValueWith(move |v| { + let mut new_v = v; + while let Value::ControlRegionInput { region, input_idx } = new_v { + match rewrites.get(region) { + // NOTE(eddyb) this needs to be able to apply multiple replacements, + // due to the input potentially having redundantly chained `OpPhi`s. + // + // FIXME(eddyb) union-find-style "path compression" could record the + // final value inside `rewrites` while replacements are being made, + // to avoid going through a chain more than once (and some of these + // replacements could also be applied early). + Some(ControlRegionInputRewrites::ReplaceWith(replacements)) => { + new_v = replacements[input_idx as usize]; + } + Some(ControlRegionInputRewrites::RenumberOrReplaceWith( + renumbering_and_replacements, + )) => match renumbering_and_replacements[input_idx as usize] { + Ok(new_idx) => { + new_v = Value::ControlRegionInput { region, input_idx: new_idx }; + break; + } + Err(replacement) => new_v = replacement, + }, + None => break, + } + } + (v != new_v).then_some(new_v) + }) + } } -/// The state of one `structurize_region_from` invocation (keyed on its start -/// [`ControlRegion`] in [`Structurizer`]) and its [`PartialControlRegion`] output. +/// The state of one `.structurize_region(region)` invocation, and its result. /// /// There is a fourth (or 0th) implicit state, which is where nothing has yet /// observed some region, and [`Structurizer`] isn't tracking it at all. @@ -434,22 +568,24 @@ enum StructurizeRegionState { /// Structurization completed, and this region can now be claimed. Ready { - /// If this region had backedges (targeting its start [`ControlRegion`]), - /// their bundle is taken from the region's [`DeferredEdgeBundleSet`], - /// and kept in this field instead (for simpler/faster access). + /// Cached `region_deferred_edges[region].edge_bundle.accumulated_count`, + /// i.e. the total count of backedges (if any exist) pointing to `region` + /// from the CFG subgraph that `region` itself dominates. /// /// Claiming a region with backedges can combine them with the bundled /// edges coming into the CFG cycle from outside, and instead of failing /// due to the latter not being enough to claim the region on their own, /// actually perform loop structurization. - backedge: Option>, + accumulated_backedge_count: IncomingEdgeCount, - region: PartialControlRegion, + // HACK(eddyb) the only part of a `ClaimedRegion` that is computed by + // `structurize_region` (the rest comes from `try_claim_edge_bundle`). + region_deferred_edges: DeferredEdgeBundleSet, }, /// Region was claimed (by an [`IncomingEdgeBundle`], with the appropriate - /// total [`IncomingEdgeCount`], minus any `consumed_backedges`), and has - /// since likely been incorporated as part of some larger region. + /// total [`IncomingEdgeCount`], minus `accumulated_backedge_count`), and + /// must eventually be incorporated as part of some larger region. Claimed, } @@ -470,6 +606,13 @@ struct IncomingEdgeBundle { target_inputs: SmallVec<[Value; 2]>, } +impl IncomingEdgeBundle { + fn with_target(self, target: U) -> IncomingEdgeBundle { + let IncomingEdgeBundle { target: _, accumulated_count, target_inputs } = self; + IncomingEdgeBundle { target, accumulated_count, target_inputs } + } +} + /// A "deferred (incoming) edge bundle" is an [`IncomingEdgeBundle`] that cannot /// be structurized immediately, but instead waits for its `accumulated_count` /// to reach the full count of its `target`, before it can grafted into some @@ -511,24 +654,15 @@ struct IncomingEdgeBundle { /// /// **Note**: `edge_bundle.target` has a generic type `T` to reduce redundancy /// when it's already implied (e.g. by the key in [`DeferredEdgeBundleSet`]'s map). -struct DeferredEdgeBundle { +struct DeferredEdgeBundle { condition: LazyCond, edge_bundle: IncomingEdgeBundle, } impl DeferredEdgeBundle { - fn into_target_keyed_kv_pair(self) -> (T, DeferredEdgeBundle<()>) { - let DeferredEdgeBundle { - condition, - edge_bundle: IncomingEdgeBundle { target, accumulated_count, target_inputs }, - } = self; - ( - target, - DeferredEdgeBundle { - condition, - edge_bundle: IncomingEdgeBundle { target: (), accumulated_count, target_inputs }, - }, - ) + fn with_target(self, target: U) -> DeferredEdgeBundle { + let DeferredEdgeBundle { condition, edge_bundle } = self; + DeferredEdgeBundle { condition, edge_bundle: edge_bundle.with_target(target) } } } @@ -540,16 +674,26 @@ impl DeferredEdgeBundle { /// that might be needed, and then removing the unused ones, but this way we /// never generate unused outputs, and can potentially even optimize away some /// redundant dataflow (e.g. `if cond { true } else { false }` is just `cond`). +#[derive(Clone)] enum LazyCond { - // FIXME(eddyb) remove `False` in favor of `Option`? + // HACK(eddyb) `Undef` is used when the condition comes from e.g. a `Select` + // case that diverges and/or represents `unreachable`. + Undef, + False, True, - MergeSelect { + + Merge(Rc), +} + +enum LazyCondMerge { + Select { control_node: ControlNode, - // FIXME(eddyb) the lowest level of this ends up with a `Vec` containing - // only `LazyCond::{False,True}`, and that could more easily be expressed - // as e.g. a bitset? (or even `SmallVec<[bool; 16]>`, tho that's silly) - per_case_conds: Vec, + // FIXME(eddyb) the lowest level of `LazyCond` ends up containing only + // `LazyCond::{Undef,False,True}`, and that could more efficiently be + // expressed using e.g. bitsets, but the `Rc` in `LazyCond::Merge` + // means that this is more compact than it would otherwise be. + per_case_conds: SmallVec<[LazyCond; 4]>, }, } @@ -565,26 +709,276 @@ enum DeferredTarget { } /// Set of [`DeferredEdgeBundle`]s, uniquely keyed by their `target`s. -// -// FIXME(eddyb) consider implementing `FromIterator>`. -struct DeferredEdgeBundleSet { - target_to_deferred: FxIndexMap>, +/// +/// Semantically equivalent to an unordered series of conditional branches +/// to each possible `target`, which corresponds to an unenforced invariant +/// that exactly one [`DeferredEdgeBundle`] condition must be `true` at any +/// given time (the only non-trivial case, [`DeferredEdgeBundleSet::Choice`], +/// satisfies it because it's only used for merging `Select` cases, and so +/// all the conditions will end up using disjoint [`LazyCond::Merge`]s). +enum DeferredEdgeBundleSet { + Unreachable, + + // NOTE(eddyb) this erases the condition (by not using `DeferredEdgeBundle`). + Always { + // HACK(eddyb) fields are split here to allow e.g. iteration. + target: DeferredTarget, + edge_bundle: IncomingEdgeBundle<()>, + }, + + Choice { + target_to_deferred: FxIndexMap>, + }, +} + +impl FromIterator for DeferredEdgeBundleSet { + fn from_iter>(iter: T) -> Self { + let mut iter = iter.into_iter(); + match iter.next() { + None => Self::Unreachable, + Some(first) => match iter.next() { + // NOTE(eddyb) this erases the condition (by not using `DeferredEdgeBundle`). + None => Self::Always { + target: first.edge_bundle.target, + edge_bundle: first.edge_bundle.with_target(()), + }, + Some(second) => Self::Choice { + target_to_deferred: ([first, second].into_iter().chain(iter)) + .map(|d| (d.edge_bundle.target, d.with_target(()))) + .collect(), + }, + }, + } + } +} + +impl From>> for DeferredEdgeBundleSet { + fn from(target_to_deferred: FxIndexMap>) -> Self { + if target_to_deferred.len() <= 1 { + target_to_deferred + .into_iter() + .map(|(target, deferred)| deferred.with_target(target)) + .collect() + } else { + Self::Choice { target_to_deferred } + } + } } -/// Partially structurized [`ControlRegion`], the result of combining together -/// several smaller [`ControlRegion`]s, based on CFG edges between them. -struct PartialControlRegion { - // FIXME(eddyb) theoretically this field could be removed, but in practice - // this still holds a mix of original `ControlRegion`s and transient ones - // which only hold a `ControlNode`, and which would need to be handled - // separately (e.g. `(ControlNode, DeferredEdgeBundleSet)`, generics, etc.). - structured_body_holder: Option, - - /// The transitive targets which can't be claimed into the [`ControlRegion`] - /// remain as deferred exits, and will blocking further structurization until +// HACK(eddyb) this API is a mess, is there an uncompromising way to clean it up? +impl DeferredEdgeBundleSet { + fn get_edge_bundle_by_target( + &self, + search_target: DeferredTarget, + ) -> Option<&IncomingEdgeBundle<()>> { + match self { + DeferredEdgeBundleSet::Unreachable => None, + DeferredEdgeBundleSet::Always { target, edge_bundle } => { + (*target == search_target).then_some(edge_bundle) + } + DeferredEdgeBundleSet::Choice { target_to_deferred } => { + Some(&target_to_deferred.get(&search_target)?.edge_bundle) + } + } + } + + fn get_edge_bundle_mut_by_target( + &mut self, + search_target: DeferredTarget, + ) -> Option<&mut IncomingEdgeBundle<()>> { + match self { + DeferredEdgeBundleSet::Unreachable => None, + DeferredEdgeBundleSet::Always { target, edge_bundle } => { + (*target == search_target).then_some(edge_bundle) + } + DeferredEdgeBundleSet::Choice { target_to_deferred } => { + Some(&mut target_to_deferred.get_mut(&search_target)?.edge_bundle) + } + } + } + + fn iter_targets_with_edge_bundle( + &self, + ) -> impl Iterator)> { + match self { + DeferredEdgeBundleSet::Unreachable => Either::Left(None.into_iter()), + DeferredEdgeBundleSet::Always { target, edge_bundle } => { + Either::Left(Some((*target, edge_bundle)).into_iter()) + } + DeferredEdgeBundleSet::Choice { target_to_deferred } => Either::Right( + target_to_deferred + .iter() + .map(|(&target, deferred)| (target, &deferred.edge_bundle)), + ), + } + } + + fn iter_targets_with_edge_bundle_mut( + &mut self, + ) -> impl Iterator)> { + match self { + DeferredEdgeBundleSet::Unreachable => Either::Left(None.into_iter()), + DeferredEdgeBundleSet::Always { target, edge_bundle } => { + Either::Left(Some((*target, edge_bundle)).into_iter()) + } + DeferredEdgeBundleSet::Choice { target_to_deferred } => Either::Right( + target_to_deferred + .iter_mut() + .map(|(&target, deferred)| (target, &mut deferred.edge_bundle)), + ), + } + } + + // HACK(eddyb) this only exists because of `DeferredEdgeBundleSet`'s lossy + // representation wrt conditions, so removal from a `DeferredEdgeBundleSet` + // cannot be used for e.g. `Select` iterating over per-case deferreds. + fn steal_deferred_by_target_without_removal( + &mut self, + search_target: DeferredTarget, + ) -> Option> { + let steal_edge_bundle = |edge_bundle: &mut IncomingEdgeBundle<()>| IncomingEdgeBundle { + target: (), + accumulated_count: edge_bundle.accumulated_count, + target_inputs: mem::take(&mut edge_bundle.target_inputs), + }; + match self { + DeferredEdgeBundleSet::Unreachable => None, + DeferredEdgeBundleSet::Always { target, edge_bundle } => (*target == search_target) + .then(|| DeferredEdgeBundle { + condition: LazyCond::True, + edge_bundle: steal_edge_bundle(edge_bundle), + }), + DeferredEdgeBundleSet::Choice { target_to_deferred } => { + let DeferredEdgeBundle { condition, edge_bundle } = + target_to_deferred.get_mut(&search_target)?; + Some(DeferredEdgeBundle { + condition: mem::replace(condition, LazyCond::False), + edge_bundle: steal_edge_bundle(edge_bundle), + }) + } + } + } + + // NOTE(eddyb) the returned `DeferredEdgeBundleSet` exists under the assumption + // that `split_target` is not reachable from it, so this method is not suitable + // for e.g. uniformly draining `DeferredEdgeBundleSet` in a way that preserves + // conditions (but rather it's almost a kind of control-flow "slicing"). + fn split_out_target(self, split_target: DeferredTarget) -> (Option, Self) { + match self { + DeferredEdgeBundleSet::Unreachable => (None, DeferredEdgeBundleSet::Unreachable), + DeferredEdgeBundleSet::Always { target, edge_bundle } => { + if target == split_target { + ( + Some(DeferredEdgeBundle { + condition: LazyCond::True, + edge_bundle: edge_bundle.with_target(target), + }), + DeferredEdgeBundleSet::Unreachable, + ) + } else { + (None, DeferredEdgeBundleSet::Always { target, edge_bundle }) + } + } + DeferredEdgeBundleSet::Choice { mut target_to_deferred } => { + // FIXME(eddyb) should this use `shift_remove` and/or emulate + // extra tombstones, to avoid impacting the order? + ( + target_to_deferred + .swap_remove(&split_target) + .map(|d| d.with_target(split_target)), + Self::from(target_to_deferred), + ) + } + } + } + + // HACK(eddyb) the strange signature is overfitted to its own callsite. + fn split_out_matching( + self, + mut matches: impl FnMut(DeferredEdgeBundle) -> Result, + ) -> (Option, Self) { + match self { + DeferredEdgeBundleSet::Unreachable => (None, DeferredEdgeBundleSet::Unreachable), + DeferredEdgeBundleSet::Always { target, edge_bundle } => { + match matches(DeferredEdgeBundle { + condition: LazyCond::True, + edge_bundle: edge_bundle.with_target(target), + }) { + Ok(x) => (Some(x), DeferredEdgeBundleSet::Unreachable), + Err(new_deferred) => { + assert!(new_deferred.edge_bundle.target == target); + assert!(matches!(new_deferred.condition, LazyCond::True)); + (None, DeferredEdgeBundleSet::Always { + target, + edge_bundle: new_deferred.edge_bundle.with_target(()), + }) + } + } + } + DeferredEdgeBundleSet::Choice { mut target_to_deferred } => { + let mut result = None; + for (i, (&target, deferred)) in target_to_deferred.iter_mut().enumerate() { + // HACK(eddyb) "take" `deferred` so it can be passed to + // `matches` (and put back if that returned `Err`). + let taken_deferred = mem::replace(deferred, DeferredEdgeBundle { + condition: LazyCond::False, + edge_bundle: IncomingEdgeBundle { + target: Default::default(), + accumulated_count: Default::default(), + target_inputs: Default::default(), + }, + }); + + match matches(taken_deferred.with_target(target)) { + Ok(x) => { + result = Some(x); + // FIXME(eddyb) should this use `swap_remove_index`? + target_to_deferred.shift_remove_index(i).unwrap(); + break; + } + + // Put back the `DeferredEdgeBundle` and keep looking. + Err(new_deferred) => { + assert!(new_deferred.edge_bundle.target == target); + *deferred = new_deferred.with_target(()); + } + } + } + (result, Self::from(target_to_deferred)) + } + } + } +} + +/// A successfully "claimed" (via `try_claim_edge_bundle`) partially structurized +/// CFG subgraph (i.e. set of [`ControlRegion`]s previously connected by CFG edges), +/// which is effectively owned by the "claimer" and **must** be used for: +/// - the whole function body (if `deferred_edges` only contains `Return`) +/// - one of the cases of a `Select` node +/// - merging into a larger region (i.e. its nearest dominator) +// +// FIXME(eddyb) consider never having to claim the function body itself, +// by wrapping the CFG in a `ControlNode` instead. +struct ClaimedRegion { + // FIXME(eddyb) find a way to clarify that this can differ from the target + // of `try_claim_edge_bundle`, and also that `deferred_edges` are from the + // perspective of being "inside" `structured_body` (wrt hermeticity). + structured_body: ControlRegion, + + /// The [`Value`]s that `Value::ControlRegionInput { region: structured_body, .. }` + /// will get on entry into `structured_body`, when this region ends up + /// merged into a larger region, or as a child of a new [`ControlNode`]. + // + // FIXME(eddyb) don't replace `Value::ControlRegionInput { region: structured_body, .. }` + // with `region_inputs` when `structured_body` ends up a `ControlNode` child, + // but instead make all `ControlRegion`s entirely hermetic wrt inputs. + structured_body_inputs: SmallVec<[Value; 2]>, + + /// The transitive targets which couldn't be claimed into `structured_body` + /// remain as deferred exits, and will block further structurization until /// all other edges to those same targets are gathered together. /// - /// **Note**: this will only be empty if the [`ControlRegion`] never exits, + /// **Note**: this will only be empty if the region can never exit, /// i.e. it has divergent control-flow (such as an infinite loop), as any /// control-flow path that can (eventually) return from the function, will /// end up using a deferred target for that (see [`DeferredTarget::Return`]). @@ -669,7 +1063,7 @@ impl<'a> Structurizer<'a> { incoming_edge_counts_including_loop_exits, structurize_region_state: FxIndexMap::default(), - control_region_input_replacements: EntityOrientedDenseMap::new(), + control_region_input_rewrites: EntityOrientedDenseMap::new(), } } @@ -679,143 +1073,140 @@ impl<'a> Structurizer<'a> { return; } - let mut body_region = - self.claim_or_defer_single_edge(self.func_def_body.body, SmallVec::new()); + // FIXME(eddyb) it might work much better to have the unstructured CFG + // wrapped in a `ControlNode` inside the function body, instead. + let func_body_deferred_edges = { + let func_entry_pseudo_edge = { + let target = self.func_def_body.body; + move || IncomingEdgeBundle { + target, + accumulated_count: IncomingEdgeCount::ONE, + target_inputs: [].into_iter().collect(), + } + }; - if body_region.deferred_edges.target_to_deferred.len() == 1 { - // Structured return, the function is fully structurized. - // - // FIXME(eddyb) also support structured return when the whole body - // is divergent, by generating undef constants (needs access to the - // whole `FuncDecl`, not just `FuncDefBody`, to get the right types). - if let Some(deferred_return) = - body_region.deferred_edges.target_to_deferred.swap_remove(&DeferredTarget::Return) + // HACK(eddyb) it's easier to assume the function never loops back + // to its body, than fix up the broken CFG if that never happens. + if self.incoming_edge_counts_including_loop_exits[func_entry_pseudo_edge().target] + != func_entry_pseudo_edge().accumulated_count { - assert!(body_region.structured_body_holder == Some(self.func_def_body.body)); - let body_def = self.func_def_body.at_mut_body().def(); - body_def.outputs = deferred_return.edge_bundle.target_inputs; - self.func_def_body.unstructured_cfg = None; - - self.apply_value_replacements(); + // FIXME(eddyb) find a way to attach (diagnostic) attributes + // to a `FuncDefBody`, would be useful to have that here. return; } - } - // Repair all the regions that remain unclaimed, including the body. - let structurize_region_state = - mem::take(&mut self.structurize_region_state).into_iter().chain([( - self.func_def_body.body, - StructurizeRegionState::Ready { region: body_region, backedge: None }, - )]); - for (target, state) in structurize_region_state { - if let StructurizeRegionState::Ready { mut region, backedge } = state { - // Undo `backedge` extraction from deferred edges, if needed. - if let Some(backedge) = backedge { - assert!( - region - .deferred_edges - .target_to_deferred - .insert(DeferredTarget::Region(target), backedge) - .is_none() - ); - } + let ClaimedRegion { structured_body, structured_body_inputs, deferred_edges } = + self.try_claim_edge_bundle(func_entry_pseudo_edge()).ok().unwrap(); + assert!(structured_body == func_entry_pseudo_edge().target); + assert!(structured_body_inputs == func_entry_pseudo_edge().target_inputs); + deferred_edges + }; - self.repair_unclaimed_region(target, region); + match func_body_deferred_edges { + // FIXME(eddyb) also support structured return when the whole body + // is divergent, by generating undef constants (needs access to the + // whole `FuncDecl`, not just `FuncDefBody`, to get the right types). + DeferredEdgeBundleSet::Unreachable => { + // HACK(eddyb) replace the CFG with one that only contains an + // `Unreachable` terminator for the body, comparable to what + // `rebuild_cfg_from_unclaimed_region_deferred_edges` would do + // in the general case (but special-cased because this is very + // close to being structurizable, just needs a bit of plumbing). + let mut control_inst_on_exit_from = EntityOrientedDenseMap::new(); + control_inst_on_exit_from.insert(self.func_def_body.body, ControlInst { + attrs: AttrSet::default(), + kind: ControlInstKind::Unreachable, + inputs: [].into_iter().collect(), + targets: [].into_iter().collect(), + target_inputs: FxIndexMap::default(), + }); + self.func_def_body.unstructured_cfg = Some(ControlFlowGraph { + control_inst_on_exit_from, + loop_merge_to_loop_header: Default::default(), + }); } - } - self.apply_value_replacements(); - } - - /// The last step of structurization is processing bulk replacements - /// collected while structurizing (like `control_region_input_replacements`). - fn apply_value_replacements(self) { - // FIXME(eddyb) maybe this should be provided by `transform`. - use crate::transform::*; - struct ReplaceValueWith(F); - impl Option> Transformer for ReplaceValueWith { - fn transform_value_use(&mut self, v: &Value) -> Transformed { - self.0(*v).map_or(Transformed::Unchanged, Transformed::Changed) + // Structured return, the function is fully structurized. + DeferredEdgeBundleSet::Always { target: DeferredTarget::Return, edge_bundle } => { + let body_def = self.func_def_body.at_mut_body().def(); + body_def.outputs = edge_bundle.target_inputs; + self.func_def_body.unstructured_cfg = None; } - } - self.func_def_body.inner_in_place_transform_with(&mut ReplaceValueWith(|v| match v { - Value::ControlRegionInput { region, input_idx } => { - Some(self.control_region_input_replacements.get(region)?[input_idx as usize]) + _ => { + // Repair all the regions that remain unclaimed, including the body. + let structurize_region_state = mem::take(&mut self.structurize_region_state) + .into_iter() + .chain([(self.func_def_body.body, StructurizeRegionState::Ready { + accumulated_backedge_count: IncomingEdgeCount::default(), + + region_deferred_edges: func_body_deferred_edges, + })]); + for (target, state) in structurize_region_state { + if let StructurizeRegionState::Ready { region_deferred_edges, .. } = state { + self.rebuild_cfg_from_unclaimed_region_deferred_edges( + target, + region_deferred_edges, + ); + } + } } - _ => None, - })); - } + } - // FIXME(eddyb) the names `target` and `region` are an issue, they - // should be treated as "the region" vs "its (deferred) successors". - fn claim_or_defer_single_edge( - &mut self, - target: ControlRegion, - target_inputs: SmallVec<[Value; 2]>, - ) -> PartialControlRegion { - self.try_claim_edge_bundle(IncomingEdgeBundle { - target, - accumulated_count: IncomingEdgeCount::ONE, - target_inputs, - }) - .unwrap_or_else(|deferred| { - let (deferred_target, deferred) = deferred.into_target_keyed_kv_pair(); - PartialControlRegion { - structured_body_holder: None, - deferred_edges: DeferredEdgeBundleSet { - target_to_deferred: [(DeferredTarget::Region(deferred_target), deferred)] - .into_iter() - .collect(), - }, - } - }) + // The last step of structurization is applying rewrites accumulated + // while structurizing (i.e. `control_region_input_rewrites`). + // + // FIXME(eddyb) obsolete this by fully taking advantage of hermeticity, + // and only replacing `Value::ControlRegionInput { region, .. }` within + // `region`'s children, shallowly, whenever `region` gets claimed. + self.func_def_body.inner_in_place_transform_with( + &mut ControlRegionInputRewrites::rewrite_all(&self.control_region_input_rewrites), + ); } - // FIXME(eddyb) make it possible to return a fresh new node, as opposed to - // an existing region? fn try_claim_edge_bundle( &mut self, - mut edge_bundle: IncomingEdgeBundle, - ) -> Result> { + edge_bundle: IncomingEdgeBundle, + ) -> Result> { let target = edge_bundle.target; // Always attempt structurization before checking the `IncomingEdgeCount`, // to be able to make use of backedges (if any were found). if self.structurize_region_state.get(&target).is_none() { - self.structurize_region_from(target); + self.structurize_region(target); } - let backedge = match &self.structurize_region_state[&target] { + let backedge_count = match self.structurize_region_state[&target] { // This `try_claim_edge_bundle` call is itself a backedge, and it's // coherent to not let any of them claim the loop itself, and only // allow claiming the whole loop (if successfully structurized). - StructurizeRegionState::InProgress => None, + StructurizeRegionState::InProgress => IncomingEdgeCount::default(), - StructurizeRegionState::Ready { backedge, .. } => backedge.as_ref(), + StructurizeRegionState::Ready { accumulated_backedge_count, .. } => { + accumulated_backedge_count + } StructurizeRegionState::Claimed => { unreachable!("cfg::Structurizer::try_claim_edge_bundle: already claimed"); } }; - let backedge_count = backedge.map(|e| e.edge_bundle.accumulated_count).unwrap_or_default(); if self.incoming_edge_counts_including_loop_exits[target] != edge_bundle.accumulated_count + backedge_count { - return Err(DeferredEdgeBundle { condition: LazyCond::True, edge_bundle }); + return Err(edge_bundle); } let state = self.structurize_region_state.insert(target, StructurizeRegionState::Claimed).unwrap(); - let (backedge, mut region) = match state { + let mut deferred_edges = match state { StructurizeRegionState::InProgress => unreachable!( "cfg::Structurizer::try_claim_edge_bundle: cyclic calls \ should not get this far" ), - StructurizeRegionState::Ready { backedge, region } => (backedge, region), + StructurizeRegionState::Ready { region_deferred_edges, .. } => region_deferred_edges, StructurizeRegionState::Claimed => { // Handled above. @@ -823,33 +1214,92 @@ impl<'a> Structurizer<'a> { } }; - // FIXME(eddyb) remove this field in most cases. - assert!(region.structured_body_holder == Some(target)); + let mut backedge = None; + if backedge_count != IncomingEdgeCount::default() { + (backedge, deferred_edges) = + deferred_edges.split_out_target(DeferredTarget::Region(target)); + } // If the target contains any backedge to itself, that's a loop, with: // * entry: `edge_bundle` (unconditional, i.e. `do`-`while`-like) - // * body: `target` / `region.structured_body_holder` + // * body: `target` // * repeat ("continue") edge: `backedge` (with its `condition`) - // * exit ("break") edges: `region.deferred_*` - if let Some(backedge) = backedge { + // * exit ("break") edges: `deferred_edges` + let structured_body = if let Some(backedge) = backedge { let DeferredEdgeBundle { condition: repeat_condition, edge_bundle: backedge } = backedge; + let body = target; + + // HACK(eddyb) due to `Loop` `ControlNode`s not being hermetic on + // the output side yet (i.e. they still have SSA-like semantics), + // it gets wrapped in a `ControlRegion`, which can be as hermetic as + // the loop body itself was originally. + // NOTE(eddyb) both input declarations and the child `Loop` node are + // added later down below, after the `Loop` node is created. + let wrapper_region = + self.func_def_body.control_regions.define(self.cx, ControlRegionDef::default()); - // If the body starts at a region with any `inputs`, receiving values - // from both the loop entry and the backedge, that has to become - // "loop state" (with values being passed to `body` `inputs`, i.e. - // the structurized `body` region as a whole takes the same `inputs`). + // Any loop body region inputs, which must receive values from both + // the loop entry and the backedge, become explicit "loop state", + // starting as `initial_inputs` and being replaced with body outputs + // after every loop iteration. // - // FIXME(eddyb) the names `target` and `region` are an issue, they - // should be treated as "the region" vs "its (deferred) successors". - let body = target; + // FIXME(eddyb) `Loop` `ControlNode`s should be changed to be hermetic + // and have the loop state be output from the whole node itself, + // for any outside uses of values defined within the loop body. let body_def = self.func_def_body.at_mut(body).def(); - let initial_inputs = mem::take(&mut edge_bundle.target_inputs); - body_def.outputs = backedge.target_inputs; + let original_input_decls = mem::take(&mut body_def.inputs); + assert!(body_def.outputs.is_empty()); + + // HACK(eddyb) some dataflow through the loop body is redundant, + // and can be lifted out of it, but the worst part is that applying + // the replacement requires leaving alone all the non-redundant + // `body` region inputs at the same time, and it's not really + // feasible to move `body`'s children into a new region without + // wasting it completely (i.e. can't swap with `wrapper_region`). + let mut initial_inputs = SmallVec::<[_; 2]>::new(); + let body_input_rewrites = ControlRegionInputRewrites::RenumberOrReplaceWith( + backedge + .target_inputs + .into_iter() + .enumerate() + .map(|(original_idx, mut backedge_value)| { + ControlRegionInputRewrites::rewrite_all( + &self.control_region_input_rewrites, + ) + .transform_value_use(&backedge_value) + .apply_to(&mut backedge_value); + + let original_idx = u32::try_from(original_idx).unwrap(); + if backedge_value + == (Value::ControlRegionInput { region: body, input_idx: original_idx }) + { + // FIXME(eddyb) does this have to be general purpose, + // or could this be handled as `None` with a single + // `wrapper_region` per `ControlRegionInputRewrites`? + Err(Value::ControlRegionInput { + region: wrapper_region, + input_idx: original_idx, + }) + } else { + let renumbered_idx = u32::try_from(body_def.inputs.len()).unwrap(); + initial_inputs.push(Value::ControlRegionInput { + region: wrapper_region, + input_idx: original_idx, + }); + body_def.inputs.push(original_input_decls[original_idx as usize]); + body_def.outputs.push(backedge_value); + Ok(renumbered_idx) + } + }) + .collect(), + ); + self.control_region_input_rewrites.insert(body, body_input_rewrites); + assert_eq!(initial_inputs.len(), body_def.inputs.len()); assert_eq!(body_def.outputs.len(), body_def.inputs.len()); - let repeat_condition = self.materialize_lazy_cond(repeat_condition); + let repeat_condition = self.materialize_lazy_cond(&repeat_condition); let loop_node = self.func_def_body.control_nodes.define( self.cx, ControlNodeDef { @@ -859,19 +1309,11 @@ impl<'a> Structurizer<'a> { .into(), ); - // Replace the region with the whole loop, any exits out of the loop - // being encoded in `region.deferred_*`. - // - // FIXME(eddyb) the names `target` and `region` are an issue, they - // should be treated as "the region" vs "its (deferred) successors". - // - // FIXME(eddyb) do not define a region just to keep a `ControlNode` in it. - let wasteful_loop_region = - self.func_def_body.control_regions.define(self.cx, ControlRegionDef::default()); - self.func_def_body.control_regions[wasteful_loop_region] + let wrapper_region_def = &mut self.func_def_body.control_regions[wrapper_region]; + wrapper_region_def.inputs = original_input_decls; + wrapper_region_def .children .insert_last(loop_node, &mut self.func_def_body.control_nodes); - region.structured_body_holder = Some(wasteful_loop_region); // HACK(eddyb) we've treated loop exits as extra "false edges", so // here they have to be added to the loop (potentially unlocking @@ -879,40 +1321,39 @@ impl<'a> Structurizer<'a> { if let Some(exit_targets) = self.loop_header_to_exit_targets.get(&target) { for &exit_target in exit_targets { // FIXME(eddyb) what if this is `None`, is that impossible? - if let Some(deferred) = region - .deferred_edges - .target_to_deferred - .get_mut(&DeferredTarget::Region(exit_target)) + if let Some(exit_edge_bundle) = deferred_edges + .get_edge_bundle_mut_by_target(DeferredTarget::Region(exit_target)) { - deferred.edge_bundle.accumulated_count += IncomingEdgeCount::ONE; + exit_edge_bundle.accumulated_count += IncomingEdgeCount::ONE; } } } - } - if !edge_bundle.target_inputs.is_empty() { - self.control_region_input_replacements.insert(target, edge_bundle.target_inputs); - } - - Ok(region) + wrapper_region + } else { + target + }; + Ok(ClaimedRegion { + structured_body, + structured_body_inputs: edge_bundle.target_inputs, + deferred_edges, + }) } - /// Structurize a region starting from `unstructured_region`, and extending - /// it (by combining the smaller [`ControlRegion`]s) as much as possible into - /// the CFG (likely everything dominated by `unstructured_region`). + /// Structurize `region` by absorbing into it the entire CFG subgraph which + /// it dominates (and deferring any other edges to the rest of the CFG). /// /// The output of this process is stored in, and any other bookkeeping is - /// done through, `self.structurize_region_state[unstructured_region]`. + /// done through, `self.structurize_region_state[region]`. /// /// See also [`StructurizeRegionState`]'s docs. - fn structurize_region_from(&mut self, unstructured_region: ControlRegion) { + fn structurize_region(&mut self, region: ControlRegion) { { - let old_state = self - .structurize_region_state - .insert(unstructured_region, StructurizeRegionState::InProgress); + let old_state = + self.structurize_region_state.insert(region, StructurizeRegionState::InProgress); if let Some(old_state) = old_state { unreachable!( - "cfg::Structurizer::structurize_region_from: \ + "cfg::Structurizer::structurize_region: \ already {}, when attempting to start structurization", match old_state { StructurizeRegionState::InProgress => "in progress (cycle detected)", @@ -923,91 +1364,130 @@ impl<'a> Structurizer<'a> { } } - let control_inst = self + let control_inst_on_exit = self .func_def_body .unstructured_cfg .as_mut() .unwrap() .control_inst_on_exit_from - .remove(unstructured_region) + .remove(region) .expect( - "cfg::Structurizer::structurize_region_from: missing \ + "cfg::Structurizer::structurize_region: missing \ `ControlInst` (CFG wasn't unstructured in the first place?)", ); - /// Marker error type for unhandled [`ControlInst`]s below. - struct UnsupportedControlInst(ControlInst); - - let region_from_control_inst = { - let ControlInst { attrs, kind, inputs, targets, target_inputs } = control_inst; + // Start with the concatenation of `region` and `control_inst_on_exit`, + // always appending `ControlNode`s (including the children of entire + // `ClaimedRegion`s) to `region`'s definition itself. + let mut deferred_edges = { + let ControlInst { attrs, kind, inputs, targets, target_inputs } = control_inst_on_exit; // FIXME(eddyb) this loses `attrs`. let _ = attrs; - let child_regions: SmallVec<[_; 8]> = targets + let target_regions: SmallVec<[_; 8]> = targets .iter() .map(|&target| { - self.claim_or_defer_single_edge( + self.try_claim_edge_bundle(IncomingEdgeBundle { target, - target_inputs.get(&target).cloned().unwrap_or_default(), - ) + accumulated_count: IncomingEdgeCount::ONE, + target_inputs: target_inputs.get(&target).cloned().unwrap_or_default(), + }) + .map_err(|edge_bundle| { + // HACK(eddyb) special-case "shared `unreachable`" to + // always inline it and avoid awkward "merges". + // FIXME(eddyb) should this be in a separate CFG pass? + // (i.e. is there a risk of other logic needing this?) + let target_is_trivial_unreachable = + match self.structurize_region_state.get(&edge_bundle.target) { + Some(StructurizeRegionState::Ready { + region_deferred_edges: DeferredEdgeBundleSet::Unreachable, + .. + }) => { + // FIXME(eddyb) DRY this "is empty region" check. + self.func_def_body + .at(edge_bundle.target) + .at_children() + .into_iter() + .next() + .is_none() + } + _ => false, + }; + if target_is_trivial_unreachable { + DeferredEdgeBundleSet::Unreachable + } else { + DeferredEdgeBundleSet::Always { + target: DeferredTarget::Region(edge_bundle.target), + edge_bundle: edge_bundle.with_target(()), + } + } + }) }) .collect(); match kind { ControlInstKind::Unreachable => { - assert_eq!((inputs.len(), child_regions.len()), (0, 0)); + assert_eq!((inputs.len(), target_regions.len()), (0, 0)); // FIXME(eddyb) this may result in lost optimizations over // actually encoding it in `ControlNode`/`ControlRegion` - // (e.g. a new `ControlKind`, or replacing region `outputs`), + // (e.g. a new `ControlNodeKind`, or replacing region `outputs`), // but it's simpler to handle it like this. - Ok(PartialControlRegion { - structured_body_holder: None, - deferred_edges: DeferredEdgeBundleSet { - target_to_deferred: [].into_iter().collect(), - }, - }) + // + // NOTE(eddyb) actually, this encoding is lossless *during* + // structurization, and a divergent region can only end up as: + // - the function body, where it implies the function can + // never actually return: not fully structurized currently + // (but only for a silly reason, and is entirely fixable) + // - a `Select` case, where it implies that case never merges + // back into the `Select` node, and potentially that the + // case can never be taken: this is where a structured + // encoding can be introduced, by pruning unreachable + // cases, and potentially even introducing `assume`s + // - a `Loop` body is not actually possible when divergent + // (as there can be no backedge to form a cyclic CFG) + DeferredEdgeBundleSet::Unreachable } - ControlInstKind::ExitInvocation(_) => { - assert_eq!(child_regions.len(), 0); - - // FIXME(eddyb) introduce equivalent `ControlNodeKind` for these. - Err(UnsupportedControlInst(ControlInst { - attrs, - kind, - inputs, - targets, - target_inputs, - })) + ControlInstKind::ExitInvocation(kind) => { + assert_eq!(target_regions.len(), 0); + + let control_node = self.func_def_body.control_nodes.define( + self.cx, + ControlNodeDef { + kind: ControlNodeKind::ExitInvocation { kind, inputs }, + outputs: [].into_iter().collect(), + } + .into(), + ); + self.func_def_body.control_regions[region] + .children + .insert_last(control_node, &mut self.func_def_body.control_nodes); + + DeferredEdgeBundleSet::Unreachable } ControlInstKind::Return => { - assert_eq!(child_regions.len(), 0); - - Ok(PartialControlRegion { - structured_body_holder: None, - deferred_edges: DeferredEdgeBundleSet { - target_to_deferred: [DeferredEdgeBundle { - condition: LazyCond::True, - edge_bundle: IncomingEdgeBundle { - accumulated_count: IncomingEdgeCount::default(), - target: DeferredTarget::Return, - target_inputs: inputs, - }, - } - .into_target_keyed_kv_pair()] - .into_iter() - .collect(), + assert_eq!(target_regions.len(), 0); + + DeferredEdgeBundleSet::Always { + target: DeferredTarget::Return, + edge_bundle: IncomingEdgeBundle { + accumulated_count: IncomingEdgeCount::default(), + target: (), + target_inputs: inputs, }, - }) + } } ControlInstKind::Branch => { - assert_eq!((inputs.len(), child_regions.len()), (0, 1)); + assert_eq!((inputs.len(), target_regions.len()), (0, 1)); - Ok(child_regions.into_iter().next().unwrap()) + self.append_maybe_claimed_region( + region, + target_regions.into_iter().next().unwrap(), + ) } ControlInstKind::SelectBranch(kind) => { @@ -1015,171 +1495,63 @@ impl<'a> Structurizer<'a> { let scrutinee = inputs[0]; - Ok(self.structurize_select(kind, scrutinee, child_regions)) + self.structurize_select_into(region, kind, Ok(scrutinee), target_regions) } } }; - let region_from_control_inst = - region_from_control_inst.unwrap_or_else(|UnsupportedControlInst(control_inst)| { - // HACK(eddyb) this only remains used for `ExitInvocation`. - assert!(control_inst.targets.is_empty()); - - // HACK(eddyb) attach the unsupported `ControlInst` to a fresh - // new "proxy" `ControlRegion`, that can then be the target of - // a deferred edge, specially crafted to be unclaimable. - let proxy = - self.func_def_body.control_regions.define(self.cx, ControlRegionDef::default()); - self.func_def_body - .unstructured_cfg - .as_mut() - .unwrap() - .control_inst_on_exit_from - .insert(proxy, control_inst); - self.structurize_region_state.insert(proxy, StructurizeRegionState::InProgress); - self.incoming_edge_counts_including_loop_exits - .insert(proxy, IncomingEdgeCount::ONE); - let deferred_proxy = DeferredEdgeBundle { - condition: LazyCond::True, - edge_bundle: IncomingEdgeBundle { - target: DeferredTarget::Region(proxy), - accumulated_count: IncomingEdgeCount::default(), - target_inputs: [].into_iter().collect(), - }, - }; - - PartialControlRegion { - structured_body_holder: None, - deferred_edges: DeferredEdgeBundleSet { - target_to_deferred: [deferred_proxy] - .into_iter() - .map(|d| d.into_target_keyed_kv_pair()) - .collect(), - }, - } - }); - - // Prepend `unstructured_region`'s children to `region_from_control_inst`. - let mut region = { - let children_from_control_inst = region_from_control_inst - .structured_body_holder - .map(|structured_body_holder_from_control_inst| { - mem::take( - &mut self - .func_def_body - .at_mut(structured_body_holder_from_control_inst) - .def() - .children, - ) - }) - .unwrap_or_default(); - - self.func_def_body.control_regions[unstructured_region] - .children - .append(children_from_control_inst, &mut self.func_def_body.control_nodes); - - PartialControlRegion { - structured_body_holder: Some(unstructured_region), - ..region_from_control_inst - } - }; - // Try to resolve deferred edges that may have accumulated, and keep // going until there's no more deferred edges that can be claimed. - let try_claim_any_deferred_edge = - |this: &mut Self, deferred_edges: &mut DeferredEdgeBundleSet| { - // FIXME(eddyb) this should try to take as many edges as possible, - // and incorporate them all at once, potentially with a switch instead - // of N individual branches with their own booleans etc. - for (i, (&deferred_target, deferred)) in - deferred_edges.target_to_deferred.iter_mut().enumerate() - { - let deferred_target = match deferred_target { - DeferredTarget::Region(target) => target, - DeferredTarget::Return => continue, - }; - - // HACK(eddyb) "take" `deferred.edge_bundle` so it can be - // passed to `try_claim_edge_bundle` (and put back if `Err`). - let DeferredEdgeBundle { condition: _, ref mut edge_bundle } = *deferred; - let taken_edge_bundle = IncomingEdgeBundle { - target: deferred_target, - accumulated_count: edge_bundle.accumulated_count, - target_inputs: mem::take(&mut edge_bundle.target_inputs), - }; + loop { + // FIXME(eddyb) this should try to take as many edges as possible, + // and incorporate them all at once, potentially with a switch instead + // of N individual branches with their own booleans etc. + let (claimed, else_deferred_edges) = deferred_edges.split_out_matching(|deferred| { + let deferred_target = deferred.edge_bundle.target; + let DeferredEdgeBundle { condition, edge_bundle } = match deferred_target { + DeferredTarget::Region(target) => deferred.with_target(target), + DeferredTarget::Return => return Err(deferred), + }; - match this.try_claim_edge_bundle(taken_edge_bundle) { - Ok(claimed_region) => { - // FIXME(eddyb) should this use `swap_remove_index`? - let (_, DeferredEdgeBundle { condition, edge_bundle: _ }) = - deferred_edges.target_to_deferred.shift_remove_index(i).unwrap(); - return Some((condition, claimed_region)); - } + match self.try_claim_edge_bundle(edge_bundle) { + Ok(claimed_region) => Ok((condition, claimed_region)), - // Put back the `IncomingEdgeBundle` and keep looking. - Err(new_deferred) => { - let (new_deferred_target, new_deferred) = - new_deferred.into_target_keyed_kv_pair(); - assert!(new_deferred_target == deferred_target); - *edge_bundle = new_deferred.edge_bundle; - } + Err(new_edge_bundle) => { + let new_target = DeferredTarget::Region(new_edge_bundle.target); + Err(DeferredEdgeBundle { + condition, + edge_bundle: new_edge_bundle.with_target(new_target), + }) } } - None - }; - while let Some((condition, then_region)) = - try_claim_any_deferred_edge(self, &mut region.deferred_edges) - { - let else_region = PartialControlRegion { structured_body_holder: None, ..region }; - let else_is_unreachable = else_region.deferred_edges.target_to_deferred.is_empty(); - - // `then_region` is only taken if `condition` holds, except that - // `condition` can be ignored when `else_region` is unreachable. - let mut merged_region = if else_is_unreachable { - then_region - } else { - let condition = self.materialize_lazy_cond(condition); - self.structurize_select( - SelectionKind::BoolCond, - condition, - [then_region, else_region].into_iter().collect(), - ) + }); + let Some((condition, then_region)) = claimed else { + deferred_edges = else_deferred_edges; + break; }; - // FIXME(eddyb) remove this field in most cases. - assert!(region.structured_body_holder == Some(unstructured_region)); - - // Append the freshly merged region's children to the original region. - let original_structured_body_holder = region.structured_body_holder.unwrap(); - if let Some(merged_structured_body_holder) = merged_region.structured_body_holder { - let merged_children = mem::take( - &mut self.func_def_body.at_mut(merged_structured_body_holder).def().children, - ); - - self.func_def_body.control_regions[original_structured_body_holder] - .children - .append(merged_children, &mut self.func_def_body.control_nodes); - } - merged_region.structured_body_holder = Some(original_structured_body_holder); - - region = merged_region; + deferred_edges = self.structurize_select_into( + region, + SelectionKind::BoolCond, + Err(&condition), + [Ok(then_region), Err(else_deferred_edges)].into_iter().collect(), + ); } - // Try to extract (deferred) backedges (which later get turned into loops). - let backedge = region - .deferred_edges - .target_to_deferred - .swap_remove(&DeferredTarget::Region(unstructured_region)); - - // FIXME(eddyb) remove this field in most cases. - assert!(region.structured_body_holder == Some(unstructured_region)); + // Cache the edge count for backedges (which later get turned into loops). + let accumulated_backedge_count = deferred_edges + .get_edge_bundle_by_target(DeferredTarget::Region(region)) + .map(|backedge| backedge.accumulated_count) + .unwrap_or_default(); - let old_state = self - .structurize_region_state - .insert(unstructured_region, StructurizeRegionState::Ready { backedge, region }); + let old_state = + self.structurize_region_state.insert(region, StructurizeRegionState::Ready { + accumulated_backedge_count, + region_deferred_edges: deferred_edges, + }); if !matches!(old_state, Some(StructurizeRegionState::InProgress)) { unreachable!( - "cfg::Structurizer::structurize_region_from: \ + "cfg::Structurizer::structurize_region: \ already {}, when attempting to store structurization result", match old_state { None => "reverted to missing (removed from the map?)", @@ -1191,216 +1563,347 @@ impl<'a> Structurizer<'a> { } } - /// Build a `Select` [`ControlNode`], from partially structured `cases`, - /// merging all of their `deferred_{edges,returns}` together. - fn structurize_select( + /// Append to `parent_region` a new `Select` [`ControlNode`] built from + /// partially structured `cases`, merging all of their `deferred_edges` + /// together into a combined `DeferredEdgeBundleSet` (which gets returned). + // + // FIXME(eddyb) handle `unreachable` cases losslessly. + fn structurize_select_into( &mut self, + parent_region: ControlRegion, kind: SelectionKind, - scrutinee: Value, - cases: SmallVec<[PartialControlRegion; 8]>, - ) -> PartialControlRegion { - // `Select` isn't actually needed unless there's at least two `cases`. - if cases.len() <= 1 { - return cases.into_iter().next().unwrap_or_else(|| PartialControlRegion { - structured_body_holder: None, - deferred_edges: DeferredEdgeBundleSet { - target_to_deferred: [].into_iter().collect(), - }, - }); - } - - // Gather the full set of deferred edges (and returns), along with the - // necessary information for the `Select`'s `ControlNodeOutputDecl`s. - let mut deferred_edges_to_input_count_and_total_edge_count = FxIndexMap::default(); - let mut deferred_return_types = None; - for case in &cases { - for (&target, deferred) in &case.deferred_edges.target_to_deferred { - let input_count = deferred.edge_bundle.target_inputs.len(); - - let (old_input_count, accumulated_edge_count) = - deferred_edges_to_input_count_and_total_edge_count - .entry(target) - .or_insert((input_count, IncomingEdgeCount::default())); - assert_eq!(*old_input_count, input_count); - *accumulated_edge_count += deferred.edge_bundle.accumulated_count; - - if target == DeferredTarget::Return && deferred_return_types.is_none() { - // HACK(eddyb) because there's no `FuncDecl` available, take the - // types from the returned values and hope they match. - deferred_return_types = Some( - deferred - .edge_bundle - .target_inputs - .iter() - .map(|&v| self.func_def_body.at(v).type_of(self.cx)), - ); - } + scrutinee: Result, + mut cases: SmallVec<[Result; 8]>, + ) -> DeferredEdgeBundleSet { + // HACK(eddyb) don't nest a sole convergent case inside the `Select`, + // and instead prefer early convergence (see also `EventualCfgExits`). + // NOTE(eddyb) this also happens to handle the situation where `Select` + // isn't even needed (i.e. the other cases don't even have side-effects), + // via the `any_non_empty_case` check (after taking `convergent_case`). + // FIXME(eddyb) consider introducing some kind of `assume` for `scrutinee`, + // to preserve its known value (whenever `convergent_case` is reached). + let convergent_cases = cases.iter_mut().filter(|case| match case { + Ok(ClaimedRegion { deferred_edges, .. }) | Err(deferred_edges) => { + !matches!(deferred_edges, DeferredEdgeBundleSet::Unreachable) } + }); + if let Ok(convergent_case) = convergent_cases.exactly_one() { + // HACK(eddyb) this relies on `structurize_select_into`'s behavior + // for `unreachable` cases being largely equivalent to empty cases. + let convergent_case = + mem::replace(convergent_case, Err(DeferredEdgeBundleSet::Unreachable)); + + // FIXME(eddyb) avoid needing recursion, by instead changing the + // "`Select` node insertion cursor" (into `parent_region`), and + // stashing `convergent_case`'s deferred edges to return later. + let deferred_edges = + self.structurize_select_into(parent_region, kind, scrutinee, cases); + assert!(matches!(deferred_edges, DeferredEdgeBundleSet::Unreachable)); + + // The sole convergent case goes in the `parent_region`, and its + // relationship with the `Select` (if it was even necessary at all) + // is only at most one of side-effect sequencing. + return self.append_maybe_claimed_region(parent_region, convergent_case); } - // The `Select` outputs are the concatenation of `target_inputs`, for - // each unique `deferred_edges` target. + // Support lazily defining the `Select` node, as soon as it's necessary + // (i.e. to plumb per-case dataflow through `Value::ControlNodeOutput`s), + // but also if any of the cases actually have non-empty regions, which + // is checked after the special-cases (which return w/o a `Select` at all). // - // FIXME(eddyb) this `struct` only really exists for readability. - struct Deferred { - target: DeferredTarget, - target_input_count: usize, + // FIXME(eddyb) some cases may be `unreachable`, and that's erased here. + let mut cached_select_node = None; + let mut non_move_kind = Some(kind); + let mut get_or_define_select_node = |this: &mut Self, cases: &[_]| { + *cached_select_node.get_or_insert_with(|| { + let kind = non_move_kind.take().unwrap(); + let cases = cases + .iter() + .map(|case| { + let case_region = match case { + &Ok(ClaimedRegion { structured_body, .. }) => structured_body, + Err(_) => this + .func_def_body + .control_regions + .define(this.cx, ControlRegionDef::default()), + }; - /// Sum of `accumulated_count` for this `target` across all `cases`. + // FIXME(eddyb) should these be asserts that it's already empty? + let case_region_def = this.func_def_body.at_mut(case_region).def(); + case_region_def.outputs.clear(); + case_region + }) + .collect(); + let scrutinee = + scrutinee.unwrap_or_else(|lazy_cond| this.materialize_lazy_cond(lazy_cond)); + let select_node = this.func_def_body.control_nodes.define( + this.cx, + ControlNodeDef { + kind: ControlNodeKind::Select { kind, scrutinee, cases }, + outputs: [].into_iter().collect(), + } + .into(), + ); + this.func_def_body.control_regions[parent_region] + .children + .insert_last(select_node, &mut this.func_def_body.control_nodes); + select_node + }) + }; + + // Ensure the `Select` exists if needed for any per-case side-effects. + let any_non_empty_case = cases.iter().any(|case| { + case.as_ref().is_ok_and(|&ClaimedRegion { structured_body, .. }| { + self.func_def_body.at(structured_body).at_children().into_iter().next().is_some() + }) + }); + if any_non_empty_case { + get_or_define_select_node(self, &cases); + } + + // Gather the full set of deferred edges (and returns). + struct DeferredTargetSummary { + input_count: usize, total_edge_count: IncomingEdgeCount, } - let deferreds = || { - deferred_edges_to_input_count_and_total_edge_count.iter().map( - |(&target, &(target_input_count, total_edge_count))| Deferred { - target, - target_input_count, - total_edge_count, - }, - ) - }; - let mut output_decls: SmallVec<[_; 2]> = - SmallVec::with_capacity(deferreds().map(|deferred| deferred.target_input_count).sum()); - for deferred in deferreds() { - let target_input_types = match deferred.target { - DeferredTarget::Region(target) => { - Either::Left(self.func_def_body.at(target).def().inputs.iter().map(|i| i.ty)) - } - DeferredTarget::Return => Either::Right(deferred_return_types.take().unwrap()), + let mut deferred_targets = FxIndexMap::default(); + for case in &cases { + let case_deferred_edges = match case { + Ok(ClaimedRegion { deferred_edges, .. }) | Err(deferred_edges) => deferred_edges, }; - assert_eq!(target_input_types.len(), deferred.target_input_count); + for (target, edge_bundle) in case_deferred_edges.iter_targets_with_edge_bundle() { + let input_count = edge_bundle.target_inputs.len(); + + let summary = deferred_targets.entry(target).or_insert(DeferredTargetSummary { + input_count, + total_edge_count: IncomingEdgeCount::default(), + }); + assert_eq!(summary.input_count, input_count); + summary.total_edge_count += edge_bundle.accumulated_count; + } + } - output_decls.extend( - target_input_types - .map(|ty| ControlNodeOutputDecl { attrs: AttrSet::default(), ty }), + // FIXME(eddyb) `control_region_input_rewrites` mappings, generated + // for every `ClaimedRegion` that has been merged into a larger region, + // only get applied after structurization fully completes, but here it's + // very useful to have the fully resolved values across all `cases`' + // incoming/outgoing edges (note, however, that within outgoing edges, + // i.e. `case_deferred_edges`' `target_inputs`, `Value::ControlRegionInput` + // are not resolved using the contents of `case_structured_body_inputs`, + // which is kept hermetic until just before `structurize_select` returns). + for case in &mut cases { + let (case_structured_body_inputs, case_deferred_edges) = match case { + Ok(ClaimedRegion { structured_body_inputs, deferred_edges, .. }) => { + (&mut structured_body_inputs[..], deferred_edges) + } + Err(deferred_edges) => (&mut [][..], deferred_edges), + }; + let all_values = case_structured_body_inputs.iter_mut().chain( + case_deferred_edges + .iter_targets_with_edge_bundle_mut() + .flat_map(|(_, edge_bundle)| &mut edge_bundle.target_inputs), ); + for v in all_values { + ControlRegionInputRewrites::rewrite_all(&self.control_region_input_rewrites) + .transform_value_use(v) + .apply_to(v); + } } - // Convert the cases into `ControlRegion`s, each outputting the full set - // of values described by `outputs` (with undef filling in any gaps), + // Merge all `deferred_edges` by plumbing their per-case `target_input`s + // (into per-case region outputs, and therefore the `Select` outputs) + // out of all cases that can reach them, with undef constants used to + // fill any gaps (i.e. for the targets not reached through each case), // while deferred conditions are collected separately (for `LazyCond`). - let mut deferred_per_case_conditions: SmallVec<[_; 8]> = - deferreds().map(|_| Vec::with_capacity(cases.len())).collect(); - let cases = cases - .into_iter() - .enumerate() - .map(|(case_idx, case)| { - let PartialControlRegion { structured_body_holder, mut deferred_edges } = case; - - let mut outputs = SmallVec::with_capacity(output_decls.len()); - for (deferred, per_case_conditions) in - deferreds().zip_eq(&mut deferred_per_case_conditions) - { - let (edge_condition, values_or_count) = - match deferred_edges.target_to_deferred.swap_remove(&deferred.target) { - Some(DeferredEdgeBundle { condition, edge_bundle }) => { - (Some(condition), Ok(edge_bundle.target_inputs)) - } + let deferred_edges = deferred_targets.into_iter().map(|(target, target_summary)| { + let DeferredTargetSummary { input_count, total_edge_count } = target_summary; + + // HACK(eddyb) `Err` wraps only `LazyCond::{Undef,False}`, which allows + // distinguishing between "not taken" and "not even reachable". + let per_case_deferred: SmallVec<[Result, LazyCond>; 8]> = cases + .iter_mut() + .map(|case| match case { + Ok(ClaimedRegion { deferred_edges, .. }) | Err(deferred_edges) => { + if let DeferredEdgeBundleSet::Unreachable = deferred_edges { + Err(LazyCond::Undef) + } else { + deferred_edges + .steal_deferred_by_target_without_removal(target) + .ok_or(LazyCond::False) + } + } + }) + .collect(); - None => (Some(LazyCond::False), Err(deferred.target_input_count)), - }; + let target_inputs = (0..input_count) + .map(|target_input_idx| { + let per_case_target_input = per_case_deferred.iter().map(|per_case_deferred| { + per_case_deferred.as_ref().ok().map( + |DeferredEdgeBundle { edge_bundle, .. }| { + edge_bundle.target_inputs[target_input_idx] + }, + ) + }); + + // Avoid introducing dynamic dataflow when the same value is + // used across all cases (which can reach this `target`). + let unique_target_input_value = per_case_target_input + .clone() + .zip_eq(&cases) + .filter_map(|(v, case)| Some((v?, case))) + .map(|(v, case)| { + // If possible, resolve `v` to a `Value` valid in + // `parent_region` (i.e. the `Select` node parent). + match case { + // `case`'s `structured_body` effectively "wraps" + // its `deferred_edges` (where `v` came from), + // so values from `parent_region` can only be + // hermetically used via `structured_body` inputs. + Ok(ClaimedRegion { + structured_body, + structured_body_inputs, + .. + }) => match v { + Value::Const(_) => Ok(v), + Value::ControlRegionInput { region, input_idx } + if region == *structured_body => + { + Ok(structured_body_inputs[input_idx as usize]) + } + _ => Err(()), + }, - if let Some(edge_condition) = edge_condition { - assert_eq!(per_case_conditions.len(), case_idx); - per_case_conditions.push(edge_condition); + // `case` has no region of its own, so everything + // it carries is already from within `parent_region`. + Err(_) => Ok(v), + } + }) + .dedup() + .exactly_one(); + if let Ok(Ok(v)) = unique_target_input_value { + return v; } - match values_or_count { - Ok(values) => outputs.extend(values), - Err(missing_value_count) => { - let decls_for_missing_values = - &output_decls[outputs.len()..][..missing_value_count]; - outputs.extend( - decls_for_missing_values - .iter() - .map(|output| Value::Const(self.const_undef(output.ty))), - ); + let ty = match target { + DeferredTarget::Region(target) => { + self.func_def_body.at(target).def().inputs[target_input_idx].ty } - } - } + // HACK(eddyb) in the absence of `FuncDecl`, infer the + // type from each returned value (and require they match). + DeferredTarget::Return => per_case_target_input + .clone() + .flatten() + .map(|v| self.func_def_body.at(v).type_of(self.cx)) + .dedup() + .exactly_one() + .ok() + .expect("mismatched `return`ed value types"), + }; + + let select_node = get_or_define_select_node(self, &cases); + let output_decls = &mut self.func_def_body.at_mut(select_node).def().outputs; + let output_idx = output_decls.len(); + output_decls.push(ControlNodeOutputDecl { attrs: AttrSet::default(), ty }); + for (case_idx, v) in per_case_target_input.enumerate() { + let v = v.unwrap_or_else(|| Value::Const(self.const_undef(ty))); - // All deferrals must have been converted into outputs above. - assert!(deferred_edges.target_to_deferred.is_empty()); - assert_eq!(outputs.len(), output_decls.len()); + let case_region = match &self.func_def_body.at(select_node).def().kind { + ControlNodeKind::Select { cases, .. } => cases[case_idx], + _ => unreachable!(), + }; + let outputs = &mut self.func_def_body.at_mut(case_region).def().outputs; + assert_eq!(outputs.len(), output_idx); + outputs.push(v); + } + Value::ControlNodeOutput { + control_node: select_node, + output_idx: output_idx.try_into().unwrap(), + } + }) + .collect(); - let case_region = structured_body_holder.unwrap_or_else(|| { - self.func_def_body.control_regions.define(self.cx, ControlRegionDef::default()) + // Simplify `LazyCond`s eagerly, to reduce costs later on, or even + // outright avoid defining the `Select` node in the first place. + // + // FIXME(eddyb) move all simplifications from `materialize_lazy_cond` + // to here (allowing e.g. not defining the `Select` in more cases). + let per_case_conds = + per_case_deferred.iter().map(|per_case_deferred| match per_case_deferred { + Ok(DeferredEdgeBundle { condition, .. }) => condition, + Err(undef_or_false) => undef_or_false, }); - self.func_def_body.at_mut(case_region).def().inputs.clear(); - self.func_def_body.at_mut(case_region).def().outputs = outputs; - case_region - }) - .collect(); + let condition = if per_case_conds + .clone() + .all(|cond| matches!(cond, LazyCond::Undef | LazyCond::True)) + { + LazyCond::True + } else { + LazyCond::Merge(Rc::new(LazyCondMerge::Select { + control_node: get_or_define_select_node(self, &cases), + per_case_conds: per_case_conds.cloned().collect(), + })) + }; - let kind = ControlNodeKind::Select { kind, scrutinee, cases }; - let select_node = self - .func_def_body - .control_nodes - .define(self.cx, ControlNodeDef { kind, outputs: output_decls }.into()); - - // Build `deferred_edges` for the whole `Select`, pointing to - // the outputs of the `select_node` `ControlNode` for all `Value`s. - let mut outputs = (0..) - .map(|output_idx| Value::ControlNodeOutput { control_node: select_node, output_idx }); - let deferreds = deferreds().zip_eq(deferred_per_case_conditions).map( - |(deferred, per_case_conditions)| { - let target_inputs = outputs.by_ref().take(deferred.target_input_count).collect(); - - // Simplify `LazyCond`s eagerly, to reduce costs later on. - let condition = - if per_case_conditions.iter().all(|cond| matches!(cond, LazyCond::True)) { - LazyCond::True - } else { - LazyCond::MergeSelect { - control_node: select_node, - per_case_conds: per_case_conditions, - } - }; + DeferredEdgeBundle { + condition, + edge_bundle: IncomingEdgeBundle { + target, + accumulated_count: total_edge_count, + target_inputs, + }, + } + }); + let deferred_edges = deferred_edges.collect(); - DeferredEdgeBundle { - condition, - edge_bundle: IncomingEdgeBundle { - target: deferred.target, - accumulated_count: deferred.total_edge_count, - target_inputs, - }, + // Only as the very last step, can per-case `region_inputs` be added to + // `control_region_input_rewrites`. + // + // FIXME(eddyb) don't replace `Value::ControlRegionInput { region, .. }` + // with `region_inputs` when the `region` ends up a `ControlNode` child, + // but instead make all `ControlRegion`s entirely hermetic wrt inputs. + #[allow(clippy::manual_flatten)] + for case in cases { + if let Ok(ClaimedRegion { structured_body, structured_body_inputs, .. }) = case { + if !structured_body_inputs.is_empty() { + self.control_region_input_rewrites.insert( + structured_body, + ControlRegionInputRewrites::ReplaceWith(structured_body_inputs), + ); + self.func_def_body.at_mut(structured_body).def().inputs.clear(); } - }, - ); - - // FIXME(eddyb) do not define a region just to keep a `ControlNode` in it. - let wasteful_select_region = - self.func_def_body.control_regions.define(self.cx, ControlRegionDef::default()); - self.func_def_body.control_regions[wasteful_select_region] - .children - .insert_last(select_node, &mut self.func_def_body.control_nodes); - PartialControlRegion { - structured_body_holder: Some(wasteful_select_region), - deferred_edges: DeferredEdgeBundleSet { - target_to_deferred: deferreds - .map(|deferred| deferred.into_target_keyed_kv_pair()) - .collect(), - }, + } } + + deferred_edges } // FIXME(eddyb) this should try to handle as many `LazyCond` as are available, // for incorporating them all at once, ideally with a switch instead // of N individual branches with their own booleans etc. - fn materialize_lazy_cond(&mut self, cond: LazyCond) -> Value { + fn materialize_lazy_cond(&mut self, cond: &LazyCond) -> Value { match cond { + LazyCond::Undef => Value::Const(self.const_undef(self.type_bool)), LazyCond::False => Value::Const(self.const_false), LazyCond::True => Value::Const(self.const_true), - LazyCond::MergeSelect { control_node, per_case_conds } => { - // HACK(eddyb) this should not allocate most of the time, and - // avoids complications later below, when mutating the cases. + + // `LazyCond::Merge` was only created in the first place if a merge + // was actually necessary, so there shouldn't be simplifications to + // do here (i.e. the value provided is if `materialize_lazy_cond` + // never gets called because the target has become unconditional). + // + // FIXME(eddyb) there is still an `if cond { true } else { false }` + // special-case (repalcing with just `cond`), that cannot be expressed + // currently in `LazyCond` itself (but maybe it should be). + LazyCond::Merge(merge) => { + let LazyCondMerge::Select { control_node, ref per_case_conds } = **merge; + + // HACK(eddyb) this won't actually allocate most of the time, + // and avoids complications later below, when mutating the cases. let per_case_conds: SmallVec<[_; 8]> = per_case_conds .into_iter() .map(|cond| self.materialize_lazy_cond(cond)) .collect(); - // FIXME(eddyb) this should handle an all-`true` `per_case_conds` - // (but `structurize_select` currently takes care of those). - let ControlNodeDef { kind, outputs: output_decls } = &mut *self.func_def_body.control_nodes[control_node]; let cases = match kind { @@ -1442,64 +1945,96 @@ impl<'a> Structurizer<'a> { } } + /// Append to `parent_region` the children of `maybe_claimed_region` (if `Ok`), + /// returning the `DeferredEdgeBundleSet` from `maybe_claimed_region`. + // + // FIXME(eddyb) the name isn't great, but e.g. "absorb into" would also be + // weird (and on top of that, the append direction can be tricky to express). + fn append_maybe_claimed_region( + &mut self, + parent_region: ControlRegion, + maybe_claimed_region: Result, + ) -> DeferredEdgeBundleSet { + match maybe_claimed_region { + Ok(ClaimedRegion { structured_body, structured_body_inputs, deferred_edges }) => { + if !structured_body_inputs.is_empty() { + self.control_region_input_rewrites.insert( + structured_body, + ControlRegionInputRewrites::ReplaceWith(structured_body_inputs), + ); + } + let new_children = + mem::take(&mut self.func_def_body.at_mut(structured_body).def().children); + self.func_def_body.control_regions[parent_region] + .children + .append(new_children, &mut self.func_def_body.control_nodes); + deferred_edges + } + Err(deferred_edges) => deferred_edges, + } + } + /// When structurization is only partial, and there remain unclaimed regions, /// they have to be reintegrated into the CFG, putting back [`ControlInst`]s - /// where `structurize_region_from` has taken them from. + /// where `structurize_region` has taken them from. /// /// This function handles one region at a time to make it more manageable, /// despite it having a single call site (in a loop in `structurize_func`). - fn repair_unclaimed_region( + fn rebuild_cfg_from_unclaimed_region_deferred_edges( &mut self, - unstructured_region: ControlRegion, - partial_control_region: PartialControlRegion, + region: ControlRegion, + mut deferred_edges: DeferredEdgeBundleSet, ) { assert!( self.structurize_region_state.is_empty(), - "cfg::Structurizer::repair_unclaimed_region: must only be called \ - from `structurize_func`, after it takes `structurize_region_state`" + "cfg::Structurizer::rebuild_cfg_from_unclaimed_region_deferred_edges: + must only be called from `structurize_func`, \ + after it takes `structurize_region_state`" ); - let PartialControlRegion { structured_body_holder, mut deferred_edges } = - partial_control_region; - - // FIXME(eddyb) remove this field in most cases. - assert!(structured_body_holder == Some(unstructured_region)); - // Build a chain of conditional branches to apply deferred edges. - let deferred_return = deferred_edges - .target_to_deferred - .swap_remove(&DeferredTarget::Return) - .map(|deferred| deferred.edge_bundle.target_inputs); - let mut deferred_edge_targets = - deferred_edges.target_to_deferred.into_iter().map(|(deferred_target, deferred)| { - let deferred_target = match deferred_target { - DeferredTarget::Region(target) => target, - DeferredTarget::Return => unreachable!(), - }; - (deferred.condition, (deferred_target, deferred.edge_bundle.target_inputs)) - }); - let mut control_source = Some(unstructured_region); - while let Some((condition, then_target_and_inputs)) = deferred_edge_targets.next() { + let mut control_source = Some(region); + loop { + let taken_then; + (taken_then, deferred_edges) = + deferred_edges.split_out_matching(|deferred| match deferred.edge_bundle.target { + DeferredTarget::Region(target) => { + Ok((deferred.condition, (target, deferred.edge_bundle.target_inputs))) + } + DeferredTarget::Return => Err(deferred), + }); + let Some((condition, then_target_and_inputs)) = taken_then else { + break; + }; let branch_source = control_source.take().unwrap(); - let else_target_and_inputs = if deferred_edge_targets.len() <= 1 - && deferred_return.is_none() - { + let else_target_and_inputs = match deferred_edges { // At most one deferral left, so it can be used as the "else" // case, or the branch left unconditional in its absence. - deferred_edge_targets.next().map(|(_, t)| t) - } else { + DeferredEdgeBundleSet::Unreachable => None, + DeferredEdgeBundleSet::Always { + target: DeferredTarget::Region(else_target), + edge_bundle, + } => { + deferred_edges = DeferredEdgeBundleSet::Unreachable; + Some((else_target, edge_bundle.target_inputs)) + } + // Either more branches, or a deferred return, are needed, so // the "else" case must be a `ControlRegion` that itself can // have a `ControlInst` attached to it later on. - let new_empty_region = - self.func_def_body.control_regions.define(self.cx, ControlRegionDef::default()); - control_source = Some(new_empty_region); - Some((new_empty_region, [].into_iter().collect())) + _ => { + let new_empty_region = self + .func_def_body + .control_regions + .define(self.cx, ControlRegionDef::default()); + control_source = Some(new_empty_region); + Some((new_empty_region, [].into_iter().collect())) + } }; let condition = Some(condition) .filter(|_| else_target_and_inputs.is_some()) - .map(|cond| self.materialize_lazy_cond(cond)); + .map(|cond| self.materialize_lazy_cond(&cond)); let branch_control_inst = ControlInst { attrs: AttrSet::default(), kind: if condition.is_some() { @@ -1530,6 +2065,14 @@ impl<'a> Structurizer<'a> { ); } + let deferred_return = match deferred_edges { + DeferredEdgeBundleSet::Unreachable => None, + DeferredEdgeBundleSet::Always { target: DeferredTarget::Return, edge_bundle } => { + Some(edge_bundle.target_inputs) + } + _ => unreachable!(), + }; + let final_source = match control_source { Some(region) => region, None => { diff --git a/src/cfgssa.rs b/src/cfgssa.rs new file mode 100644 index 0000000..a14aa5e --- /dev/null +++ b/src/cfgssa.rs @@ -0,0 +1,717 @@ +//! Tools for working with control-flow graphs that contain SSA dataflow +//! (often abbreviated to `CFG` or similar). +//! +//! The defining characteristic of SSA dataflow in a control-flow graph is that +//! SSA definitions (of values, e.g. the result of an instruction) are "visible" +//! from all the CFG locations they dominate (i.e. the locations that can only +//! be reached by passing through the definition first), and can therefore be +//! directly used arbitrarily far away in the CFG with no annotations required +//! anywhere in the CFG between the definition and its uses. +//! +//! While "def dominates use" is sufficient to ensure the value can traverse +//! the necessary paths (between def and use) in the CFG, a lot of care must +//! be taken to preserve the correctness of such implicit dataflow across all +//! transformations, and it's overall far more fragile than the local dataflow +//! of e.g. phi nodes (or their alternative "block arguments"), or in SPIR-T's +//! case, `ControlRegion` inputs and `ControlNode` outputs (inspired by RVSDG, +//! which has even stricter isolation/locality in its regions). + +use crate::{FxIndexMap, FxIndexSet}; +use itertools::Either; +use rustc_hash::FxHashMap; +use std::collections::VecDeque; +use std::hash::Hash; + +// HACK(eddyb) to be able to propagate many uses at once while avoiding expensive +// hierchical indexing (and because the parent block of a def is significant), +// each block's defs get chunked, with chunks being the size of `FixedBitSet` +// (i.e. each bit tracks one def in the chunk, and can be propagated together). +// FIXME(eddyb) in theory, a sparse bitset could expose some of its sparseness +// to allow chunked addressing/iteration/etc. (but that requires more API design). +const CHUNK_SIZE: usize = data::FixedBitSet::SIZE; + +#[derive(Copy, Clone, PartialEq, Eq, Hash, derive_more::From, derive_more::Into)] +struct BlockIdx(usize); +#[derive(Copy, Clone, PartialEq, Eq, Hash, derive_more::From, derive_more::Into)] +struct ChunkIdx(usize); +#[derive(Copy, Clone, PartialEq, Eq, Hash, derive_more::From, derive_more::Into)] +struct DefIdx(usize); + +impl DefIdx { + fn chunk(self) -> ChunkIdx { + ChunkIdx(self.0 / CHUNK_SIZE) + } +} + +/// All blocks and ddefinitions they contain, which have to be computed first, +/// and remain immutable, because where a value is defined (or whether it's at +/// all part of the function itself) can have non-monotonic effects elsewhere. +pub struct DefMap { + blocks_by_id: FxIndexMap, + // FIXME(eddyb) should this contain `BlockIdx` instead? + chunk_to_block_id: data::KeyedVec, + chunk_defs: data::KeyedVec; CHUNK_SIZE]>, + def_id_to_def_idx: FxHashMap, +} + +struct BlockDef { + last_def_idx: DefIdx, +} + +impl Default + for DefMap +{ + fn default() -> Self { + Self::new() + } +} + +impl + DefMap +{ + pub fn new() -> Self { + Self { + blocks_by_id: Default::default(), + chunk_to_block_id: Default::default(), + chunk_defs: Default::default(), + def_id_to_def_idx: Default::default(), + } + } + + pub fn add_block(&mut self, block_id: BlockId) { + // FIXME(eddyb) disallow accidental re-insertion. + self.blocks_by_id.insert(block_id, BlockDef { last_def_idx: DefIdx(!0) }); + } + + pub fn add_def(&mut self, block_id: BlockId, def_id: DefId, def_type: DefType) { + // HACK(eddyb) optimize for repeated definitions in the same block. + let block = match self.blocks_by_id.last_mut() { + Some((&last_block_id, last_block)) if last_block_id == block_id => last_block, + _ => &mut self.blocks_by_id[&block_id], + }; + let def_idx = Some(DefIdx(block.last_def_idx.0.wrapping_add(1))) + .filter(|def_idx| def_idx.chunk() == block.last_def_idx.chunk()) + .unwrap_or_else(|| { + let chunk_idx = self.chunk_to_block_id.push(block_id); + assert!(chunk_idx == self.chunk_defs.push([None; CHUNK_SIZE])); + DefIdx(chunk_idx.0 * CHUNK_SIZE) + }); + block.last_def_idx = def_idx; + + self.chunk_defs[def_idx.chunk()][def_idx.0 % CHUNK_SIZE] = Some((def_id, def_type)); + + // FIXME(eddyb) disallow accidental re-insertion. + self.def_id_to_def_idx.insert(def_id, def_idx); + } + + fn block_id_from_idx(&self, block_idx: BlockIdx) -> BlockId { + *self.blocks_by_id.get_index(block_idx.0).unwrap().0 + } +} + +/// Incremental tracker for definition uses (and CFG edges between blocks), +/// accumulating the complete set of transitive uses for each block, also known +/// as the SSA "live set" (corresponding to the starting position of each block). +pub struct UseAccumulator<'a, BlockId, DefId, DefType> { + def_map: &'a DefMap, + + blocks: data::KeyedVec, + + // HACK(eddyb) optimize for repeated uses from the same block. + most_recent_block_idx: BlockIdx, + + /// Every `block_idx` with non-empty `blocks[block_idx].dirty_chunks`, + /// and used for breadth-first propagation through predecessors. + // + // FIXME(eddyb) some traversal orders might be more effective, but also + // the "chunk" granularity might itself be enough to paper over that? + propagate_queue: VecDeque, +} + +#[derive(Default)] +struct BlockAcc { + // FIXME(eddyb) should this use a bitset? seems likely to be inefficient + preds: FxIndexSet, + + /// All definitions used in this block (or any other block reachable from it), + /// excluding its own definitions, and represented as a sparse bitset. + uses: data::SparseMap>, + + /// All chunks `c` where `uses[c]` has changed since this block has last + /// propagated any of its `uses` to its predecessors. + dirty_chunks: data::BitSet, +} + +enum AddUsesSource { + New(DefIdx), + PropagateBackwardsAcrossEdge { target: BlockIdx, only_dirty: bool }, +} + +impl<'a, BlockId: Copy + Eq + Hash, DefId: Copy + Eq + Hash, DefType: Copy> + UseAccumulator<'a, BlockId, DefId, DefType> +{ + pub fn new(def_map: &'a DefMap) -> Self { + Self { + def_map, + + blocks: data::KeyedVec::from_fn(..BlockIdx(def_map.blocks_by_id.len()), |_| { + Default::default() + }), + + most_recent_block_idx: BlockIdx(0), + + propagate_queue: VecDeque::new(), + } + } + + // FIXME(eddyb) how inefficient is `FxIndexMap`? + // (vs e.g. a bitset combined with not duplicating `DefType`s per-block?) + // FIXME(eddyb) naming might not be enough to clarify the semantics, + // might be useful to use the liveness (e.g. "live set") jargon? + pub fn into_inter_block_uses( + mut self, + ) -> impl Iterator)> + 'a { + self.propagate(); + + assert!(self.propagate_queue.is_empty()); + + self.blocks.into_iter().map(|(block_idx, block_acc)| { + assert!(block_acc.dirty_chunks.is_empty()); + + ( + self.def_map.block_id_from_idx(block_idx), + block_acc + .uses + .iter() + .flat_map(|(chunk_idx, chunk_uses)| { + let chunk_defs = &self.def_map.chunk_defs[chunk_idx]; + chunk_uses.keys().map(move |i| chunk_defs[i].unwrap()) + }) + .collect(), + ) + }) + } + + pub fn add_use(&mut self, block_id: BlockId, used_def_id: DefId) { + // FIXME(eddyb) use `let ... else`? + let &used_def_idx = match self.def_map.def_id_to_def_idx.get(&used_def_id) { + // HACK(eddyb) silently ignoring unrecognized defs. + None => return, + Some(def_idx) => def_idx, + }; + + // Intra-block uses are not tracked. + if self.def_map.chunk_to_block_id[used_def_idx.chunk()] == block_id { + return; + } + + let block_idx = Some(self.most_recent_block_idx) + .filter(|&block_idx| self.def_map.block_id_from_idx(block_idx) == block_id) + .unwrap_or_else(|| { + BlockIdx(self.def_map.blocks_by_id.get_index_of(&block_id).unwrap()) + }); + + self.add_uses_to(block_idx, AddUsesSource::New(used_def_idx)); + } + + pub fn add_edge(&mut self, source_block_id: BlockId, target_block_id: BlockId) { + // Self-loops require no tracking (could never introduce more uses). + if source_block_id == target_block_id { + return; + } + + // FIXME(eddyb) is this necessary? (the concern is that dirty-tracking + // might get confused, but it shouldn't actually be an issue) + self.propagate(); + + let [source_block_idx, target_block_idx] = [source_block_id, target_block_id] + .map(|block_id| BlockIdx(self.def_map.blocks_by_id.get_index_of(&block_id).unwrap())); + + if self.blocks[target_block_idx].preds.insert(source_block_idx) { + self.add_uses_to(source_block_idx, AddUsesSource::PropagateBackwardsAcrossEdge { + target: target_block_idx, + only_dirty: false, + }); + } + } + + fn propagate(&mut self) { + while let Some(block_idx) = self.propagate_queue.pop_front() { + for i in 0..self.blocks[block_idx].preds.len() { + let pred_block_idx = self.blocks[block_idx].preds[i]; + + self.add_uses_to(pred_block_idx, AddUsesSource::PropagateBackwardsAcrossEdge { + target: block_idx, + only_dirty: true, + }); + } + self.blocks[block_idx].dirty_chunks.clear(); + } + } + + fn add_uses_to(&mut self, block_idx: BlockIdx, uses: AddUsesSource) { + // FIXME(eddyb) make this unnecessary for a comparison later, perhaps? + let block_id = self.def_map.block_id_from_idx(block_idx); + + let mut new_uses; + let (block_acc, chunked_uses) = match uses { + AddUsesSource::New(def_idx) => { + new_uses = data::FixedBitSet::new(); + new_uses.insert(def_idx.0 % CHUNK_SIZE, ()); + ( + &mut self.blocks[block_idx], + Either::Left([(def_idx.chunk(), &new_uses)].into_iter()), + ) + } + AddUsesSource::PropagateBackwardsAcrossEdge { target, only_dirty } => { + let [block_acc, target_block_acc] = + self.blocks.get_mut2([block_idx, target]).unwrap(); + + ( + block_acc, + Either::Right(if only_dirty { + Either::Left(target_block_acc.dirty_chunks.iter().map(|(chunk_idx, _)| { + (chunk_idx, target_block_acc.uses.get(chunk_idx).unwrap()) + })) + } else { + Either::Right(target_block_acc.uses.iter()) + }), + ) + } + }; + + let block_was_dirty = !block_acc.dirty_chunks.is_empty(); + for (chunk_idx, new_uses) in chunked_uses { + // Use tracking terminates in the defining block. + if self.def_map.chunk_to_block_id[chunk_idx] == block_id { + continue; + } + + let uses = block_acc.uses.entry(chunk_idx).or_default(); + + let old_and_new_uses = uses.union(new_uses); + if *uses != old_and_new_uses { + *uses = old_and_new_uses; + block_acc.dirty_chunks.entry(chunk_idx).insert(()); + } + } + if !block_was_dirty && !block_acc.dirty_chunks.is_empty() { + self.propagate_queue.push_back(block_idx); + } + } +} + +// HACK(eddyb) the hierarchy/breadth/sparsity/etc. of these data structures is +// somewhat arbitrary, but they should do better than naive non-sparse solutions. +// FIMXE(eddyb) attempt to fine-tune this for realistic workloads. +// FIXME(eddyb) move this out of here. +mod data { + use smallvec::SmallVec; + use std::marker::PhantomData; + use std::{iter, mem, ops}; + + // FIXME(eddyb) should this be `FlatVec`? also, does it belong here? + pub struct KeyedVec { + vec: Vec, + _marker: PhantomData, + } + + impl + From, T> Default for KeyedVec { + fn default() -> Self { + Self::new() + } + } + impl + From, T> KeyedVec { + pub fn new() -> Self { + Self { vec: vec![], _marker: PhantomData } + } + pub fn from_fn(keys: ops::RangeTo, mut f: impl FnMut(K) -> T) -> Self { + KeyedVec { + vec: (0..keys.end.into()).map(|i| f(K::from(i))).collect(), + _marker: PhantomData, + } + } + pub fn push(&mut self, x: T) -> K { + let k = K::from(self.vec.len()); + self.vec.push(x); + k + } + pub fn into_iter(self) -> impl Iterator { + self.vec.into_iter().enumerate().map(|(i, x)| (K::from(i), x)) + } + + // FIXME(eddyb) replace this when `get_many_mut` gets stabilizes. + pub fn get_mut2(&mut self, keys: [K; 2]) -> Option<[&mut T; 2]> { + let [k_i, k_j] = keys; + let (i, j) = (k_i.into(), k_j.into()); + if i > j { + let [y, x] = self.get_mut2([k_j, k_i])?; + return Some([x, y]); + } + if i == j || j >= self.vec.len() { + return None; + } + + let (xs, ys) = self.vec.split_at_mut(j); + Some([&mut xs[i], &mut ys[0]]) + } + } + impl + From, T> ops::Index for KeyedVec { + type Output = T; + fn index(&self, k: K) -> &T { + &self.vec[k.into()] + } + } + impl + From, T> ops::IndexMut for KeyedVec { + fn index_mut(&mut self, k: K) -> &mut T { + &mut self.vec[k.into()] + } + } + + // HACK(eddyb) abstraction to enable code sharing between maps and sets. + pub trait ValueStorage { + // HACK(eddyb) most of the need for this arises from avoidance of + // `unsafe` code (i.e. `MaybeUninit` could suffice in most cases). + type Slot: Default; + fn slot_unwrap(slot: Self::Slot) -> V; + fn slot_unwrap_ref(slot: &Self::Slot) -> &V; + fn slot_unwrap_mut(slot: &mut Self::Slot) -> &mut V; + fn slot_insert(slot: &mut Self::Slot, v: V) -> &mut V; + + // FIXME(eddyb) ideally whether this allocates would be size-based. + // FIXME(eddyb) the name and APIs probably don't make it clear this is + // for holding some number of `Self::Slot`s specifically. + type LazyBox; + fn lazy_box_default(default: impl Fn() -> T) -> Self::LazyBox; + fn lazy_box_unwrap_ref(lb: &Self::LazyBox) -> &T; + fn lazy_box_unwrap_mut_or_alloc( + lb: &mut Self::LazyBox, + default: impl Fn() -> T, + ) -> &mut T; + } + + pub enum IgnoreValue {} + impl ValueStorage<()> for IgnoreValue { + type Slot = (); + fn slot_unwrap(_: ()) {} + fn slot_unwrap_ref(_: &()) -> &() { + &() + } + fn slot_unwrap_mut(slot: &mut ()) -> &mut () { + slot + } + fn slot_insert(slot: &mut (), _: ()) -> &mut () { + slot + } + + type LazyBox = T; + fn lazy_box_default(default: impl Fn() -> T) -> T { + default() + } + fn lazy_box_unwrap_ref(lb: &T) -> &T { + lb + } + fn lazy_box_unwrap_mut_or_alloc(lb: &mut T, _: impl Fn() -> T) -> &mut T { + lb + } + } + + pub enum EfficientValue {} + impl ValueStorage for EfficientValue { + type Slot = V; + fn slot_unwrap(slot: V) -> V { + slot + } + fn slot_unwrap_ref(slot: &V) -> &V { + slot + } + fn slot_unwrap_mut(slot: &mut V) -> &mut V { + slot + } + fn slot_insert(slot: &mut V, v: V) -> &mut V { + *slot = v; + slot + } + + // FIXME(eddyb) this is far from "efficient", maybe this part belong + // in another `trait`, or some better automation could be found? + type LazyBox = Option>; + fn lazy_box_default(_: impl Fn() -> T) -> Option> { + None + } + fn lazy_box_unwrap_ref(lb: &Option>) -> &T { + lb.as_deref().unwrap() + } + fn lazy_box_unwrap_mut_or_alloc( + lb: &mut Option>, + default: impl Fn() -> T, + ) -> &mut T { + lb.get_or_insert_with(|| Box::new(default())) + } + } + + // HACK(eddyb) most of the need for this arises from avoidance of + // `unsafe` code (i.e. `MaybeUninit` could suffice in most cases). + pub enum WrapNonDefaultValueInOption {} + impl ValueStorage for WrapNonDefaultValueInOption { + type Slot = Option; + fn slot_unwrap(slot: Option) -> V { + slot.unwrap() + } + fn slot_unwrap_ref(slot: &Option) -> &V { + slot.as_ref().unwrap() + } + fn slot_unwrap_mut(slot: &mut Option) -> &mut V { + slot.as_mut().unwrap() + } + fn slot_insert(slot: &mut Option, v: V) -> &mut V { + slot.insert(v) + } + + type LazyBox = Option>; + fn lazy_box_default(_: impl Fn() -> T) -> Option> { + None + } + fn lazy_box_unwrap_ref(lb: &Option>) -> &T { + lb.as_deref().unwrap() + } + fn lazy_box_unwrap_mut_or_alloc( + lb: &mut Option>, + default: impl Fn() -> T, + ) -> &mut T { + lb.get_or_insert_with(|| Box::new(default())) + } + } + + // FIXME(eddyb) maybe make this parameterizable? + type FixedBitSetUint = u64; + + pub type FixedBitSet = FixedFlatMap; + const _: () = + assert!(mem::size_of::>() == mem::size_of::()); + + pub struct FixedFlatMap = EfficientValue> { + occupied: FixedBitSetUint, + slots: VS::LazyBox<[VS::Slot; FixedBitSet::SIZE]>, + _marker: PhantomData, + } + + impl FixedBitSet { + pub const SIZE: usize = { + let bit_width = mem::size_of::() * 8; + assert!(FixedBitSetUint::count_ones(!0) == bit_width as u32); + bit_width + }; + } + impl PartialEq for FixedBitSet { + fn eq(&self, other: &Self) -> bool { + self.occupied == other.occupied + } + } + impl + From> FixedBitSet { + pub fn union(&self, other: &Self) -> Self { + Self { occupied: self.occupied | other.occupied, ..Self::default() } + } + } + + impl + From, V, VS: ValueStorage> Default + for FixedFlatMap + { + fn default() -> Self { + Self::new() + } + } + impl + From, V, VS: ValueStorage> FixedFlatMap { + pub fn new() -> Self { + Self { + occupied: 0, + slots: VS::lazy_box_default(|| std::array::from_fn(|_| Default::default())), + _marker: PhantomData, + } + } + pub fn contains(&self, k: K) -> bool { + u32::try_from(k.into()).ok().and_then(|k| Some(self.occupied.checked_shr(k)? & 1)) + == Some(1) + } + pub fn get(&self, k: K) -> Option<&V> { + self.contains(k) + .then(|| VS::slot_unwrap_ref(&VS::lazy_box_unwrap_ref(&self.slots)[k.into()])) + } + pub fn entry(&mut self, k: K) -> FixedFlatMapEntry<'_, V, VS> { + let k = k.into(); + let key_mask = FixedBitSetUint::checked_shl(1, u32::try_from(k).unwrap()).unwrap(); + FixedFlatMapEntry { + key_mask, + occupied: &mut self.occupied, + slot: &mut VS::lazy_box_unwrap_mut_or_alloc(&mut self.slots, || { + std::array::from_fn(|_| Default::default()) + })[k], + } + } + + pub fn insert(&mut self, k: K, v: V) { + self.entry(k).insert(v); + } + fn remove(&mut self, k: K) -> Option { + self.contains(k).then(|| self.entry(k).remove().unwrap()) + } + + pub fn keys(&self) -> impl Iterator { + let mut i = 0; + let mut remaining = self.occupied; + iter::from_fn(move || { + (remaining != 0).then(|| { + let gap = remaining.trailing_zeros() as usize; + i += gap; + remaining >>= gap; + + let k = K::from(i); + + // Skip the lowest bit (which should always be `1` here). + i += 1; + remaining >>= 1; + + k + }) + }) + } + pub fn iter(&self) -> impl Iterator + '_ { + self.keys().map(|k| (k, self.get(k).unwrap())) + } + pub fn drain(&mut self) -> impl Iterator + '_ { + self.keys().map(|k| (k, self.remove(k).unwrap())) + } + + // FIXME(eddyb) does this fully replace `drain`? + pub fn clear(&mut self) { + // FIXME(eddyb) theoretically this could be more efficient, but + // it doesn't seem worth it wrt `VS` abstraction complexity. + for _ in self.drain() {} + } + } + + pub struct FixedFlatMapEntry<'a, V, VS: ValueStorage = EfficientValue> { + key_mask: FixedBitSetUint, + occupied: &'a mut FixedBitSetUint, + // FIXME(eddyb) in theory, this forces the `Box` to be allocated even + // when it might not be needed, so it optimizes for e.g. insertion. + slot: &'a mut VS::Slot, + } + + impl<'a, V, VS: ValueStorage> FixedFlatMapEntry<'a, V, VS> { + pub fn occupied(&self) -> bool { + (*self.occupied & self.key_mask) != 0 + } + fn into_mut(self) -> Option<&'a mut V> { + self.occupied().then(|| VS::slot_unwrap_mut(self.slot)) + } + pub fn or_insert_with(self, f: impl FnOnce() -> V) -> &'a mut V { + if self.occupied() { self.into_mut().unwrap() } else { self.insert(f()) } + } + pub fn insert(self, v: V) -> &'a mut V { + *self.occupied |= self.key_mask; + VS::slot_insert(self.slot, v) + } + pub fn remove(&mut self) -> Option { + self.occupied().then(|| { + *self.occupied &= !self.key_mask; + VS::slot_unwrap(mem::take(self.slot)) + }) + } + } + + // FIXME(eddyb) not a sparse bitset because of how `SparseMap` is only sparse + // wrt not allocating space to store values until needed, but `BitSet` has no + // real values (and uses `IgnoreValue` to completely remove that allocation). + pub type BitSet = SparseMap; + + pub struct SparseMap = EfficientValue> { + // NOTE(eddyb) this is really efficient when the keys don't exceed + // `FixedBitSet::SIZE`, and this can be further amplified by using + // e.g. `SparseMap<_, FixedFlatMap<_, V>>`, which adds another layer + // of sparseness (more concretely, if `FixedBitSet::SIZE == 64`, then + // that combination can effectively handle up to 64*64 = 4096 entries + // without causing the outermost `SmallFlatMap` to allocate a `Vec`). + outer_map: SmallVec<[FixedFlatMap; 1]>, + count: usize, + _marker: PhantomData, + } + + impl + From, V, VS: ValueStorage> Default for SparseMap { + fn default() -> Self { + Self::new() + } + } + impl + From, V, VS: ValueStorage> SparseMap { + pub fn new() -> Self { + Self { outer_map: SmallVec::new(), count: 0, _marker: PhantomData } + } + pub fn is_empty(&self) -> bool { + self.count == 0 + } + pub fn get(&self, k: K) -> Option<&V> { + let k = k.into(); + let (outer_key, inner_key) = (k / FixedBitSet::SIZE, k % FixedBitSet::SIZE); + self.outer_map.get(outer_key)?.get(inner_key) + } + pub fn entry(&mut self, k: K) -> SparseMapEntry<'_, V, VS> { + let k = k.into(); + let (outer_key, inner_key) = (k / FixedBitSet::SIZE, k % FixedBitSet::SIZE); + let needed_outer_len = outer_key + 1; + if self.outer_map.len() < needed_outer_len { + self.outer_map.resize_with(needed_outer_len, Default::default); + } + SparseMapEntry { + inner: self.outer_map[outer_key].entry(inner_key), + count: &mut self.count, + } + } + + pub fn iter(&self) -> impl Iterator + '_ { + (!self.is_empty()) + .then(|| { + self.outer_map.iter().enumerate().flat_map(|(outer_key, inner_map)| { + inner_map.iter().map(move |(inner_key, v)| { + (K::from(outer_key * FixedBitSet::SIZE + inner_key), v) + }) + }) + }) + .into_iter() + .flatten() + } + + pub fn clear(&mut self) { + for inner_map in &mut self.outer_map { + inner_map.clear(); + } + self.count = 0; + } + } + + pub struct SparseMapEntry<'a, V, VS: ValueStorage = EfficientValue> { + inner: FixedFlatMapEntry<'a, V, VS>, + count: &'a mut usize, + } + + impl<'a, V, VS: ValueStorage> SparseMapEntry<'a, V, VS> { + pub fn or_insert_with(self, f: impl FnOnce() -> V) -> &'a mut V { + self.inner.or_insert_with(|| { + *self.count += 1; + f() + }) + } + #[allow(clippy::unwrap_or_default)] + pub fn or_default(self) -> &'a mut V + where + V: Default, + { + self.or_insert_with(V::default) + } + pub fn insert(self, v: V) { + if !self.inner.occupied() { + *self.count += 1; + } + self.inner.insert(v); + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 9ccb043..88a94a0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -153,6 +153,7 @@ // NOTE(eddyb) all the modules are declared here, but they're documented "inside" // (i.e. using inner doc comments). pub mod cfg; +pub mod cfgssa; mod context; pub mod func_at; pub mod print; @@ -705,13 +706,12 @@ pub struct FuncDefBody { /// * dominance is simpler, so values defined in a [`ControlRegion`](crate::ControlRegion) can be used: /// * later in that region, including in the region's `outputs` /// (which allows "exporting" values out to the rest of the function) -/// * outside that region, but *only* if the parent [`ControlNode`](crate::ControlNode) only has -/// exactly one child region (i.e. a single-case `Select`, or a `Loop`) +/// * outside that region, but *only* if the parent [`ControlNode`](crate::ControlNode) +/// is a `Loop` (that is, when the region is a loop's body) /// * this is an "emergent" property, stemming from the region having to -/// execute (at least once) before the parent [`ControlNode`](crate::ControlNode) can complete, -/// but is not is not ideal (especially for reasoning about loops) and -/// should eventually be replaced with passing all such values through -/// the region `outputs` (or by inlining the region, in the `Select` case) +/// execute (at least once) before the parent [`ControlNode`](crate::ControlNode) +/// can complete, but is not is not ideal and should eventually be replaced +/// with passing all such values through loop (body) `outputs` /// * instead of φ ("phi") nodes, SPIR-T uses region `outputs` to merge values /// coming from separate control-flow paths (i.e. the cases of a `Select`), /// and region `inputs` for passing values back along loop backedges @@ -722,7 +722,13 @@ pub struct FuncDefBody { /// instead of in the merge (where phi nodes require special-casing, as /// their "uses" of all the "source" values would normally be illegal) /// * in unstructured control-flow, region `inputs` are additionally used for -/// phi nodes, as [`cfg::ControlInst`](crate::cfg::ControlInst)s passing values to their target regions +/// representing phi nodes, as [`cfg::ControlInst`](crate::cfg::ControlInst)s +/// passing values to their target regions +/// * all value uses across unstructured control-flow edges (i.e. not in the +/// same region containing the value definition) *require* explicit passing, +/// as unstructured control-flow [`ControlRegion`](crate::ControlRegion)s +/// do *not* themselves get *any* implied dominance relations from the +/// shape of the control-flow graph (unlike most typical CFG+SSA IRs) pub use context::ControlRegion; /// Definition for a [`ControlRegion`]: a control-flow region. @@ -823,6 +829,19 @@ pub enum ControlNodeKind { // have any ambiguity as to whether it can see `body`-computed values) repeat_condition: Value, }, + + /// Leave the current invocation, similar to returning from every function + /// call in the stack (up to and including the entry-point), but potentially + /// indicating a fatal error as well. + // + // FIXME(eddyb) make this less shader-controlflow-centric. + ExitInvocation { + kind: cfg::ExitInvocationKind, + + // FIXME(eddyb) centralize `Value` inputs across `ControlNode`s, + // and only use stricter types for building/traversing the IR. + inputs: SmallVec<[Value; 2]>, + }, } #[derive(Clone)] @@ -834,10 +853,13 @@ pub enum SelectionKind { SpvInst(spv::Inst), } -/// Entity handle for a [`DataInstDef`](crate::DataInstDef) (an SSA instruction). +/// Entity handle for a [`DataInstDef`](crate::DataInstDef) (a leaf instruction). pub use context::DataInst; -/// Definition for a [`DataInst`]: an SSA instruction. +/// Definition for a [`DataInst`]: a leaf (non-control-flow) instruction. +// +// FIXME(eddyb) `DataInstKind::FuncCall` should probably be a `ControlNodeKind`, +// but also `DataInst` vs `ControlNode` is a purely artificial distinction. #[derive(Clone)] pub struct DataInstDef { pub attrs: AttrSet, @@ -884,7 +906,7 @@ pub enum DataInstKind { }, } -#[derive(Copy, Clone, PartialEq, Eq)] +#[derive(Copy, Clone, PartialEq, Eq, Hash)] pub enum Value { Const(Const), diff --git a/src/passes/legalize.rs b/src/passes/legalize.rs index 6bdc6d6..57e9690 100644 --- a/src/passes/legalize.rs +++ b/src/passes/legalize.rs @@ -1,6 +1,6 @@ use crate::visit::{InnerVisit, Visitor}; use crate::{ - cfg, AttrSet, Const, Context, DataInstForm, DeclDef, Func, FxIndexSet, GlobalVar, Module, Type, + AttrSet, Const, Context, DataInstForm, DeclDef, Func, FxIndexSet, GlobalVar, Module, Type, cfg, }; /// Apply the [`cfg::Structurizer`] algorithm to all function definitions in `module`. diff --git a/src/passes/qptr.rs b/src/passes/qptr.rs index c4f49a4..a0fb9b0 100644 --- a/src/passes/qptr.rs +++ b/src/passes/qptr.rs @@ -1,8 +1,8 @@ //! [`QPtr`](crate::TypeKind::QPtr) transforms. use crate::visit::{InnerVisit, Visitor}; -use crate::{qptr, DataInstForm}; use crate::{AttrSet, Const, Context, Func, FxIndexSet, GlobalVar, Module, Type}; +use crate::{DataInstForm, qptr}; pub fn lower_from_spv_ptrs(module: &mut Module, layout_config: &qptr::LayoutConfig) { let cx = &module.cx(); diff --git a/src/print/mod.rs b/src/print/mod.rs index c37eca0..185d23b 100644 --- a/src/print/mod.rs +++ b/src/print/mod.rs @@ -24,13 +24,13 @@ use crate::print::multiversion::Versions; use crate::qptr::{self, QPtrAttr, QPtrMemUsage, QPtrMemUsageKind, QPtrOp, QPtrUsage}; use crate::visit::{InnerVisit, Visit, Visitor}; use crate::{ - cfg, spv, AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, Context, - ControlNode, ControlNodeDef, ControlNodeKind, ControlNodeOutputDecl, ControlRegion, - ControlRegionDef, ControlRegionInputDecl, DataInst, DataInstDef, DataInstForm, DataInstFormDef, - DataInstKind, DeclDef, Diag, DiagLevel, DiagMsgPart, EntityListIter, ExportKey, Exportee, Func, - FuncDecl, FuncParam, FxIndexMap, FxIndexSet, GlobalVar, GlobalVarDecl, GlobalVarDefBody, - Import, Module, ModuleDebugInfo, ModuleDialect, OrdAssertEq, SelectionKind, Type, TypeDef, - TypeKind, TypeOrConst, Value, + AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, Context, ControlNode, + ControlNodeDef, ControlNodeKind, ControlNodeOutputDecl, ControlRegion, ControlRegionDef, + ControlRegionInputDecl, DataInst, DataInstDef, DataInstForm, DataInstFormDef, DataInstKind, + DeclDef, Diag, DiagLevel, DiagMsgPart, EntityListIter, ExportKey, Exportee, Func, FuncDecl, + FuncParam, FxIndexMap, FxIndexSet, GlobalVar, GlobalVarDecl, GlobalVarDefBody, Import, Module, + ModuleDebugInfo, ModuleDialect, OrdAssertEq, SelectionKind, Type, TypeDef, TypeKind, + TypeOrConst, Value, cfg, spv, }; use arrayvec::ArrayVec; use itertools::Either; @@ -1839,10 +1839,9 @@ impl Print for spv::Dialect { printer.pretty_spv_print_tokens_for_operand({ let mut tokens = spv::print::operand_from_imms(cap_imms(cap)); tokens.tokens.drain(..tokens.tokens.len() - 1); - assert!(matches!( - tokens.tokens[..], - [spv::print::Token::EnumerandName(_)] - )); + assert!(matches!(tokens.tokens[..], [ + spv::print::Token::EnumerandName(_) + ])); tokens }) }); @@ -1971,12 +1970,8 @@ impl Print for spv::ModuleDebugInfo { printer.pretty_string_literal( &printer.cx[file], ), - pretty::join_space( - ":", - [printer.pretty_string_literal( - contents, - )], - ), + pretty::join_space(":", [printer + .pretty_string_literal(contents)]), ]) }) .map(|entry| { @@ -2999,13 +2994,10 @@ impl Print for FuncAt<'_, ControlNode> { let (inputs_header, body_suffix) = if !inputs.is_empty() { let input_decls_and_uses = inputs.iter().enumerate().map(|(input_idx, input)| { - ( - input, - Value::ControlRegionInput { - region: *body, - input_idx: input_idx.try_into().unwrap(), - }, - ) + (input, Value::ControlRegionInput { + region: *body, + input_idx: input_idx.try_into().unwrap(), + }) }); ( pretty::join_comma_sep( @@ -3053,6 +3045,15 @@ impl Print for FuncAt<'_, ControlNode> { repeat_condition.print(printer), ]) } + ControlNodeKind::ExitInvocation { + kind: cfg::ExitInvocationKind::SpvInst(spv::Inst { opcode, imms }), + inputs, + } => printer.pretty_spv_inst( + kw_style, + *opcode, + imms, + inputs.iter().map(|v| v.print(printer)), + ), }; pretty::Fragment::new([ Use::AlignmentAnchorForControlNode(self.position).print_as_def(printer), diff --git a/src/print/multiversion.rs b/src/print/multiversion.rs index d23f5ac..48e45ea 100644 --- a/src/print/multiversion.rs +++ b/src/print/multiversion.rs @@ -1,7 +1,7 @@ //! Multi-version pretty-printing support (e.g. for comparing the IR between passes). -use crate::print::pretty::{self, TextOp}; use crate::FxIndexMap; +use crate::print::pretty::{self, TextOp}; use internal_iterator::{ FromInternalIterator, InternalIterator, IntoInternalIterator, IteratorExt, }; diff --git a/src/qptr/analyze.rs b/src/qptr/analyze.rs index daaf139..57134f7 100644 --- a/src/qptr/analyze.rs +++ b/src/qptr/analyze.rs @@ -1,9 +1,9 @@ //! [`QPtr`](crate::TypeKind::QPtr) usage analysis (for legalizing/lifting). // HACK(eddyb) sharing layout code with other modules. -use super::{layout::*, QPtrMemUsageKind}; +use super::{QPtrMemUsageKind, layout::*}; -use super::{shapes, QPtrAttr, QPtrMemUsage, QPtrOp, QPtrUsage}; +use super::{QPtrAttr, QPtrMemUsage, QPtrOp, QPtrUsage, shapes}; use crate::func_at::FuncAt; use crate::visit::{InnerVisit, Visitor}; use crate::{ diff --git a/src/qptr/layout.rs b/src/qptr/layout.rs index 00def11..c5e081b 100644 --- a/src/qptr/layout.rs +++ b/src/qptr/layout.rs @@ -2,7 +2,7 @@ use crate::qptr::shapes; use crate::{ - spv, AddrSpace, Attr, Const, ConstKind, Context, Diag, FxIndexMap, Type, TypeKind, TypeOrConst, + AddrSpace, Attr, Const, ConstKind, Context, Diag, FxIndexMap, Type, TypeKind, TypeOrConst, spv, }; use itertools::Either; use smallvec::SmallVec; diff --git a/src/qptr/lift.rs b/src/qptr/lift.rs index f962ded..1aca985 100644 --- a/src/qptr/lift.rs +++ b/src/qptr/lift.rs @@ -4,13 +4,13 @@ use super::layout::*; use crate::func_at::FuncAtMut; -use crate::qptr::{shapes, QPtrAttr, QPtrMemUsage, QPtrMemUsageKind, QPtrOp, QPtrUsage}; +use crate::qptr::{QPtrAttr, QPtrMemUsage, QPtrMemUsageKind, QPtrOp, QPtrUsage, shapes}; use crate::transform::{InnerInPlaceTransform, InnerTransform, Transformed, Transformer}; use crate::{ - spv, AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, Context, ControlNode, + AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, Context, ControlNode, ControlNodeKind, DataInst, DataInstDef, DataInstFormDef, DataInstKind, DeclDef, Diag, DiagLevel, EntityDefs, EntityOrientedDenseMap, Func, FuncDecl, FxIndexMap, GlobalVar, - GlobalVarDecl, Module, Type, TypeDef, TypeKind, TypeOrConst, Value, + GlobalVarDecl, Module, Type, TypeDef, TypeKind, TypeOrConst, Value, spv, }; use smallvec::SmallVec; use std::cell::Cell; @@ -520,15 +520,12 @@ impl LiftToSpvPtrInstsInFunc<'_> { _ => return Err(LiftError(Diag::bug(["non-Buffer pointee".into()]))), }; - self.deferred_ptr_noops.insert( - data_inst, - DeferredPtrNoop { - output_pointer: buf_ptr, - output_pointer_addr_space: addr_space, - output_pointee_layout: buf_data_layout, - parent_block, - }, - ); + self.deferred_ptr_noops.insert(data_inst, DeferredPtrNoop { + output_pointer: buf_ptr, + output_pointer_addr_space: addr_space, + output_pointee_layout: buf_data_layout, + parent_block, + }); DataInstDef { // FIXME(eddyb) avoid the repeated call to `type_of_val` @@ -557,10 +554,10 @@ impl LiftToSpvPtrInstsInFunc<'_> { last_field.mem_layout.fixed_base.size == 0 && last_field.mem_layout.dyn_unit_stride == Some(dyn_unit_stride) - && matches!( - last_field.components, - Components::Elements { fixed_len: None, .. } - ) + && matches!(last_field.components, Components::Elements { + fixed_len: None, + .. + }) }) => { u32::try_from(offsets.len() - 1).unwrap() @@ -660,15 +657,12 @@ impl LiftToSpvPtrInstsInFunc<'_> { } if access_chain_inputs.len() == 1 { - self.deferred_ptr_noops.insert( - data_inst, - DeferredPtrNoop { - output_pointer: base_ptr, - output_pointer_addr_space: addr_space, - output_pointee_layout: TypeLayout::Concrete(layout), - parent_block, - }, - ); + self.deferred_ptr_noops.insert(data_inst, DeferredPtrNoop { + output_pointer: base_ptr, + output_pointer_addr_space: addr_space, + output_pointee_layout: TypeLayout::Concrete(layout), + parent_block, + }); DataInstDef { // FIXME(eddyb) avoid the repeated call to `type_of_val` // (and the interning of a temporary `DataInstFormDef`), diff --git a/src/qptr/lower.rs b/src/qptr/lower.rs index 512b685..5f880da 100644 --- a/src/qptr/lower.rs +++ b/src/qptr/lower.rs @@ -4,12 +4,12 @@ use super::layout::*; use crate::func_at::FuncAtMut; -use crate::qptr::{shapes, QPtrAttr, QPtrOp}; +use crate::qptr::{QPtrAttr, QPtrOp, shapes}; use crate::transform::{InnerInPlaceTransform, Transformed, Transformer}; use crate::{ - spv, AddrSpace, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, Context, ControlNode, + AddrSpace, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, Context, ControlNode, ControlNodeKind, DataInst, DataInstDef, DataInstForm, DataInstFormDef, DataInstKind, Diag, - FuncDecl, GlobalVarDecl, OrdAssertEq, Type, TypeKind, TypeOrConst, Value, + FuncDecl, GlobalVarDecl, OrdAssertEq, Type, TypeKind, TypeOrConst, Value, spv, }; use smallvec::SmallVec; use std::cell::Cell; diff --git a/src/spv/lift.rs b/src/spv/lift.rs index 690dd44..7fcd1f5 100644 --- a/src/spv/lift.rs +++ b/src/spv/lift.rs @@ -4,12 +4,12 @@ use crate::func_at::FuncAt; use crate::spv::{self, spec}; use crate::visit::{InnerVisit, Visitor}; use crate::{ - cfg, AddrSpace, Attr, AttrSet, Const, ConstDef, ConstKind, Context, ControlNode, - ControlNodeKind, ControlNodeOutputDecl, ControlRegion, ControlRegionInputDecl, DataInst, - DataInstDef, DataInstForm, DataInstFormDef, DataInstKind, DeclDef, EntityList, ExportKey, - Exportee, Func, FuncDecl, FuncParam, FxIndexMap, FxIndexSet, GlobalVar, GlobalVarDefBody, - Import, Module, ModuleDebugInfo, ModuleDialect, SelectionKind, Type, TypeDef, TypeKind, - TypeOrConst, Value, + AddrSpace, Attr, AttrSet, Const, ConstDef, ConstKind, Context, ControlNode, ControlNodeKind, + ControlNodeOutputDecl, ControlRegion, ControlRegionInputDecl, DataInst, DataInstDef, + DataInstForm, DataInstFormDef, DataInstKind, DeclDef, EntityList, ExportKey, Exportee, Func, + FuncDecl, FuncParam, FxIndexMap, FxIndexSet, GlobalVar, GlobalVarDefBody, Import, Module, + ModuleDebugInfo, ModuleDialect, SelectionKind, Type, TypeDef, TypeKind, TypeOrConst, Value, + cfg, }; use rustc_hash::FxHashMap; use smallvec::SmallVec; @@ -430,7 +430,9 @@ impl<'a, 'p> FuncAt<'a, CfgCursor<'p>> { parent: cursor.parent, }), - ControlNodeKind::Select { .. } | ControlNodeKind::Loop { .. } => None, + ControlNodeKind::Select { .. } + | ControlNodeKind::Loop { .. } + | ControlNodeKind::ExitInvocation { .. } => None, }, // Exiting a `ControlNode` chains to a sibling/parent. @@ -466,7 +468,7 @@ impl<'a> FuncAt<'a, ControlRegion> { /// and being able to stop iteration by returning `Err`. /// /// RPO iteration over a CFG provides certain guarantees, most importantly - /// that SSA definitions are visited before any of their uses. + /// that dominators are visited before the entire subgraph they dominate. fn rev_post_order_try_for_each( self, mut f: impl FnMut(CfgCursor<'_>) -> Result<(), E>, @@ -482,10 +484,10 @@ impl<'a> FuncAt<'a, ControlRegion> { let region = self.position; f(CfgCursor { point: CfgPoint::RegionEntry(region), parent })?; for func_at_control_node in self.at_children() { - func_at_control_node.rev_post_order_try_for_each_inner( - f, - &CfgCursor { point: ControlParent::Region(region), parent }, - )?; + func_at_control_node.rev_post_order_try_for_each_inner(f, &CfgCursor { + point: ControlParent::Region(region), + parent, + })?; } f(CfgCursor { point: CfgPoint::RegionExit(region), parent }) } @@ -498,7 +500,7 @@ impl<'a> FuncAt<'a, ControlNode> { parent: &CfgCursor<'_, ControlParent>, ) -> Result<(), E> { let child_regions: &[_] = match &self.def().kind { - ControlNodeKind::Block { .. } => &[], + ControlNodeKind::Block { .. } | ControlNodeKind::ExitInvocation { .. } => &[], ControlNodeKind::Select { cases, .. } => cases, ControlNodeKind::Loop { body, .. } => slice::from_ref(body), }; @@ -679,7 +681,7 @@ impl<'a> FuncLifting<'a> { } } - // Entering a `ControlNode` with child `ControlRegion`s. + // Entering a `ControlNode` with child `ControlRegion`s (or diverging). (CfgPoint::ControlNodeEntry(control_node), None) => { let control_node_def = func_def_body.at(control_node).def(); match &control_node_def.kind { @@ -719,6 +721,15 @@ impl<'a> FuncLifting<'a> { }), } } + + ControlNodeKind::ExitInvocation { kind, inputs } => Terminator { + attrs: AttrSet::default(), + kind: Cow::Owned(cfg::ControlInstKind::ExitInvocation(kind.clone())), + inputs: inputs.clone(), + targets: [].into_iter().collect(), + target_phi_values: FxIndexMap::default(), + merge: None, + }, } } @@ -734,7 +745,7 @@ impl<'a> FuncLifting<'a> { }; match func_def_body.at(parent_node).def().kind { - ControlNodeKind::Block { .. } => { + ControlNodeKind::Block { .. } | ControlNodeKind::ExitInvocation { .. } => { unreachable!() } @@ -829,7 +840,7 @@ impl<'a> FuncLifting<'a> { // of a linear branch chain (and potentially fusable), later on. // // FIXME(eddyb) use `EntityOrientedDenseMap` here. - let mut use_counts = FxHashMap::default(); + let mut use_counts = FxHashMap::::default(); use_counts.reserve(blocks.len()); let all_edges = blocks.first().map(|(&entry_point, _)| entry_point).into_iter().chain( blocks.values().flat_map(|block| { @@ -896,17 +907,14 @@ impl<'a> FuncLifting<'a> { // they start being tracked. if target_phis.is_empty() { let extra_insts = mem::take(extra_insts); - let new_terminator = mem::replace( - new_terminator, - Terminator { - attrs: Default::default(), - kind: Cow::Owned(cfg::ControlInstKind::Unreachable), - inputs: Default::default(), - targets: Default::default(), - target_phi_values: Default::default(), - merge: None, - }, - ); + let new_terminator = mem::replace(new_terminator, Terminator { + attrs: Default::default(), + kind: Cow::Owned(cfg::ControlInstKind::Unreachable), + inputs: Default::default(), + targets: Default::default(), + target_phi_values: Default::default(), + merge: None, + }); *target_use_count = 0; let combined_block = &mut blocks[block_idx]; @@ -918,7 +926,7 @@ impl<'a> FuncLifting<'a> { } // Remove now-unused blocks. - blocks.retain(|point, _| use_counts[point] > 0); + blocks.retain(|point, _| use_counts.get(point).is_some_and(|&count| count > 0)); // Collect `OpPhi`s from other blocks' edges into each block. // diff --git a/src/spv/lower.rs b/src/spv/lower.rs index 1e62dc9..1ec1aa8 100644 --- a/src/spv/lower.rs +++ b/src/spv/lower.rs @@ -3,11 +3,11 @@ use crate::spv::{self, spec}; // FIXME(eddyb) import more to avoid `crate::` everywhere. use crate::{ - cfg, print, AddrSpace, Attr, AttrSet, Const, ConstDef, ConstKind, Context, ControlNodeDef, - ControlNodeKind, ControlRegion, ControlRegionDef, ControlRegionInputDecl, DataInstDef, - DataInstFormDef, DataInstKind, DeclDef, Diag, EntityDefs, EntityList, ExportKey, Exportee, - Func, FuncDecl, FuncDefBody, FuncParam, FxIndexMap, GlobalVarDecl, GlobalVarDefBody, Import, - InternedStr, Module, SelectionKind, Type, TypeDef, TypeKind, TypeOrConst, Value, + AddrSpace, Attr, AttrSet, Const, ConstDef, ConstKind, Context, ControlNodeDef, ControlNodeKind, + ControlRegion, ControlRegionDef, ControlRegionInputDecl, DataInstDef, DataInstFormDef, + DataInstKind, DeclDef, Diag, EntityDefs, EntityList, ExportKey, Exportee, Func, FuncDecl, + FuncDefBody, FuncParam, FxIndexMap, GlobalVarDecl, GlobalVarDefBody, Import, InternedStr, + Module, SelectionKind, Type, TypeDef, TypeKind, TypeOrConst, Value, cfg, print, }; use rustc_hash::FxHashMap; use smallvec::SmallVec; @@ -660,16 +660,13 @@ impl Module { None => DeclDef::Present(GlobalVarDefBody { initializer }), }; - let global_var = module.global_vars.define( - &cx, - GlobalVarDecl { - attrs: mem::take(&mut attrs), - type_of_ptr_to: type_of_ptr_to_global_var, - shape: None, - addr_space: AddrSpace::SpvStorageClass(storage_class), - def, - }, - ); + let global_var = module.global_vars.define(&cx, GlobalVarDecl { + attrs: mem::take(&mut attrs), + type_of_ptr_to: type_of_ptr_to_global_var, + shape: None, + addr_space: AddrSpace::SpvStorageClass(storage_class), + def, + }); let ptr_to_global_var = cx.intern(ConstDef { attrs: AttrSet::default(), ty: type_of_ptr_to_global_var, @@ -752,17 +749,14 @@ impl Module { } }; - let func = module.funcs.define( - &cx, - FuncDecl { - attrs: mem::take(&mut attrs), - ret_type: func_ret_type, - params: func_type_param_types - .map(|ty| FuncParam { attrs: AttrSet::default(), ty }) - .collect(), - def, - }, - ); + let func = module.funcs.define(&cx, FuncDecl { + attrs: mem::take(&mut attrs), + ret_type: func_ret_type, + params: func_type_param_types + .map(|ty| FuncParam { attrs: AttrSet::default(), ty }) + .collect(), + def, + }); id_defs.insert(func_id, IdDef::Func(func)); current_func_body = Some(FuncBody { func_id, func, insts: vec![] }); @@ -856,20 +850,43 @@ impl Module { struct BlockDetails { label_id: spv::Id, - phi_count: u32, + phi_count: usize, + + // FIXME(eddyb) how inefficient is `FxIndexMap`? + // (vs e.g. a bitset combined with not duplicating `Type`s per-block?) + cfgssa_inter_block_uses: FxIndexMap, } // Index IDs declared within the function, first. let mut local_id_defs = FxIndexMap::default(); // `OpPhi`s are also collected here, to assign them per-edge. let mut phi_to_values = FxIndexMap::>::default(); + // FIXME(eddyb) wouldn't `EntityOrientedDenseMap` make more sense? let mut block_details = FxIndexMap::::default(); let mut has_blocks = false; + let mut cfgssa_def_map = { + // FIXME(eddyb) in theory, this could be a toggle, but there is + // very little value in allowing dominance-based SSA use rules. + const SPIRT_CFGSSA_UNDOMINATE: bool = true; + + SPIRT_CFGSSA_UNDOMINATE.then(|| { + let mut def_map = crate::cfgssa::DefMap::new(); + + // HACK(eddyb) allow e.g. `OpFunctionParameter` to + // be treated like `OpPhi`s of the entry block. + if let DeclDef::Present(func_def_body) = &func_decl.def { + def_map.add_block(func_def_body.body); + } + + def_map + }) + }; { let mut next_param_idx = 0u32; for raw_inst in &raw_insts { let IntraFuncInst { without_ids: spv::Inst { opcode, ref imms }, + result_type, result_id, .. } = *raw_inst; @@ -909,8 +926,11 @@ impl Module { .control_regions .define(&cx, ControlRegionDef::default()) }; - block_details - .insert(block, BlockDetails { label_id: id, phi_count: 0 }); + block_details.insert(block, BlockDetails { + label_id: id, + phi_count: 0, + cfgssa_inter_block_uses: Default::default(), + }); LocalIdDef::BlockLabel(block) } else if opcode == wk.OpPhi { let (¤t_block, block_details) = match block_details.last_mut() @@ -922,6 +942,7 @@ impl Module { let phi_idx = block_details.phi_count; block_details.phi_count = phi_idx.checked_add(1).unwrap(); + let phi_idx = u32::try_from(phi_idx).unwrap(); assert!(imms.is_empty()); // FIXME(eddyb) use `array_chunks` when that's stable. @@ -961,6 +982,29 @@ impl Module { }; local_id_defs.insert(id, local_id_def); } + + if let Some(def_map) = &mut cfgssa_def_map { + if let DeclDef::Present(func_def_body) = &func_decl.def { + let current_block = match block_details.last() { + Some((¤t_block, _)) => current_block, + // HACK(eddyb) ensure e.g. `OpFunctionParameter` + // are treated like `OpPhi`s of the entry block. + None => func_def_body.body, + }; + + if opcode == wk.OpLabel { + // HACK(eddyb) the entry block was already added. + if current_block != func_def_body.body { + def_map.add_block(current_block); + } + continue; + } + + if let Some(id) = result_id { + def_map.add_def(current_block, id, result_type.unwrap()); + } + } + } } } @@ -992,7 +1036,94 @@ impl Module { None }; - let mut current_block_control_region_and_details = None; + // HACK(eddyb) an entire separate traversal is required to find + // all inter-block uses, before any blocks get lowered to SPIR-T. + let mut cfgssa_use_accumulator = cfgssa_def_map + .as_ref() + .filter(|_| func_def_body.is_some()) + .map(crate::cfgssa::UseAccumulator::new); + if let Some(use_acc) = &mut cfgssa_use_accumulator { + // HACK(eddyb) ensure e.g. `OpFunctionParameter` + // are treated like `OpPhi`s of the entry block. + let mut current_block = func_def_body.as_ref().unwrap().body; + for raw_inst in &raw_insts { + let IntraFuncInst { + without_ids: spv::Inst { opcode, ref imms }, + result_id, + .. + } = *raw_inst; + + if opcode == wk.OpLabel { + current_block = match local_id_defs[&result_id.unwrap()] { + LocalIdDef::BlockLabel(control_region) => control_region, + LocalIdDef::Value(_) => unreachable!(), + }; + continue; + } + + if opcode == wk.OpPhi { + assert!(imms.is_empty()); + // FIXME(eddyb) use `array_chunks` when that's stable. + for value_and_source_block_id in raw_inst.ids.chunks(2) { + let &[value_id, source_block_id]: &[_; 2] = + value_and_source_block_id.try_into().unwrap(); + + if let Some(&LocalIdDef::BlockLabel(source_block)) = + local_id_defs.get(&source_block_id) + { + // HACK(eddyb) `value_id` would be explicitly used + // in `source_block`, in a "BB args" representation, + // but phis move the use to the edge's target. + use_acc.add_use(source_block, value_id); + } + } + continue; + } + + // HACK(eddyb) while including merges as edges may seem useful, + // they don't participate in dominance (and thus SSA validity), + // and if there's any chance `current_block` is *not* the + // closest dominator of a merge, that merge could contain + // uses that don't belong/are illegal in `current_block`. + if [wk.OpSelectionMerge, wk.OpLoopMerge].contains(&opcode) { + continue; + } + + for &id in &raw_inst.ids { + // HACK(eddyb) treat all mentions of `OpLabel` IDs as + // CFG edge targets, which turns out to be accurate, + // except for `OpPhi`/`OpSelectionMerge`/`OpLoopMerge` + // (which are already special-cased above). + if let Some(&LocalIdDef::BlockLabel(target_block)) = local_id_defs.get(&id) + { + use_acc.add_edge(current_block, target_block); + } else { + // HACK(eddyb) this heavily relies on `add_use(_, id)` + // ignoring `id`s which aren't recognized by `def_map`. + use_acc.add_use(current_block, id); + } + } + } + } + if let Some(use_acc) = cfgssa_use_accumulator { + for (block, inter_block_uses) in use_acc.into_inter_block_uses() { + block_details[&block].cfgssa_inter_block_uses = inter_block_uses; + } + } + + struct CurrentBlock<'a> { + region: ControlRegion, + + // FIXME(eddyb) figure out a better name and/or organization for this. + details: &'a BlockDetails, + + // HACK(eddyb) this is probably very inefficient but allows easy + // access to inter-block-used IDs, in a form directly usable in + // the current block (i.e. `ControlRegion` inputs). + shadowed_local_id_defs: FxIndexMap, + } + + let mut current_block = None; for (raw_inst_idx, raw_inst) in raw_insts.iter().enumerate() { let lookahead_raw_inst = |dist| raw_inst_idx.checked_add(dist).and_then(|i| raw_insts.get(i)); @@ -1051,7 +1182,7 @@ impl Module { }; if opcode == wk.OpFunctionParameter { - if current_block_control_region_and_details.is_some() { + if current_block.is_some() { return Err(invalid( "out of order: `OpFunctionParameter`s should come \ before the function's blocks", @@ -1084,22 +1215,74 @@ impl Module { // A `ControlRegion` (using an empty `Block` `ControlNode` // as its sole child) was defined earlier, // to be able to have an entry in `local_id_defs`. - let control_region = match local_id_defs[&result_id.unwrap()] { - LocalIdDef::BlockLabel(control_region) => control_region, + let region = match local_id_defs[&result_id.unwrap()] { + LocalIdDef::BlockLabel(region) => region, LocalIdDef::Value(_) => unreachable!(), }; - let current_block_details = &block_details[&control_region]; - assert_eq!(current_block_details.label_id, result_id.unwrap()); - current_block_control_region_and_details = - Some((control_region, current_block_details)); + let details = &block_details[®ion]; + assert_eq!(details.label_id, result_id.unwrap()); + current_block = Some(CurrentBlock { + region, + details, + + // HACK(eddyb) reuse `shadowed_local_id_defs` storage. + shadowed_local_id_defs: current_block + .take() + .map(|CurrentBlock { mut shadowed_local_id_defs, .. }| { + shadowed_local_id_defs.clear(); + shadowed_local_id_defs + }) + .unwrap_or_default(), + }); continue; } - let (current_block_control_region, current_block_details) = - current_block_control_region_and_details.ok_or_else(|| { - invalid("out of order: not expected before the function's blocks") - })?; + let current_block = current_block.as_mut().ok_or_else(|| { + invalid("out of order: not expected before the function's blocks") + })?; let current_block_control_region_def = - &mut func_def_body.control_regions[current_block_control_region]; + &mut func_def_body.control_regions[current_block.region]; + + // HACK(eddyb) the `ControlRegion` inputs for inter-block uses + // have to be inserted just after all the `OpPhi`s' region inputs, + // or right away (e.g. on `OpLabel`) when there are no `OpPhi`s, + // so the easiest place to insert them is before handling the + // first instruction in the block that's not `OpLabel`/`OpPhi`. + if opcode != wk.OpPhi + && current_block.shadowed_local_id_defs.is_empty() + && !current_block.details.cfgssa_inter_block_uses.is_empty() + { + assert!(current_block_control_region_def.children.is_empty()); + + current_block.shadowed_local_id_defs.extend( + current_block.details.cfgssa_inter_block_uses.iter().map( + |(&used_id, &ty)| { + let input_idx = current_block_control_region_def + .inputs + .len() + .try_into() + .unwrap(); + current_block_control_region_def + .inputs + .push(ControlRegionInputDecl { attrs: AttrSet::default(), ty }); + ( + used_id, + LocalIdDef::Value(Value::ControlRegionInput { + region: current_block.region, + input_idx, + }), + ) + }, + ), + ); + } + + // HACK(eddyb) shadowing the closure with the same name, could + // it be defined here to make use of `current_block`? + let lookup_global_or_local_id_for_data_or_control_inst_input = + |id| match current_block.shadowed_local_id_defs.get(&id) { + Some(&shadowed) => Ok(shadowed), + None => lookup_global_or_local_id_for_data_or_control_inst_input(id), + }; if is_last_in_block { if opcode.def().category != spec::InstructionCategory::ControlFlow @@ -1134,7 +1317,9 @@ impl Module { let target_block_details = &block_details[&target_block]; - if target_block_details.phi_count == 0 { + if target_block_details.phi_count == 0 + && target_block_details.cfgssa_inter_block_uses.is_empty() + { return Ok(()); } @@ -1146,9 +1331,9 @@ impl Module { let inputs = (0..target_block_details.phi_count).map(|target_phi_idx| { let phi_key = PhiKey { - source_block_id: current_block_details.label_id, + source_block_id: current_block.details.label_id, target_block_id: target_block_details.label_id, - target_phi_idx, + target_phi_idx: target_phi_idx.try_into().unwrap(), }; let phi_value_ids = phi_to_values.swap_remove(&phi_key).unwrap_or_default(); @@ -1165,6 +1350,16 @@ impl Module { ))), } }); + let inputs = inputs.chain( + target_block_details.cfgssa_inter_block_uses.keys().map(|&used_id| { + match lookup_global_or_local_id_for_data_or_control_inst_input( + used_id, + )? { + LocalIdDef::Value(v) => Ok(v), + LocalIdDef::BlockLabel(_) => unreachable!(), + } + }), + ); target_inputs_entry.insert(inputs.collect::>()?); Ok(()) @@ -1221,10 +1416,13 @@ impl Module { .as_mut() .unwrap() .control_inst_on_exit_from - .insert( - current_block_control_region, - cfg::ControlInst { attrs, kind, inputs, targets, target_inputs }, - ); + .insert(current_block.region, cfg::ControlInst { + attrs, + kind, + inputs, + targets, + target_inputs, + }); } else if opcode == wk.OpPhi { if !current_block_control_region_def.children.is_empty() { return Err(invalid( @@ -1265,7 +1463,7 @@ impl Module { .as_mut() .unwrap() .loop_merge_to_loop_header - .insert(loop_merge_target, current_block_control_region); + .insert(loop_merge_target, current_block.region); } // HACK(eddyb) merges are mostly ignored - this may be lossy, diff --git a/src/spv/spec.rs b/src/spv/spec.rs index b11b7b6..9ea6164 100644 --- a/src/spv/spec.rs +++ b/src/spv/spec.rs @@ -846,13 +846,10 @@ impl Spec { bases[0] = "LiteralContextDependentNumber"; } - ( - o.kind, - [ - operand_kinds.lookup(bases[0]).unwrap(), - operand_kinds.lookup(bases[1]).unwrap(), - ], - ) + (o.kind, [ + operand_kinds.lookup(bases[0]).unwrap(), + operand_kinds.lookup(bases[1]).unwrap(), + ]) }) .collect(); diff --git a/src/transform.rs b/src/transform.rs index 6b97b8a..3fbefc2 100644 --- a/src/transform.rs +++ b/src/transform.rs @@ -3,12 +3,12 @@ use crate::func_at::FuncAtMut; use crate::qptr::{self, QPtrAttr, QPtrMemUsage, QPtrMemUsageKind, QPtrOp, QPtrUsage}; use crate::{ - cfg, spv, AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, ControlNode, - ControlNodeDef, ControlNodeKind, ControlNodeOutputDecl, ControlRegion, ControlRegionDef, + AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, ControlNode, ControlNodeDef, + ControlNodeKind, ControlNodeOutputDecl, ControlRegion, ControlRegionDef, ControlRegionInputDecl, DataInst, DataInstDef, DataInstForm, DataInstFormDef, DataInstKind, DeclDef, EntityListIter, ExportKey, Exportee, Func, FuncDecl, FuncDefBody, FuncParam, GlobalVar, GlobalVarDecl, GlobalVarDefBody, Import, Module, ModuleDebugInfo, ModuleDialect, - OrdAssertEq, SelectionKind, Type, TypeDef, TypeKind, TypeOrConst, Value, + OrdAssertEq, SelectionKind, Type, TypeDef, TypeKind, TypeOrConst, Value, cfg, spv, }; use std::cmp::Ordering; use std::rc::Rc; @@ -615,7 +615,7 @@ impl InnerInPlaceTransform for FuncAtMut<'_, EntityListIter> { impl FuncAtMut<'_, ControlNode> { fn child_regions(&mut self) -> &mut [ControlRegion] { match &mut self.reborrow().def().kind { - ControlNodeKind::Block { .. } => &mut [][..], + ControlNodeKind::Block { .. } | ControlNodeKind::ExitInvocation { .. } => &mut [][..], ControlNodeKind::Select { cases, .. } => cases, ControlNodeKind::Loop { body, .. } => slice::from_mut(body), @@ -641,8 +641,12 @@ impl InnerInPlaceTransform for FuncAtMut<'_, ControlNode> { } => { transformer.transform_value_use(scrutinee).apply_to(scrutinee); } - ControlNodeKind::Loop { initial_inputs, body: _, repeat_condition: _ } => { - for v in initial_inputs { + ControlNodeKind::Loop { initial_inputs: inputs, body: _, repeat_condition: _ } + | ControlNodeKind::ExitInvocation { + kind: cfg::ExitInvocationKind::SpvInst(_), + inputs, + } => { + for v in inputs { transformer.transform_value_use(v).apply_to(v); } } @@ -660,7 +664,11 @@ impl InnerInPlaceTransform for FuncAtMut<'_, ControlNode> { match kind { // Fully handled above, before recursing into any child regions. ControlNodeKind::Block { insts: _ } - | ControlNodeKind::Select { kind: _, scrutinee: _, cases: _ } => {} + | ControlNodeKind::Select { kind: _, scrutinee: _, cases: _ } + | ControlNodeKind::ExitInvocation { + kind: cfg::ExitInvocationKind::SpvInst(_), + inputs: _, + } => {} ControlNodeKind::Loop { initial_inputs: _, body: _, repeat_condition } => { transformer.transform_value_use(repeat_condition).apply_to(repeat_condition); diff --git a/src/visit.rs b/src/visit.rs index 7bb837d..55de833 100644 --- a/src/visit.rs +++ b/src/visit.rs @@ -3,12 +3,12 @@ use crate::func_at::FuncAt; use crate::qptr::{self, QPtrAttr, QPtrMemUsage, QPtrMemUsageKind, QPtrOp, QPtrUsage}; use crate::{ - cfg, spv, AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, ControlNode, - ControlNodeDef, ControlNodeKind, ControlNodeOutputDecl, ControlRegion, ControlRegionDef, + AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, ControlNode, ControlNodeDef, + ControlNodeKind, ControlNodeOutputDecl, ControlRegion, ControlRegionDef, ControlRegionInputDecl, DataInstDef, DataInstForm, DataInstFormDef, DataInstKind, DeclDef, DiagMsgPart, EntityListIter, ExportKey, Exportee, Func, FuncDecl, FuncDefBody, FuncParam, GlobalVar, GlobalVarDecl, GlobalVarDefBody, Import, Module, ModuleDebugInfo, ModuleDialect, - SelectionKind, Type, TypeDef, TypeKind, TypeOrConst, Value, + SelectionKind, Type, TypeDef, TypeKind, TypeOrConst, Value, cfg, spv, }; // FIXME(eddyb) `Sized` bound shouldn't be needed but removing it requires @@ -490,6 +490,14 @@ impl<'a> FuncAt<'a, ControlNode> { visitor.visit_control_region_def(self.at(*body)); visitor.visit_value_use(repeat_condition); } + ControlNodeKind::ExitInvocation { + kind: cfg::ExitInvocationKind::SpvInst(_), + inputs, + } => { + for v in inputs { + visitor.visit_value_use(v); + } + } } for output in outputs { output.inner_visit_with(visitor);