From 3d031f66e52ba617175e4dbc10bad693b0f33eab Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Thu, 2 Nov 2023 13:56:39 -0700 Subject: [PATCH] Cranelift: Bitpack the egraph `Cost` structure Co-Authored-By: Chris Fallin Co-Authored-By: Trevor Elliott --- cranelift/codegen/src/egraph/cost.rs | 101 ++++++++++++++++----------- 1 file changed, 60 insertions(+), 41 deletions(-) diff --git a/cranelift/codegen/src/egraph/cost.rs b/cranelift/codegen/src/egraph/cost.rs index 8a59a8dd0456..22d9be814ab9 100644 --- a/cranelift/codegen/src/egraph/cost.rs +++ b/cranelift/codegen/src/egraph/cost.rs @@ -30,23 +30,36 @@ use crate::ir::Opcode; /// `finite()` method.) An infinite cost is used to represent a value /// that cannot be computed, or otherwise serve as a sentinel when /// performing search for the lowest-cost representation of a value. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub(crate) struct Cost { - opcode_cost: u32, - depth: u32, +#[derive(Clone, Copy, PartialEq, Eq)] +pub(crate) struct Cost(u32); + +impl core::fmt::Debug for Cost { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + if *self == Cost::infinity() { + write!(f, "Cost::Infinite") + } else { + f.debug_struct("Cost::Finite") + .field("op_cost", &self.op_cost()) + .field("depth", &self.depth()) + .finish() + } + } } impl Ord for Cost { #[inline] fn cmp(&self, other: &Self) -> std::cmp::Ordering { - // Break `opcode_cost` ties with `depth`. This means that, when the - // opcode cost is the same, we prefer shallow and wide expressions to - // narrow and deep. For example, `(a + b) + (c + d)` is preferred to - // `((a + b) + c) + d`. This is beneficial because it exposes more + // We make sure that the high bits are the op cost and the low bits are + // the depth. This means that we can use normal integer comparison to + // order by op cost and then depth. + // + // We want to break op cost ties with depth (rather than the other way + // around). When the op cost is the same, we prefer shallow and wide + // expressions to narrow and deep expressions and breaking ties with + // `depth` gives us that. For example, `(a + b) + (c + d)` is preferred + // to `((a + b) + c) + d`. This is beneficial because it exposes more // instruction-level parallelism and shortens live ranges. - self.opcode_cost - .cmp(&other.opcode_cost) - .then_with(|| self.depth.cmp(&other.depth)) + self.0.cmp(&other.0) } } @@ -58,38 +71,44 @@ impl PartialOrd for Cost { } impl Cost { + const DEPTH_BITS: u8 = 8; + const DEPTH_MASK: u32 = (1 << Self::DEPTH_BITS) - 1; + const OP_COST_MASK: u32 = !Self::DEPTH_MASK; + const MAX_OP_COST: u32 = Self::OP_COST_MASK >> Self::DEPTH_BITS; + pub(crate) fn infinity() -> Cost { // 2^32 - 1 is, uh, pretty close to infinite... (we use `Cost` // only for heuristics and always saturate so this suffices!) - Cost { - opcode_cost: u32::MAX, - depth: u32::MAX, - } + Cost(u32::MAX) } pub(crate) fn zero() -> Cost { - Cost { - opcode_cost: 0, - depth: 0, - } + Cost(0) } - pub(crate) fn new(opcode_cost: u32) -> Cost { - let cost = Cost { - opcode_cost, - depth: 0, - }; - cost.finite() + fn new(opcode_cost: u32, depth: u8) -> Cost { + debug_assert!( + opcode_cost <= Self::MAX_OP_COST, + "Cost::new: given opcode cost of {opcode_cost} is larger than max of {}", + Self::MAX_OP_COST, + ); + Cost((opcode_cost << Self::DEPTH_BITS) | u32::from(depth)) + } + + fn depth(&self) -> u8 { + let depth = self.0 & Self::DEPTH_MASK; + u8::try_from(depth).unwrap() + } + + fn op_cost(&self) -> u32 { + (self.0 & Self::OP_COST_MASK) >> Self::DEPTH_BITS } /// Clamp this cost at a "finite" value. Can be used in /// conjunction with saturating ops to avoid saturating into /// `infinity()`. fn finite(self) -> Cost { - Cost { - opcode_cost: std::cmp::min(u32::MAX - 1, self.opcode_cost), - depth: std::cmp::min(u32::MAX - 1, self.depth), - } + Cost(std::cmp::min(u32::MAX - 1, self.0)) } /// Compute the cost of the operation and its given operands. @@ -97,9 +116,8 @@ impl Cost { /// Caller is responsible for checking that the opcode came from an instruction /// that satisfies `inst_predicates::is_pure_for_egraph()`. pub(crate) fn of_pure_op(op: Opcode, operand_costs: impl IntoIterator) -> Self { - let mut c: Self = pure_op_cost(op) + operand_costs.into_iter().sum(); - c.depth = c.depth.saturating_add(1); - c + let c = pure_op_cost(op) + operand_costs.into_iter().sum(); + Cost::new(c.op_cost(), c.depth().saturating_add(1)).finite() } } @@ -123,11 +141,12 @@ impl std::ops::Add for Cost { type Output = Cost; fn add(self, other: Cost) -> Cost { - let cost = Cost { - opcode_cost: self.opcode_cost.saturating_add(other.opcode_cost), - depth: std::cmp::max(self.depth, other.depth), - }; - cost.finite() + let op_cost = std::cmp::min( + self.op_cost().saturating_add(other.op_cost()), + Self::MAX_OP_COST, + ); + let depth = std::cmp::max(self.depth(), other.depth()); + Cost::new(op_cost, depth).finite() } } @@ -138,11 +157,11 @@ impl std::ops::Add for Cost { fn pure_op_cost(op: Opcode) -> Cost { match op { // Constants. - Opcode::Iconst | Opcode::F32const | Opcode::F64const => Cost::new(1), + Opcode::Iconst | Opcode::F32const | Opcode::F64const => Cost::new(1, 0), // Extends/reduces. Opcode::Uextend | Opcode::Sextend | Opcode::Ireduce | Opcode::Iconcat | Opcode::Isplit => { - Cost::new(2) + Cost::new(2, 0) } // "Simple" arithmetic. @@ -154,9 +173,9 @@ fn pure_op_cost(op: Opcode) -> Cost { | Opcode::Bnot | Opcode::Ishl | Opcode::Ushr - | Opcode::Sshr => Cost::new(3), + | Opcode::Sshr => Cost::new(3, 0), // Everything else (pure.) - _ => Cost::new(4), + _ => Cost::new(4, 0), } }