From 9e356796c773f63e5fc11322c508cca8d0a90cd0 Mon Sep 17 00:00:00 2001 From: demilade Date: Tue, 19 Mar 2024 10:59:47 +0100 Subject: [PATCH 01/95] added implementation of SSRA for a single basic block --- src/fastalloc.rs | 363 +++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 10 +- 2 files changed, 372 insertions(+), 1 deletion(-) create mode 100644 src/fastalloc.rs diff --git a/src/fastalloc.rs b/src/fastalloc.rs new file mode 100644 index 00000000..f5d3410c --- /dev/null +++ b/src/fastalloc.rs @@ -0,0 +1,363 @@ +use core::convert::TryInto; + +use crate::{Block, InstRange, Inst, OperandKind, Operand, PReg, RegClass, VReg, SpillSlot, FxHashMap, AllocationKind}; +use crate::{Function, MachineEnv, ssa::validate_ssa, ProgPoint, Edit, Output}; +use crate::{cfg::CFGInfo, RegAllocError, Allocation, ion::Stats}; +use alloc::vec::Vec; +use hashbrown::{HashMap, HashSet}; + +use std::println; + +/// A least recently used cache organized as a linked list based on a vector +#[derive(Debug)] +struct Lru { + /// The list of node information + /// + /// Each node corresponds to a physical register. + /// The index of a node is the `address` from the perspective of the linked list. + data: Vec, + /// Index of the most recently used register + head: usize, + class: RegClass, +} + +#[derive(Clone, Copy, Debug)] +struct LruNode { + /// The previous physical register in the list + prev: usize, + /// The next physical register in the list + next: usize, +} + +impl Lru { + fn new(no_of_regs: usize) -> Self { + let mut data = Vec::with_capacity(no_of_regs); + for _ in 0..no_of_regs { + data.push(LruNode { prev: 0, next: 0 }); + } + let mut lru = Self { + head: 0, + data, + class: RegClass::Int + }; + for i in 0..no_of_regs { + lru.data[i].prev = i.checked_sub(1).unwrap_or(no_of_regs - 1); + lru.data[i].next = (i + 1) % no_of_regs; + } + lru + } + + /// Marks the physical register `i` as the most recently used + /// and sets `vreg` as the virtual register it contains + fn poke(&mut self, preg: PReg) { + let prev_newest = self.head; + let i = preg.hw_enc(); + if i == prev_newest { + return; + } + if self.data[prev_newest].prev != i { + self.remove(i); + self.insert_before(i, self.head); + } + self.head = i; + } + + /// Gets the least recently used physical register. + fn pop(&mut self) -> PReg { + let oldest = self.data[self.head].prev; + PReg::new(oldest, self.class) + } + + /// Splices out a node from the list + fn remove(&mut self, i: usize) { + let (iprev, inext) = (self.data[i].prev, self.data[i].next); + self.data[iprev].next = self.data[i].next; + self.data[inext].prev = self.data[i].prev; + } + + /// Insert node `i` before node `j` in the list + fn insert_before(&mut self, i: usize, j: usize) { + let prev = self.data[j].prev; + self.data[prev].next = i; + self.data[j].prev = i; + self.data[i] = LruNode { + next: j, + prev, + }; + } +} + +/// Info about the operand currently in a `PReg`. +#[derive(Debug)] +struct OpInfo { + /// The instruction the operand is in. + inst: Inst, + /// The index of the operand in the instruction. + op_idx: usize, + /// The `VReg` in the `PReg`. + vreg: VReg, +} + +#[derive(Debug)] +struct LiveRegInfo { + /// The current allocation. + alloc: Allocation, +} + +#[derive(Debug)] +pub struct Env<'a, F: Function> { + func: &'a F, + env: &'a MachineEnv, + /// What do we know about the live registers? + livevregs: HashMap, + /// Which virtual registers are held in physical registers. + preg_info: HashMap, + /// Free registers for each register class: Int, Float and Vector, + /// respectively. + freepregs: [Vec; 3], + /// Least recently used cache, for eviction. + lru: Lru, + /// All registers used in the current instruction being allocated. + regs_used_in_curr_inst: HashSet, + /// Spillslots allocated to virtual registers. + allocd_stackslots: HashMap, + /// Offset for the next stack slot allocation. + next_freestack_offset: usize, + + // Output. + allocs: Vec, + inst_alloc_offsets: Vec, + edits: Vec<(ProgPoint, Edit)>, + num_spillslots: u32, + + stats: Stats, +} + +impl<'a, F: Function> Env<'a, F> { + fn new(func: &'a F, env: &'a MachineEnv) -> Self { + let freepregs = [ + env.preferred_regs_by_class[RegClass::Int as usize].clone(), + env.preferred_regs_by_class[RegClass::Float as usize].clone(), + env.preferred_regs_by_class[RegClass::Vector as usize].clone(), + ]; + let lru = Lru::new(freepregs[RegClass::Int as usize].len()); + let inst_alloc_offsets: Vec = (0..func.num_insts()) + .map(|_| 0).collect(); + Self { + func, + env, + allocs: Vec::with_capacity(func.num_vregs()), + edits: Vec::new(), + inst_alloc_offsets, + num_spillslots: 0, + stats: Stats::default(), + livevregs: HashMap::with_capacity(func.num_vregs()), + freepregs, + lru, + regs_used_in_curr_inst: HashSet::new(), + allocd_stackslots: HashMap::new(), + preg_info: HashMap::new(), + next_freestack_offset: 0, + } + } + + fn init_operands_allocs(&mut self, operands: &[Operand], inst: Inst) { + let no_of_operands: u32 = operands.len().try_into().unwrap(); + let offset = self.allocs.len(); + for _ in 0..no_of_operands { + self.allocs.push(Allocation::none()); + } + self.inst_alloc_offsets[inst.index()] = offset.try_into().unwrap(); + } + + fn assigned_reg(&self, operand: &Operand) -> Option { + self.livevregs.get(&operand.vreg()) + .and_then(|info| info.alloc.as_reg().map(|reg| reg)) + } + + fn no_of_operands(&self, inst: Inst) -> usize { + self.func.inst_operands(inst).len() + } + + fn curralloc_mut(&mut self, inst: Inst, op_idx: usize) -> &mut Allocation { + let inst_offset = self.inst_alloc_offsets[inst.index()]; + let inst_offset: usize = inst_offset.try_into().unwrap(); + let no_of_operands = self.no_of_operands(inst); + // The end results will be reversed + // So, the operands have to be put in reverse order to + // avoid breaking the external API. + &mut self.allocs[inst_offset + (no_of_operands - op_idx - 1)] + } + + fn allocstack(&mut self, vreg: VReg) -> Allocation { + let ss = if let Some(ss) = self.allocd_stackslots.get(&vreg) { + *ss + } else { + let size = self.func.spillslot_size(vreg.class()); + let offset = self.next_freestack_offset; + let slot = (offset + size - 1) & !(size - 1); + self.next_freestack_offset = offset + size; + let ss = SpillSlot::new(slot); + self.allocd_stackslots.insert(vreg, ss); + ss + }; + Allocation::stack(ss) + } + + fn evictreg(&mut self, inst: Inst) -> PReg { + let preg = self.lru.pop(); + let evicted_vreg = self.preg_info[&preg]; + let stackloc = self.allocstack(evicted_vreg); + self.edits.push((ProgPoint::after(inst), Edit::Move { + from: stackloc, + to: Allocation::reg(preg), + })); + self.livevregs.get_mut(&evicted_vreg).unwrap().alloc = stackloc; + preg + } + + fn freealloc(&mut self, operand: &Operand) { + let livereg = self.livevregs.get(&operand.vreg()) + .expect("Trying to free an unallocated vreg"); + match livereg.alloc.kind() { + AllocationKind::Reg => { + let preg = livereg.alloc.as_reg().unwrap(); + self.freepregs[operand.class() as usize].push(preg); + self.preg_info.remove(&preg); + self.livevregs.remove(&operand.vreg()); + } + _ => unimplemented!() + }; + } + + /// Allocate a physical register for `operand` at index `idx` in + /// instruction `inst`. + fn allocreg(&mut self, operand: &Operand, inst: Inst, idx: usize) -> PReg { + let freepregs_idx = operand.class() as usize; + let preg = if self.freepregs[freepregs_idx].is_empty() { + self.evictreg(inst) + } else{ + self.freepregs[freepregs_idx].pop().unwrap() + }; + self.lru.poke(preg); + self.livevregs.insert(operand.vreg(), LiveRegInfo { + alloc: Allocation::reg(preg) + }); + self.preg_info.insert(preg, operand.vreg()); + *self.curralloc_mut(inst, idx) = Allocation::reg(preg); + preg + } + + fn alloc_inst(&mut self, inst: Inst) -> Result<(), RegAllocError> { + let operands = self.func.inst_operands(inst); + self.init_operands_allocs(operands, inst); + let mut def_operands = Vec::with_capacity(operands.len()); + let mut use_operands = Vec::with_capacity(operands.len()); + for (i, operand) in operands.iter().enumerate() { + if operand.kind() == OperandKind::Def { + def_operands.push(i); + } else { + use_operands.push(i); + } + } + + for idx in def_operands { + if let Some(preg) = self.assigned_reg(&operands[idx]) { + *self.curralloc_mut(inst, idx) = Allocation::reg(preg); + } else { + self.allocreg(&operands[idx], inst, idx); + } + self.freealloc(&operands[idx]); + } + + for idx in use_operands { + let operand = &operands[idx]; + let prevalloc = if let Some(livereg) = self.livevregs.get(&operand.vreg()) { + Some(livereg.alloc.clone()) + } else { + None + }; + let assigned_reg = self.assigned_reg(operand) + .map(|reg| reg.clone()); + let preg = if let Some(preg) = assigned_reg { + *self.curralloc_mut(inst, idx) = Allocation::reg(preg); + self.lru.poke(preg); + preg + } else { + let preg = self.allocreg(operand, inst, idx); + if self.regs_used_in_curr_inst.contains(&preg) { + return Err(RegAllocError::TooManyLiveRegs); + } + self.regs_used_in_curr_inst.insert(preg); + preg + }; + if let Some(prevalloc) = prevalloc { + if prevalloc.is_stack() { + self.edits.push((ProgPoint::before(inst), Edit::Move { + from: Allocation::reg(preg), + to: Allocation::stack(prevalloc.as_stack().unwrap()), + })); + } + } + } + self.regs_used_in_curr_inst.clear(); + Ok(()) + } + + /// Allocates instructions in reverse order. + fn alloc_basic_block(&mut self, block: Block) -> Result<(), RegAllocError> { + for inst in self.func.block_insns(block).iter().rev() { + self.alloc_inst(inst)?; + } + // Reversing the result to conform to the external API + self.reverse_results(); + Ok(()) + } + + fn reverse_results(&mut self) { + let mut offset = 0; + let mut prev_end: u32 = self.allocs.len().try_into().unwrap(); + for i in 0..self.inst_alloc_offsets.len() - 1 { + let diff = prev_end as u32 - self.inst_alloc_offsets[i]; + prev_end = self.inst_alloc_offsets[i]; + self.inst_alloc_offsets[i] = offset; + offset += diff; + } + *self.inst_alloc_offsets.last_mut().unwrap() = offset; + self.allocs.reverse(); + self.edits.reverse(); + } + + fn run(&mut self) -> Result<(), RegAllocError> { + for blocknum in 0..self.func.num_blocks() { + self.alloc_basic_block(Block::new(blocknum))?; + } + Ok(()) + } +} + +pub fn run( + func: &F, + mach_env: &MachineEnv, + enable_annotations: bool, + enable_ssa_checker: bool, +) -> Result { + let cfginfo = CFGInfo::new(func)?; + + if enable_ssa_checker { + validate_ssa(func, &cfginfo)?; + } + + let mut env = Env::new(func, mach_env); + env.run()?; + + Ok(Output { + edits: env.edits, + allocs: env.allocs, + inst_alloc_offsets: env.inst_alloc_offsets, + num_spillslots: env.num_spillslots as usize, + debug_locations: Vec::new(), + safepoint_slots: Vec::new(), + stats: env.stats, + }) +} diff --git a/src/lib.rs b/src/lib.rs index 3fe8267b..1fc6bb26 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -46,6 +46,7 @@ pub(crate) mod ion; pub mod moves; pub(crate) mod postorder; pub mod ssa; +pub(crate) mod fastalloc; #[macro_use] mod index; @@ -1534,7 +1535,11 @@ pub fn run( env: &MachineEnv, options: &RegallocOptions, ) -> Result { - ion::run(func, env, options.verbose_log, options.validate_ssa) + if options.use_fastalloc { + fastalloc::run(func, env, options.verbose_log, options.validate_ssa) + } else { + ion::run(func, env, options.verbose_log, options.validate_ssa) + } } /// Options for allocation. @@ -1545,4 +1550,7 @@ pub struct RegallocOptions { /// Run the SSA validator before allocating registers. pub validate_ssa: bool, + + /// Run the SSRA algorithm + pub use_fastalloc: bool, } From 46e38c39b4d997afff1d04da2e82f65645f90920 Mon Sep 17 00:00:00 2001 From: demilade Date: Tue, 19 Mar 2024 15:44:32 +0100 Subject: [PATCH 02/95] extended implementation to account for multiple basic blocks --- src/fastalloc.rs | 207 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 195 insertions(+), 12 deletions(-) diff --git a/src/fastalloc.rs b/src/fastalloc.rs index f5d3410c..01600155 100644 --- a/src/fastalloc.rs +++ b/src/fastalloc.rs @@ -98,10 +98,60 @@ struct OpInfo { vreg: VReg, } +#[derive(Debug, Clone)] +enum MaybeManyAllocation { + Single(Allocation), + /// Allocation in multiple stack locations + /// + /// This is a vector to account for cases where + /// multiple basic blocks have a single predecessor and + /// the branch arguments passed to them are the same. + /// For example, if we have an instruction in some block 1: + /// `if v0 < v1 goto 2 v0 v1 else goto 3 v0 v1`, + /// blocks 2 and 3 are successors. Each of them will have their own + /// stack slots for branch params, so virtual registers v0 and v1 + /// have to be present in both. + Many(Vec), +} + +impl MaybeManyAllocation { + fn as_reg(&self) -> Option { + match self { + Self::Single(alloc) => alloc.as_reg(), + _ => None + } + } + + fn kind(&self) -> AllocationKind { + match self { + Self::Single(alloc) => alloc.kind(), + _ => AllocationKind::Stack, + } + } + + fn reg(preg: PReg) -> Self { + Self::Single(Allocation::reg(preg)) + } + + fn is_stack(&self) -> bool { + match self { + Self::Single(alloc) => alloc.is_stack(), + _ => true + } + } + + fn as_single(&self) -> Option { + match self { + Self::Single(alloc) => Some(*alloc), + _ => None, + } + } +} + #[derive(Debug)] struct LiveRegInfo { /// The current allocation. - alloc: Allocation, + alloc: MaybeManyAllocation, } #[derive(Debug)] @@ -123,6 +173,9 @@ pub struct Env<'a, F: Function> { allocd_stackslots: HashMap, /// Offset for the next stack slot allocation. next_freestack_offset: usize, + /// A mapping from a block to the allocations of its block params + /// at the beginning of the block. + blockparam_allocs: HashMap>, // Output. allocs: Vec, @@ -158,6 +211,7 @@ impl<'a, F: Function> Env<'a, F> { allocd_stackslots: HashMap::new(), preg_info: HashMap::new(), next_freestack_offset: 0, + blockparam_allocs: HashMap::with_capacity(func.num_blocks()), } } @@ -212,7 +266,7 @@ impl<'a, F: Function> Env<'a, F> { from: stackloc, to: Allocation::reg(preg), })); - self.livevregs.get_mut(&evicted_vreg).unwrap().alloc = stackloc; + self.livevregs.get_mut(&evicted_vreg).unwrap().alloc = MaybeManyAllocation::Single(stackloc); preg } @@ -241,14 +295,84 @@ impl<'a, F: Function> Env<'a, F> { }; self.lru.poke(preg); self.livevregs.insert(operand.vreg(), LiveRegInfo { - alloc: Allocation::reg(preg) + alloc: MaybeManyAllocation::reg(preg) }); self.preg_info.insert(preg, operand.vreg()); *self.curralloc_mut(inst, idx) = Allocation::reg(preg); preg } - fn alloc_inst(&mut self, inst: Inst) -> Result<(), RegAllocError> { + fn alloc_inst(&mut self, block: Block, inst: Inst) -> Result<(), RegAllocError> { + if self.func.is_branch(inst) { + let succs = self.func.block_succs(block); + if succs.len() > 1 { + // Mapping from branch args to all the stack slots they need to be in + // to account for all successors. + let mut alloc_map: HashMap> = HashMap::new(); + for succ_idx in 0..succs.len() { + let branchargs = self.func.branch_blockparams(block, inst, succ_idx); + // Each branch arg will be mapped to multiple stack slots + // for each of the spaces reserved for the block params in each successor. + if let Some(blockparam_allocs) = self.blockparam_allocs.get(&succs[succ_idx]) { + assert_eq!(branchargs.len(), blockparam_allocs.len()); + for (i, brancharg) in branchargs.iter().enumerate() { + if let Some(slots) = alloc_map.get_mut(brancharg) { + slots.push(blockparam_allocs[i]); + } else { + let mut allocs = Vec::with_capacity(succs.len()); + allocs.push(blockparam_allocs[i]); + alloc_map.insert(*brancharg, allocs); + } + } + } else { + // Branching to a block that has not yet been allocated. + // Can happen with loops. + // Make the allocations for the block params now + let mut blockparam_allocs = Vec::with_capacity(branchargs.len()); + for brancharg in branchargs { + let stackloc = self.allocstack(*brancharg); + blockparam_allocs.push(stackloc); + if let Some(slots) = alloc_map.get_mut(brancharg) { + slots.push(stackloc); + } else { + let mut allocs = Vec::with_capacity(succs.len()); + allocs.push(stackloc); + alloc_map.insert(*brancharg, allocs); + } + } + } + } + for (vreg, allocs) in alloc_map.into_iter() { + self.livevregs.insert(vreg, LiveRegInfo { + alloc: MaybeManyAllocation::Many(allocs) + }); + } + } else if succs.len() == 1 { + let succ_idx = 0; + let branchargs = self.func.branch_blockparams(block, inst, succ_idx); + // Block has already been allocated. + if let Some(blockparam_allocs) = self.blockparam_allocs.get(&succs[succ_idx]) { + assert_eq!(branchargs.len(), blockparam_allocs.len()); + for (i, brancharg) in branchargs.iter().enumerate() { + self.livevregs.insert(*brancharg, LiveRegInfo { + alloc: MaybeManyAllocation::Single(blockparam_allocs[i]) + }); + } + } else { + // Branching to a block that has not yet been allocated. + // Can happen with loops. + // Make the allocations for the block params now + let mut blockparam_allocs = Vec::with_capacity(branchargs.len()); + for brancharg in branchargs { + let stackloc = self.allocstack(*brancharg); + blockparam_allocs.push(stackloc); + self.livevregs.insert(*brancharg, LiveRegInfo { + alloc: MaybeManyAllocation::Single(stackloc), + }); + } + } + } + } let operands = self.func.inst_operands(inst); self.init_operands_allocs(operands, inst); let mut def_operands = Vec::with_capacity(operands.len()); @@ -293,10 +417,22 @@ impl<'a, F: Function> Env<'a, F> { }; if let Some(prevalloc) = prevalloc { if prevalloc.is_stack() { - self.edits.push((ProgPoint::before(inst), Edit::Move { - from: Allocation::reg(preg), - to: Allocation::stack(prevalloc.as_stack().unwrap()), - })); + match prevalloc { + MaybeManyAllocation::Single(alloc) => self.edits.push((ProgPoint::before(inst), Edit::Move { + from: Allocation::reg(preg), + to: Allocation::stack(alloc.as_stack().unwrap()), + })), + MaybeManyAllocation::Many(allocs) => { + println!("multiple moves here {:?}", allocs); + for alloc in allocs.iter() { + self.edits.push((ProgPoint::before(inst), Edit::Move { + from: Allocation::reg(preg), + to: Allocation::stack(alloc.as_stack().unwrap()), + })); + } + } + } + } } } @@ -307,17 +443,62 @@ impl<'a, F: Function> Env<'a, F> { /// Allocates instructions in reverse order. fn alloc_basic_block(&mut self, block: Block) -> Result<(), RegAllocError> { for inst in self.func.block_insns(block).iter().rev() { - self.alloc_inst(inst)?; + self.alloc_inst(block, inst)?; } - // Reversing the result to conform to the external API - self.reverse_results(); + self.reload_at_begin(block); + self.livevregs.clear(); + for preg in self.preg_info.keys() { + self.freepregs[preg.class() as usize].push(*preg); + } + self.preg_info.clear(); Ok(()) + // Now, how do I tell the predecessor blocks that the registers live now + // are their branch args. + } + + /// Insert instructions to load live regs at the beginning of the block. + fn reload_at_begin(&mut self, block: Block) { + // All registers that are still live were not defined in this block. + // So, they should be block params. + println!("{:?} in block {:?}", self.livevregs, block); + assert_eq!(self.livevregs.len(), self.func.block_params(block).len()); + let first_inst = self.func.block_insns(block).first(); + // The block params have already been allocated during processing of a block + // that branches to this one. + if let Some(blockparam_allocs) = self.blockparam_allocs.get(&block) { + for (vreg, reserved) in self.func.block_params(block).iter().zip(blockparam_allocs.iter()) { + self.edits.push((ProgPoint::before(first_inst), Edit::Move { + from: *reserved, + to: self.livevregs[vreg].alloc.as_single().unwrap(), + })); + } + } else { + // A mapping from block param index to current allocation to + // be used to indicate the current location of block args to predecessors. + let mut blockparam_allocs = Vec::with_capacity(self.func.block_params(block).len()); + for vreg in self.func.block_params(block) { + let livereg = self.livevregs.get_mut(vreg).unwrap(); + let alloc = livereg.alloc.as_single().unwrap(); + if alloc.is_reg() { + let stackloc = self.allocstack(*vreg); + self.edits.push((ProgPoint::before(first_inst), Edit::Move { + from: stackloc, + to: alloc + })); + blockparam_allocs.push(stackloc); + } else { + blockparam_allocs.push(alloc); + } + } + self.blockparam_allocs.insert(block, blockparam_allocs); + } } fn reverse_results(&mut self) { let mut offset = 0; let mut prev_end: u32 = self.allocs.len().try_into().unwrap(); for i in 0..self.inst_alloc_offsets.len() - 1 { + //println!("prevend: {:?} inst_alloc_offsets: {:?}", prev_end, self.inst_alloc_offsets[i]); let diff = prev_end as u32 - self.inst_alloc_offsets[i]; prev_end = self.inst_alloc_offsets[i]; self.inst_alloc_offsets[i] = offset; @@ -329,9 +510,11 @@ impl<'a, F: Function> Env<'a, F> { } fn run(&mut self) -> Result<(), RegAllocError> { - for blocknum in 0..self.func.num_blocks() { + for blocknum in (0..self.func.num_blocks()).rev() { self.alloc_basic_block(Block::new(blocknum))?; } + // Reversing the result to conform to the external API + self.reverse_results(); Ok(()) } } From ac293b8fb09d560aed374b187f499be096eeadf6 Mon Sep 17 00:00:00 2001 From: demilade Date: Tue, 19 Mar 2024 16:14:48 +0100 Subject: [PATCH 03/95] updated readme --- README.md | 10 ++++++++++ src/fastalloc.rs | 5 ----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 9758baf3..72bb18c4 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,13 @@ +## fastalloc: a sample implementation of SSRA + +In the `RegallocOptions`, setting `use_fastalloc` will run a sample SSRA +(https://www.mattkeeter.com/blog/2022-10-04-ssra/) implementation. + +It only supports registers of class int and it can handle multiple basic +blocks. + +To test it out on a toy language: https://github.com/d-sonuga/reverse-linear-scan-regalloc-concept-2. + ## regalloc2: another register allocator This is a register allocator that started life as, and is about 50% diff --git a/src/fastalloc.rs b/src/fastalloc.rs index 01600155..8bc7ff35 100644 --- a/src/fastalloc.rs +++ b/src/fastalloc.rs @@ -6,8 +6,6 @@ use crate::{cfg::CFGInfo, RegAllocError, Allocation, ion::Stats}; use alloc::vec::Vec; use hashbrown::{HashMap, HashSet}; -use std::println; - /// A least recently used cache organized as a linked list based on a vector #[derive(Debug)] struct Lru { @@ -423,7 +421,6 @@ impl<'a, F: Function> Env<'a, F> { to: Allocation::stack(alloc.as_stack().unwrap()), })), MaybeManyAllocation::Many(allocs) => { - println!("multiple moves here {:?}", allocs); for alloc in allocs.iter() { self.edits.push((ProgPoint::before(inst), Edit::Move { from: Allocation::reg(preg), @@ -460,7 +457,6 @@ impl<'a, F: Function> Env<'a, F> { fn reload_at_begin(&mut self, block: Block) { // All registers that are still live were not defined in this block. // So, they should be block params. - println!("{:?} in block {:?}", self.livevregs, block); assert_eq!(self.livevregs.len(), self.func.block_params(block).len()); let first_inst = self.func.block_insns(block).first(); // The block params have already been allocated during processing of a block @@ -498,7 +494,6 @@ impl<'a, F: Function> Env<'a, F> { let mut offset = 0; let mut prev_end: u32 = self.allocs.len().try_into().unwrap(); for i in 0..self.inst_alloc_offsets.len() - 1 { - //println!("prevend: {:?} inst_alloc_offsets: {:?}", prev_end, self.inst_alloc_offsets[i]); let diff = prev_end as u32 - self.inst_alloc_offsets[i]; prev_end = self.inst_alloc_offsets[i]; self.inst_alloc_offsets[i] = offset; From 1456d2b79c72cc31576bbcdcdd97dd5955c03e7a Mon Sep 17 00:00:00 2001 From: demilade Date: Sun, 19 May 2024 11:19:01 +0100 Subject: [PATCH 04/95] some progress --- README.md | 10 - fuzz/Cargo.toml | 6 + fuzz/fuzz_targets/fastalloc_checker.rs | 45 ++ regalloc2-tool/src/main.rs | 1 + src/fastalloc.rs | 541 ------------------------- src/fastalloc/iter.rs | 174 ++++++++ src/fastalloc/lru.rs | 118 ++++++ src/fastalloc/mod.rs | 462 +++++++++++++++++++++ src/fuzzing/mod.rs | 3 + 9 files changed, 809 insertions(+), 551 deletions(-) create mode 100644 fuzz/fuzz_targets/fastalloc_checker.rs delete mode 100644 src/fastalloc.rs create mode 100644 src/fastalloc/iter.rs create mode 100644 src/fastalloc/lru.rs create mode 100644 src/fastalloc/mod.rs diff --git a/README.md b/README.md index 72bb18c4..9758baf3 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,3 @@ -## fastalloc: a sample implementation of SSRA - -In the `RegallocOptions`, setting `use_fastalloc` will run a sample SSRA -(https://www.mattkeeter.com/blog/2022-10-04-ssra/) implementation. - -It only supports registers of class int and it can handle multiple basic -blocks. - -To test it out on a toy language: https://github.com/d-sonuga/reverse-linear-scan-regalloc-concept-2. - ## regalloc2: another register allocator This is a register allocator that started life as, and is about 50% diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 3315d022..8c96ee11 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -49,6 +49,12 @@ path = "fuzz_targets/ion_checker.rs" test = false doc = false +[[bin]] +name = "fastalloc_checker" +path = "fuzz_targets/fastalloc_checker.rs" +test = false +doc = false + # Enable debug assertions and overflow checks when fuzzing [profile.release] debug = true diff --git a/fuzz/fuzz_targets/fastalloc_checker.rs b/fuzz/fuzz_targets/fastalloc_checker.rs new file mode 100644 index 00000000..65dd50f4 --- /dev/null +++ b/fuzz/fuzz_targets/fastalloc_checker.rs @@ -0,0 +1,45 @@ +/* + * Released under the terms of the Apache 2.0 license with LLVM + * exception. See `LICENSE` for details. + */ + +#![no_main] +use regalloc2::fuzzing::arbitrary::{Arbitrary, Result, Unstructured}; +use regalloc2::fuzzing::checker::Checker; +use regalloc2::fuzzing::func::{Func, Options}; +use regalloc2::fuzzing::fuzz_target; + +#[derive(Clone, Debug)] +struct TestCase { + func: Func, +} + +impl Arbitrary<'_> for TestCase { + fn arbitrary(u: &mut Unstructured) -> Result { + Ok(TestCase { + func: Func::arbitrary_with_options( + u, + &Options { + reused_inputs: false, + fixed_regs: false, + fixed_nonallocatable: false, + clobbers: false, + reftypes: false, + }, + )?, + }) + } +} + +fuzz_target!(|testcase: TestCase| { + let func = testcase.func; + let _ = env_logger::try_init(); + log::trace!("func:\n{:?}", func); + let env = regalloc2::fuzzing::func::machine_env(); + let out = + regalloc2::fuzzing::fastalloc::run(&func, &env, true, false).expect("regalloc did not succeed"); + + let mut checker = Checker::new(&func, &env); + checker.prepare(&out); + checker.run().expect("checker failed"); +}); diff --git a/regalloc2-tool/src/main.rs b/regalloc2-tool/src/main.rs index e6d31734..1763456f 100644 --- a/regalloc2-tool/src/main.rs +++ b/regalloc2-tool/src/main.rs @@ -32,6 +32,7 @@ fn main() { let options = RegallocOptions { verbose_log: true, validate_ssa: true, + use_fastalloc: true, }; let output = match regalloc2::run(&function, function.machine_env(), &options) { Ok(output) => output, diff --git a/src/fastalloc.rs b/src/fastalloc.rs deleted file mode 100644 index 8bc7ff35..00000000 --- a/src/fastalloc.rs +++ /dev/null @@ -1,541 +0,0 @@ -use core::convert::TryInto; - -use crate::{Block, InstRange, Inst, OperandKind, Operand, PReg, RegClass, VReg, SpillSlot, FxHashMap, AllocationKind}; -use crate::{Function, MachineEnv, ssa::validate_ssa, ProgPoint, Edit, Output}; -use crate::{cfg::CFGInfo, RegAllocError, Allocation, ion::Stats}; -use alloc::vec::Vec; -use hashbrown::{HashMap, HashSet}; - -/// A least recently used cache organized as a linked list based on a vector -#[derive(Debug)] -struct Lru { - /// The list of node information - /// - /// Each node corresponds to a physical register. - /// The index of a node is the `address` from the perspective of the linked list. - data: Vec, - /// Index of the most recently used register - head: usize, - class: RegClass, -} - -#[derive(Clone, Copy, Debug)] -struct LruNode { - /// The previous physical register in the list - prev: usize, - /// The next physical register in the list - next: usize, -} - -impl Lru { - fn new(no_of_regs: usize) -> Self { - let mut data = Vec::with_capacity(no_of_regs); - for _ in 0..no_of_regs { - data.push(LruNode { prev: 0, next: 0 }); - } - let mut lru = Self { - head: 0, - data, - class: RegClass::Int - }; - for i in 0..no_of_regs { - lru.data[i].prev = i.checked_sub(1).unwrap_or(no_of_regs - 1); - lru.data[i].next = (i + 1) % no_of_regs; - } - lru - } - - /// Marks the physical register `i` as the most recently used - /// and sets `vreg` as the virtual register it contains - fn poke(&mut self, preg: PReg) { - let prev_newest = self.head; - let i = preg.hw_enc(); - if i == prev_newest { - return; - } - if self.data[prev_newest].prev != i { - self.remove(i); - self.insert_before(i, self.head); - } - self.head = i; - } - - /// Gets the least recently used physical register. - fn pop(&mut self) -> PReg { - let oldest = self.data[self.head].prev; - PReg::new(oldest, self.class) - } - - /// Splices out a node from the list - fn remove(&mut self, i: usize) { - let (iprev, inext) = (self.data[i].prev, self.data[i].next); - self.data[iprev].next = self.data[i].next; - self.data[inext].prev = self.data[i].prev; - } - - /// Insert node `i` before node `j` in the list - fn insert_before(&mut self, i: usize, j: usize) { - let prev = self.data[j].prev; - self.data[prev].next = i; - self.data[j].prev = i; - self.data[i] = LruNode { - next: j, - prev, - }; - } -} - -/// Info about the operand currently in a `PReg`. -#[derive(Debug)] -struct OpInfo { - /// The instruction the operand is in. - inst: Inst, - /// The index of the operand in the instruction. - op_idx: usize, - /// The `VReg` in the `PReg`. - vreg: VReg, -} - -#[derive(Debug, Clone)] -enum MaybeManyAllocation { - Single(Allocation), - /// Allocation in multiple stack locations - /// - /// This is a vector to account for cases where - /// multiple basic blocks have a single predecessor and - /// the branch arguments passed to them are the same. - /// For example, if we have an instruction in some block 1: - /// `if v0 < v1 goto 2 v0 v1 else goto 3 v0 v1`, - /// blocks 2 and 3 are successors. Each of them will have their own - /// stack slots for branch params, so virtual registers v0 and v1 - /// have to be present in both. - Many(Vec), -} - -impl MaybeManyAllocation { - fn as_reg(&self) -> Option { - match self { - Self::Single(alloc) => alloc.as_reg(), - _ => None - } - } - - fn kind(&self) -> AllocationKind { - match self { - Self::Single(alloc) => alloc.kind(), - _ => AllocationKind::Stack, - } - } - - fn reg(preg: PReg) -> Self { - Self::Single(Allocation::reg(preg)) - } - - fn is_stack(&self) -> bool { - match self { - Self::Single(alloc) => alloc.is_stack(), - _ => true - } - } - - fn as_single(&self) -> Option { - match self { - Self::Single(alloc) => Some(*alloc), - _ => None, - } - } -} - -#[derive(Debug)] -struct LiveRegInfo { - /// The current allocation. - alloc: MaybeManyAllocation, -} - -#[derive(Debug)] -pub struct Env<'a, F: Function> { - func: &'a F, - env: &'a MachineEnv, - /// What do we know about the live registers? - livevregs: HashMap, - /// Which virtual registers are held in physical registers. - preg_info: HashMap, - /// Free registers for each register class: Int, Float and Vector, - /// respectively. - freepregs: [Vec; 3], - /// Least recently used cache, for eviction. - lru: Lru, - /// All registers used in the current instruction being allocated. - regs_used_in_curr_inst: HashSet, - /// Spillslots allocated to virtual registers. - allocd_stackslots: HashMap, - /// Offset for the next stack slot allocation. - next_freestack_offset: usize, - /// A mapping from a block to the allocations of its block params - /// at the beginning of the block. - blockparam_allocs: HashMap>, - - // Output. - allocs: Vec, - inst_alloc_offsets: Vec, - edits: Vec<(ProgPoint, Edit)>, - num_spillslots: u32, - - stats: Stats, -} - -impl<'a, F: Function> Env<'a, F> { - fn new(func: &'a F, env: &'a MachineEnv) -> Self { - let freepregs = [ - env.preferred_regs_by_class[RegClass::Int as usize].clone(), - env.preferred_regs_by_class[RegClass::Float as usize].clone(), - env.preferred_regs_by_class[RegClass::Vector as usize].clone(), - ]; - let lru = Lru::new(freepregs[RegClass::Int as usize].len()); - let inst_alloc_offsets: Vec = (0..func.num_insts()) - .map(|_| 0).collect(); - Self { - func, - env, - allocs: Vec::with_capacity(func.num_vregs()), - edits: Vec::new(), - inst_alloc_offsets, - num_spillslots: 0, - stats: Stats::default(), - livevregs: HashMap::with_capacity(func.num_vregs()), - freepregs, - lru, - regs_used_in_curr_inst: HashSet::new(), - allocd_stackslots: HashMap::new(), - preg_info: HashMap::new(), - next_freestack_offset: 0, - blockparam_allocs: HashMap::with_capacity(func.num_blocks()), - } - } - - fn init_operands_allocs(&mut self, operands: &[Operand], inst: Inst) { - let no_of_operands: u32 = operands.len().try_into().unwrap(); - let offset = self.allocs.len(); - for _ in 0..no_of_operands { - self.allocs.push(Allocation::none()); - } - self.inst_alloc_offsets[inst.index()] = offset.try_into().unwrap(); - } - - fn assigned_reg(&self, operand: &Operand) -> Option { - self.livevregs.get(&operand.vreg()) - .and_then(|info| info.alloc.as_reg().map(|reg| reg)) - } - - fn no_of_operands(&self, inst: Inst) -> usize { - self.func.inst_operands(inst).len() - } - - fn curralloc_mut(&mut self, inst: Inst, op_idx: usize) -> &mut Allocation { - let inst_offset = self.inst_alloc_offsets[inst.index()]; - let inst_offset: usize = inst_offset.try_into().unwrap(); - let no_of_operands = self.no_of_operands(inst); - // The end results will be reversed - // So, the operands have to be put in reverse order to - // avoid breaking the external API. - &mut self.allocs[inst_offset + (no_of_operands - op_idx - 1)] - } - - fn allocstack(&mut self, vreg: VReg) -> Allocation { - let ss = if let Some(ss) = self.allocd_stackslots.get(&vreg) { - *ss - } else { - let size = self.func.spillslot_size(vreg.class()); - let offset = self.next_freestack_offset; - let slot = (offset + size - 1) & !(size - 1); - self.next_freestack_offset = offset + size; - let ss = SpillSlot::new(slot); - self.allocd_stackslots.insert(vreg, ss); - ss - }; - Allocation::stack(ss) - } - - fn evictreg(&mut self, inst: Inst) -> PReg { - let preg = self.lru.pop(); - let evicted_vreg = self.preg_info[&preg]; - let stackloc = self.allocstack(evicted_vreg); - self.edits.push((ProgPoint::after(inst), Edit::Move { - from: stackloc, - to: Allocation::reg(preg), - })); - self.livevregs.get_mut(&evicted_vreg).unwrap().alloc = MaybeManyAllocation::Single(stackloc); - preg - } - - fn freealloc(&mut self, operand: &Operand) { - let livereg = self.livevregs.get(&operand.vreg()) - .expect("Trying to free an unallocated vreg"); - match livereg.alloc.kind() { - AllocationKind::Reg => { - let preg = livereg.alloc.as_reg().unwrap(); - self.freepregs[operand.class() as usize].push(preg); - self.preg_info.remove(&preg); - self.livevregs.remove(&operand.vreg()); - } - _ => unimplemented!() - }; - } - - /// Allocate a physical register for `operand` at index `idx` in - /// instruction `inst`. - fn allocreg(&mut self, operand: &Operand, inst: Inst, idx: usize) -> PReg { - let freepregs_idx = operand.class() as usize; - let preg = if self.freepregs[freepregs_idx].is_empty() { - self.evictreg(inst) - } else{ - self.freepregs[freepregs_idx].pop().unwrap() - }; - self.lru.poke(preg); - self.livevregs.insert(operand.vreg(), LiveRegInfo { - alloc: MaybeManyAllocation::reg(preg) - }); - self.preg_info.insert(preg, operand.vreg()); - *self.curralloc_mut(inst, idx) = Allocation::reg(preg); - preg - } - - fn alloc_inst(&mut self, block: Block, inst: Inst) -> Result<(), RegAllocError> { - if self.func.is_branch(inst) { - let succs = self.func.block_succs(block); - if succs.len() > 1 { - // Mapping from branch args to all the stack slots they need to be in - // to account for all successors. - let mut alloc_map: HashMap> = HashMap::new(); - for succ_idx in 0..succs.len() { - let branchargs = self.func.branch_blockparams(block, inst, succ_idx); - // Each branch arg will be mapped to multiple stack slots - // for each of the spaces reserved for the block params in each successor. - if let Some(blockparam_allocs) = self.blockparam_allocs.get(&succs[succ_idx]) { - assert_eq!(branchargs.len(), blockparam_allocs.len()); - for (i, brancharg) in branchargs.iter().enumerate() { - if let Some(slots) = alloc_map.get_mut(brancharg) { - slots.push(blockparam_allocs[i]); - } else { - let mut allocs = Vec::with_capacity(succs.len()); - allocs.push(blockparam_allocs[i]); - alloc_map.insert(*brancharg, allocs); - } - } - } else { - // Branching to a block that has not yet been allocated. - // Can happen with loops. - // Make the allocations for the block params now - let mut blockparam_allocs = Vec::with_capacity(branchargs.len()); - for brancharg in branchargs { - let stackloc = self.allocstack(*brancharg); - blockparam_allocs.push(stackloc); - if let Some(slots) = alloc_map.get_mut(brancharg) { - slots.push(stackloc); - } else { - let mut allocs = Vec::with_capacity(succs.len()); - allocs.push(stackloc); - alloc_map.insert(*brancharg, allocs); - } - } - } - } - for (vreg, allocs) in alloc_map.into_iter() { - self.livevregs.insert(vreg, LiveRegInfo { - alloc: MaybeManyAllocation::Many(allocs) - }); - } - } else if succs.len() == 1 { - let succ_idx = 0; - let branchargs = self.func.branch_blockparams(block, inst, succ_idx); - // Block has already been allocated. - if let Some(blockparam_allocs) = self.blockparam_allocs.get(&succs[succ_idx]) { - assert_eq!(branchargs.len(), blockparam_allocs.len()); - for (i, brancharg) in branchargs.iter().enumerate() { - self.livevregs.insert(*brancharg, LiveRegInfo { - alloc: MaybeManyAllocation::Single(blockparam_allocs[i]) - }); - } - } else { - // Branching to a block that has not yet been allocated. - // Can happen with loops. - // Make the allocations for the block params now - let mut blockparam_allocs = Vec::with_capacity(branchargs.len()); - for brancharg in branchargs { - let stackloc = self.allocstack(*brancharg); - blockparam_allocs.push(stackloc); - self.livevregs.insert(*brancharg, LiveRegInfo { - alloc: MaybeManyAllocation::Single(stackloc), - }); - } - } - } - } - let operands = self.func.inst_operands(inst); - self.init_operands_allocs(operands, inst); - let mut def_operands = Vec::with_capacity(operands.len()); - let mut use_operands = Vec::with_capacity(operands.len()); - for (i, operand) in operands.iter().enumerate() { - if operand.kind() == OperandKind::Def { - def_operands.push(i); - } else { - use_operands.push(i); - } - } - - for idx in def_operands { - if let Some(preg) = self.assigned_reg(&operands[idx]) { - *self.curralloc_mut(inst, idx) = Allocation::reg(preg); - } else { - self.allocreg(&operands[idx], inst, idx); - } - self.freealloc(&operands[idx]); - } - - for idx in use_operands { - let operand = &operands[idx]; - let prevalloc = if let Some(livereg) = self.livevregs.get(&operand.vreg()) { - Some(livereg.alloc.clone()) - } else { - None - }; - let assigned_reg = self.assigned_reg(operand) - .map(|reg| reg.clone()); - let preg = if let Some(preg) = assigned_reg { - *self.curralloc_mut(inst, idx) = Allocation::reg(preg); - self.lru.poke(preg); - preg - } else { - let preg = self.allocreg(operand, inst, idx); - if self.regs_used_in_curr_inst.contains(&preg) { - return Err(RegAllocError::TooManyLiveRegs); - } - self.regs_used_in_curr_inst.insert(preg); - preg - }; - if let Some(prevalloc) = prevalloc { - if prevalloc.is_stack() { - match prevalloc { - MaybeManyAllocation::Single(alloc) => self.edits.push((ProgPoint::before(inst), Edit::Move { - from: Allocation::reg(preg), - to: Allocation::stack(alloc.as_stack().unwrap()), - })), - MaybeManyAllocation::Many(allocs) => { - for alloc in allocs.iter() { - self.edits.push((ProgPoint::before(inst), Edit::Move { - from: Allocation::reg(preg), - to: Allocation::stack(alloc.as_stack().unwrap()), - })); - } - } - } - - } - } - } - self.regs_used_in_curr_inst.clear(); - Ok(()) - } - - /// Allocates instructions in reverse order. - fn alloc_basic_block(&mut self, block: Block) -> Result<(), RegAllocError> { - for inst in self.func.block_insns(block).iter().rev() { - self.alloc_inst(block, inst)?; - } - self.reload_at_begin(block); - self.livevregs.clear(); - for preg in self.preg_info.keys() { - self.freepregs[preg.class() as usize].push(*preg); - } - self.preg_info.clear(); - Ok(()) - // Now, how do I tell the predecessor blocks that the registers live now - // are their branch args. - } - - /// Insert instructions to load live regs at the beginning of the block. - fn reload_at_begin(&mut self, block: Block) { - // All registers that are still live were not defined in this block. - // So, they should be block params. - assert_eq!(self.livevregs.len(), self.func.block_params(block).len()); - let first_inst = self.func.block_insns(block).first(); - // The block params have already been allocated during processing of a block - // that branches to this one. - if let Some(blockparam_allocs) = self.blockparam_allocs.get(&block) { - for (vreg, reserved) in self.func.block_params(block).iter().zip(blockparam_allocs.iter()) { - self.edits.push((ProgPoint::before(first_inst), Edit::Move { - from: *reserved, - to: self.livevregs[vreg].alloc.as_single().unwrap(), - })); - } - } else { - // A mapping from block param index to current allocation to - // be used to indicate the current location of block args to predecessors. - let mut blockparam_allocs = Vec::with_capacity(self.func.block_params(block).len()); - for vreg in self.func.block_params(block) { - let livereg = self.livevregs.get_mut(vreg).unwrap(); - let alloc = livereg.alloc.as_single().unwrap(); - if alloc.is_reg() { - let stackloc = self.allocstack(*vreg); - self.edits.push((ProgPoint::before(first_inst), Edit::Move { - from: stackloc, - to: alloc - })); - blockparam_allocs.push(stackloc); - } else { - blockparam_allocs.push(alloc); - } - } - self.blockparam_allocs.insert(block, blockparam_allocs); - } - } - - fn reverse_results(&mut self) { - let mut offset = 0; - let mut prev_end: u32 = self.allocs.len().try_into().unwrap(); - for i in 0..self.inst_alloc_offsets.len() - 1 { - let diff = prev_end as u32 - self.inst_alloc_offsets[i]; - prev_end = self.inst_alloc_offsets[i]; - self.inst_alloc_offsets[i] = offset; - offset += diff; - } - *self.inst_alloc_offsets.last_mut().unwrap() = offset; - self.allocs.reverse(); - self.edits.reverse(); - } - - fn run(&mut self) -> Result<(), RegAllocError> { - for blocknum in (0..self.func.num_blocks()).rev() { - self.alloc_basic_block(Block::new(blocknum))?; - } - // Reversing the result to conform to the external API - self.reverse_results(); - Ok(()) - } -} - -pub fn run( - func: &F, - mach_env: &MachineEnv, - enable_annotations: bool, - enable_ssa_checker: bool, -) -> Result { - let cfginfo = CFGInfo::new(func)?; - - if enable_ssa_checker { - validate_ssa(func, &cfginfo)?; - } - - let mut env = Env::new(func, mach_env); - env.run()?; - - Ok(Output { - edits: env.edits, - allocs: env.allocs, - inst_alloc_offsets: env.inst_alloc_offsets, - num_spillslots: env.num_spillslots as usize, - debug_locations: Vec::new(), - safepoint_slots: Vec::new(), - stats: env.stats, - }) -} diff --git a/src/fastalloc/iter.rs b/src/fastalloc/iter.rs new file mode 100644 index 00000000..f911b1f9 --- /dev/null +++ b/src/fastalloc/iter.rs @@ -0,0 +1,174 @@ +use crate::{Operand, OperandKind, OperandPos, OperandConstraint}; +use std::println; + +/// Looking for operands with this particular constraint. +#[derive(Clone, Copy, PartialEq)] +enum LookingFor { + FixedReg, + Others +} + +/// Iterate over operands in position `pos` and kind +/// `kind` in no particular order. +struct ByKindAndPosOperands<'a> { + operands: &'a [Operand], + idx: usize, + kind: OperandKind, + pos: OperandPos, +} + +impl<'a> ByKindAndPosOperands<'a> { + fn new(operands: &'a [Operand], kind: OperandKind, pos: OperandPos) -> Self { + Self { operands, idx: 0, kind, pos } + } +} + +impl<'a> Iterator for ByKindAndPosOperands<'a> { + type Item = (usize, Operand); + + fn next(&mut self) -> Option { + while self.idx < self.operands.len() && (self.operands[self.idx].kind() != self.kind + || self.operands[self.idx].pos() != self.pos) { + self.idx += 1; + } + if self.idx >= self.operands.len() { + None + } else { + self.idx += 1; + Some((self.idx - 1, self.operands[self.idx - 1])) + } + } +} + +/// Iterate over operands with position `pos` starting from the ones with +/// fixed registers, then the rest. +struct ByPosOperands<'a> { + operands: &'a [Operand], + idx: usize, + looking_for: LookingFor, + pos: OperandPos, +} + +impl<'a> ByPosOperands<'a> { + fn new(operands: &'a [Operand], pos: OperandPos) -> Self { + Self { operands, idx: 0, looking_for: LookingFor::FixedReg, pos } + } +} + +impl<'a> ByPosOperands<'a> { + fn next_fixed_reg(&mut self) -> Option<(usize, Operand)> { + while self.idx < self.operands.len() && (self.operands[self.idx].pos() != self.pos + || !matches!(self.operands[self.idx].constraint(), OperandConstraint::FixedReg(_))) { + self.idx += 1; + } + if self.idx >= self.operands.len() { + None + } else { + self.idx += 1; + Some((self.idx - 1, self.operands[self.idx - 1])) + } + } + + fn next_others(&mut self) -> Option<(usize, Operand)> { + while self.idx < self.operands.len() && (self.operands[self.idx].pos() != self.pos + || matches!(self.operands[self.idx].constraint(), OperandConstraint::FixedReg(_))) { + self.idx += 1; + } + if self.idx >= self.operands.len() { + None + } else { + self.idx += 1; + Some((self.idx - 1, self.operands[self.idx - 1])) + } + } +} + +impl<'a> Iterator for ByPosOperands<'a> { + type Item = (usize, Operand); + + fn next(&mut self) -> Option { + if self.idx >= self.operands.len() { + if self.looking_for == LookingFor::FixedReg { + self.idx = 0; + self.looking_for = LookingFor::Others; + } else { + return None; + } + } + match self.looking_for { + LookingFor::FixedReg => { + let next = self.next_fixed_reg(); + if next.is_none() { + self.next() + } else { + next + } + }, + LookingFor::Others => self.next_others(), + } + } +} + +pub struct LateOperands<'a>(ByPosOperands<'a>); + +impl<'a> LateOperands<'a> { + pub fn new(operands: &'a [Operand]) -> Self { + Self(ByPosOperands::new(operands, OperandPos::Late)) + } +} + +impl<'a> Iterator for LateOperands<'a> { + type Item = (usize, Operand); + + fn next(&mut self) -> Option { + self.0.next() + } +} + +pub struct EarlyOperands<'a>(ByPosOperands<'a>); + +impl<'a> EarlyOperands<'a> { + pub fn new(operands: &'a [Operand]) -> Self { + Self(ByPosOperands::new(operands, OperandPos::Early)) + } +} + +impl<'a> Iterator for EarlyOperands<'a> { + type Item = (usize, Operand); + + fn next(&mut self) -> Option { + self.0.next() + } +} + +pub struct LateDefOperands<'a>(ByKindAndPosOperands<'a>); + +impl<'a> LateDefOperands<'a> { + pub fn new(operands: &'a [Operand]) -> Self { + Self(ByKindAndPosOperands::new(operands, OperandKind::Def, OperandPos::Late)) + } +} + +impl<'a> Iterator for LateDefOperands<'a> { + type Item = (usize, Operand); + + fn next(&mut self) -> Option { + self.0.next() + } +} + +pub struct EarlyDefOperands<'a>(ByKindAndPosOperands<'a>); + +impl<'a> EarlyDefOperands<'a> { + pub fn new(operands: &'a [Operand]) -> Self { + Self(ByKindAndPosOperands::new(operands, OperandKind::Def, OperandPos::Early)) + } +} + +impl<'a> Iterator for EarlyDefOperands<'a> { + type Item = (usize, Operand); + + fn next(&mut self) -> Option { + self.0.next() + } +} diff --git a/src/fastalloc/lru.rs b/src/fastalloc/lru.rs new file mode 100644 index 00000000..add850dc --- /dev/null +++ b/src/fastalloc/lru.rs @@ -0,0 +1,118 @@ +use alloc::vec::Vec; +use core::ops::IndexMut; +use std::ops::Index; +use crate::{RegClass, PReg}; + +/// A least-recently-used cache organized as a linked list based on a vector. +#[derive(Debug)] +pub struct Lru { + /// The list of node information. + /// + /// Each node corresponds to a physical register. + /// The index of a node is the `address` from the perspective of the linked list. + pub data: Vec, + /// Index of the most recently used register. + pub head: usize, + /// Class of registers in the cache. + pub regclass: RegClass, +} + +#[derive(Clone, Copy, Debug)] +pub struct LruNode { + /// The previous physical register in the list. + pub prev: usize, + /// The next physical register in the list. + pub next: usize, +} + +impl Lru { + pub fn new(regclass: RegClass, no_of_regs: usize) -> Self { + let mut data = Vec::with_capacity(no_of_regs); + for _ in 0..no_of_regs { + data.push(LruNode { prev: 0, next: 0 }); + } + let mut lru = Self { + head: 0, + data, + regclass, + }; + for i in 0..no_of_regs { + lru.data[i].prev = i.checked_sub(1).unwrap_or(no_of_regs - 1); + lru.data[i].next = (i + 1) % no_of_regs; + } + lru + } + + /// Marks the physical register `i` as the most recently used + /// and sets `vreg` as the virtual register it contains. + pub fn poke(&mut self, preg: PReg) { + let prev_newest = self.head; + let i = preg.hw_enc(); + if i == prev_newest { + return; + } + if self.data[prev_newest].prev != i { + self.remove(i); + self.insert_before(i, self.head); + } + self.head = i; + } + + /// Gets the least recently used physical register. + pub fn pop(&mut self) -> PReg { + let oldest = self.data[self.head].prev; + PReg::new(oldest, self.regclass) + } + + /// Splices out a node from the list. + pub fn remove(&mut self, i: usize) { + let (iprev, inext) = (self.data[i].prev, self.data[i].next); + self.data[iprev].next = self.data[i].next; + self.data[inext].prev = self.data[i].prev; + } + + /// Insert node `i` before node `j` in the list. + pub fn insert_before(&mut self, i: usize, j: usize) { + let prev = self.data[j].prev; + self.data[prev].next = i; + self.data[j].prev = i; + self.data[i] = LruNode { + next: j, + prev, + }; + } +} + +#[derive(Debug)] +pub struct PartedByRegClass { + pub items: [T; 3], +} + +impl Index for PartedByRegClass { + type Output = T; + + fn index(&self, index: RegClass) -> &Self::Output { + &self.items[index as usize] + } +} + +impl IndexMut for PartedByRegClass { + fn index_mut(&mut self, index: RegClass) -> &mut Self::Output { + &mut self.items[index as usize] + } +} + +/// Least-recently-used caches for register classes Int, Float, and Vector, respectively. +pub type Lrus = PartedByRegClass; + +impl Lrus { + pub fn new(no_of_int_regs: usize, no_of_float_regs: usize, no_of_vec_regs: usize) -> Self { + Self { + items: [ + Lru::new(RegClass::Int, no_of_int_regs), + Lru::new(RegClass::Float, no_of_float_regs), + Lru::new(RegClass::Vector, no_of_vec_regs), + ] + } + } +} diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs new file mode 100644 index 00000000..202c7185 --- /dev/null +++ b/src/fastalloc/mod.rs @@ -0,0 +1,462 @@ +use core::convert::TryInto; +use core::ops::{Index, IndexMut}; + +use crate::{Block, Inst, OperandKind, Operand, PReg, RegClass, VReg, SpillSlot, AllocationKind, OperandConstraint, InstPosition}; +use crate::{Function, MachineEnv, ssa::validate_ssa, ProgPoint, Edit, Output}; +use crate::{cfg::CFGInfo, RegAllocError, Allocation, ion::Stats}; +use alloc::vec::Vec; +use hashbrown::HashSet; +use std::println; + +mod lru; +mod iter; +use lru::*; +use iter::*; + +#[derive(Debug)] +struct Allocs { + allocs: Vec, + /// `inst_alloc_offsets[i]` is the offset into `allocs` for the allocations of + /// instruction `i`'s operands. + inst_alloc_offsets: Vec, +} + +impl Allocs { + fn new(func: &F, env: &MachineEnv) -> Self { + // The number of operands is <= number of virtual registers + // It can be lesser in the case where virtual registers are used multiple + // times in a single instruction. + let mut allocs = Vec::with_capacity(func.num_vregs()); + let mut inst_alloc_offsets = Vec::with_capacity(func.num_vregs()); + for inst in 0..func.num_insts() { + let operands_len = func.inst_operands(Inst::new(inst)).len() as u32; + inst_alloc_offsets.push(allocs.len() as u32); + for _ in 0..operands_len { + allocs.push(Allocation::none()); + } + } + Self { + allocs, + inst_alloc_offsets, + } + } +} + +impl Index<(usize, usize)> for Allocs { + type Output = Allocation; + + /// Retrieve the allocation for operand `idx.1` at instruction `idx.0` + fn index(&self, idx: (usize, usize)) -> &Allocation { + &self.allocs[self.inst_alloc_offsets[idx.0] as usize + idx.1] + } +} + +impl IndexMut<(usize, usize)> for Allocs { + fn index_mut(&mut self, idx: (usize, usize)) -> &mut Allocation { + &mut self.allocs[self.inst_alloc_offsets[idx.0] as usize + idx.1] + } +} + +#[derive(Debug)] +pub struct Env<'a, F: Function> { + func: &'a F, + + /// The current allocations for all virtual registers. + vreg_allocs: Vec, + /// Spillslots for all virtual registers. + /// `vreg_spillslots[i]` is the spillslot for virtual register `i`. + vreg_spillslots: Vec, + /// The virtual registers that are currently live. + live_vregs: HashSet, + /// Free physical registers for classes Int, Float, and Vector, respectively. + freepregs: PartedByRegClass>,//[Vec; 3], + /// Least-recently-used caches for register classes Int, Float, and Vector, respectively. + lrus: Lrus, + /// `vreg_in_preg[class][i]` is the virtual register currently in physical register `i` + /// in register class `class`. + vreg_in_preg: PartedByRegClass>, + + next_spillslot_idx: usize, + + // Output. + allocs: Allocs, + edits: Vec<(ProgPoint, Edit)>, + num_spillslots: u32, + stats: Stats, +} + +impl<'a, F: Function> Env<'a, F> { + fn new(func: &'a F, env: &'a MachineEnv) -> Self { + println!("multispillslots_named_by_last_slot: {:?}", func.multi_spillslot_named_by_last_slot()); + let regs = [ + env.preferred_regs_by_class[RegClass::Int as usize].clone(), + env.preferred_regs_by_class[RegClass::Float as usize].clone(), + env.preferred_regs_by_class[RegClass::Vector as usize].clone(), + ]; + use alloc::vec; + Self { + func, + vreg_allocs: vec![Allocation::none(); func.num_vregs()], + vreg_spillslots: vec![SpillSlot::invalid(); func.num_vregs()], + live_vregs: HashSet::with_capacity(func.num_vregs()), + freepregs: PartedByRegClass { items: regs.clone() }, + lrus: Lrus::new( + regs[0].len(), + regs[1].len(), + regs[2].len() + ), + vreg_in_preg: PartedByRegClass { items: [ + vec![VReg::invalid(); regs[0].len()], + vec![VReg::invalid(); regs[1].len()], + vec![VReg::invalid(); regs[2].len()], + ] }, + next_spillslot_idx: 0, + allocs: Allocs::new(func, env), + edits: Vec::new(), + num_spillslots: 0, + stats: Stats::default(), + } + } + + fn add_move(&mut self, inst: Inst, vreg: VReg, to: Allocation, pos: InstPosition) { + let from = self.vreg_allocs[vreg.vreg()]; + if from.is_stack() && to.is_stack() { + let mut evicted = false; + let scratch_reg = if self.freepregs[vreg.class()].is_empty() { + evicted = true; + self.evictreg(inst, vreg.class()) + } else { + *self.freepregs[vreg.class()].last().unwrap() + }; + if evicted { + self.freepregs[vreg.class()].push(scratch_reg); + } + let scratch_alloc = Allocation::reg(scratch_reg); + // Edits are added in reverse order because the edits + // will be reversed when all allocation is completed. + println!("Edit: {:?}", (ProgPoint::new(inst, pos), Edit::Move { + from: scratch_alloc, + to, + })); + self.edits.push((ProgPoint::new(inst, pos), Edit::Move { + from: scratch_alloc, + to, + })); + println!("Edit: {:?}", (ProgPoint::new(inst, pos), Edit::Move { + from, + to: scratch_alloc, + })); + self.edits.push((ProgPoint::new(inst, pos), Edit::Move { + from, + to: scratch_alloc, + })) + + } else { + println!("Edit: {:?}", (ProgPoint::new(inst, pos), Edit::Move { + from, + to, + })); + self.edits.push((ProgPoint::new(inst, pos), Edit::Move { + from, + to, + })); + } + } + + fn move_after_inst(&mut self, inst: Inst, vreg: VReg, to: Allocation) { + self.add_move(inst, vreg, to, InstPosition::After); + } + + fn move_before_inst(&mut self, inst: Inst, vreg: VReg, to: Allocation) { + self.add_move(inst, vreg, to, InstPosition::Before); + } + + fn allocd_within_constraint(&self, op: Operand) -> bool { + let curr_alloc = self.vreg_allocs[op.vreg().vreg()]; + match op.constraint() { + OperandConstraint::Any => curr_alloc.is_some(), + OperandConstraint::Reg => curr_alloc.is_reg() && curr_alloc.as_reg().unwrap().class() == op.class(), + OperandConstraint::Stack => curr_alloc.is_stack(), + OperandConstraint::FixedReg(preg) => curr_alloc.is_reg() && + curr_alloc.as_reg().unwrap() == preg, + OperandConstraint::Reuse(_) => { + // TODO: Come back here!!! + true + } + } + } + + fn evictreg(&mut self, inst: Inst, regclass: RegClass) -> PReg { + let preg = self.lrus[regclass].pop(); + // TODO: Check if the preg has already been allocated for this + // instruction. If it has, then there are too many stuff to + // allocate, making allocation impossible. + // Remember that for this to be true, the fixed registers must have + // be allocated already. Why? Because if some register p0 has been allocated + // and some fixed constraint register is encountered that needs p0, then + // allocation will fail regardless of whether or not there are other free registers + let evicted_vreg = self.vreg_in_preg[regclass][preg.hw_enc()]; + let slot = self.allocstack(&evicted_vreg); + self.vreg_allocs[evicted_vreg.vreg()] = Allocation::stack(slot); + println!("Move reason: eviction"); + self.move_after_inst(inst, evicted_vreg, Allocation::reg(preg)); + preg + } + + fn freealloc(&mut self, vreg: VReg) { + let alloc = self.vreg_allocs[vreg.vreg()]; + match alloc.kind() { + AllocationKind::Reg => { + let preg = alloc.as_reg().unwrap(); + self.freepregs[vreg.class()].push(preg); + self.vreg_in_preg[vreg.class()][preg.hw_enc()] = VReg::invalid(); + } + AllocationKind::Stack => { + // Do nothing. + // I think it the allocation will be cheaper this way. + } + AllocationKind::None => panic!("Attempting to free an unallocated operand!") + } + self.vreg_allocs[vreg.vreg()] = Allocation::none(); + self.live_vregs.remove(&vreg); + } + + /// Allocates a spill slot on the stack for `vreg` + fn allocstack(&mut self, vreg: &VReg) -> SpillSlot { + let size: u32 = self.func.spillslot_size(vreg.class()).try_into().unwrap(); + // Rest of this function was copied verbatim + // from `Env::allocate_spillslot` in src/ion/spill.rs. + let mut offset = self.num_spillslots; + // Align up to `size`. + debug_assert!(size.is_power_of_two()); + offset = (offset + size - 1) & !(size - 1); + let slot = if self.func.multi_spillslot_named_by_last_slot() { + offset + size - 1 + } else { + offset + }; + offset += size; + self.num_spillslots = offset; + SpillSlot::new(slot as usize) + } + + /// Allocates a physical register for the operand `op` + /// which should have a constraint of either + /// `OperandConstraint::Any` or `OperandConstraint::Reg`. + fn alloc_reg_for_operand(&mut self, inst: Inst, op: Operand) { + assert!(op.constraint() == OperandConstraint::Any || + op.constraint() == OperandConstraint::Reg); + let preg = if self.freepregs[op.class()].is_empty() { + self.evictreg(inst, op.class()) + } else { + self.freepregs[op.class()].pop().unwrap() + }; + self.lrus[op.class()].poke(preg); + self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(preg); + self.vreg_in_preg[op.class()][preg.hw_enc()] = op.vreg(); + } + + /// Allocates for the operand `op` with index `op_idx` into the + /// vector of instruction `inst`'s operands. + fn alloc_operand(&mut self, inst: Inst, op: Operand, op_idx: usize) { + match op.constraint() { + OperandConstraint::Any => { + self.alloc_reg_for_operand(inst, op); + } + OperandConstraint::Reg => { + self.alloc_reg_for_operand(inst, op); + } + OperandConstraint::Stack => { + panic!("Stack only allocations aren't supported yet"); + } + OperandConstraint::FixedReg(preg) => { + panic!("Fixed reg allocations aren't supported yet"); + } + OperandConstraint::Reuse(_) => { + // We need to allocate a register for the operand, + // then remember that it must have the same allocation + // as the input when processing the use operands. + panic!("Reuse input allocations aren't supported yet"); + } + } + self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[op.vreg().vreg()]; + } + + fn process_operand_allocation(&mut self, inst: Inst, op: Operand, op_idx: usize) { + self.live_vregs.insert(op.vreg()); + if !self.allocd_within_constraint(op) { + let prev_alloc = self.vreg_allocs[op.vreg().vreg()]; + self.alloc_operand(inst, op, op_idx); + // Need to insert a move to propagate flow from the current + // allocation to the subsequent places where the value was + // used (in `prev_alloc`, that is). + if prev_alloc.is_some() { + println!("Move reason: Prev allocation doesn't meet constraints"); + if op.kind() == OperandKind::Def { + // In the case where `op` is a def, + // the allocation of `op` will not be holding the value + // of `op` before the instruction. Since it's a def, + // it will only hold the value after. So, the move + // has to be done after. + self.move_after_inst(inst, op.vreg(), prev_alloc); + } else { + // In the case where `op` is a use, the defined value could + // have the same allocation as the `op` allocation. This + // is due to the fact that def operands are allocated and freed before + // use operands. Because of this, `op`'s allocation could be + // overwritten by the defined value's. And after the instruction, + // the defined value could be in `op`'s allocation, resulting in + // an incorrect value being moved into `prev_alloc`. + // Since, it's a use, the correct `op` value will already be in + // the `op` allocation before the instruction. + // Because of this, the move is done before, not after, `inst`. + self.move_before_inst(inst, op.vreg(), prev_alloc); + } + } + println!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); + } else { + self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[op.vreg().vreg()]; + println!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); + } + } + + fn alloc_slots_for_block_params(&mut self, block: Block, inst: Inst, succ: Block, succ_idx: usize) { + for vreg in self.func.block_params(succ) { + if self.vreg_spillslots[vreg.vreg()].is_invalid() { + self.vreg_spillslots[vreg.vreg()] = self.allocstack(vreg); + println!("Block param {:?} is in {:?}", vreg, Allocation::stack(self.vreg_spillslots[vreg.vreg()])); + } + } + } + + /// If instruction `inst` is a branch from block `block` to block `succ` and + /// `succ_idx` is the successor index of `succ`, then this function sets the current + /// allocation for the branch args to be the spill slots where `succ` is expecting its + /// branch params to be. + fn place_branch_args_in_stack_allocs(&mut self, block: Block, inst: Inst, succ: Block, succ_idx: usize) { + let succ_params = self.func.block_params(succ); + for (pos, vreg) in self.func.branch_blockparams(block, inst, succ_idx).iter().enumerate() { + self.live_vregs.insert(*vreg); + let prev_alloc = self.vreg_allocs[vreg.vreg()]; + let succ_param_vreg = succ_params[pos]; + self.vreg_allocs[vreg.vreg()] = Allocation::stack(self.vreg_spillslots[succ_param_vreg.vreg()]); + println!(" --- Placing branch arg {:?} in {:?}", vreg, Allocation::stack(self.vreg_spillslots[succ_param_vreg.vreg()])); + if prev_alloc.is_some() { + println!("{:?} which is now in {:?} inserting move to {:?}", vreg, self.vreg_allocs[vreg.vreg()], prev_alloc); + self.move_before_inst(inst, *vreg, prev_alloc); + } else { + println!("{:?} prev alloc is none, so no moving here", vreg); + } + } + } + + fn alloc_inst(&mut self, block: Block, inst: Inst) { + if self.func.is_branch(inst) { + for (succ_idx, succ) in self.func.block_succs(block).iter().enumerate() { + self.alloc_slots_for_block_params(block, inst, *succ, succ_idx); + self.place_branch_args_in_stack_allocs(block, inst, *succ, succ_idx); + } + } + let operands = self.func.inst_operands(inst); + for (op_idx, op) in LateOperands::new(operands) { + self.process_operand_allocation(inst, op, op_idx); + } + for (_, op) in LateDefOperands::new(operands) { + self.freealloc(op.vreg()); + } + for (op_idx, op) in EarlyOperands::new(operands) { + self.process_operand_allocation(inst, op, op_idx); + } + for (_, op) in EarlyDefOperands::new(operands) { + self.freealloc(op.vreg()); + } + } + + fn reload_at_begin(&mut self, block: Block) { + // We need to check for the registers that are still live. + // These registers are livein and they should be stack-allocated. + // TODO: Get rid of this clone!!!!!!! + let live_vregs = self.live_vregs.clone(); + for vreg in live_vregs.into_iter() { + if self.vreg_spillslots[vreg.vreg()].is_invalid() { + self.vreg_spillslots[vreg.vreg()] = self.allocstack(&vreg); + } + let prev_alloc = self.vreg_allocs[vreg.vreg()]; + if prev_alloc.is_reg() { + self.freealloc(vreg); + } + self.vreg_allocs[vreg.vreg()] = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); + println!("Move reason: reload {:?} at begin - move into its spillslot", vreg); + self.move_before_inst( + self.func.block_insns(block).first(), + vreg, + prev_alloc, + ); + } + } + + fn alloc_block(&mut self, block: Block) { + println!("{:?} start", block); + for inst in self.func.block_insns(block).iter().rev() { + self.alloc_inst(block, inst); + } + self.reload_at_begin(block); + self.live_vregs.clear(); + println!("{:?} end\n", block); + } + + fn run(&mut self) -> Result<(), RegAllocError> { + assert_eq!(self.func.entry_block().index(), 0); + for block in (0..self.func.num_blocks()).rev() { + self.alloc_block(Block::new(block)); + } + self.edits.reverse(); + + ///////////////////////////////////////////////////////////////////////////////////// + println!("Done!"); + // Debugging that dastardly bug! + struct Z(usize); + impl std::fmt::Debug for Z { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "v{}", self.0) + } + } + let mut v = Vec::new(); + for i in 0..self.func.num_vregs() { + if self.vreg_spillslots[i].is_valid() { + v.push((Z(i), Allocation::stack(self.vreg_spillslots[i]))); + } + } + println!("{:?}", v); + ///////////////////////////////////////////////////////////////////////////////////// + + Ok(()) + } +} + +pub fn run( + func: &F, + mach_env: &MachineEnv, + enable_annotations: bool, + enable_ssa_checker: bool, +) -> Result { + let cfginfo = CFGInfo::new(func)?; + + if enable_ssa_checker { + validate_ssa(func, &cfginfo)?; + } + + let mut env = Env::new(func, mach_env); + env.run()?; +use alloc::vec; +println!("Final edits: {:?}", env.edits); + Ok(Output { + edits: env.edits, + allocs: env.allocs.allocs, + inst_alloc_offsets: env.allocs.inst_alloc_offsets, + num_spillslots: env.num_spillslots as usize, + debug_locations: Vec::new(), + safepoint_slots: Vec::new(), + stats: env.stats, + }) +} diff --git a/src/fuzzing/mod.rs b/src/fuzzing/mod.rs index 1b05994f..6dd9073b 100644 --- a/src/fuzzing/mod.rs +++ b/src/fuzzing/mod.rs @@ -21,6 +21,9 @@ pub mod cfg { pub mod ion { pub use crate::ion::*; } +pub mod fastalloc { + pub use crate::fastalloc::*; +} pub mod checker { pub use crate::checker::*; } From fe31f41263989e5b9dce7be1396eb63c544aa52f Mon Sep 17 00:00:00 2001 From: demilade Date: Wed, 29 May 2024 07:06:26 +0100 Subject: [PATCH 05/95] basic algorithm correct --- src/fastalloc/mod.rs | 281 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 235 insertions(+), 46 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 202c7185..f7e418d3 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -69,14 +69,14 @@ pub struct Env<'a, F: Function> { /// The virtual registers that are currently live. live_vregs: HashSet, /// Free physical registers for classes Int, Float, and Vector, respectively. - freepregs: PartedByRegClass>,//[Vec; 3], + freepregs: PartedByRegClass>, /// Least-recently-used caches for register classes Int, Float, and Vector, respectively. lrus: Lrus, /// `vreg_in_preg[class][i]` is the virtual register currently in physical register `i` /// in register class `class`. vreg_in_preg: PartedByRegClass>, - - next_spillslot_idx: usize, + /// For parallel moves from branch args to block paeam spillslots. + temp_spillslots: PartedByRegClass>, // Output. allocs: Allocs, @@ -87,7 +87,7 @@ pub struct Env<'a, F: Function> { impl<'a, F: Function> Env<'a, F> { fn new(func: &'a F, env: &'a MachineEnv) -> Self { - println!("multispillslots_named_by_last_slot: {:?}", func.multi_spillslot_named_by_last_slot()); + trace!("multispillslots_named_by_last_slot: {:?}", func.multi_spillslot_named_by_last_slot()); let regs = [ env.preferred_regs_by_class[RegClass::Int as usize].clone(), env.preferred_regs_by_class[RegClass::Float as usize].clone(), @@ -110,7 +110,11 @@ impl<'a, F: Function> Env<'a, F> { vec![VReg::invalid(); regs[1].len()], vec![VReg::invalid(); regs[2].len()], ] }, - next_spillslot_idx: 0, + temp_spillslots: PartedByRegClass { items: [ + Vec::with_capacity(func.num_vregs()), + Vec::with_capacity(func.num_vregs()), + Vec::with_capacity(func.num_vregs()), + ] }, allocs: Allocs::new(func, env), edits: Vec::new(), num_spillslots: 0, @@ -118,23 +122,22 @@ impl<'a, F: Function> Env<'a, F> { } } - fn add_move(&mut self, inst: Inst, vreg: VReg, to: Allocation, pos: InstPosition) { - let from = self.vreg_allocs[vreg.vreg()]; + fn add_move(&mut self, inst: Inst, from: Allocation, to: Allocation, class: RegClass, pos: InstPosition) { if from.is_stack() && to.is_stack() { let mut evicted = false; - let scratch_reg = if self.freepregs[vreg.class()].is_empty() { + let scratch_reg = if self.freepregs[class].is_empty() { evicted = true; - self.evictreg(inst, vreg.class()) + self.evictreg(inst, class) } else { - *self.freepregs[vreg.class()].last().unwrap() + *self.freepregs[class].last().unwrap() }; if evicted { - self.freepregs[vreg.class()].push(scratch_reg); + self.freepregs[class].push(scratch_reg); } let scratch_alloc = Allocation::reg(scratch_reg); // Edits are added in reverse order because the edits // will be reversed when all allocation is completed. - println!("Edit: {:?}", (ProgPoint::new(inst, pos), Edit::Move { + trace!("Edit: {:?}", (ProgPoint::new(inst, pos), Edit::Move { from: scratch_alloc, to, })); @@ -142,7 +145,7 @@ impl<'a, F: Function> Env<'a, F> { from: scratch_alloc, to, })); - println!("Edit: {:?}", (ProgPoint::new(inst, pos), Edit::Move { + trace!("Edit: {:?}", (ProgPoint::new(inst, pos), Edit::Move { from, to: scratch_alloc, })); @@ -152,7 +155,7 @@ impl<'a, F: Function> Env<'a, F> { })) } else { - println!("Edit: {:?}", (ProgPoint::new(inst, pos), Edit::Move { + trace!("Edit: {:?}", (ProgPoint::new(inst, pos), Edit::Move { from, to, })); @@ -164,11 +167,11 @@ impl<'a, F: Function> Env<'a, F> { } fn move_after_inst(&mut self, inst: Inst, vreg: VReg, to: Allocation) { - self.add_move(inst, vreg, to, InstPosition::After); + self.add_move(inst, self.vreg_allocs[vreg.vreg()], to, vreg.class(), InstPosition::After); } fn move_before_inst(&mut self, inst: Inst, vreg: VReg, to: Allocation) { - self.add_move(inst, vreg, to, InstPosition::Before); + self.add_move(inst, self.vreg_allocs[vreg.vreg()], to, vreg.class(), InstPosition::Before); } fn allocd_within_constraint(&self, op: Operand) -> bool { @@ -198,7 +201,7 @@ impl<'a, F: Function> Env<'a, F> { let evicted_vreg = self.vreg_in_preg[regclass][preg.hw_enc()]; let slot = self.allocstack(&evicted_vreg); self.vreg_allocs[evicted_vreg.vreg()] = Allocation::stack(slot); - println!("Move reason: eviction"); + trace!("Move reason: eviction"); self.move_after_inst(inst, evicted_vreg, Allocation::reg(preg)); preg } @@ -244,7 +247,7 @@ impl<'a, F: Function> Env<'a, F> { /// which should have a constraint of either /// `OperandConstraint::Any` or `OperandConstraint::Reg`. fn alloc_reg_for_operand(&mut self, inst: Inst, op: Operand) { - assert!(op.constraint() == OperandConstraint::Any || + debug_assert!(op.constraint() == OperandConstraint::Any || op.constraint() == OperandConstraint::Reg); let preg = if self.freepregs[op.class()].is_empty() { self.evictreg(inst, op.class()) @@ -291,7 +294,7 @@ impl<'a, F: Function> Env<'a, F> { // allocation to the subsequent places where the value was // used (in `prev_alloc`, that is). if prev_alloc.is_some() { - println!("Move reason: Prev allocation doesn't meet constraints"); + trace!("Move reason: Prev allocation doesn't meet constraints"); if op.kind() == OperandKind::Def { // In the case where `op` is a def, // the allocation of `op` will not be holding the value @@ -313,10 +316,10 @@ impl<'a, F: Function> Env<'a, F> { self.move_before_inst(inst, op.vreg(), prev_alloc); } } - println!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); + trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); } else { self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[op.vreg().vreg()]; - println!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); + trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); } } @@ -324,38 +327,211 @@ impl<'a, F: Function> Env<'a, F> { for vreg in self.func.block_params(succ) { if self.vreg_spillslots[vreg.vreg()].is_invalid() { self.vreg_spillslots[vreg.vreg()] = self.allocstack(vreg); - println!("Block param {:?} is in {:?}", vreg, Allocation::stack(self.vreg_spillslots[vreg.vreg()])); + trace!("Block param {:?} is in {:?}", vreg, Allocation::stack(self.vreg_spillslots[vreg.vreg()])); } } } - /// If instruction `inst` is a branch from block `block` to block `succ` and - /// `succ_idx` is the successor index of `succ`, then this function sets the current - /// allocation for the branch args to be the spill slots where `succ` is expecting its - /// branch params to be. fn place_branch_args_in_stack_allocs(&mut self, block: Block, inst: Inst, succ: Block, succ_idx: usize) { let succ_params = self.func.block_params(succ); - for (pos, vreg) in self.func.branch_blockparams(block, inst, succ_idx).iter().enumerate() { + + // Used to know which temporary spillslot should be used next. + let mut next_temp_idx = PartedByRegClass { items: [0, 0, 0] }; + + fn reset_temp_idx(next_temp_idx: &mut PartedByRegClass) { + next_temp_idx[RegClass::Int] = 0; + next_temp_idx[RegClass::Float] = 0; + next_temp_idx[RegClass::Vector] = 0; + } + + // Move from temporaries to post block locations. + for vreg in self.func.branch_blockparams(block, inst, succ_idx).iter() { self.live_vregs.insert(*vreg); + if self.temp_spillslots[vreg.class()].len() == next_temp_idx[vreg.class()] { + let newslot = self.allocstack(vreg); + self.temp_spillslots[vreg.class()].push(newslot); + } + let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; + let temp = Allocation::stack(temp_slot); + next_temp_idx[vreg.class()] += 1; let prev_alloc = self.vreg_allocs[vreg.vreg()]; - let succ_param_vreg = succ_params[pos]; - self.vreg_allocs[vreg.vreg()] = Allocation::stack(self.vreg_spillslots[succ_param_vreg.vreg()]); - println!(" --- Placing branch arg {:?} in {:?}", vreg, Allocation::stack(self.vreg_spillslots[succ_param_vreg.vreg()])); if prev_alloc.is_some() { - println!("{:?} which is now in {:?} inserting move to {:?}", vreg, self.vreg_allocs[vreg.vreg()], prev_alloc); - self.move_before_inst(inst, *vreg, prev_alloc); + trace!("{:?} which is going to be in {:?} inserting move to {:?}", vreg, temp, prev_alloc); + self.add_move(inst, temp, prev_alloc, vreg.class(), InstPosition::Before); + //self.move_before_inst(inst, *vreg, prev_alloc); } else { - println!("{:?} prev alloc is none, so no moving here", vreg); + trace!("{:?} prev alloc is none, so no moving here", vreg); + } + } + + reset_temp_idx(&mut next_temp_idx); + + // Move from temporaries to block param spillslots. + for (pos, vreg) in self.func.branch_blockparams(block, inst, succ_idx).iter().enumerate() { + let succ_param_vreg = succ_params[pos]; + let param_alloc = Allocation::stack(self.vreg_spillslots[succ_param_vreg.vreg()]); + let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; + let temp = Allocation::stack(temp_slot); + next_temp_idx[vreg.class()] += 1; + trace!(" --- Placing branch arg {:?} in {:?}", vreg, temp); + trace!("{:?} which is now in {:?} inserting move to {:?}", vreg, temp, param_alloc); + //self.move_before_inst(inst, *vreg, param_alloc); + self.add_move(inst, temp, param_alloc, vreg.class(), InstPosition::Before); + } + + reset_temp_idx(&mut next_temp_idx); + + // Move from branch args spillslots to temporaries. + for vreg in self.func.branch_blockparams(block, inst, succ_idx).iter() { + if self.vreg_spillslots[vreg.vreg()].is_invalid() { + self.vreg_spillslots[vreg.vreg()] = self.allocstack(vreg); + trace!("Block arg {:?} is going to be in {:?}", vreg, Allocation::stack(self.vreg_spillslots[vreg.vreg()])); + } + let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; + let temp = Allocation::stack(temp_slot); + next_temp_idx[vreg.class()] += 1; + let vreg_spill = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); + self.vreg_allocs[vreg.vreg()] = vreg_spill; + trace!("{:?} which is going to be in {:?} inserting move to {:?}", vreg, vreg_spill, temp); + //self.move_before_inst(inst, *vreg, temp); + self.add_move(inst, vreg_spill, temp, vreg.class(), InstPosition::Before); + } + + /*// Set the current allocations to be their respective spillslots. + for vreg in self.func.branch_blockparams(block, inst, succ_idx).iter() { + self.vreg_allocs[vreg.vreg()] = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); + }*/ + } + + /// If instruction `inst` is a branch in `block`, + /// this function places branch arguments in the spillslots + /// expected by the destination blocks. + /// + /// The process used to do this is as follows: + /// + /// 1. Move all branch arguments into corresponding temporary spillslots. + /// 2. Move values from the temporary spillslots to corresponding block param spillslots. + /// 3. Move values from the temporary spillslots to post-block locatioks, if any, for + /// non-block-param arguments. + /// + /// These temporaries are used because the moves have to be parallel in the case where + /// a block parameter of the successor block is a branch argument. + fn process_branch(&mut self, block: Block, inst: Inst) { + + // Used to know which temporary spillslot should be used next. + let mut next_temp_idx = PartedByRegClass { items: [0, 0, 0] }; + + fn reset_temp_idx(next_temp_idx: &mut PartedByRegClass) { + next_temp_idx[RegClass::Int] = 0; + next_temp_idx[RegClass::Float] = 0; + next_temp_idx[RegClass::Vector] = 0; + } + + // In the case where the block param of a successor is also a branch arg, + // the reading of all the block params must be done before the writing. + // This is necessary to prevent overwriting the branch arg's value before + // placing it in the corresponding branch param spillslot. + // And because edits are inserted in reverse, the algorithm has to process + // the branch args which are not branch params first. This will result in the + // output code processing branch args which are params before the others. + + for (succ_idx, succ) in self.func.block_succs(block).iter().enumerate() { + self.alloc_slots_for_block_params(block, inst, *succ, succ_idx); + } + + for (succ_idx, succ) in self.func.block_succs(block).iter().enumerate() { + let succ_params = self.func.block_params(*succ); + + // Move from temporaries to post block locations. + for vreg in self.func.branch_blockparams(block, inst, succ_idx).iter() { + self.live_vregs.insert(*vreg); + if self.temp_spillslots[vreg.class()].len() == next_temp_idx[vreg.class()] { + let newslot = self.allocstack(vreg); + self.temp_spillslots[vreg.class()].push(newslot); + } + let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; + let temp = Allocation::stack(temp_slot); + next_temp_idx[vreg.class()] += 1; + if succ_params.contains(vreg) { + // Skip to avoid overwriting the new value for the block param, + // which will be moved into its spillslot from its temporary. + continue; + } + let prev_alloc = self.vreg_allocs[vreg.vreg()]; + if prev_alloc.is_some() { + trace!("{:?} which is going to be in {:?} inserting move to {:?}", vreg, temp, prev_alloc); + self.add_move(inst, temp, prev_alloc, vreg.class(), InstPosition::Before); + } else { + trace!("{:?} prev alloc is none, so no moving here", vreg); + } + } + } + + reset_temp_idx(&mut next_temp_idx); + + for (succ_idx, succ) in self.func.block_succs(block).iter().enumerate() { + let succ_params = self.func.block_params(*succ); + + // Move from temporaries to block param spillslots. + for (pos, vreg) in self.func.branch_blockparams(block, inst, succ_idx).iter().enumerate() { + let succ_param_vreg = succ_params[pos]; + let param_alloc = Allocation::stack(self.vreg_spillslots[succ_param_vreg.vreg()]); + let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; + let temp = Allocation::stack(temp_slot); + self.vreg_allocs[vreg.vreg()] = temp; + next_temp_idx[vreg.class()] += 1; + trace!(" --- Placing branch arg {:?} in {:?}", vreg, temp); + trace!("{:?} which is now in {:?} inserting move to {:?}", vreg, temp, param_alloc); + self.add_move(inst, temp, param_alloc, vreg.class(), InstPosition::Before); + } + } + + reset_temp_idx(&mut next_temp_idx); + + for (succ_idx, _) in self.func.block_succs(block).iter().enumerate() { + // Move from branch args spillslots to temporaries. + // + // Consider a scenario: block X branches to block Y and block Y branches to block X. + // Block Y has block param vp and block X uses virtual register va as the branch arg for vp. + // Block X has an instruction that uses vp. + // In the case where branch arg va is defined in a predecessor, there is a possibility + // that, at the beginning of the block, during the reload, that va will always overwrite vp. + // This could happen because at the end of the block, va is allocated to be in vp's + // spillslot. If va isn't used throughout the block (or if all its use constraints allow it to be + // in vp's spillslot), then during reload, it will still be allocated to vp's spillslot. + // This will mean that at the beginning of the block, both va and vp will be expected to be + // in vp's spillslot. An edit will be inserted to move from va's spillslot to vp's. + // And depending on the constraints of vp's use, an edit may or may not be inserted to move + // from vp's spillslot to somewhere else. + // Either way, the correctness of the dataflow will depend on the order of edits. + // If vp is required in be on the stack, then no edit will be inserted for it (it's already on + // the stack, in its spillslot). But an edit will be inserted to move from va's spillslot + // to vp's. + // If block Y has other predecessors that define vp to be other values, then this dataflow + // is clearly wrong. + // + // To avoid this scenario, branch args are placed into their own spillslots here + // so that if they aren't moved at all throughout the block, they will not be expected to + // be in another vreg's spillslot at the block beginning. + for vreg in self.func.branch_blockparams(block, inst, succ_idx).iter() { + if self.vreg_spillslots[vreg.vreg()].is_invalid() { + self.vreg_spillslots[vreg.vreg()] = self.allocstack(vreg); + trace!("Block arg {:?} is going to be in {:?}", vreg, Allocation::stack(self.vreg_spillslots[vreg.vreg()])); + } + let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; + let temp = Allocation::stack(temp_slot); + next_temp_idx[vreg.class()] += 1; + let vreg_spill = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); + self.vreg_allocs[vreg.vreg()] = vreg_spill; + trace!("{:?} which is going to be in {:?} inserting move to {:?}", vreg, vreg_spill, temp); + self.add_move(inst, vreg_spill, temp, vreg.class(), InstPosition::Before); } } } fn alloc_inst(&mut self, block: Block, inst: Inst) { if self.func.is_branch(inst) { - for (succ_idx, succ) in self.func.block_succs(block).iter().enumerate() { - self.alloc_slots_for_block_params(block, inst, *succ, succ_idx); - self.place_branch_args_in_stack_allocs(block, inst, *succ, succ_idx); - } + self.process_branch(block, inst); } let operands = self.func.inst_operands(inst); for (op_idx, op) in LateOperands::new(operands) { @@ -372,6 +548,12 @@ impl<'a, F: Function> Env<'a, F> { } } + /// At the beginning of every block, all virtual registers that are + /// livein are expected to be in their respective spillslots. + /// This function sets the current allocations of livein registers + /// to their spillslots and inserts the edits to flow livein values to + /// the allocations where they are expected to be before the first + /// instruction. fn reload_at_begin(&mut self, block: Block) { // We need to check for the registers that are still live. // These registers are livein and they should be stack-allocated. @@ -381,12 +563,19 @@ impl<'a, F: Function> Env<'a, F> { if self.vreg_spillslots[vreg.vreg()].is_invalid() { self.vreg_spillslots[vreg.vreg()] = self.allocstack(&vreg); } + // The allocation where the vreg is expected to be before + // the first instruction. let prev_alloc = self.vreg_allocs[vreg.vreg()]; if prev_alloc.is_reg() { self.freealloc(vreg); } self.vreg_allocs[vreg.vreg()] = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); - println!("Move reason: reload {:?} at begin - move into its spillslot", vreg); + if self.vreg_allocs[vreg.vreg()] == prev_alloc { + // No need to do any movements if the spillslot is where the vreg is expected to be. + trace!("No need to reload {:?} because it's already in its expected allocation", vreg); + continue; + } + trace!("Move reason: reload {:?} at begin - move from its spillslot", vreg); self.move_before_inst( self.func.block_insns(block).first(), vreg, @@ -396,25 +585,24 @@ impl<'a, F: Function> Env<'a, F> { } fn alloc_block(&mut self, block: Block) { - println!("{:?} start", block); + trace!("{:?} start", block); for inst in self.func.block_insns(block).iter().rev() { self.alloc_inst(block, inst); } self.reload_at_begin(block); self.live_vregs.clear(); - println!("{:?} end\n", block); + trace!("{:?} end\n", block); } fn run(&mut self) -> Result<(), RegAllocError> { - assert_eq!(self.func.entry_block().index(), 0); + debug_assert_eq!(self.func.entry_block().index(), 0); for block in (0..self.func.num_blocks()).rev() { self.alloc_block(Block::new(block)); } self.edits.reverse(); ///////////////////////////////////////////////////////////////////////////////////// - println!("Done!"); - // Debugging that dastardly bug! + trace!("Done!"); struct Z(usize); impl std::fmt::Debug for Z { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { @@ -427,7 +615,8 @@ impl<'a, F: Function> Env<'a, F> { v.push((Z(i), Allocation::stack(self.vreg_spillslots[i]))); } } - println!("{:?}", v); + trace!("{:?}", v); + trace!("\nTemp spillslots: {:?}", self.temp_spillslots); ///////////////////////////////////////////////////////////////////////////////////// Ok(()) @@ -448,8 +637,8 @@ pub fn run( let mut env = Env::new(func, mach_env); env.run()?; -use alloc::vec; -println!("Final edits: {:?}", env.edits); + +trace!("Final edits: {:?}", env.edits); Ok(Output { edits: env.edits, allocs: env.allocs.allocs, From 1ce61693b2ab9e8a1f42f3fa805b4cbf42989f3a Mon Sep 17 00:00:00 2001 From: demilade Date: Thu, 18 Jul 2024 15:50:40 +0100 Subject: [PATCH 06/95] can now handle reused inputs --- fuzz/fuzz_targets/fastalloc_checker.rs | 2 +- src/fastalloc/iter.rs | 301 +++++++++++++++++++++++-- src/fastalloc/mod.rs | 197 ++++++++++++---- 3 files changed, 439 insertions(+), 61 deletions(-) diff --git a/fuzz/fuzz_targets/fastalloc_checker.rs b/fuzz/fuzz_targets/fastalloc_checker.rs index 65dd50f4..459e1904 100644 --- a/fuzz/fuzz_targets/fastalloc_checker.rs +++ b/fuzz/fuzz_targets/fastalloc_checker.rs @@ -20,7 +20,7 @@ impl Arbitrary<'_> for TestCase { func: Func::arbitrary_with_options( u, &Options { - reused_inputs: false, + reused_inputs: true, fixed_regs: false, fixed_nonallocatable: false, clobbers: false, diff --git a/src/fastalloc/iter.rs b/src/fastalloc/iter.rs index f911b1f9..26ca6f2d 100644 --- a/src/fastalloc/iter.rs +++ b/src/fastalloc/iter.rs @@ -1,14 +1,96 @@ use crate::{Operand, OperandKind, OperandPos, OperandConstraint}; -use std::println; -/// Looking for operands with this particular constraint. #[derive(Clone, Copy, PartialEq)] -enum LookingFor { +enum OperandConstraintKind { + Any, + Reg, + Stack, FixedReg, - Others + Reuse, } -/// Iterate over operands in position `pos` and kind +impl PartialEq for OperandConstraintKind { + fn eq(&self, other: &OperandConstraint) -> bool { + match other { + OperandConstraint::Any => *self == Self::Any, + OperandConstraint::Reg => *self == Self::Reg, + OperandConstraint::Stack => *self == Self::Stack, + OperandConstraint::FixedReg(_) => *self == Self::FixedReg, + OperandConstraint::Reuse(_) => *self == Self::Reuse, + } + } +} + +#[derive(Clone, Copy, PartialEq)] +struct SearchConstraint { + kind: Option, + pos: Option, + must_not_have_constraint: Option, + must_have_constraint: Option, +} + +impl SearchConstraint { + fn meets_constraint(&self, op: Operand) -> bool { + match self.pos { + None => (), + Some(expected_pos) => if op.pos() != expected_pos { + return false; + } + }; + match self.kind { + None => (), + Some(expected_kind) => if op.kind() != expected_kind { + return false; + } + }; + match self.must_not_have_constraint { + None => (), + Some(should_not_be_constraint) => if should_not_be_constraint == op.constraint() { + return false; + } + } + match self.must_have_constraint { + None => (), + Some(should_be_constraint) => if should_be_constraint != op.constraint() { + return false; + } + } + true + } +} + +struct Operands<'a> { + operands: &'a [Operand], + idx: usize, + search_constraint: SearchConstraint, +} + +impl<'a> Operands<'a> { + fn new(operands: &'a [Operand], search_constraint: SearchConstraint) -> Self { + Self { operands, search_constraint, idx: 0 } + } +} + +impl<'a> Iterator for Operands<'a> { + type Item = (usize, Operand); + + fn next(&mut self) -> Option { + while self.idx < self.operands.len() + && !self.search_constraint.meets_constraint(self.operands[self.idx]) + { + self.idx += 1; + } + if self.idx >= self.operands.len() { + None + } else { + self.idx += 1; + Some((self.idx - 1, self.operands[self.idx - 1])) + } + } +} + + +/*/// Iterate over operands in position `pos` and kind /// `kind` in no particular order. struct ByKindAndPosOperands<'a> { operands: &'a [Operand], @@ -107,17 +189,43 @@ impl<'a> Iterator for ByPosOperands<'a> { LookingFor::Others => self.next_others(), } } +}*/ + +pub struct NonReuseLateOperands<'a>(Operands<'a>); + +impl<'a> NonReuseLateOperands<'a> { + pub fn new(operands: &'a [Operand]) -> Self { + Self(Operands::new(operands, SearchConstraint { + pos: Some(OperandPos::Late), + kind: None, + must_not_have_constraint: Some(OperandConstraintKind::Reuse), + must_have_constraint: None, + })) + } } -pub struct LateOperands<'a>(ByPosOperands<'a>); +impl<'a> Iterator for NonReuseLateOperands<'a> { + type Item = (usize, Operand); + + fn next(&mut self) -> Option { + self.0.next() + } +} -impl<'a> LateOperands<'a> { +pub struct NonReuseEarlyOperands<'a>(Operands<'a>); + +impl<'a> NonReuseEarlyOperands<'a> { pub fn new(operands: &'a [Operand]) -> Self { - Self(ByPosOperands::new(operands, OperandPos::Late)) + Self(Operands::new(operands, SearchConstraint { + pos: Some(OperandPos::Early), + kind: None, + must_not_have_constraint: Some(OperandConstraintKind::Reuse), + must_have_constraint: None, + })) } } -impl<'a> Iterator for LateOperands<'a> { +impl<'a> Iterator for NonReuseEarlyOperands<'a> { type Item = (usize, Operand); fn next(&mut self) -> Option { @@ -125,15 +233,20 @@ impl<'a> Iterator for LateOperands<'a> { } } -pub struct EarlyOperands<'a>(ByPosOperands<'a>); +pub struct NonReuseLateDefOperands<'a>(Operands<'a>); -impl<'a> EarlyOperands<'a> { +impl<'a> NonReuseLateDefOperands<'a> { pub fn new(operands: &'a [Operand]) -> Self { - Self(ByPosOperands::new(operands, OperandPos::Early)) + Self(Operands::new(operands, SearchConstraint { + kind: Some(OperandKind::Def), + pos: Some(OperandPos::Late), + must_not_have_constraint: Some(OperandConstraintKind::Reuse), + must_have_constraint: None, + })) } } -impl<'a> Iterator for EarlyOperands<'a> { +impl<'a> Iterator for NonReuseLateDefOperands<'a> { type Item = (usize, Operand); fn next(&mut self) -> Option { @@ -141,15 +254,20 @@ impl<'a> Iterator for EarlyOperands<'a> { } } -pub struct LateDefOperands<'a>(ByKindAndPosOperands<'a>); +pub struct NonReuseEarlyDefOperands<'a>(Operands<'a>); -impl<'a> LateDefOperands<'a> { +impl<'a> NonReuseEarlyDefOperands<'a> { pub fn new(operands: &'a [Operand]) -> Self { - Self(ByKindAndPosOperands::new(operands, OperandKind::Def, OperandPos::Late)) + Self(Operands::new(operands, SearchConstraint { + kind: Some(OperandKind::Def), + pos: Some(OperandPos::Early), + must_have_constraint: None, + must_not_have_constraint: Some(OperandConstraintKind::Reuse), + })) } } -impl<'a> Iterator for LateDefOperands<'a> { +impl<'a> Iterator for NonReuseEarlyDefOperands<'a> { type Item = (usize, Operand); fn next(&mut self) -> Option { @@ -157,18 +275,159 @@ impl<'a> Iterator for LateDefOperands<'a> { } } -pub struct EarlyDefOperands<'a>(ByKindAndPosOperands<'a>); +/// Operands that reuse and input allocation. +/// They are all expected to be def operands. +pub struct ReuseOperands<'a>(Operands<'a>); -impl<'a> EarlyDefOperands<'a> { +impl<'a> ReuseOperands<'a> { pub fn new(operands: &'a [Operand]) -> Self { - Self(ByKindAndPosOperands::new(operands, OperandKind::Def, OperandPos::Early)) + Self(Operands::new(operands, SearchConstraint { + kind: None, + pos: None, + must_have_constraint: Some(OperandConstraintKind::Reuse), + must_not_have_constraint: None, + })) } } -impl<'a> Iterator for EarlyDefOperands<'a> { +impl<'a> Iterator for ReuseOperands<'a> { type Item = (usize, Operand); fn next(&mut self) -> Option { self.0.next() } } + +#[cfg(test)] +mod tests { + use alloc::vec::Vec; + use alloc::vec; + use crate::RegClass; + use super::*; + + // Using a new function because Operand::new isn't a const function + const fn operand(vreg_no: u32, constraint: OperandConstraint, kind: OperandKind, pos: OperandPos) -> Operand { + let constraint_field = match constraint { + OperandConstraint::Any => 0, + OperandConstraint::Reg => 1, + OperandConstraint::Stack => 2, + OperandConstraint::FixedReg(preg) => { + 0b1000000 | preg.hw_enc() as u32 + } + OperandConstraint::Reuse(which) => { + 0b0100000 | which as u32 + } + }; + let class_field = RegClass::Int as u8 as u32; + let pos_field = pos as u8 as u32; + let kind_field = kind as u8 as u32; + Operand { + bits: vreg_no + | (class_field << 21) + | (pos_field << 23) + | (kind_field << 24) + | (constraint_field << 25), + } + } + + const fn late_reuse_def_operand(vreg_no: u32) -> Operand { + operand(vreg_no, OperandConstraint::Reuse(0), OperandKind::Def, OperandPos::Late) + } + + const fn early_reuse_def_operand(vreg_no: u32) -> Operand { + operand(vreg_no, OperandConstraint::Reuse(0), OperandKind::Def, OperandPos::Early) + } + + const fn early_reuse_use_operand(vreg_no: u32) -> Operand { + operand(vreg_no, OperandConstraint::Reuse(0), OperandKind::Use, OperandPos::Early) + } + + const fn late_reuse_use_operand(vreg_no: u32) -> Operand { + operand(vreg_no, OperandConstraint::Reuse(0), OperandKind::Use, OperandPos::Late) + } + + const fn late_def_operand(vreg_no: u32) -> Operand { + operand(vreg_no, OperandConstraint::Any, OperandKind::Def, OperandPos::Late) + } + + const fn late_use_operand(vreg_no: u32) -> Operand { + operand(vreg_no, OperandConstraint::Any, OperandKind::Use, OperandPos::Late) + } + + const fn early_use_operand(vreg_no: u32) -> Operand { + operand(vreg_no, OperandConstraint::Any, OperandKind::Use, OperandPos::Early) + } + + const fn early_def_operand(vreg_no: u32) -> Operand { + operand(vreg_no, OperandConstraint::Any, OperandKind::Def, OperandPos::Early) + } + + static OPERANDS: [Operand; 10] = [ + late_reuse_def_operand(0), + late_def_operand(1), + early_reuse_def_operand(2), + early_use_operand(3), + early_def_operand(4), + late_reuse_def_operand(5), + late_use_operand(6), + late_reuse_use_operand(7), + early_def_operand(8), + early_use_operand(9), + ]; + + #[test] + fn late() { + let late_operands: Vec = NonReuseLateOperands::new(&OPERANDS) + .map(|(_, op)| op) + .collect(); + assert_eq!(late_operands, vec![ + late_def_operand(1), + late_use_operand(6), + ]); + } + + #[test] + fn late_def() { + let late_def_operands: Vec = NonReuseLateDefOperands::new(&OPERANDS) + .map(|(_, op)| op) + .collect(); + assert_eq!(late_def_operands, vec![ late_def_operand(1) ]); + } + + #[test] + fn early() { + let early_operands: Vec = NonReuseEarlyOperands::new(&OPERANDS) + .map(|(_, op)| op) + .collect(); + assert_eq!(early_operands, vec![ + early_use_operand(3), + early_def_operand(4), + early_def_operand(8), + early_use_operand(9), + ]); + } + + #[test] + fn early_def() { + let early_def_operands: Vec = NonReuseEarlyDefOperands::new(&OPERANDS) + .map(|(_, op)| op) + .collect(); + assert_eq!(early_def_operands, vec![ + early_def_operand(4), + early_def_operand(8), + ]); + } + + #[test] + fn reuse() { + let reuse_operands: Vec = ReuseOperands::new(&OPERANDS) + .map(|(_, op)| op) + .collect(); + assert_eq!(reuse_operands, vec![ + late_reuse_def_operand(0), + early_reuse_def_operand(2), + late_reuse_def_operand(5), + late_reuse_use_operand(7), + ]); + } +} diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index f7e418d3..296cff45 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1,12 +1,10 @@ use core::convert::TryInto; use core::ops::{Index, IndexMut}; - use crate::{Block, Inst, OperandKind, Operand, PReg, RegClass, VReg, SpillSlot, AllocationKind, OperandConstraint, InstPosition}; use crate::{Function, MachineEnv, ssa::validate_ssa, ProgPoint, Edit, Output}; use crate::{cfg::CFGInfo, RegAllocError, Allocation, ion::Stats}; use alloc::vec::Vec; use hashbrown::HashSet; -use std::println; mod lru; mod iter; @@ -75,8 +73,10 @@ pub struct Env<'a, F: Function> { /// `vreg_in_preg[class][i]` is the virtual register currently in physical register `i` /// in register class `class`. vreg_in_preg: PartedByRegClass>, - /// For parallel moves from branch args to block paeam spillslots. + /// For parallel moves from branch args to block param spillslots. temp_spillslots: PartedByRegClass>, + /// The edits to be inserted before the currently processed instruction. + pre_edits: Vec<(ProgPoint, Edit)>, // Output. allocs: Allocs, @@ -115,6 +115,7 @@ impl<'a, F: Function> Env<'a, F> { Vec::with_capacity(func.num_vregs()), Vec::with_capacity(func.num_vregs()), ] }, + pre_edits: Vec::new(), allocs: Allocs::new(func, env), edits: Vec::new(), num_spillslots: 0, @@ -135,13 +136,17 @@ impl<'a, F: Function> Env<'a, F> { self.freepregs[class].push(scratch_reg); } let scratch_alloc = Allocation::reg(scratch_reg); + let mut target_edits = &mut self.edits; + if pos == InstPosition::Before { + target_edits = &mut self.pre_edits; + } // Edits are added in reverse order because the edits // will be reversed when all allocation is completed. trace!("Edit: {:?}", (ProgPoint::new(inst, pos), Edit::Move { from: scratch_alloc, to, })); - self.edits.push((ProgPoint::new(inst, pos), Edit::Move { + target_edits.push((ProgPoint::new(inst, pos), Edit::Move { from: scratch_alloc, to, })); @@ -149,17 +154,21 @@ impl<'a, F: Function> Env<'a, F> { from, to: scratch_alloc, })); - self.edits.push((ProgPoint::new(inst, pos), Edit::Move { + target_edits.push((ProgPoint::new(inst, pos), Edit::Move { from, to: scratch_alloc, })) } else { + let mut target_edits = &mut self.edits; + if pos == InstPosition::Before { + target_edits = &mut self.pre_edits; + } trace!("Edit: {:?}", (ProgPoint::new(inst, pos), Edit::Move { from, to, })); - self.edits.push((ProgPoint::new(inst, pos), Edit::Move { + target_edits.push((ProgPoint::new(inst, pos), Edit::Move { from, to, })); @@ -176,15 +185,18 @@ impl<'a, F: Function> Env<'a, F> { fn allocd_within_constraint(&self, op: Operand) -> bool { let curr_alloc = self.vreg_allocs[op.vreg().vreg()]; - match op.constraint() { - OperandConstraint::Any => curr_alloc.is_some(), - OperandConstraint::Reg => curr_alloc.is_reg() && curr_alloc.as_reg().unwrap().class() == op.class(), - OperandConstraint::Stack => curr_alloc.is_stack(), - OperandConstraint::FixedReg(preg) => curr_alloc.is_reg() && - curr_alloc.as_reg().unwrap() == preg, + self.alloc_meets_op_constraint(curr_alloc, op.class(), op.constraint()) + } + + fn alloc_meets_op_constraint(&self, alloc: Allocation, class: RegClass, constraint: OperandConstraint) -> bool { + match constraint { + OperandConstraint::Any => alloc.is_some(), + OperandConstraint::Reg => alloc.is_reg() && alloc.as_reg().unwrap().class() == class, + OperandConstraint::Stack => alloc.is_stack(), + OperandConstraint::FixedReg(preg) => alloc.is_reg() && + alloc.as_reg().unwrap() == preg, OperandConstraint::Reuse(_) => { - // TODO: Come back here!!! - true + unreachable!() } } } @@ -243,12 +255,8 @@ impl<'a, F: Function> Env<'a, F> { SpillSlot::new(slot as usize) } - /// Allocates a physical register for the operand `op` - /// which should have a constraint of either - /// `OperandConstraint::Any` or `OperandConstraint::Reg`. + /// Allocates a physical register for the operand `op`. fn alloc_reg_for_operand(&mut self, inst: Inst, op: Operand) { - debug_assert!(op.constraint() == OperandConstraint::Any || - op.constraint() == OperandConstraint::Reg); let preg = if self.freepregs[op.class()].is_empty() { self.evictreg(inst, op.class()) } else { @@ -261,6 +269,7 @@ impl<'a, F: Function> Env<'a, F> { /// Allocates for the operand `op` with index `op_idx` into the /// vector of instruction `inst`'s operands. + /// Only non reuse-input operands. fn alloc_operand(&mut self, inst: Inst, op: Operand, op_idx: usize) { match op.constraint() { OperandConstraint::Any => { @@ -276,16 +285,16 @@ impl<'a, F: Function> Env<'a, F> { panic!("Fixed reg allocations aren't supported yet"); } OperandConstraint::Reuse(_) => { - // We need to allocate a register for the operand, - // then remember that it must have the same allocation - // as the input when processing the use operands. - panic!("Reuse input allocations aren't supported yet"); + // This is handled elsewhere + unreachable!(); } } self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[op.vreg().vreg()]; } + /// Only processes non reuse-input operands fn process_operand_allocation(&mut self, inst: Inst, op: Operand, op_idx: usize) { + debug_assert!(!matches!(op.constraint(), OperandConstraint::Reuse(_))); self.live_vregs.insert(op.vreg()); if !self.allocd_within_constraint(op) { let prev_alloc = self.vreg_allocs[op.vreg().vreg()]; @@ -323,7 +332,116 @@ impl<'a, F: Function> Env<'a, F> { } } - fn alloc_slots_for_block_params(&mut self, block: Block, inst: Inst, succ: Block, succ_idx: usize) { + fn process_reuse_operand_allocation( + &mut self, + inst: Inst, + op: Operand, + op_idx: usize, + reused_op: Operand, + reused_idx: usize + ) { + debug_assert!(matches!(op.constraint(), OperandConstraint::Reuse(_))); + // TODO: Check if reuse operand is not a def and if it's not, return an error. + + // We first need to check if the reuse operand has already been allocated, + // in a previous alloc_inst call. There are 2 cases that need to be considered here: + // + // Case 1: The reuse operand has already been allocated. + // An example: + // inst 1: reuse def v0 (1), use v1 + // inst 2: use v0 + // In the above example, v0 will have already been allocated by the time inst 1 + // is about to be processed. + // After this inst 1, v0 is expected to be in some location l0. + // But because of the constraints, it should be in v1's location. + // To account for this, a move is inserted to move from the reused input's allocation + // to the reuse operand's allocation before the instruction. + // The allocations for both the reused input and the reuse operand are then both set + // to l0, for this current instruction. + // + // Case 2: The reuse operand has not yet been allocated. + // This could happen in a scenario such as: + // inst 1: reuse def v0 (1), use v1 + // inst 2: use v1 + // Since v1 and v0 have to be the same allocation, then one of the following could be done: + // 1. A new location is allocated for v0, v1 is moved into that new location before the + // instruction, and the allocs for both v0 and v1 are set to that location. + // 2. v1 is moved into its spillslot before the instruction, used as the allocation for + // v0, then v1 is moved from its spillslot into its allocation after the instruction. + // + // No 1. is better with respect to moves: only 1 move is generated rather than 2. + // No 2. is better with respect to allocations: no extra allocation is required. Especially + // considering the fact that, since reuse operands are always defs, the allocation will be + // deallocated immediately. + // No 1. may lead to better runtime performance, because less stack movements are required + // (assuming no eviction takes place) while no 2. may lead to better compile time performance + // because less bookkeeping has to be done to effect it. + // We're going with no 2. here. + + // TODO: Ensure that the reused operand is a use. + // TODO: Ensure that the reuse operand and its reused input have the + // same register class. + let reused_alloc = self.allocs[(inst.index(), reused_idx)]; + if self.vreg_allocs[op.vreg().vreg()].is_some() { + if !self.alloc_meets_op_constraint( + self.vreg_allocs[op.vreg().vreg()], + reused_op.class(), + reused_op.constraint() + ) { + // After this instruction, op's location is expected to be `prev_alloc` + let prev_alloc = self.vreg_allocs[op.vreg().vreg()]; + self.alloc_reg_for_operand(inst, op); + self.add_move( + inst, + self.vreg_allocs[op.vreg().vreg()], + prev_alloc, + op.class(), + InstPosition::After, + ); + } + self.add_move( + inst, + reused_alloc, + self.vreg_allocs[op.vreg().vreg()], + op.class(), + InstPosition::Before, + ); + self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[op.vreg().vreg()]; + self.allocs[(inst.index(), reused_idx)] = self.vreg_allocs[op.vreg().vreg()]; + // Deallocate the reuse operand. + self.vreg_allocs[op.vreg().vreg()] = Allocation::none(); + self.live_vregs.remove(&op.vreg()); + trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); + trace!("Allocation for instruction {:?} and operand {:?} is now: {:?}", inst, reused_op, self.allocs[(inst.index(), op_idx)]); + } else { + let reused_op_vreg = reused_op.vreg(); + if self.vreg_spillslots[reused_op_vreg.vreg()].is_invalid() { + self.vreg_spillslots[reused_op_vreg.vreg()] = self.allocstack(&reused_op_vreg); + } + let reused_op_spillslot = self.vreg_spillslots[reused_op.vreg().vreg()]; + // Remember: Edits are inserted in reverse. + // Move back into its allocation. + self.add_move( + inst, + Allocation::stack(reused_op_spillslot), + self.vreg_allocs[reused_op_vreg.vreg()], + op.class(), + InstPosition::After + ); + // Move the reused input into its spillslot before the instruction. + self.add_move( + inst, + self.vreg_allocs[reused_op_vreg.vreg()], + Allocation::stack(reused_op_spillslot), + op.class(), + InstPosition::Before, + ); + self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[reused_op_vreg.vreg()]; + trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); + } + } + + fn alloc_slots_for_block_params(&mut self, succ: Block) { for vreg in self.func.block_params(succ) { if self.vreg_spillslots[vreg.vreg()].is_invalid() { self.vreg_spillslots[vreg.vreg()] = self.allocstack(vreg); @@ -358,7 +476,6 @@ impl<'a, F: Function> Env<'a, F> { if prev_alloc.is_some() { trace!("{:?} which is going to be in {:?} inserting move to {:?}", vreg, temp, prev_alloc); self.add_move(inst, temp, prev_alloc, vreg.class(), InstPosition::Before); - //self.move_before_inst(inst, *vreg, prev_alloc); } else { trace!("{:?} prev alloc is none, so no moving here", vreg); } @@ -375,7 +492,6 @@ impl<'a, F: Function> Env<'a, F> { next_temp_idx[vreg.class()] += 1; trace!(" --- Placing branch arg {:?} in {:?}", vreg, temp); trace!("{:?} which is now in {:?} inserting move to {:?}", vreg, temp, param_alloc); - //self.move_before_inst(inst, *vreg, param_alloc); self.add_move(inst, temp, param_alloc, vreg.class(), InstPosition::Before); } @@ -393,14 +509,8 @@ impl<'a, F: Function> Env<'a, F> { let vreg_spill = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); self.vreg_allocs[vreg.vreg()] = vreg_spill; trace!("{:?} which is going to be in {:?} inserting move to {:?}", vreg, vreg_spill, temp); - //self.move_before_inst(inst, *vreg, temp); self.add_move(inst, vreg_spill, temp, vreg.class(), InstPosition::Before); } - - /*// Set the current allocations to be their respective spillslots. - for vreg in self.func.branch_blockparams(block, inst, succ_idx).iter() { - self.vreg_allocs[vreg.vreg()] = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); - }*/ } /// If instruction `inst` is a branch in `block`, @@ -434,9 +544,9 @@ impl<'a, F: Function> Env<'a, F> { // And because edits are inserted in reverse, the algorithm has to process // the branch args which are not branch params first. This will result in the // output code processing branch args which are params before the others. - - for (succ_idx, succ) in self.func.block_succs(block).iter().enumerate() { - self.alloc_slots_for_block_params(block, inst, *succ, succ_idx); + + for succ in self.func.block_succs(block).iter() { + self.alloc_slots_for_block_params(*succ); } for (succ_idx, succ) in self.func.block_succs(block).iter().enumerate() { @@ -534,18 +644,26 @@ impl<'a, F: Function> Env<'a, F> { self.process_branch(block, inst); } let operands = self.func.inst_operands(inst); - for (op_idx, op) in LateOperands::new(operands) { + for (op_idx, op) in NonReuseLateOperands::new(operands) { self.process_operand_allocation(inst, op, op_idx); } - for (_, op) in LateDefOperands::new(operands) { + for (_, op) in NonReuseLateDefOperands::new(operands) { self.freealloc(op.vreg()); } - for (op_idx, op) in EarlyOperands::new(operands) { + for (op_idx, op) in NonReuseEarlyOperands::new(operands) { self.process_operand_allocation(inst, op, op_idx); } - for (_, op) in EarlyDefOperands::new(operands) { + for (_, op) in NonReuseEarlyDefOperands::new(operands) { self.freealloc(op.vreg()); } + for (op_idx, op) in ReuseOperands::new(operands) { + let OperandConstraint::Reuse(reused_idx) = op.constraint() else { + unreachable!() + }; + self.process_reuse_operand_allocation(inst, op, op_idx, operands[reused_idx], reused_idx); + } + self.edits.extend(self.pre_edits.iter().cloned()); + self.pre_edits.clear(); } /// At the beginning of every block, all virtual registers that are @@ -559,6 +677,7 @@ impl<'a, F: Function> Env<'a, F> { // These registers are livein and they should be stack-allocated. // TODO: Get rid of this clone!!!!!!! let live_vregs = self.live_vregs.clone(); + trace!("In reloading, live_vregs: {:?}", live_vregs); for vreg in live_vregs.into_iter() { if self.vreg_spillslots[vreg.vreg()].is_invalid() { self.vreg_spillslots[vreg.vreg()] = self.allocstack(&vreg); From ed6c3f231a06dcbf951a8ce6df374f38783ed32a Mon Sep 17 00:00:00 2001 From: demilade Date: Mon, 22 Jul 2024 06:53:12 +0100 Subject: [PATCH 07/95] can now handle fixed registers --- fuzz/fuzz_targets/fastalloc_checker.rs | 2 +- src/fastalloc/iter.rs | 189 ++++- src/fastalloc/lru.rs | 9 + src/fastalloc/mod.rs | 927 +++++++++++++++++++------ 4 files changed, 880 insertions(+), 247 deletions(-) diff --git a/fuzz/fuzz_targets/fastalloc_checker.rs b/fuzz/fuzz_targets/fastalloc_checker.rs index 459e1904..546c30bb 100644 --- a/fuzz/fuzz_targets/fastalloc_checker.rs +++ b/fuzz/fuzz_targets/fastalloc_checker.rs @@ -21,7 +21,7 @@ impl Arbitrary<'_> for TestCase { u, &Options { reused_inputs: true, - fixed_regs: false, + fixed_regs: true, fixed_nonallocatable: false, clobbers: false, reftypes: false, diff --git a/src/fastalloc/iter.rs b/src/fastalloc/iter.rs index 26ca6f2d..aea8236b 100644 --- a/src/fastalloc/iter.rs +++ b/src/fastalloc/iter.rs @@ -25,7 +25,7 @@ impl PartialEq for OperandConstraintKind { struct SearchConstraint { kind: Option, pos: Option, - must_not_have_constraint: Option, + must_not_have_constraints: [Option; 2], must_have_constraint: Option, } @@ -43,10 +43,12 @@ impl SearchConstraint { return false; } }; - match self.must_not_have_constraint { - None => (), - Some(should_not_be_constraint) => if should_not_be_constraint == op.constraint() { - return false; + for must_not_have_constraint in self.must_not_have_constraints.iter().cloned() { + match must_not_have_constraint { + None => (), + Some(should_not_be_constraint) => if should_not_be_constraint == op.constraint() { + return false; + } } } match self.must_have_constraint { @@ -191,20 +193,20 @@ impl<'a> Iterator for ByPosOperands<'a> { } }*/ -pub struct NonReuseLateOperands<'a>(Operands<'a>); +pub struct NonFixedNonReuseLateOperands<'a>(Operands<'a>); -impl<'a> NonReuseLateOperands<'a> { +impl<'a> NonFixedNonReuseLateOperands<'a> { pub fn new(operands: &'a [Operand]) -> Self { Self(Operands::new(operands, SearchConstraint { pos: Some(OperandPos::Late), kind: None, - must_not_have_constraint: Some(OperandConstraintKind::Reuse), + must_not_have_constraints: [Some(OperandConstraintKind::Reuse), Some(OperandConstraintKind::FixedReg)], must_have_constraint: None, })) } } -impl<'a> Iterator for NonReuseLateOperands<'a> { +impl<'a> Iterator for NonFixedNonReuseLateOperands<'a> { type Item = (usize, Operand); fn next(&mut self) -> Option { @@ -212,20 +214,20 @@ impl<'a> Iterator for NonReuseLateOperands<'a> { } } -pub struct NonReuseEarlyOperands<'a>(Operands<'a>); +pub struct NonFixedNonReuseEarlyOperands<'a>(Operands<'a>); -impl<'a> NonReuseEarlyOperands<'a> { +impl<'a> NonFixedNonReuseEarlyOperands<'a> { pub fn new(operands: &'a [Operand]) -> Self { Self(Operands::new(operands, SearchConstraint { pos: Some(OperandPos::Early), kind: None, - must_not_have_constraint: Some(OperandConstraintKind::Reuse), + must_not_have_constraints: [Some(OperandConstraintKind::Reuse), Some(OperandConstraintKind::FixedReg)], must_have_constraint: None, })) } } -impl<'a> Iterator for NonReuseEarlyOperands<'a> { +impl<'a> Iterator for NonFixedNonReuseEarlyOperands<'a> { type Item = (usize, Operand); fn next(&mut self) -> Option { @@ -240,7 +242,7 @@ impl<'a> NonReuseLateDefOperands<'a> { Self(Operands::new(operands, SearchConstraint { kind: Some(OperandKind::Def), pos: Some(OperandPos::Late), - must_not_have_constraint: Some(OperandConstraintKind::Reuse), + must_not_have_constraints: [Some(OperandConstraintKind::Reuse), None], must_have_constraint: None, })) } @@ -262,7 +264,7 @@ impl<'a> NonReuseEarlyDefOperands<'a> { kind: Some(OperandKind::Def), pos: Some(OperandPos::Early), must_have_constraint: None, - must_not_have_constraint: Some(OperandConstraintKind::Reuse), + must_not_have_constraints: [Some(OperandConstraintKind::Reuse), None], })) } } @@ -285,7 +287,7 @@ impl<'a> ReuseOperands<'a> { kind: None, pos: None, must_have_constraint: Some(OperandConstraintKind::Reuse), - must_not_have_constraint: None, + must_not_have_constraints: [None, None], })) } } @@ -298,11 +300,74 @@ impl<'a> Iterator for ReuseOperands<'a> { } } +pub struct FixedLateOperands<'a>(Operands<'a>); + +impl<'a> FixedLateOperands<'a> { + pub fn new(operands: &'a [Operand]) -> Self { + Self(Operands::new(operands, SearchConstraint { + kind: None, + pos: Some(OperandPos::Late), + must_have_constraint: Some(OperandConstraintKind::FixedReg), + must_not_have_constraints: [None, None], + })) + } +} + +impl<'a> Iterator for FixedLateOperands<'a> { + type Item = (usize, Operand); + + fn next(&mut self) -> Option { + self.0.next() + } +} + +pub struct FixedEarlyOperands<'a>(Operands<'a>); + +impl<'a> FixedEarlyOperands<'a> { + pub fn new(operands: &'a [Operand]) -> Self { + Self(Operands::new(operands, SearchConstraint { + kind: None, + pos: Some(OperandPos::Early), + must_have_constraint: Some(OperandConstraintKind::FixedReg), + must_not_have_constraints: [None, None], + })) + } +} + +impl<'a> Iterator for FixedEarlyOperands<'a> { + type Item = (usize, Operand); + + fn next(&mut self) -> Option { + self.0.next() + } +} + +pub struct NonReuseDefOperands<'a>(Operands<'a>); + +impl<'a> NonReuseDefOperands<'a> { + pub fn new(operands: &'a [Operand]) -> Self { + Self(Operands::new(operands, SearchConstraint { + kind: Some(OperandKind::Def), + pos: None, + must_have_constraint: None, + must_not_have_constraints: [Some(OperandConstraintKind::Reuse), None], + })) + } +} + +impl<'a> Iterator for NonReuseDefOperands<'a> { + type Item = (usize, Operand); + + fn next(&mut self) -> Option { + self.0.next() + } +} + #[cfg(test)] mod tests { use alloc::vec::Vec; use alloc::vec; - use crate::RegClass; + use crate::{PReg, RegClass}; use super::*; // Using a new function because Operand::new isn't a const function @@ -362,7 +427,44 @@ mod tests { operand(vreg_no, OperandConstraint::Any, OperandKind::Def, OperandPos::Early) } - static OPERANDS: [Operand; 10] = [ + const fn fixed_late_def_operand(vreg_no: u32) -> Operand { + operand( + vreg_no, + OperandConstraint::FixedReg(PReg::new(1, RegClass::Int)), + OperandKind::Def, + OperandPos::Late, + ) + } + + const fn fixed_early_def_operand(vreg_no: u32) -> Operand { + operand( + vreg_no, + OperandConstraint::FixedReg(PReg::new(1, RegClass::Int)), + OperandKind::Def, + OperandPos::Early, + ) + } + + + const fn fixed_late_use_operand(vreg_no: u32) -> Operand { + operand( + vreg_no, + OperandConstraint::FixedReg(PReg::new(1, RegClass::Int)), + OperandKind::Use, + OperandPos::Late, + ) + } + + const fn fixed_early_use_operand(vreg_no: u32) -> Operand { + operand( + vreg_no, + OperandConstraint::FixedReg(PReg::new(1, RegClass::Int)), + OperandKind::Use, + OperandPos::Early, + ) + } + + static OPERANDS: [Operand; 14] = [ late_reuse_def_operand(0), late_def_operand(1), early_reuse_def_operand(2), @@ -373,11 +475,16 @@ mod tests { late_reuse_use_operand(7), early_def_operand(8), early_use_operand(9), + + fixed_late_def_operand(10), + fixed_early_def_operand(11), + fixed_late_use_operand(12), + fixed_early_use_operand(13), ]; #[test] fn late() { - let late_operands: Vec = NonReuseLateOperands::new(&OPERANDS) + let late_operands: Vec = NonFixedNonReuseLateOperands::new(&OPERANDS) .map(|(_, op)| op) .collect(); assert_eq!(late_operands, vec![ @@ -391,12 +498,15 @@ mod tests { let late_def_operands: Vec = NonReuseLateDefOperands::new(&OPERANDS) .map(|(_, op)| op) .collect(); - assert_eq!(late_def_operands, vec![ late_def_operand(1) ]); + assert_eq!(late_def_operands, vec![ + late_def_operand(1), + fixed_late_def_operand(10), + ]); } #[test] fn early() { - let early_operands: Vec = NonReuseEarlyOperands::new(&OPERANDS) + let early_operands: Vec = NonFixedNonReuseEarlyOperands::new(&OPERANDS) .map(|(_, op)| op) .collect(); assert_eq!(early_operands, vec![ @@ -415,6 +525,7 @@ mod tests { assert_eq!(early_def_operands, vec![ early_def_operand(4), early_def_operand(8), + fixed_early_def_operand(11), ]); } @@ -430,4 +541,40 @@ mod tests { late_reuse_use_operand(7), ]); } + + #[test] + fn fixed_late() { + let fixed_late_operands: Vec = FixedLateOperands::new(&OPERANDS) + .map(|(_, op)| op) + .collect(); + assert_eq!(fixed_late_operands, vec![ + fixed_late_def_operand(10), + fixed_late_use_operand(12), + ]); + } + + #[test] + fn fixed_early() { + let fixed_early_operands: Vec = FixedEarlyOperands::new(&OPERANDS) + .map(|(_, op)| op) + .collect(); + assert_eq!(fixed_early_operands, vec![ + fixed_early_def_operand(11), + fixed_early_use_operand(13), + ]); + } + + #[test] + fn def() { + let def_operands: Vec = NonReuseDefOperands::new(&OPERANDS) + .map(|(_, op)| op) + .collect(); + assert_eq!(def_operands, vec![ + late_def_operand(1), + early_def_operand(4), + early_def_operand(8), + fixed_late_def_operand(10), + fixed_early_def_operand(11), + ]); + } } diff --git a/src/fastalloc/lru.rs b/src/fastalloc/lru.rs index add850dc..6f980739 100644 --- a/src/fastalloc/lru.rs +++ b/src/fastalloc/lru.rs @@ -71,6 +71,15 @@ impl Lru { self.data[inext].prev = self.data[i].prev; } + /// Sets the node `i` to the last in the list. + pub fn append(&mut self, i: usize) { + let last_node = self.data[self.head].prev; + self.data[last_node].next = i; + self.data[self.head].prev = i; + self.data[i].prev = last_node; + self.data[i].next = self.head; + } + /// Insert node `i` before node `j` in the list. pub fn insert_before(&mut self, i: usize, j: usize) { let prev = self.data[j].prev; diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 296cff45..e6039255 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1,8 +1,10 @@ use core::convert::TryInto; +use core::iter::FromIterator; use core::ops::{Index, IndexMut}; use crate::{Block, Inst, OperandKind, Operand, PReg, RegClass, VReg, SpillSlot, AllocationKind, OperandConstraint, InstPosition}; use crate::{Function, MachineEnv, ssa::validate_ssa, ProgPoint, Edit, Output}; use crate::{cfg::CFGInfo, RegAllocError, Allocation, ion::Stats}; +use alloc::collections::{BTreeSet, VecDeque}; use alloc::vec::Vec; use hashbrown::HashSet; @@ -66,21 +68,66 @@ pub struct Env<'a, F: Function> { vreg_spillslots: Vec, /// The virtual registers that are currently live. live_vregs: HashSet, - /// Free physical registers for classes Int, Float, and Vector, respectively. - freepregs: PartedByRegClass>, + /// Allocatable free physical registers for classes Int, Float, and Vector, respectively. + freepregs: PartedByRegClass>, /// Least-recently-used caches for register classes Int, Float, and Vector, respectively. lrus: Lrus, - /// `vreg_in_preg[class][i]` is the virtual register currently in physical register `i` - /// in register class `class`. - vreg_in_preg: PartedByRegClass>, + /// `vreg_in_preg[i]` is the virtual register currently in the physical register + /// with index `i`. + vreg_in_preg: Vec, /// For parallel moves from branch args to block param spillslots. temp_spillslots: PartedByRegClass>, /// The edits to be inserted before the currently processed instruction. - pre_edits: Vec<(ProgPoint, Edit)>, + inst_pre_edits: VecDeque<(ProgPoint, Edit, RegClass)>, + /// The edits to be inserted after the currently processed instruction. + inst_post_edits: VecDeque<(ProgPoint, Edit, RegClass)>, + /// All the allocatables registers that were used for one thing or the other + /// but need to be freed after the current instruction has completed processing, + /// not immediately, like allocatable registers used as scratch registers. + /// + /// This is used to keep track of them so that they can be marked as free for reallocation + /// after the instruction has completed processing. + free_after_curr_inst: HashSet, + /// The virtual registers of operands that have been allocated in the current instruction. + /// This needs to be kept track of to generate the correct moves in the case where a + /// single virtual register is used multiple times in a single instruction with + /// different constraints. + vregs_allocd_in_curr_inst: HashSet, + /// Physical registers that were used for late def operands and now free to be + /// reused for early operands in the current instruction. + /// + /// After late defs have been allocated, rather than returning their registers to + /// the free register list, it is added here to avoid the registers being used as + /// scratch registers. + /// + /// For example, consider the following: + /// def v0, use v1 + /// If the processing of v1 requires a stack-to-stack move, then a scratch register is + /// used and the instruction becomes: + /// def v0, use v1 + /// move from stack0 to p0 + /// move from p0 to stack1 + /// + /// Since scratch registers may be drawn from the free register list and v0 will be allocated and + /// deallocated before v1, then it's possible for the scratch register p0 to be v0's allocation, + /// which is incorrect because p0 will end up holding whatever is in stack0, not v0. + /// `freed_def_regs` avoids this by allowing the late def registers to be reused without making it + /// possible for this scratch register scenario to happen. + freed_def_pregs: PartedByRegClass>, + /// Used to keep track of which used vregs are being used for the first time + /// in the instruction. This is used to determine whether or not reused operands + /// for reuse-input constraints should be restored after an instruction. + first_use: HashSet, + /// Used to keep track of which allocations have been used by use operands in the + /// current instruction. This is to determine whether or not an allocation + /// for a reuse operand was reused by a use operand, and make decisions on + /// whether or not to free the allocation. + allocs_used_by_use_ops: HashSet, + fixed_stack_slots: Vec, // Output. allocs: Allocs, - edits: Vec<(ProgPoint, Edit)>, + edits: VecDeque<(ProgPoint, Edit)>, num_spillslots: u32, stats: Stats, } @@ -88,53 +135,152 @@ pub struct Env<'a, F: Function> { impl<'a, F: Function> Env<'a, F> { fn new(func: &'a F, env: &'a MachineEnv) -> Self { trace!("multispillslots_named_by_last_slot: {:?}", func.multi_spillslot_named_by_last_slot()); - let regs = [ + let mut regs = [ env.preferred_regs_by_class[RegClass::Int as usize].clone(), env.preferred_regs_by_class[RegClass::Float as usize].clone(), env.preferred_regs_by_class[RegClass::Vector as usize].clone(), ]; + regs[0].extend(env.non_preferred_regs_by_class[RegClass::Int as usize].iter().cloned()); + regs[1].extend(env.non_preferred_regs_by_class[RegClass::Float as usize].iter().cloned()); + regs[2].extend(env.non_preferred_regs_by_class[RegClass::Vector as usize].iter().cloned()); use alloc::vec; + trace!("{:?}", env); Self { func, vreg_allocs: vec![Allocation::none(); func.num_vregs()], vreg_spillslots: vec![SpillSlot::invalid(); func.num_vregs()], live_vregs: HashSet::with_capacity(func.num_vregs()), - freepregs: PartedByRegClass { items: regs.clone() }, + freepregs: PartedByRegClass { + items: [ + BTreeSet::from_iter(regs[0].clone()), + BTreeSet::from_iter(regs[1].clone()), + BTreeSet::from_iter(regs[2].clone()), + ] + }, lrus: Lrus::new( regs[0].len(), regs[1].len(), regs[2].len() ), - vreg_in_preg: PartedByRegClass { items: [ - vec![VReg::invalid(); regs[0].len()], - vec![VReg::invalid(); regs[1].len()], - vec![VReg::invalid(); regs[2].len()], - ] }, + vreg_in_preg: vec![VReg::invalid(); PReg::NUM_INDEX], + fixed_stack_slots: env.fixed_stack_slots.clone(), temp_spillslots: PartedByRegClass { items: [ Vec::with_capacity(func.num_vregs()), Vec::with_capacity(func.num_vregs()), Vec::with_capacity(func.num_vregs()), ] }, - pre_edits: Vec::new(), + inst_pre_edits: VecDeque::new(), + inst_post_edits: VecDeque::new(), + free_after_curr_inst: HashSet::new(), + vregs_allocd_in_curr_inst: HashSet::new(), + freed_def_pregs: PartedByRegClass { items: [BTreeSet::new(), BTreeSet::new(), BTreeSet::new()] }, + first_use: HashSet::new(), + allocs_used_by_use_ops: HashSet::new(), allocs: Allocs::new(func, env), - edits: Vec::new(), + edits: VecDeque::new(), num_spillslots: 0, stats: Stats::default(), } } + fn is_stack(&self, alloc: Allocation) -> bool { + if alloc.is_stack() { + return true; + } + if alloc.is_reg() { + return self.fixed_stack_slots.contains(&alloc.as_reg().unwrap()); + } + false + } + + fn process_edit(&mut self, point: ProgPoint, edit: Edit, class: RegClass) { + trace!("Processing edit: {:?}", edit); + let Edit::Move { from, to } = edit; + if self.is_stack(from) && self.is_stack(to) { + let scratch_reg = *self.freepregs[class].last().expect("Allocation impossible?"); + trace!("Edit is stack-to-stack, generating two moves with a scratch register {:?}", scratch_reg); + let scratch_alloc = Allocation::reg(scratch_reg); + trace!("Processed Edit: {:?}", (point, Edit::Move { + from: scratch_alloc, + to, + })); + self.edits.push_front((point, Edit::Move { + from: scratch_alloc, + to, + })); + trace!("Processed Edit: {:?}", (point, Edit::Move { + from, + to: scratch_alloc, + })); + self.edits.push_front((point, Edit::Move { + from, + to: scratch_alloc, + })); + } else { + trace!("Edit is not stack-to-stack. Adding it directly:"); + trace!("Processed Edit: {:?}", (point, Edit::Move { + from, + to, + })); + self.edits.push_front((point, Edit::Move { + from, + to, + })); + } + } + + fn add_move_later(&mut self, inst: Inst, from: Allocation, to: Allocation, class: RegClass, pos: InstPosition, prepend: bool) { + let target_edits = match pos { + InstPosition::After => &mut self.inst_post_edits, + InstPosition::Before => &mut self.inst_pre_edits + }; + trace!("Recording edit to add later: {:?}", (ProgPoint::new(inst, pos), Edit::Move { + from, + to + }, class)); + // The sorting out of stack-to-stack moves will be done when the instruction's + // edits are processed after all operands have been allocated. + if prepend { + target_edits.push_front((ProgPoint::new(inst, pos), Edit::Move { + from, + to, + }, class)); + } else { + target_edits.push_back((ProgPoint::new(inst, pos), Edit::Move { + from, + to, + }, class)); + } + } + + /* fn add_move(&mut self, inst: Inst, from: Allocation, to: Allocation, class: RegClass, pos: InstPosition) { - if from.is_stack() && to.is_stack() { - let mut evicted = false; + if self.is_stack(from) && self.is_stack(to) { let scratch_reg = if self.freepregs[class].is_empty() { - evicted = true; - self.evictreg(inst, class) + self.evict_any_reg(inst, class) } else { - *self.freepregs[class].last().unwrap() + // The physical register used as a scratch register here has to be explicitly + // removed from the free registers list. This is to avoid scenarios like the + // following: + // 1. def v0, use v1, use v2 + // If v2 needs to be moved from a stack slot to some other specific stack slot + // for the constraints to be satisified, then some scratch register p0 has to be + // used to avoid stack-to-stack moves. If p0 is not a dedicated scratch register, + // then the following sequence is possible: + // First, p0 is used as a scratch register to move from the stack to whatever stackslot + // v2 is supposed to be in and the moves are inserted before the instruction. + // Second, v1 is allocated to p0 (thinking p0 is free). But before the instruction, + // p0 is overwritten by v2, so v2 will be used instead of v1. + // + // So, rather than leave the register in the free list, it's removed from this list + // and added back after the complete processing of all the operands in the instruction. + self.freepregs[class].pop().unwrap() }; - if evicted { - self.freepregs[class].push(scratch_reg); - } + self.free_after_curr_inst.push(scratch_reg); + // Remove the scratch register from the LRU to stop it from + // being used as an allocatable register for the current instruction. + // It should be added back after processing the current instruction. + self.lrus[class].remove(scratch_reg.hw_enc()); let scratch_alloc = Allocation::reg(scratch_reg); let mut target_edits = &mut self.edits; if pos == InstPosition::Before { @@ -173,14 +319,14 @@ impl<'a, F: Function> Env<'a, F> { to, })); } - } + }*/ fn move_after_inst(&mut self, inst: Inst, vreg: VReg, to: Allocation) { - self.add_move(inst, self.vreg_allocs[vreg.vreg()], to, vreg.class(), InstPosition::After); + self.add_move_later(inst, self.vreg_allocs[vreg.vreg()], to, vreg.class(), InstPosition::After, false); } fn move_before_inst(&mut self, inst: Inst, vreg: VReg, to: Allocation) { - self.add_move(inst, self.vreg_allocs[vreg.vreg()], to, vreg.class(), InstPosition::Before); + self.add_move_later(inst, self.vreg_allocs[vreg.vreg()], to, vreg.class(), InstPosition::Before, false); } fn allocd_within_constraint(&self, op: Operand) -> bool { @@ -191,8 +337,11 @@ impl<'a, F: Function> Env<'a, F> { fn alloc_meets_op_constraint(&self, alloc: Allocation, class: RegClass, constraint: OperandConstraint) -> bool { match constraint { OperandConstraint::Any => alloc.is_some(), - OperandConstraint::Reg => alloc.is_reg() && alloc.as_reg().unwrap().class() == class, - OperandConstraint::Stack => alloc.is_stack(), + OperandConstraint::Reg => { + alloc.is_reg() && alloc.as_reg().unwrap().class() == class + && !self.is_stack(alloc) + }, + OperandConstraint::Stack => self.is_stack(alloc), OperandConstraint::FixedReg(preg) => alloc.is_reg() && alloc.as_reg().unwrap() == preg, OperandConstraint::Reuse(_) => { @@ -201,7 +350,19 @@ impl<'a, F: Function> Env<'a, F> { } } - fn evictreg(&mut self, inst: Inst, regclass: RegClass) -> PReg { + fn evict_vreg_in_preg(&mut self, inst: Inst, preg: PReg) { + let evicted_vreg = self.vreg_in_preg[preg.index()]; + debug_assert_ne!(evicted_vreg, VReg::invalid()); + if self.vreg_spillslots[evicted_vreg.vreg()].is_invalid() { + self.vreg_spillslots[evicted_vreg.vreg()] = self.allocstack(&evicted_vreg); + } + let slot = self.vreg_spillslots[evicted_vreg.vreg()]; + self.vreg_allocs[evicted_vreg.vreg()] = Allocation::stack(slot); + trace!("Move reason: eviction"); + self.move_after_inst(inst, evicted_vreg, Allocation::reg(preg)); + } + + fn evict_any_reg(&mut self, inst: Inst, regclass: RegClass) -> PReg { let preg = self.lrus[regclass].pop(); // TODO: Check if the preg has already been allocated for this // instruction. If it has, then there are too many stuff to @@ -210,21 +371,26 @@ impl<'a, F: Function> Env<'a, F> { // be allocated already. Why? Because if some register p0 has been allocated // and some fixed constraint register is encountered that needs p0, then // allocation will fail regardless of whether or not there are other free registers - let evicted_vreg = self.vreg_in_preg[regclass][preg.hw_enc()]; - let slot = self.allocstack(&evicted_vreg); - self.vreg_allocs[evicted_vreg.vreg()] = Allocation::stack(slot); - trace!("Move reason: eviction"); - self.move_after_inst(inst, evicted_vreg, Allocation::reg(preg)); + self.evict_vreg_in_preg(inst, preg); preg } - fn freealloc(&mut self, vreg: VReg) { + fn freealloc(&mut self, vreg: VReg, add_to_freelist: bool) { + trace!("Freeing vreg {:?} (add_to_freelist: {:?})", vreg, add_to_freelist); let alloc = self.vreg_allocs[vreg.vreg()]; match alloc.kind() { AllocationKind::Reg => { let preg = alloc.as_reg().unwrap(); - self.freepregs[vreg.class()].push(preg); - self.vreg_in_preg[vreg.class()][preg.hw_enc()] = VReg::invalid(); + self.vreg_in_preg[preg.index()] = VReg::invalid(); + // If it's a fixed stack slot, then it's not allocatable. + if !self.is_stack(alloc) { + if add_to_freelist { + // Added to the freed def pregs list, not the free pregs + // list to avoid a def's allocated register being used + // as a scratch register. + self.freed_def_pregs[vreg.class()].insert(preg); + } + } } AllocationKind::Stack => { // Do nothing. @@ -257,14 +423,58 @@ impl<'a, F: Function> Env<'a, F> { /// Allocates a physical register for the operand `op`. fn alloc_reg_for_operand(&mut self, inst: Inst, op: Operand) { - let preg = if self.freepregs[op.class()].is_empty() { - self.evictreg(inst, op.class()) + trace!("freepregs: {:?}", self.freepregs[RegClass::Int]); + trace!("freed_def_pregs: {:?}", self.freed_def_pregs[RegClass::Int]); + trace!(""); + if let Some(freed_def_preg) = self.freed_def_pregs[op.class()].pop_last() { + // Don't poke the LRU because the freed def register is no + // longer in the LRU. It's not there so as to avoid getting it + // used as a scratch register. + trace!("Reusing the freed def preg: {:?}", freed_def_preg); + self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(freed_def_preg); + self.vreg_in_preg[freed_def_preg.index()] = op.vreg(); } else { - self.freepregs[op.class()].pop().unwrap() - }; - self.lrus[op.class()].poke(preg); + let preg = if self.freepregs[op.class()].is_empty() { + trace!("Evicting a register"); + self.evict_any_reg(inst, op.class()) + } else { + trace!("Getting a register from freepregs"); + self.freepregs[op.class()].pop_last().unwrap() + }; + trace!("The allocated register for vreg {:?}: {:?}", preg, op.vreg()); + self.lrus[op.class()].poke(preg); + self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(preg); + self.vreg_in_preg[preg.index()] = op.vreg(); + } + } + + fn alloc_fixed_reg_for_operand(&mut self, inst: Inst, op: Operand, preg: PReg) { + trace!("The fixed preg: {:?}", preg); + let mut preg_is_allocatable = false; + if self.vreg_in_preg[preg.index()] != VReg::invalid() { + // Something is already in that register. Evict it. + self.evict_vreg_in_preg(inst, preg); + } else if self.freed_def_pregs[preg.class()].contains(&preg) { + // Consider the scenario: + // def v0 (fixed: p0), use v1 (fixed: p0) + // In the above, p0 has already been used for v0, and since it's a + // def operand, the register has been freed and kept in `freed_def_pregs`, + // so it can be added back to the free pregs list after the instruction + // has finished processing. + // To avoid the preg being added back to the free list, it must be removed + // from `freed_def_pregs` here. + preg_is_allocatable = true; + self.freed_def_pregs[preg.class()].remove(&preg); + } else { + // Find the register in the list of free registers (if it's there). + // If it's not there, then it must be be a fixed stack slot. + preg_is_allocatable = self.freepregs[op.class()].remove(&preg); + } + if preg_is_allocatable { + self.lrus[op.class()].poke(preg); + } self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(preg); - self.vreg_in_preg[op.class()][preg.hw_enc()] = op.vreg(); + self.vreg_in_preg[preg.index()] = op.vreg(); } /// Allocates for the operand `op` with index `op_idx` into the @@ -282,7 +492,7 @@ impl<'a, F: Function> Env<'a, F> { panic!("Stack only allocations aren't supported yet"); } OperandConstraint::FixedReg(preg) => { - panic!("Fixed reg allocations aren't supported yet"); + self.alloc_fixed_reg_for_operand(inst, op, preg); } OperandConstraint::Reuse(_) => { // This is handled elsewhere @@ -310,7 +520,45 @@ impl<'a, F: Function> Env<'a, F> { // of `op` before the instruction. Since it's a def, // it will only hold the value after. So, the move // has to be done after. - self.move_after_inst(inst, op.vreg(), prev_alloc); + // + // The move also has to be prepended. Consider the scenario: + // + // 1. def v0 (any reg), use v1 (fixed: p0) + // 2. use v0 (fixed: p0) + // + // During the processing of the first instruction, v0 is already in + // p0. Since v1 has a fixed register constraint, it's processed + // first and evicts v0 from p0. Edits are inserted to flow v0 from + // its spillslot to p0 after the instruction: + // + // 1. def v0 (any reg), use v1 (fixed: p0) + // move from stack_v0 to p0 + // 2. use v0 (fixed: p0) + // + // When it's time to process v0, it has to be moved again: this time + // because it needs to be in a register, not on the stack. + // Edits are inserted to flow v0 from its spillslot to the newly allocated + // register, say p1. + // + // 1. def v0 (any reg), use v1 (fixed: p0) + // move from stack_v0 to p0 + // move from p1 to stack_v0 + // 2. use v0 (fixed: p0) + // + // The problem here is that the edits are out of order. p1, the + // allocation used for v0 in inst 1., is never moved into p0, + // the location v0 is expected to be in after inst 1. + // This messes up the dataflow. + // To avoid this, the moves are prepended. + //self.move_after_inst(inst, op.vreg(), prev_alloc); + self.add_move_later( + inst, + self.vreg_allocs[op.vreg().vreg()], + prev_alloc, + op.class(), + InstPosition::After, + true + ); } else { // In the case where `op` is a use, the defined value could // have the same allocation as the `op` allocation. This @@ -322,14 +570,161 @@ impl<'a, F: Function> Env<'a, F> { // Since, it's a use, the correct `op` value will already be in // the `op` allocation before the instruction. // Because of this, the move is done before, not after, `inst`. - self.move_before_inst(inst, op.vreg(), prev_alloc); + // + // This was handled by a simple move from the operand to its previous + // allocation before the instruction, but this is incorrect. + // Consider the scenario: + // 1. use v0 (fixed: p0), use v1 (fixed: p1) + // 2. use v0 (fixed: p1) + // By the time inst 1 is to be processed, v0 will be in p1. + // But v1 should be in p1, not v0. If v0 is moved to p1 before inst 1, + // then it will overwrite v1 and v0 will be used instead of v1. + // It's also possible that the register used by v0 could be reused + // with a def operand. + // To resolve this, v0 is moved into its spillslot before inst 1. + // Then it's moved from its spillslot into p1 after inst 1, which is the place + // where it's expected to be after the instruction. + // This is to avoid two problems: + // 1. Overwriting a vreg that uses p1 in the current instruction. + // 2. Avoiding a situation where a def reuses the register used by v0 + // and overwrites v0. + // + // It is possible for a virtual register to be used twice in the + // same instruction with different constraints. + // For example: + // 1. use v0 (fixed: stack0), use v0 (fixed: p0) + // 2. use v0 (fixed: p1) + // By the time inst 1 is to be processed, v0 will be in p1. + // But it should be in p0 and stack0. If stack0 is processed + // first, moves will be inserted to move from stack0 to v0's + // spillslot before inst 1 and to move from spillslot + // to p1 after the instruction: + // + // move from stack0 to stack_v0 + // 1. use v0 (fixed: stack0), use v0 (fixed: p0) + // move from stack_v0 to p1 + // 2. use v0 (fixed: p1) + // + // But when the second use is encountered, moves will be inserted again + // and mess up the dataflow: + // + // move from p0 to stack_v0 + // move from stack0 to stack_v0 + // 1. use v0 (fixed: stack0), use v0 (fixed: p0) + // move from stack_v0 to p1 + // move from stack_v0 to p1 + // 2. use v0 (fixed: p1) + // + // Assuming that after instruction 1 is processed, v0's + // location is p0, then stack0 will always overwrite it, + // and v0 is not in stack0 (it's in p0, now). + // To avoid this scenario, these moves are only inserted + // for the first encountered constraint in an instruction. + // After this, any other operands with the same virtual register + // but different constraint will simply generate a move from the + // new location to the prev_alloc. This new move is inserted before + // the original one because the new location is now where v0 is + // expected to be before the instruction. + // For example: + // + // move from stack0 to stack_v0 + // 1. use v0 (fixed: stack0), use v0 (fixed: p0) + // move from stack_v0 to p1 + // 2. use v0 (fixed: p1) + // + // When the second use is encountered, the current location for v0 becomes + // p0 and a move from p0 to stack0 is prepended to the edits: + // + // move from p0 to stack0 + // move from stack0 to stack_v0 + // 1. use v0 (fixed: stack0), use v0 (fixed: p0) + // move from stack_v0 to p1 + // 2. use v0 (fixed: p1) + + if !self.vregs_allocd_in_curr_inst.contains(&op.vreg()) { + if self.vreg_spillslots[op.vreg().vreg()].is_invalid() { + self.vreg_spillslots[op.vreg().vreg()] = self.allocstack(&op.vreg()); + } + let op_spillslot = Allocation::stack(self.vreg_spillslots[op.vreg().vreg()]); + self.add_move_later( + inst, + self.vreg_allocs[op.vreg().vreg()], + op_spillslot, + op.class(), + InstPosition::Before, + false, + ); + self.add_move_later( + inst, + op_spillslot, + prev_alloc, + op.class(), + InstPosition::After, + false, + ); + } else { + self.add_move_later( + inst, + self.vreg_allocs[op.vreg().vreg()], + prev_alloc, + op.class(), + InstPosition::Before, + true, + ); + } } + if prev_alloc.is_reg() { + // Free the previous allocation so that it can be + // reused. + let preg = prev_alloc.as_reg().unwrap(); + self.vreg_in_preg[preg.index()] = VReg::invalid(); + // If it's a fixed stack slot, then it's not allocatable. + if !self.is_stack(prev_alloc) { + trace!("{:?} is no longer using preg {:?}, so freeing it after instruction", op.vreg(), preg); + self.free_after_curr_inst.insert(preg); + self.lrus[preg.class()].remove(preg.hw_enc()); + } + } + } else if op.kind() == OperandKind::Use { + trace!("{:?}'s first use", op.vreg()); + self.first_use.insert(op.vreg()); } trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); } else { self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[op.vreg().vreg()]; trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); } + let new_alloc = self.vreg_allocs[op.vreg().vreg()]; + if new_alloc.is_reg() { + // If the new allocation was once a freed prev_alloc, remove it + // from the free after current inst list. + // For example: + // + // 1. use v0 (fixed: p0), use v0 (fixed: p1) + // 2. use v0 (fixed: p1) + // + // In the processing of the above, v0 is allocated to p1 at inst 2. + // During the processing of inst 1, v0's allocation is changed to p0 + // and p1 is put on the free after current inst list to make it + // available for later allocation. + // But then, it's reallocated for the second operand. + // To prevent reallocating a register while a live one is still in it, + // this register has to be removed from the list. + let preg = new_alloc.as_reg().unwrap(); + if self.free_after_curr_inst.contains(&preg) { + trace!("{:?} is now using preg {:?}. Removing it from the free after instruction list", op.vreg(), preg); + self.free_after_curr_inst.remove(&preg); + } + // The LRU doesn't need to be modified here because it has already + // been handled during the new allocation. + } + if op.kind() == OperandKind::Use { + // Need to remember that this allocation is used in this instruction + // by a use operand, to make decisions on whether to free a reuse operand's + // allocation during the processing of reuse operands. + self.allocs_used_by_use_ops.insert(new_alloc); + } + self.vregs_allocd_in_curr_inst.insert(op.vreg()); } fn process_reuse_operand_allocation( @@ -354,10 +749,12 @@ impl<'a, F: Function> Env<'a, F> { // is about to be processed. // After this inst 1, v0 is expected to be in some location l0. // But because of the constraints, it should be in v1's location. - // To account for this, a move is inserted to move from the reused input's allocation - // to the reuse operand's allocation before the instruction. - // The allocations for both the reused input and the reuse operand are then both set - // to l0, for this current instruction. + // To account for this, the reused input (v1) is moved into its spillslot before the instruction + // and its allocation is used for both the reuse operand (v0) and the reused input + // (the reused input's allocation is used for both of them, just in case the + // reused input has a fixed register constraint). + // After the instruction, v0 is first moved from v1's allocation to l0, the location it's expected to be + // after the instruction and v1 is moved from its spillslot into its current allocation. // // Case 2: The reuse operand has not yet been allocated. // This could happen in a scenario such as: @@ -365,7 +762,8 @@ impl<'a, F: Function> Env<'a, F> { // inst 2: use v1 // Since v1 and v0 have to be the same allocation, then one of the following could be done: // 1. A new location is allocated for v0, v1 is moved into that new location before the - // instruction, and the allocs for both v0 and v1 are set to that location. + // instruction, and the allocs for both v0 and v1 are set to that location (Not good if + // v1 has a fixed register constraint). // 2. v1 is moved into its spillslot before the instruction, used as the allocation for // v0, then v1 is moved from its spillslot into its allocation after the instruction. // @@ -377,65 +775,142 @@ impl<'a, F: Function> Env<'a, F> { // (assuming no eviction takes place) while no 2. may lead to better compile time performance // because less bookkeeping has to be done to effect it. // We're going with no 2. here. + // + // There is also a problem that could arise when the reused input is the first encountered + // use of a vreg. + // Consider a scenario: + // + // 1. def v12 (reuse: 1), use v7 (fixed: p31) + // 2. def v13, use v12 (fixed: p31) + // v12 is in p31 afterwards + // + // Inst 2 is processed first and after its processing + // v12 is in p31, right before inst 2. + // During the processing of inst 1, because of the way reuse + // operands are handled, v7's allocation is first saved before inst 1, + // then it is restored afterwards. The resulting modifications are: + // + // move from p31 to stack_v7 // v7 is in p31 from this point upwards + // 1. def v12 (reuse: 1), use v7 (fixed: p31) // Both are allocated to p31 + // move from p31 to p31 // to flow v12 to the location it's expected to be afterwards + // move from stack_v7 to p31 // to restore v7 + // 2. def v13, use v12 (fixed: p31) + // + // The problem with the above is that the reuse operand handling assumed that vregs + // used in an instruction will be live after, but it isn't in this case. v12 uses p31 + // after inst 1 because it is supposed to be free. Since v7's first use is in inst 1, + // it should not be moved into its allocation afterwards. + // Hence, moves to flow the reused input into its allocation after the instruction + // are inserted only if the input lives past the instruction, that is, its first use + // is not in this instruction. // TODO: Ensure that the reused operand is a use. // TODO: Ensure that the reuse operand and its reused input have the // same register class. - let reused_alloc = self.allocs[(inst.index(), reused_idx)]; + trace!("Move Reason: Reuse constraints"); + + let reused_op_first_use = self.first_use.contains(&reused_op.vreg()); if self.vreg_allocs[op.vreg().vreg()].is_some() { - if !self.alloc_meets_op_constraint( - self.vreg_allocs[op.vreg().vreg()], - reused_op.class(), - reused_op.constraint() - ) { - // After this instruction, op's location is expected to be `prev_alloc` - let prev_alloc = self.vreg_allocs[op.vreg().vreg()]; - self.alloc_reg_for_operand(inst, op); - self.add_move( + let op_prev_alloc = self.vreg_allocs[op.vreg().vreg()]; + let reused_op_vreg = reused_op.vreg(); + if self.vreg_spillslots[reused_op_vreg.vreg()].is_invalid() { + self.vreg_spillslots[reused_op_vreg.vreg()] = self.allocstack(&reused_op_vreg); + } + let reused_op_spillslot = self.vreg_spillslots[reused_op.vreg().vreg()]; + + // If this is the reused operand's first use, then don't + // restore it afterwards, because it doesn't live past this instruction. + if !reused_op_first_use { + // Move the reused input into its spillslot. + self.add_move_later( inst, - self.vreg_allocs[op.vreg().vreg()], - prev_alloc, + self.vreg_allocs[reused_op_vreg.vreg()], + Allocation::stack(reused_op_spillslot), op.class(), - InstPosition::After, + InstPosition::Before, + false, ); } - self.add_move( + + // Move the reuse operand from the reused input's allocation into the location it's + // expected to be in after the current instruction. + self.add_move_later( inst, - reused_alloc, - self.vreg_allocs[op.vreg().vreg()], + self.vreg_allocs[reused_op_vreg.vreg()], + op_prev_alloc, op.class(), - InstPosition::Before, + InstPosition::After, + false, ); - self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[op.vreg().vreg()]; - self.allocs[(inst.index(), reused_idx)] = self.vreg_allocs[op.vreg().vreg()]; + + // If this is the reused operand's first use, then don't + // restore it afterwards, because it doesn't live past this instruction. + if !reused_op_first_use { + // Move the reused input from its spillslot into its current allocation + self.add_move_later( + inst, + Allocation::stack(reused_op_spillslot), + self.vreg_allocs[reused_op_vreg.vreg()], + op.class(), + InstPosition::After, + false, + ); + } + + self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[reused_op_vreg.vreg()]; + self.allocs[(inst.index(), reused_idx)] = self.vreg_allocs[reused_op_vreg.vreg()]; + // Deallocate the reuse operand. - self.vreg_allocs[op.vreg().vreg()] = Allocation::none(); - self.live_vregs.remove(&op.vreg()); + // We can't just deallocate the reuse operand. + // The reason for this is that, since reuse operands are defs + // it is possible for its allocation to be reused by a use operand. + // If it is freed here, then the allocation could be reallocated to another + // vreg while the use it was allocated to is still live. + // For example: + // + // 1. def v0 + // 2. def v1, use v2 + // 3. def v3 (reuse: 1), use v0 + // + // If v0 is allocated to p0, then v3 will also be allocated to p0. + // Since reuse operands are processed last, then if v3 is just freed normally, + // then p0 will be free for allocation to v1 and v2, overwriting whatever + // value was defd in v0 in inst 1. + // To avoid this allocation of a place that has already been allocated to a live vreg, + // the `add_to_freelist` parameter is set to true + // only if the reuse operand's allocation was not reused by any use operands + // in the instruction. + let op_alloc_is_in_use = self.allocs_used_by_use_ops.contains(&op_prev_alloc); + self.freealloc(op.vreg(), !op_alloc_is_in_use); trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); - trace!("Allocation for instruction {:?} and operand {:?} is now: {:?}", inst, reused_op, self.allocs[(inst.index(), op_idx)]); } else { let reused_op_vreg = reused_op.vreg(); - if self.vreg_spillslots[reused_op_vreg.vreg()].is_invalid() { - self.vreg_spillslots[reused_op_vreg.vreg()] = self.allocstack(&reused_op_vreg); + // If this is the reused operand's first use, then don't + // restore it afterwards, because it doesn't live past this instruction. + if !reused_op_first_use { + if self.vreg_spillslots[reused_op_vreg.vreg()].is_invalid() { + self.vreg_spillslots[reused_op_vreg.vreg()] = self.allocstack(&reused_op_vreg); + } + let reused_op_spillslot = self.vreg_spillslots[reused_op.vreg().vreg()]; + // Move the reused input into its spillslot before the instruction. + self.add_move_later( + inst, + self.vreg_allocs[reused_op_vreg.vreg()], + Allocation::stack(reused_op_spillslot), + op.class(), + InstPosition::Before, + false, + ); + // Move back into its allocation. + self.add_move_later( + inst, + Allocation::stack(reused_op_spillslot), + self.vreg_allocs[reused_op_vreg.vreg()], + op.class(), + InstPosition::After, + false, + ); } - let reused_op_spillslot = self.vreg_spillslots[reused_op.vreg().vreg()]; - // Remember: Edits are inserted in reverse. - // Move back into its allocation. - self.add_move( - inst, - Allocation::stack(reused_op_spillslot), - self.vreg_allocs[reused_op_vreg.vreg()], - op.class(), - InstPosition::After - ); - // Move the reused input into its spillslot before the instruction. - self.add_move( - inst, - self.vreg_allocs[reused_op_vreg.vreg()], - Allocation::stack(reused_op_spillslot), - op.class(), - InstPosition::Before, - ); self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[reused_op_vreg.vreg()]; trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); } @@ -450,69 +925,6 @@ impl<'a, F: Function> Env<'a, F> { } } - fn place_branch_args_in_stack_allocs(&mut self, block: Block, inst: Inst, succ: Block, succ_idx: usize) { - let succ_params = self.func.block_params(succ); - - // Used to know which temporary spillslot should be used next. - let mut next_temp_idx = PartedByRegClass { items: [0, 0, 0] }; - - fn reset_temp_idx(next_temp_idx: &mut PartedByRegClass) { - next_temp_idx[RegClass::Int] = 0; - next_temp_idx[RegClass::Float] = 0; - next_temp_idx[RegClass::Vector] = 0; - } - - // Move from temporaries to post block locations. - for vreg in self.func.branch_blockparams(block, inst, succ_idx).iter() { - self.live_vregs.insert(*vreg); - if self.temp_spillslots[vreg.class()].len() == next_temp_idx[vreg.class()] { - let newslot = self.allocstack(vreg); - self.temp_spillslots[vreg.class()].push(newslot); - } - let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; - let temp = Allocation::stack(temp_slot); - next_temp_idx[vreg.class()] += 1; - let prev_alloc = self.vreg_allocs[vreg.vreg()]; - if prev_alloc.is_some() { - trace!("{:?} which is going to be in {:?} inserting move to {:?}", vreg, temp, prev_alloc); - self.add_move(inst, temp, prev_alloc, vreg.class(), InstPosition::Before); - } else { - trace!("{:?} prev alloc is none, so no moving here", vreg); - } - } - - reset_temp_idx(&mut next_temp_idx); - - // Move from temporaries to block param spillslots. - for (pos, vreg) in self.func.branch_blockparams(block, inst, succ_idx).iter().enumerate() { - let succ_param_vreg = succ_params[pos]; - let param_alloc = Allocation::stack(self.vreg_spillslots[succ_param_vreg.vreg()]); - let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; - let temp = Allocation::stack(temp_slot); - next_temp_idx[vreg.class()] += 1; - trace!(" --- Placing branch arg {:?} in {:?}", vreg, temp); - trace!("{:?} which is now in {:?} inserting move to {:?}", vreg, temp, param_alloc); - self.add_move(inst, temp, param_alloc, vreg.class(), InstPosition::Before); - } - - reset_temp_idx(&mut next_temp_idx); - - // Move from branch args spillslots to temporaries. - for vreg in self.func.branch_blockparams(block, inst, succ_idx).iter() { - if self.vreg_spillslots[vreg.vreg()].is_invalid() { - self.vreg_spillslots[vreg.vreg()] = self.allocstack(vreg); - trace!("Block arg {:?} is going to be in {:?}", vreg, Allocation::stack(self.vreg_spillslots[vreg.vreg()])); - } - let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; - let temp = Allocation::stack(temp_slot); - next_temp_idx[vreg.class()] += 1; - let vreg_spill = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); - self.vreg_allocs[vreg.vreg()] = vreg_spill; - trace!("{:?} which is going to be in {:?} inserting move to {:?}", vreg, vreg_spill, temp); - self.add_move(inst, vreg_spill, temp, vreg.class(), InstPosition::Before); - } - } - /// If instruction `inst` is a branch in `block`, /// this function places branch arguments in the spillslots /// expected by the destination blocks. @@ -541,6 +953,8 @@ impl<'a, F: Function> Env<'a, F> { // the reading of all the block params must be done before the writing. // This is necessary to prevent overwriting the branch arg's value before // placing it in the corresponding branch param spillslot. + + // NO LONGER BEING INSERTED IN REVERSE. // And because edits are inserted in reverse, the algorithm has to process // the branch args which are not branch params first. This will result in the // output code processing branch args which are params before the others. @@ -549,55 +963,6 @@ impl<'a, F: Function> Env<'a, F> { self.alloc_slots_for_block_params(*succ); } - for (succ_idx, succ) in self.func.block_succs(block).iter().enumerate() { - let succ_params = self.func.block_params(*succ); - - // Move from temporaries to post block locations. - for vreg in self.func.branch_blockparams(block, inst, succ_idx).iter() { - self.live_vregs.insert(*vreg); - if self.temp_spillslots[vreg.class()].len() == next_temp_idx[vreg.class()] { - let newslot = self.allocstack(vreg); - self.temp_spillslots[vreg.class()].push(newslot); - } - let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; - let temp = Allocation::stack(temp_slot); - next_temp_idx[vreg.class()] += 1; - if succ_params.contains(vreg) { - // Skip to avoid overwriting the new value for the block param, - // which will be moved into its spillslot from its temporary. - continue; - } - let prev_alloc = self.vreg_allocs[vreg.vreg()]; - if prev_alloc.is_some() { - trace!("{:?} which is going to be in {:?} inserting move to {:?}", vreg, temp, prev_alloc); - self.add_move(inst, temp, prev_alloc, vreg.class(), InstPosition::Before); - } else { - trace!("{:?} prev alloc is none, so no moving here", vreg); - } - } - } - - reset_temp_idx(&mut next_temp_idx); - - for (succ_idx, succ) in self.func.block_succs(block).iter().enumerate() { - let succ_params = self.func.block_params(*succ); - - // Move from temporaries to block param spillslots. - for (pos, vreg) in self.func.branch_blockparams(block, inst, succ_idx).iter().enumerate() { - let succ_param_vreg = succ_params[pos]; - let param_alloc = Allocation::stack(self.vreg_spillslots[succ_param_vreg.vreg()]); - let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; - let temp = Allocation::stack(temp_slot); - self.vreg_allocs[vreg.vreg()] = temp; - next_temp_idx[vreg.class()] += 1; - trace!(" --- Placing branch arg {:?} in {:?}", vreg, temp); - trace!("{:?} which is now in {:?} inserting move to {:?}", vreg, temp, param_alloc); - self.add_move(inst, temp, param_alloc, vreg.class(), InstPosition::Before); - } - } - - reset_temp_idx(&mut next_temp_idx); - for (succ_idx, _) in self.func.block_succs(block).iter().enumerate() { // Move from branch args spillslots to temporaries. // @@ -624,17 +989,72 @@ impl<'a, F: Function> Env<'a, F> { // so that if they aren't moved at all throughout the block, they will not be expected to // be in another vreg's spillslot at the block beginning. for vreg in self.func.branch_blockparams(block, inst, succ_idx).iter() { + self.live_vregs.insert(*vreg); if self.vreg_spillslots[vreg.vreg()].is_invalid() { self.vreg_spillslots[vreg.vreg()] = self.allocstack(vreg); trace!("Block arg {:?} is going to be in {:?}", vreg, Allocation::stack(self.vreg_spillslots[vreg.vreg()])); } + if self.temp_spillslots[vreg.class()].len() == next_temp_idx[vreg.class()] { + let newslot = self.allocstack(vreg); + self.temp_spillslots[vreg.class()].push(newslot); + } let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; let temp = Allocation::stack(temp_slot); next_temp_idx[vreg.class()] += 1; let vreg_spill = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); - self.vreg_allocs[vreg.vreg()] = vreg_spill; trace!("{:?} which is going to be in {:?} inserting move to {:?}", vreg, vreg_spill, temp); - self.add_move(inst, vreg_spill, temp, vreg.class(), InstPosition::Before); + self.add_move_later(inst, vreg_spill, temp, vreg.class(), InstPosition::Before, false); + } + } + + reset_temp_idx(&mut next_temp_idx); + + for (succ_idx, succ) in self.func.block_succs(block).iter().enumerate() { + let succ_params = self.func.block_params(*succ); + + // Move from temporaries to block param spillslots. + for (pos, vreg) in self.func.branch_blockparams(block, inst, succ_idx).iter().enumerate() { + let succ_param_vreg = succ_params[pos]; + let param_alloc = Allocation::stack(self.vreg_spillslots[succ_param_vreg.vreg()]); + let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; + let temp = Allocation::stack(temp_slot); + self.vreg_allocs[vreg.vreg()] = temp; + next_temp_idx[vreg.class()] += 1; + trace!(" --- Placing branch arg {:?} in {:?}", vreg, temp); + trace!("{:?} which is now in {:?} inserting move to {:?}", vreg, temp, param_alloc); + self.add_move_later(inst, temp, param_alloc, vreg.class(), InstPosition::Before, false); + } + } + + reset_temp_idx(&mut next_temp_idx); + + /*for (succ_idx, succ) in self.func.block_succs(block).iter().enumerate() { + let succ_params = self.func.block_params(*succ); + + // Move from temporaries to post block locations. + for vreg in self.func.branch_blockparams(block, inst, succ_idx).iter() { + let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; + let temp = Allocation::stack(temp_slot); + next_temp_idx[vreg.class()] += 1; + if succ_params.contains(vreg) { + // Skip to avoid overwriting the new value for the block param, + // which will be moved into its spillslot from its temporary. + continue; + } + let prev_alloc = self.vreg_allocs[vreg.vreg()]; + if prev_alloc.is_some() { + trace!("{:?} which is going to be in {:?} inserting move to {:?}", vreg, temp, prev_alloc); + self.add_move_later(inst, temp, prev_alloc, vreg.class(), InstPosition::Before); + } else { + trace!("{:?} prev alloc is none, so no moving here", vreg); + } + } + }*/ + + for (succ_idx, _) in self.func.block_succs(block).iter().enumerate() { + for vreg in self.func.branch_blockparams(block, inst, succ_idx).iter() { + // All branch arguments should be in their spillslots at the end of the function. + self.vreg_allocs[vreg.vreg()] = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); } } } @@ -644,17 +1064,23 @@ impl<'a, F: Function> Env<'a, F> { self.process_branch(block, inst); } let operands = self.func.inst_operands(inst); - for (op_idx, op) in NonReuseLateOperands::new(operands) { + for (op_idx, op) in FixedLateOperands::new(operands) { + self.process_operand_allocation(inst, op, op_idx); + } + for (op_idx, op) in NonFixedNonReuseLateOperands::new(operands) { self.process_operand_allocation(inst, op, op_idx); } for (_, op) in NonReuseLateDefOperands::new(operands) { - self.freealloc(op.vreg()); + self.freealloc(op.vreg(), true); + } + for (op_idx, op) in FixedEarlyOperands::new(operands) { + self.process_operand_allocation(inst, op, op_idx); } - for (op_idx, op) in NonReuseEarlyOperands::new(operands) { + for (op_idx, op) in NonFixedNonReuseEarlyOperands::new(operands) { self.process_operand_allocation(inst, op, op_idx); } for (_, op) in NonReuseEarlyDefOperands::new(operands) { - self.freealloc(op.vreg()); + self.freealloc(op.vreg(), true); } for (op_idx, op) in ReuseOperands::new(operands) { let OperandConstraint::Reuse(reused_idx) = op.constraint() else { @@ -662,8 +1088,31 @@ impl<'a, F: Function> Env<'a, F> { }; self.process_reuse_operand_allocation(inst, op, op_idx, operands[reused_idx], reused_idx); } - self.edits.extend(self.pre_edits.iter().cloned()); - self.pre_edits.clear(); + for i in (0..self.inst_post_edits.len()).rev() { + let (point, edit, class) = self.inst_post_edits[i].clone(); + self.process_edit(point, edit, class); + } + for i in (0..self.inst_pre_edits.len()).rev() { + let (point, edit, class) = self.inst_pre_edits[i].clone(); + self.process_edit(point, edit, class); + } + self.inst_post_edits.clear(); + self.inst_pre_edits.clear(); + for preg in self.free_after_curr_inst.iter().cloned() { + self.freepregs[preg.class()].insert(preg); + self.lrus[preg.class()].append(preg.hw_enc()); + } + for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { + // Add the remaining freed def pregs back to the free list. + for preg in self.freed_def_pregs[class].iter().cloned() { + self.freepregs[class].insert(preg); + } + self.freed_def_pregs[class].clear(); + } + self.free_after_curr_inst.clear(); + self.vregs_allocd_in_curr_inst.clear(); + self.first_use.clear(); + self.allocs_used_by_use_ops.clear(); } /// At the beginning of every block, all virtual registers that are @@ -671,14 +1120,12 @@ impl<'a, F: Function> Env<'a, F> { /// This function sets the current allocations of livein registers /// to their spillslots and inserts the edits to flow livein values to /// the allocations where they are expected to be before the first - /// instruction. + /// instruction. fn reload_at_begin(&mut self, block: Block) { // We need to check for the registers that are still live. // These registers are livein and they should be stack-allocated. - // TODO: Get rid of this clone!!!!!!! let live_vregs = self.live_vregs.clone(); - trace!("In reloading, live_vregs: {:?}", live_vregs); - for vreg in live_vregs.into_iter() { + for vreg in live_vregs.iter().cloned() { if self.vreg_spillslots[vreg.vreg()].is_invalid() { self.vreg_spillslots[vreg.vreg()] = self.allocstack(&vreg); } @@ -686,21 +1133,51 @@ impl<'a, F: Function> Env<'a, F> { // the first instruction. let prev_alloc = self.vreg_allocs[vreg.vreg()]; if prev_alloc.is_reg() { - self.freealloc(vreg); + self.freealloc(vreg, true); } - self.vreg_allocs[vreg.vreg()] = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); - if self.vreg_allocs[vreg.vreg()] == prev_alloc { + let slot = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); + self.vreg_allocs[vreg.vreg()] = slot; + if slot == prev_alloc { // No need to do any movements if the spillslot is where the vreg is expected to be. trace!("No need to reload {:?} because it's already in its expected allocation", vreg); continue; } trace!("Move reason: reload {:?} at begin - move from its spillslot", vreg); - self.move_before_inst( + self.add_move_later( self.func.block_insns(block).first(), - vreg, + slot, prev_alloc, + vreg.class(), + InstPosition::Before, + true ); + /*self.move_before_inst( + self.func.block_insns(block).first(), + vreg, + prev_alloc, + );*/ + } + for i in (0..self.inst_post_edits.len()).rev() { + let (point, edit, class) = self.inst_post_edits[i].clone(); + self.process_edit(point, edit, class); + } + for i in (0..self.inst_pre_edits.len()).rev() { + let (point, edit, class) = self.inst_pre_edits[i].clone(); + self.process_edit(point, edit, class); + } + self.inst_post_edits.clear(); + self.inst_pre_edits.clear(); + for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { + for preg in self.freed_def_pregs[class].iter().cloned() { + self.freepregs[class].insert(preg); + } + self.freed_def_pregs[class].clear(); + } + for preg in self.free_after_curr_inst.iter().cloned() { + self.freepregs[preg.class()].insert(preg); + self.lrus[preg.class()].append(preg.hw_enc()); } + self.free_after_curr_inst.clear(); } fn alloc_block(&mut self, block: Block) { @@ -718,7 +1195,7 @@ impl<'a, F: Function> Env<'a, F> { for block in (0..self.func.num_blocks()).rev() { self.alloc_block(Block::new(block)); } - self.edits.reverse(); + //self.edits.reverse(); ///////////////////////////////////////////////////////////////////////////////////// trace!("Done!"); @@ -759,7 +1236,7 @@ pub fn run( trace!("Final edits: {:?}", env.edits); Ok(Output { - edits: env.edits, + edits: env.edits.make_contiguous().to_vec(), allocs: env.allocs.allocs, inst_alloc_offsets: env.allocs.inst_alloc_offsets, num_spillslots: env.num_spillslots as usize, From fc7bfed15c12d67c552698ae617025535e647548 Mon Sep 17 00:00:00 2001 From: demilade Date: Mon, 22 Jul 2024 11:47:03 +0100 Subject: [PATCH 08/95] can now handle fixed non-allocatable constraints --- fuzz/fuzz_targets/fastalloc_checker.rs | 2 +- src/fastalloc/mod.rs | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fuzz/fuzz_targets/fastalloc_checker.rs b/fuzz/fuzz_targets/fastalloc_checker.rs index 546c30bb..880baa78 100644 --- a/fuzz/fuzz_targets/fastalloc_checker.rs +++ b/fuzz/fuzz_targets/fastalloc_checker.rs @@ -22,7 +22,7 @@ impl Arbitrary<'_> for TestCase { &Options { reused_inputs: true, fixed_regs: true, - fixed_nonallocatable: false, + fixed_nonallocatable: true, clobbers: false, reftypes: false, }, diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index e6039255..3d2abaab 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -505,6 +505,11 @@ impl<'a, F: Function> Env<'a, F> { /// Only processes non reuse-input operands fn process_operand_allocation(&mut self, inst: Inst, op: Operand, op_idx: usize) { debug_assert!(!matches!(op.constraint(), OperandConstraint::Reuse(_))); + if let Some(preg) = op.as_fixed_nonallocatable() { + self.allocs[(inst.index(), op_idx)] = Allocation::reg(preg); + trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); + return; + } self.live_vregs.insert(op.vreg()); if !self.allocd_within_constraint(op) { let prev_alloc = self.vreg_allocs[op.vreg().vreg()]; From 90d3c4f83c9f1c9def97fd0603b1a6e4c0fe8354 Mon Sep 17 00:00:00 2001 From: demilade Date: Tue, 23 Jul 2024 05:34:34 +0100 Subject: [PATCH 09/95] can now handle clobbers --- fuzz/fuzz_targets/fastalloc_checker.rs | 2 +- src/fastalloc/mod.rs | 102 ++++++++++++++++--------- 2 files changed, 69 insertions(+), 35 deletions(-) diff --git a/fuzz/fuzz_targets/fastalloc_checker.rs b/fuzz/fuzz_targets/fastalloc_checker.rs index 880baa78..b099c27a 100644 --- a/fuzz/fuzz_targets/fastalloc_checker.rs +++ b/fuzz/fuzz_targets/fastalloc_checker.rs @@ -23,7 +23,7 @@ impl Arbitrary<'_> for TestCase { reused_inputs: true, fixed_regs: true, fixed_nonallocatable: true, - clobbers: false, + clobbers: true, reftypes: false, }, )?, diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 3d2abaab..8f1ac4a7 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -123,6 +123,10 @@ pub struct Env<'a, F: Function> { /// for a reuse operand was reused by a use operand, and make decisions on /// whether or not to free the allocation. allocs_used_by_use_ops: HashSet, + /// Used to check if a clobbered register in the current instruction is an + /// allocatable register, to make decisions on whether or not is should be returned to + /// the free register list after allocation of the instruction's operands. + clobbered_reg_is_allocatable: HashSet, fixed_stack_slots: Vec, // Output. @@ -176,6 +180,7 @@ impl<'a, F: Function> Env<'a, F> { freed_def_pregs: PartedByRegClass { items: [BTreeSet::new(), BTreeSet::new(), BTreeSet::new()] }, first_use: HashSet::new(), allocs_used_by_use_ops: HashSet::new(), + clobbered_reg_is_allocatable: HashSet::new(), allocs: Allocs::new(func, env), edits: VecDeque::new(), num_spillslots: 0, @@ -329,12 +334,17 @@ impl<'a, F: Function> Env<'a, F> { self.add_move_later(inst, self.vreg_allocs[vreg.vreg()], to, vreg.class(), InstPosition::Before, false); } - fn allocd_within_constraint(&self, op: Operand) -> bool { + fn allocd_within_constraint(&self, inst: Inst, op: Operand) -> bool { let curr_alloc = self.vreg_allocs[op.vreg().vreg()]; - self.alloc_meets_op_constraint(curr_alloc, op.class(), op.constraint()) + self.alloc_meets_op_constraint(inst, curr_alloc, op.class(), op.constraint()) } - fn alloc_meets_op_constraint(&self, alloc: Allocation, class: RegClass, constraint: OperandConstraint) -> bool { + fn alloc_meets_op_constraint(&self, inst: Inst, alloc: Allocation, class: RegClass, constraint: OperandConstraint) -> bool { + if let Some(preg) = alloc.as_reg() { + if self.func.inst_clobbers(inst).contains(preg) { + return false; + } + } match constraint { OperandConstraint::Any => alloc.is_some(), OperandConstraint::Reg => { @@ -511,7 +521,7 @@ impl<'a, F: Function> Env<'a, F> { return; } self.live_vregs.insert(op.vreg()); - if !self.allocd_within_constraint(op) { + if !self.allocd_within_constraint(inst, op) { let prev_alloc = self.vreg_allocs[op.vreg().vreg()]; self.alloc_operand(inst, op, op_idx); // Need to insert a move to propagate flow from the current @@ -813,7 +823,7 @@ impl<'a, F: Function> Env<'a, F> { // TODO: Ensure that the reuse operand and its reused input have the // same register class. trace!("Move Reason: Reuse constraints"); - + let reused_op_first_use = self.first_use.contains(&reused_op.vreg()); if self.vreg_allocs[op.vreg().vreg()].is_some() { let op_prev_alloc = self.vreg_allocs[op.vreg().vreg()]; @@ -1033,29 +1043,6 @@ impl<'a, F: Function> Env<'a, F> { reset_temp_idx(&mut next_temp_idx); - /*for (succ_idx, succ) in self.func.block_succs(block).iter().enumerate() { - let succ_params = self.func.block_params(*succ); - - // Move from temporaries to post block locations. - for vreg in self.func.branch_blockparams(block, inst, succ_idx).iter() { - let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; - let temp = Allocation::stack(temp_slot); - next_temp_idx[vreg.class()] += 1; - if succ_params.contains(vreg) { - // Skip to avoid overwriting the new value for the block param, - // which will be moved into its spillslot from its temporary. - continue; - } - let prev_alloc = self.vreg_allocs[vreg.vreg()]; - if prev_alloc.is_some() { - trace!("{:?} which is going to be in {:?} inserting move to {:?}", vreg, temp, prev_alloc); - self.add_move_later(inst, temp, prev_alloc, vreg.class(), InstPosition::Before); - } else { - trace!("{:?} prev alloc is none, so no moving here", vreg); - } - } - }*/ - for (succ_idx, _) in self.func.block_succs(block).iter().enumerate() { for vreg in self.func.branch_blockparams(block, inst, succ_idx).iter() { // All branch arguments should be in their spillslots at the end of the function. @@ -1069,6 +1056,11 @@ impl<'a, F: Function> Env<'a, F> { self.process_branch(block, inst); } let operands = self.func.inst_operands(inst); + for preg in self.func.inst_clobbers(inst) { + if self.freepregs[preg.class()].remove(&preg) { + self.clobbered_reg_is_allocatable.insert(preg); + } + } for (op_idx, op) in FixedLateOperands::new(operands) { self.process_operand_allocation(inst, op, op_idx); } @@ -1093,6 +1085,54 @@ impl<'a, F: Function> Env<'a, F> { }; self.process_reuse_operand_allocation(inst, op, op_idx, operands[reused_idx], reused_idx); } + for clobbered_preg in self.func.inst_clobbers(inst) { + // If the instruction clobbers a register holding a live vreg, + // insert edits to save the live reg and restore it + // after the instruction. + // For example: + // + // 1. def v2 + // 2. use v0, use v1 - clobbers p0 + // 3. use v2 (fixed: p0) + // + // In the above, v2 is assigned to p0 first. During the processing of inst 2, + // p0 is clobbered, so v2 is no longer in it and p0 no longer contains v2 at inst 2. + // p0 is allocated to the v2 def operand in inst 1. The flow ends up wrong because of + // the clobbering. + let vreg = self.vreg_in_preg[clobbered_preg.index()]; + if vreg != VReg::invalid() { + let preg_alloc = Allocation::reg(clobbered_preg); + let slot = if self.vreg_spillslots[vreg.vreg()].is_valid() { + self.vreg_spillslots[vreg.vreg()] + } else { + self.vreg_spillslots[vreg.vreg()] = self.allocstack(&vreg); + self.vreg_spillslots[vreg.vreg()] + }; + let slot_alloc = Allocation::stack(slot); + self.add_move_later( + inst, + preg_alloc, + slot_alloc, + vreg.class(), + InstPosition::Before, + true + ); + self.add_move_later( + inst, + slot_alloc, + preg_alloc, + vreg.class(), + InstPosition::After, + false, + ); + } + } + for preg in self.func.inst_clobbers(inst) { + if self.clobbered_reg_is_allocatable.contains(&preg) { + self.freepregs[preg.class()].insert(preg); + } + } + self.clobbered_reg_is_allocatable.clear(); for i in (0..self.inst_post_edits.len()).rev() { let (point, edit, class) = self.inst_post_edits[i].clone(); self.process_edit(point, edit, class); @@ -1156,11 +1196,6 @@ impl<'a, F: Function> Env<'a, F> { InstPosition::Before, true ); - /*self.move_before_inst( - self.func.block_insns(block).first(), - vreg, - prev_alloc, - );*/ } for i in (0..self.inst_post_edits.len()).rev() { let (point, edit, class) = self.inst_post_edits[i].clone(); @@ -1200,7 +1235,6 @@ impl<'a, F: Function> Env<'a, F> { for block in (0..self.func.num_blocks()).rev() { self.alloc_block(Block::new(block)); } - //self.edits.reverse(); ///////////////////////////////////////////////////////////////////////////////////// trace!("Done!"); From 7bf8e094f4e19f32cc29d3f7bb76e75cc373131a Mon Sep 17 00:00:00 2001 From: demilade Date: Thu, 25 Jul 2024 07:10:53 +0100 Subject: [PATCH 10/95] can now handle safepoint instructions --- fuzz/fuzz_targets/fastalloc_checker.rs | 2 +- src/fastalloc/mod.rs | 370 ++++++++++++++++--------- 2 files changed, 245 insertions(+), 127 deletions(-) diff --git a/fuzz/fuzz_targets/fastalloc_checker.rs b/fuzz/fuzz_targets/fastalloc_checker.rs index b099c27a..b8b7e467 100644 --- a/fuzz/fuzz_targets/fastalloc_checker.rs +++ b/fuzz/fuzz_targets/fastalloc_checker.rs @@ -24,7 +24,7 @@ impl Arbitrary<'_> for TestCase { fixed_regs: true, fixed_nonallocatable: true, clobbers: true, - reftypes: false, + reftypes: true, }, )?, }) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 8f1ac4a7..722c093a 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1,7 +1,8 @@ use core::convert::TryInto; use core::iter::FromIterator; use core::ops::{Index, IndexMut}; -use crate::{Block, Inst, OperandKind, Operand, PReg, RegClass, VReg, SpillSlot, AllocationKind, OperandConstraint, InstPosition}; +use crate::domtree::dominates; +use crate::{domtree, postorder, AllocationKind, Block, Inst, InstPosition, Operand, OperandConstraint, OperandKind, PReg, RegClass, SpillSlot, VReg}; use crate::{Function, MachineEnv, ssa::validate_ssa, ProgPoint, Edit, Output}; use crate::{cfg::CFGInfo, RegAllocError, Allocation, ion::Stats}; use alloc::collections::{BTreeSet, VecDeque}; @@ -127,11 +128,33 @@ pub struct Env<'a, F: Function> { /// allocatable register, to make decisions on whether or not is should be returned to /// the free register list after allocation of the instruction's operands. clobbered_reg_is_allocatable: HashSet, + /// All the safepoint instructions encountered during allocation and their blocks. + /// When allocation is completed, this contains all the safepoint instructions + /// in the function. + /// This is used to build the stackmap after allocation is complete. + safepoint_insts: Vec<(Block, Inst)>, + /// All the liveout vregs encountered during allocation. + /// When allocation is completed, this contains all the liveout vregs in + /// the function. + /// This is used to build the stackmap after allocation is complete. + liveout_vregs: HashSet, + /// When allocation is completed, `liveout_vreg_def_inst[i]` holds the block + /// and instruction in which liveout vreg `i` is defined. If vreg `i` is not liveout, + /// then the block and instruction will be invalid. + /// This is used to build the stackmap after allocation is complete. + liveout_vreg_def_inst: Vec<(Block, Inst)>, + /// When allocation is completed, this holds all the reftype vregs that + /// already have a slot in the stackmap. + /// This is used while building the stackmap after allocation is completed, + /// to avoid adding duplicate entries for liveout vregs. + slot_is_in_stackmap: HashSet<(Inst, VReg)>, + fixed_stack_slots: Vec, // Output. allocs: Allocs, edits: VecDeque<(ProgPoint, Edit)>, + safepoint_slots: Vec<(ProgPoint, Allocation)>, num_spillslots: u32, stats: Stats, } @@ -181,8 +204,13 @@ impl<'a, F: Function> Env<'a, F> { first_use: HashSet::new(), allocs_used_by_use_ops: HashSet::new(), clobbered_reg_is_allocatable: HashSet::new(), + safepoint_insts: Vec::new(), + liveout_vregs: HashSet::new(), + liveout_vreg_def_inst: vec![(Block::invalid(), Inst::invalid()); func.num_vregs()], + slot_is_in_stackmap: HashSet::new(), allocs: Allocs::new(func, env), edits: VecDeque::new(), + safepoint_slots: Vec::new(), num_spillslots: 0, stats: Stats::default(), } @@ -258,74 +286,6 @@ impl<'a, F: Function> Env<'a, F> { } } - /* - fn add_move(&mut self, inst: Inst, from: Allocation, to: Allocation, class: RegClass, pos: InstPosition) { - if self.is_stack(from) && self.is_stack(to) { - let scratch_reg = if self.freepregs[class].is_empty() { - self.evict_any_reg(inst, class) - } else { - // The physical register used as a scratch register here has to be explicitly - // removed from the free registers list. This is to avoid scenarios like the - // following: - // 1. def v0, use v1, use v2 - // If v2 needs to be moved from a stack slot to some other specific stack slot - // for the constraints to be satisified, then some scratch register p0 has to be - // used to avoid stack-to-stack moves. If p0 is not a dedicated scratch register, - // then the following sequence is possible: - // First, p0 is used as a scratch register to move from the stack to whatever stackslot - // v2 is supposed to be in and the moves are inserted before the instruction. - // Second, v1 is allocated to p0 (thinking p0 is free). But before the instruction, - // p0 is overwritten by v2, so v2 will be used instead of v1. - // - // So, rather than leave the register in the free list, it's removed from this list - // and added back after the complete processing of all the operands in the instruction. - self.freepregs[class].pop().unwrap() - }; - self.free_after_curr_inst.push(scratch_reg); - // Remove the scratch register from the LRU to stop it from - // being used as an allocatable register for the current instruction. - // It should be added back after processing the current instruction. - self.lrus[class].remove(scratch_reg.hw_enc()); - let scratch_alloc = Allocation::reg(scratch_reg); - let mut target_edits = &mut self.edits; - if pos == InstPosition::Before { - target_edits = &mut self.pre_edits; - } - // Edits are added in reverse order because the edits - // will be reversed when all allocation is completed. - trace!("Edit: {:?}", (ProgPoint::new(inst, pos), Edit::Move { - from: scratch_alloc, - to, - })); - target_edits.push((ProgPoint::new(inst, pos), Edit::Move { - from: scratch_alloc, - to, - })); - trace!("Edit: {:?}", (ProgPoint::new(inst, pos), Edit::Move { - from, - to: scratch_alloc, - })); - target_edits.push((ProgPoint::new(inst, pos), Edit::Move { - from, - to: scratch_alloc, - })) - - } else { - let mut target_edits = &mut self.edits; - if pos == InstPosition::Before { - target_edits = &mut self.pre_edits; - } - trace!("Edit: {:?}", (ProgPoint::new(inst, pos), Edit::Move { - from, - to, - })); - target_edits.push((ProgPoint::new(inst, pos), Edit::Move { - from, - to, - })); - } - }*/ - fn move_after_inst(&mut self, inst: Inst, vreg: VReg, to: Allocation) { self.add_move_later(inst, self.vreg_allocs[vreg.vreg()], to, vreg.class(), InstPosition::After, false); } @@ -410,6 +370,7 @@ impl<'a, F: Function> Env<'a, F> { } self.vreg_allocs[vreg.vreg()] = Allocation::none(); self.live_vregs.remove(&vreg); + trace!("{:?} curr alloc is now {:?}", vreg, self.vreg_allocs[vreg.vreg()]); } /// Allocates a spill slot on the stack for `vreg` @@ -433,8 +394,12 @@ impl<'a, F: Function> Env<'a, F> { /// Allocates a physical register for the operand `op`. fn alloc_reg_for_operand(&mut self, inst: Inst, op: Operand) { - trace!("freepregs: {:?}", self.freepregs[RegClass::Int]); - trace!("freed_def_pregs: {:?}", self.freed_def_pregs[RegClass::Int]); + trace!("freepregs int: {:?}", self.freepregs[RegClass::Int]); + trace!("freepregs vector: {:?}", self.freepregs[RegClass::Vector]); + trace!("freepregs float: {:?}", self.freepregs[RegClass::Float]); + trace!("freed_def_pregs int: {:?}", self.freed_def_pregs[RegClass::Int]); + trace!("freed_def_pregs vector: {:?}", self.freed_def_pregs[RegClass::Vector]); + trace!("freed_def_pregs float: {:?}", self.freed_def_pregs[RegClass::Float]); trace!(""); if let Some(freed_def_preg) = self.freed_def_pregs[op.class()].pop_last() { // Don't poke the LRU because the freed def register is no @@ -459,7 +424,7 @@ impl<'a, F: Function> Env<'a, F> { } fn alloc_fixed_reg_for_operand(&mut self, inst: Inst, op: Operand, preg: PReg) { - trace!("The fixed preg: {:?}", preg); + trace!("The fixed preg: {:?} for operand {:?}", preg, op); let mut preg_is_allocatable = false; if self.vreg_in_preg[preg.index()] != VReg::invalid() { // Something is already in that register. Evict it. @@ -940,6 +905,51 @@ impl<'a, F: Function> Env<'a, F> { } } + fn save_and_restore_clobbered_registers(&mut self, inst: Inst) { + for clobbered_preg in self.func.inst_clobbers(inst) { + // If the instruction clobbers a register holding a live vreg, + // insert edits to save the live reg and restore it + // after the instruction. + // For example: + // + // 1. def v2 + // 2. use v0, use v1 - clobbers p0 + // 3. use v2 (fixed: p0) + // + // In the above, v2 is assigned to p0 first. During the processing of inst 2, + // p0 is clobbered, so v2 is no longer in it and p0 no longer contains v2 at inst 2. + // p0 is allocated to the v2 def operand in inst 1. The flow ends up wrong because of + // the clobbering. + let vreg = self.vreg_in_preg[clobbered_preg.index()]; + if vreg != VReg::invalid() { + let preg_alloc = Allocation::reg(clobbered_preg); + let slot = if self.vreg_spillslots[vreg.vreg()].is_valid() { + self.vreg_spillslots[vreg.vreg()] + } else { + self.vreg_spillslots[vreg.vreg()] = self.allocstack(&vreg); + self.vreg_spillslots[vreg.vreg()] + }; + let slot_alloc = Allocation::stack(slot); + self.add_move_later( + inst, + preg_alloc, + slot_alloc, + vreg.class(), + InstPosition::Before, + true + ); + self.add_move_later( + inst, + slot_alloc, + preg_alloc, + vreg.class(), + InstPosition::After, + false, + ); + } + } + } + /// If instruction `inst` is a branch in `block`, /// this function places branch arguments in the spillslots /// expected by the destination blocks. @@ -968,11 +978,6 @@ impl<'a, F: Function> Env<'a, F> { // the reading of all the block params must be done before the writing. // This is necessary to prevent overwriting the branch arg's value before // placing it in the corresponding branch param spillslot. - - // NO LONGER BEING INSERTED IN REVERSE. - // And because edits are inserted in reverse, the algorithm has to process - // the branch args which are not branch params first. This will result in the - // output code processing branch args which are params before the others. for succ in self.func.block_succs(block).iter() { self.alloc_slots_for_block_params(*succ); @@ -1004,7 +1009,6 @@ impl<'a, F: Function> Env<'a, F> { // so that if they aren't moved at all throughout the block, they will not be expected to // be in another vreg's spillslot at the block beginning. for vreg in self.func.branch_blockparams(block, inst, succ_idx).iter() { - self.live_vregs.insert(*vreg); if self.vreg_spillslots[vreg.vreg()].is_invalid() { self.vreg_spillslots[vreg.vreg()] = self.allocstack(vreg); trace!("Block arg {:?} is going to be in {:?}", vreg, Allocation::stack(self.vreg_spillslots[vreg.vreg()])); @@ -1047,6 +1051,7 @@ impl<'a, F: Function> Env<'a, F> { for vreg in self.func.branch_blockparams(block, inst, succ_idx).iter() { // All branch arguments should be in their spillslots at the end of the function. self.vreg_allocs[vreg.vreg()] = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); + self.live_vregs.insert(*vreg); } } } @@ -1068,6 +1073,11 @@ impl<'a, F: Function> Env<'a, F> { self.process_operand_allocation(inst, op, op_idx); } for (_, op) in NonReuseLateDefOperands::new(operands) { + if self.liveout_vregs.contains(&op.vreg()) { + // Need to remember the instruction in which a liveout + // vreg was defined when adding reftype vregs to the stackmap. + self.liveout_vreg_def_inst[op.vreg().vreg()] = (block, inst); + } self.freealloc(op.vreg(), true); } for (op_idx, op) in FixedEarlyOperands::new(operands) { @@ -1077,54 +1087,75 @@ impl<'a, F: Function> Env<'a, F> { self.process_operand_allocation(inst, op, op_idx); } for (_, op) in NonReuseEarlyDefOperands::new(operands) { + if self.liveout_vregs.contains(&op.vreg()) { + // Need to remember the instruction in which a liveout + // vreg was defined when adding reftype vregs to the stackmap. + self.liveout_vreg_def_inst[op.vreg().vreg()] = (block, inst); + } self.freealloc(op.vreg(), true); } for (op_idx, op) in ReuseOperands::new(operands) { let OperandConstraint::Reuse(reused_idx) = op.constraint() else { unreachable!() }; + if self.liveout_vregs.contains(&op.vreg()) { + // Need to remember the instruction in which a liveout + // vreg was defined when adding reftype vregs to the stackmap. + self.liveout_vreg_def_inst[op.vreg().vreg()] = (block, inst); + } self.process_reuse_operand_allocation(inst, op, op_idx, operands[reused_idx], reused_idx); } - for clobbered_preg in self.func.inst_clobbers(inst) { - // If the instruction clobbers a register holding a live vreg, - // insert edits to save the live reg and restore it - // after the instruction. - // For example: - // - // 1. def v2 - // 2. use v0, use v1 - clobbers p0 - // 3. use v2 (fixed: p0) - // - // In the above, v2 is assigned to p0 first. During the processing of inst 2, - // p0 is clobbered, so v2 is no longer in it and p0 no longer contains v2 at inst 2. - // p0 is allocated to the v2 def operand in inst 1. The flow ends up wrong because of - // the clobbering. - let vreg = self.vreg_in_preg[clobbered_preg.index()]; - if vreg != VReg::invalid() { - let preg_alloc = Allocation::reg(clobbered_preg); - let slot = if self.vreg_spillslots[vreg.vreg()].is_valid() { - self.vreg_spillslots[vreg.vreg()] - } else { - self.vreg_spillslots[vreg.vreg()] = self.allocstack(&vreg); - self.vreg_spillslots[vreg.vreg()] - }; - let slot_alloc = Allocation::stack(slot); - self.add_move_later( - inst, - preg_alloc, - slot_alloc, - vreg.class(), - InstPosition::Before, - true - ); - self.add_move_later( - inst, - slot_alloc, - preg_alloc, - vreg.class(), - InstPosition::After, - false, - ); + self.save_and_restore_clobbered_registers(inst); + if self.func.requires_refs_on_stack(inst) { + trace!("{:?} is a safepoint instruction. Need to move reftypes to stack", inst); + // Need to remember that this is a safepoint instruction when adding reftype + // liveout vregs to the stackmap. + self.safepoint_insts.push((block, inst)); + // Insert edits to save and restore live reftype vregs + // not already on the stack. + for reftype_vreg in self.func.reftype_vregs() { + trace!("{:?} is a reftype vreg and needs to be on the stack", reftype_vreg); + let curr_alloc = self.vreg_allocs[reftype_vreg.vreg()]; + trace!("curr_alloc: {:?}", curr_alloc); + if let Some(_preg) = curr_alloc.as_reg() { + trace!("{:?} is currently in a preg. Inserting moves to save and restore it", reftype_vreg); + let slot = if self.vreg_spillslots[reftype_vreg.vreg()].is_valid() { + self.vreg_spillslots[reftype_vreg.vreg()] + } else { + self.vreg_spillslots[reftype_vreg.vreg()] = self.allocstack(&reftype_vreg); + self.vreg_spillslots[reftype_vreg.vreg()] + }; + let slot_alloc = Allocation::stack(slot); + self.add_move_later( + inst, + curr_alloc, + slot_alloc, + reftype_vreg.class(), + InstPosition::Before, + true + ); + self.add_move_later( + inst, + slot_alloc, + curr_alloc, + reftype_vreg.class(), + InstPosition::After, + false + ); + self.safepoint_slots.push((ProgPoint::new(inst, InstPosition::Before), slot_alloc)); + // Need to remember that this reftype's slot is already in the stackmap to + // avoid adding duplicated entries when adding entries for liveout reftype vregs. + self.slot_is_in_stackmap.insert((inst, *reftype_vreg)); + } else if let Some(slot) = curr_alloc.as_stack() { + trace!("{:?} is already on the stack.", reftype_vreg); + self.safepoint_slots.push(( + ProgPoint::new(inst, InstPosition::Before), + Allocation::stack(slot) + )); + // Need to remember that this reftype's slot is already in the stackmap to + // avoid adding duplicated entries when adding entries for liveout reftype vregs. + self.slot_is_in_stackmap.insert((inst, *reftype_vreg)); + } } } for preg in self.func.inst_clobbers(inst) { @@ -1141,12 +1172,13 @@ impl<'a, F: Function> Env<'a, F> { let (point, edit, class) = self.inst_pre_edits[i].clone(); self.process_edit(point, edit, class); } - self.inst_post_edits.clear(); - self.inst_pre_edits.clear(); for preg in self.free_after_curr_inst.iter().cloned() { self.freepregs[preg.class()].insert(preg); self.lrus[preg.class()].append(preg.hw_enc()); } + self.free_after_curr_inst.clear(); + self.inst_post_edits.clear(); + self.inst_pre_edits.clear(); for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { // Add the remaining freed def pregs back to the free list. for preg in self.freed_def_pregs[class].iter().cloned() { @@ -1154,7 +1186,6 @@ impl<'a, F: Function> Env<'a, F> { } self.freed_def_pregs[class].clear(); } - self.free_after_curr_inst.clear(); self.vregs_allocd_in_curr_inst.clear(); self.first_use.clear(); self.allocs_used_by_use_ops.clear(); @@ -1167,6 +1198,9 @@ impl<'a, F: Function> Env<'a, F> { /// the allocations where they are expected to be before the first /// instruction. fn reload_at_begin(&mut self, block: Block) { + trace!("Reloading live registers at the beginning of block {:?}", block); + trace!("Live registers at the beginning of block {:?}: {:?}", block, self.live_vregs); + trace!("Block params at block {:?} beginning: {:?}", block, self.func.block_params(block)); // We need to check for the registers that are still live. // These registers are livein and they should be stack-allocated. let live_vregs = self.live_vregs.clone(); @@ -1177,11 +1211,37 @@ impl<'a, F: Function> Env<'a, F> { // The allocation where the vreg is expected to be before // the first instruction. let prev_alloc = self.vreg_allocs[vreg.vreg()]; - if prev_alloc.is_reg() { + let slot = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); + if self.func.block_params(block).contains(&vreg) { + // A block's block param is not live before the block. + // And `vreg_allocs[i]` of a virtual register i is none for + // dead vregs. self.freealloc(vreg, true); + if self.func.reftype_vregs().contains(&vreg) { + // This marks the definition of the block param. + // Record this information which will be used while building + // the stackmap later. + self.liveout_vreg_def_inst[vreg.vreg()] = (block, self.func.block_insns(block).first()); + } + } else { + // It is a liveout vreg from a predecessor. + self.vreg_allocs[vreg.vreg()] = slot; + // Need to remember that this is a liveout vreg so that its + // spillslot, if it's a reftype, can be recorded in the stackmap + // later. + self.liveout_vregs.insert(vreg); + if let Some(preg) = prev_alloc.as_reg() { + // Nothing is in that preg anymore. Return it to + // the free preg list. + self.vreg_in_preg[preg.index()] = VReg::invalid(); + if !self.is_stack(prev_alloc) { + // Using this instead of directly adding it to + // freepregs to prevent allocated registers from being + // used as scratch registers. + self.freed_def_pregs[preg.class()].insert(preg); + } + } } - let slot = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); - self.vreg_allocs[vreg.vreg()] = slot; if slot == prev_alloc { // No need to do any movements if the spillslot is where the vreg is expected to be. trace!("No need to reload {:?} because it's already in its expected allocation", vreg); @@ -1197,6 +1257,9 @@ impl<'a, F: Function> Env<'a, F> { true ); } + for block_param_vreg in self.func.block_params(block) { + self.live_vregs.remove(block_param_vreg); + } for i in (0..self.inst_post_edits.len()).rev() { let (point, edit, class) = self.inst_post_edits[i].clone(); self.process_edit(point, edit, class); @@ -1220,13 +1283,66 @@ impl<'a, F: Function> Env<'a, F> { self.free_after_curr_inst.clear(); } + fn build_safepoint_stackmap(&mut self) { + let postorder = postorder::calculate(self.func.num_blocks(), self.func.entry_block(), |block| { + self.func.block_succs(block) + }); + let domtree = domtree::calculate( + self.func.num_blocks(), + |block| self.func.block_preds(block), + &postorder[..], + self.func.entry_block(), + ); + // Check if the liveout vreg was defined before the safepoint + // instruction. If it was defined before it, then record the liveout + // with its spillslot in the stackmap (because the liveout vreg's first + // use hasn't been encountered yet. It is possible that a loop could). + for (safepoint_block, safepoint_inst) in self.safepoint_insts.iter() { + for liveout_vreg in self.liveout_vregs.iter() { + let (liveout_vreg_def_block, liveout_vreg_def_inst) = self.liveout_vreg_def_inst[liveout_vreg.vreg()]; + if self.func.reftype_vregs().contains(liveout_vreg) + && !self.slot_is_in_stackmap.contains(&(*safepoint_inst, *liveout_vreg)) + && dominates(&domtree, liveout_vreg_def_block, *safepoint_block) + { + if self.func.block_params(liveout_vreg_def_block).contains(liveout_vreg) { + // Since block params aren't explicitly defined, they are marked as defined + // in the first instruction in the block, even though they are actually + // defined just before that. + // This is the reason why <= is used here instead of just <. + if liveout_vreg_def_inst <= *safepoint_inst { + trace!("Liveout vreg inst: {:?}", self.liveout_vreg_def_inst[liveout_vreg.vreg()]); + trace!("Safepoint inst: {:?}", safepoint_inst); + trace!("Adding a stackmap slot for liveout vreg {:?}", liveout_vreg); + self.safepoint_slots.push(( + ProgPoint::before(*safepoint_inst), + Allocation::stack(self.vreg_spillslots[liveout_vreg.vreg()]) + )); + } + } + // The definition of the vreg must come before the safepoint instruction + // This is necessary because, while the `dominates` call checks for different + // blocks, in the case where the vreg definition and the safepoint instructions + // are in the same block, we need to make this check. + else if liveout_vreg_def_inst < *safepoint_inst { + self.safepoint_slots.push(( + ProgPoint::before(*safepoint_inst), + Allocation::stack(self.vreg_spillslots[liveout_vreg.vreg()]) + )); + } + } + } + } + self.safepoint_slots.sort_by( + |slot0, slot1| slot0.0.cmp(&slot1.0) + ); + } + fn alloc_block(&mut self, block: Block) { trace!("{:?} start", block); for inst in self.func.block_insns(block).iter().rev() { self.alloc_inst(block, inst); } self.reload_at_begin(block); - self.live_vregs.clear(); trace!("{:?} end\n", block); } @@ -1235,6 +1351,7 @@ impl<'a, F: Function> Env<'a, F> { for block in (0..self.func.num_blocks()).rev() { self.alloc_block(Block::new(block)); } + self.build_safepoint_stackmap(); ///////////////////////////////////////////////////////////////////////////////////// trace!("Done!"); @@ -1274,13 +1391,14 @@ pub fn run( env.run()?; trace!("Final edits: {:?}", env.edits); +trace!("safepoint_slots: {:?}", env.safepoint_slots); Ok(Output { edits: env.edits.make_contiguous().to_vec(), allocs: env.allocs.allocs, inst_alloc_offsets: env.allocs.inst_alloc_offsets, num_spillslots: env.num_spillslots as usize, debug_locations: Vec::new(), - safepoint_slots: Vec::new(), + safepoint_slots: env.safepoint_slots, stats: env.stats, }) } From b93cb6a4b99f64d451ee7bcb6e659202b776ef5b Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Fri, 26 Jul 2024 16:53:54 +0100 Subject: [PATCH 11/95] remove stack only alloc panic --- src/fastalloc/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 722c093a..d4312a77 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -464,7 +464,10 @@ impl<'a, F: Function> Env<'a, F> { self.alloc_reg_for_operand(inst, op); } OperandConstraint::Stack => { - panic!("Stack only allocations aren't supported yet"); + if self.vreg_spillslots[op.vreg().vreg()].is_invalid() { + self.vreg_spillslots[op.vreg().vreg()] = self.allocstack(&op.vreg()); + } + self.vreg_allocs[op.vreg().vreg()] = Allocation::stack(self.vreg_spillslots[op.vreg().vreg()]); } OperandConstraint::FixedReg(preg) => { self.alloc_fixed_reg_for_operand(inst, op, preg); From 2fbb2518be4e2e74068844a1b52cc1d81b795078 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 1 Aug 2024 14:01:53 +0100 Subject: [PATCH 12/95] fixed bug with the LRU's PReg handling --- src/fastalloc/lru.rs | 33 ++++++++++++++++++--------------- src/fastalloc/mod.rs | 6 +++--- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/src/fastalloc/lru.rs b/src/fastalloc/lru.rs index 6f980739..00c691ac 100644 --- a/src/fastalloc/lru.rs +++ b/src/fastalloc/lru.rs @@ -1,4 +1,5 @@ use alloc::vec::Vec; +use alloc::vec; use core::ops::IndexMut; use std::ops::Index; use crate::{RegClass, PReg}; @@ -26,21 +27,23 @@ pub struct LruNode { } impl Lru { - pub fn new(regclass: RegClass, no_of_regs: usize) -> Self { - let mut data = Vec::with_capacity(no_of_regs); - for _ in 0..no_of_regs { - data.push(LruNode { prev: 0, next: 0 }); + pub fn new(regclass: RegClass, regs: &[PReg]) -> Self { + let mut data = vec![LruNode { prev: 0, next: 0 }; PReg::MAX + 1]; + let no_of_regs = regs.len(); + for i in 0..no_of_regs { + let (reg, prev_reg, next_reg) = ( + regs[i], + regs[i.checked_sub(1).unwrap_or(no_of_regs - 1)], + regs[if i >= no_of_regs - 1 { 0 } else { i + 1 }] + ); + data[reg.hw_enc()].prev = prev_reg.hw_enc(); + data[reg.hw_enc()].next = next_reg.hw_enc(); } - let mut lru = Self { - head: 0, + Self { + head: if regs.is_empty() { usize::MAX } else { regs[0].hw_enc() }, data, regclass, - }; - for i in 0..no_of_regs { - lru.data[i].prev = i.checked_sub(1).unwrap_or(no_of_regs - 1); - lru.data[i].next = (i + 1) % no_of_regs; } - lru } /// Marks the physical register `i` as the most recently used @@ -115,12 +118,12 @@ impl IndexMut for PartedByRegClass { pub type Lrus = PartedByRegClass; impl Lrus { - pub fn new(no_of_int_regs: usize, no_of_float_regs: usize, no_of_vec_regs: usize) -> Self { + pub fn new(int_regs: &[PReg], float_regs: &[PReg], vec_regs: &[PReg]) -> Self { Self { items: [ - Lru::new(RegClass::Int, no_of_int_regs), - Lru::new(RegClass::Float, no_of_float_regs), - Lru::new(RegClass::Vector, no_of_vec_regs), + Lru::new(RegClass::Int, int_regs), + Lru::new(RegClass::Float, float_regs), + Lru::new(RegClass::Vector, vec_regs), ] } } diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index d4312a77..9408157a 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -185,9 +185,9 @@ impl<'a, F: Function> Env<'a, F> { ] }, lrus: Lrus::new( - regs[0].len(), - regs[1].len(), - regs[2].len() + ®s[0], + ®s[1], + ®s[2] ), vreg_in_preg: vec![VReg::invalid(); PReg::NUM_INDEX], fixed_stack_slots: env.fixed_stack_slots.clone(), From d1a868e4638c953e85041f88c4b9c3f70fee0eed Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Fri, 2 Aug 2024 14:27:54 +0100 Subject: [PATCH 13/95] fix: registers can now be evicted to use as scratch registers --- src/fastalloc/mod.rs | 149 ++++++++++++++++++++++++++++++------------- 1 file changed, 104 insertions(+), 45 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 9408157a..1803119c 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -148,6 +148,10 @@ pub struct Env<'a, F: Function> { /// This is used while building the stackmap after allocation is completed, /// to avoid adding duplicate entries for liveout vregs. slot_is_in_stackmap: HashSet<(Inst, VReg)>, + /// Used to determine if a scratch register is needed for an + /// instruction's moves during the `process_edit` calls. + inst_needs_scratch_reg: PartedByRegClass, + dedicated_scratch_regs: PartedByRegClass>, fixed_stack_slots: Vec, @@ -207,6 +211,12 @@ impl<'a, F: Function> Env<'a, F> { safepoint_insts: Vec::new(), liveout_vregs: HashSet::new(), liveout_vreg_def_inst: vec![(Block::invalid(), Inst::invalid()); func.num_vregs()], + inst_needs_scratch_reg: PartedByRegClass { items: [false, false, false] }, + dedicated_scratch_regs: PartedByRegClass { items: [ + env.scratch_by_class[0], + env.scratch_by_class[1], + env.scratch_by_class[2], + ] }, slot_is_in_stackmap: HashSet::new(), allocs: Allocs::new(func, env), edits: VecDeque::new(), @@ -226,11 +236,83 @@ impl<'a, F: Function> Env<'a, F> { false } - fn process_edit(&mut self, point: ProgPoint, edit: Edit, class: RegClass) { + fn add_freed_regs_to_freelist(&mut self) { + for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { + for preg in self.freed_def_pregs[class].iter().cloned() { + self.freepregs[class].insert(preg); + } + self.freed_def_pregs[class].clear(); + } + for preg in self.free_after_curr_inst.iter().cloned() { + self.freepregs[preg.class()].insert(preg); + self.lrus[preg.class()].append(preg.hw_enc()); + } + self.free_after_curr_inst.clear(); + } + + /// The scratch registers needed for processing the edits generated + /// during a `reload_at_begin` call. + /// + /// This function is only called when all instructions in a block have + /// already been processed. The only edits being processed will be for the + /// ones to move a liveout vreg or block param from its spillslot to its + /// expected allocation. + fn get_scratch_regs_for_reloading(&self) -> PartedByRegClass> { + let mut scratch_regs = PartedByRegClass{ items: [None, None, None] }; + for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { + if self.inst_needs_scratch_reg[class] { + if self.dedicated_scratch_regs[class].is_some() { + scratch_regs[class] = self.dedicated_scratch_regs[class]; + } else { + scratch_regs[class] = Some(*self.freepregs[class].last().expect("Allocation impossible?")); + } + } + } + scratch_regs + } + + /// The scratch registers needed for processing edits generated while + /// processing instructions. + fn get_scratch_regs(&mut self, inst: Inst) -> PartedByRegClass> { + let mut scratch_regs = PartedByRegClass { items: [None, None, None] }; + for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { + if self.inst_needs_scratch_reg[class] { + if let Some(reg) = self.dedicated_scratch_regs[class] { + scratch_regs[class] = Some(reg); + } else { + let reg = if let Some(preg) = self.freepregs[class].last() { + *preg + } else { + self.evict_any_reg(inst, class) + }; + scratch_regs[class] = Some(reg); + } + } + } + scratch_regs + } + + fn process_edits(&mut self, scratch_regs: PartedByRegClass>) { + for i in (0..self.inst_post_edits.len()).rev() { + let (point, edit, class) = self.inst_post_edits[i].clone(); + self.process_edit(point, edit, scratch_regs[class]); + } + for i in (0..self.inst_pre_edits.len()).rev() { + let (point, edit, class) = self.inst_pre_edits[i].clone(); + self.process_edit(point, edit, scratch_regs[class]); + } + for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { + self.inst_needs_scratch_reg[class] = false; + } + self.inst_post_edits.clear(); + self.inst_pre_edits.clear(); + } + + fn process_edit(&mut self, point: ProgPoint, edit: Edit, scratch_reg: Option) { trace!("Processing edit: {:?}", edit); let Edit::Move { from, to } = edit; if self.is_stack(from) && self.is_stack(to) { - let scratch_reg = *self.freepregs[class].last().expect("Allocation impossible?"); + let scratch_reg = scratch_reg.unwrap(); trace!("Edit is stack-to-stack, generating two moves with a scratch register {:?}", scratch_reg); let scratch_alloc = Allocation::reg(scratch_reg); trace!("Processed Edit: {:?}", (point, Edit::Move { @@ -263,6 +345,9 @@ impl<'a, F: Function> Env<'a, F> { } fn add_move_later(&mut self, inst: Inst, from: Allocation, to: Allocation, class: RegClass, pos: InstPosition, prepend: bool) { + if self.is_stack(from) && self.is_stack(to) { + self.inst_needs_scratch_reg[class] = true; + } let target_edits = match pos { InstPosition::After => &mut self.inst_post_edits, InstPosition::Before => &mut self.inst_pre_edits @@ -1167,28 +1252,9 @@ impl<'a, F: Function> Env<'a, F> { } } self.clobbered_reg_is_allocatable.clear(); - for i in (0..self.inst_post_edits.len()).rev() { - let (point, edit, class) = self.inst_post_edits[i].clone(); - self.process_edit(point, edit, class); - } - for i in (0..self.inst_pre_edits.len()).rev() { - let (point, edit, class) = self.inst_pre_edits[i].clone(); - self.process_edit(point, edit, class); - } - for preg in self.free_after_curr_inst.iter().cloned() { - self.freepregs[preg.class()].insert(preg); - self.lrus[preg.class()].append(preg.hw_enc()); - } - self.free_after_curr_inst.clear(); - self.inst_post_edits.clear(); - self.inst_pre_edits.clear(); - for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { - // Add the remaining freed def pregs back to the free list. - for preg in self.freed_def_pregs[class].iter().cloned() { - self.freepregs[class].insert(preg); - } - self.freed_def_pregs[class].clear(); - } + let scratch_regs = self.get_scratch_regs(inst); + self.process_edits(scratch_regs); + self.add_freed_regs_to_freelist(); self.vregs_allocd_in_curr_inst.clear(); self.first_use.clear(); self.allocs_used_by_use_ops.clear(); @@ -1263,27 +1329,8 @@ impl<'a, F: Function> Env<'a, F> { for block_param_vreg in self.func.block_params(block) { self.live_vregs.remove(block_param_vreg); } - for i in (0..self.inst_post_edits.len()).rev() { - let (point, edit, class) = self.inst_post_edits[i].clone(); - self.process_edit(point, edit, class); - } - for i in (0..self.inst_pre_edits.len()).rev() { - let (point, edit, class) = self.inst_pre_edits[i].clone(); - self.process_edit(point, edit, class); - } - self.inst_post_edits.clear(); - self.inst_pre_edits.clear(); - for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { - for preg in self.freed_def_pregs[class].iter().cloned() { - self.freepregs[class].insert(preg); - } - self.freed_def_pregs[class].clear(); - } - for preg in self.free_after_curr_inst.iter().cloned() { - self.freepregs[preg.class()].insert(preg); - self.lrus[preg.class()].append(preg.hw_enc()); - } - self.free_after_curr_inst.clear(); + self.process_edits(self.get_scratch_regs_for_reloading()); + self.add_freed_regs_to_freelist(); } fn build_safepoint_stackmap(&mut self) { @@ -1390,6 +1437,18 @@ pub fn run( validate_ssa(func, &cfginfo)?; } + trace!("Processing a new function"); + for block in 0..func.num_blocks() { + let block = Block::new(block); + trace!("Block {:?}. preds: {:?}. succs: {:?}", + block, func.block_preds(block), func.block_succs(block) + ); + for inst in func.block_insns(block).iter() { + trace!("inst{:?}: {:?}", inst.index(), func.inst_operands(inst)); + } + trace!(""); + } + let mut env = Env::new(func, mach_env); env.run()?; From 8e88c40e570298ee432d24a94baac0f0a78c7343 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Sun, 4 Aug 2024 09:47:42 +0100 Subject: [PATCH 14/95] fixed bugs --- src/fastalloc/iter.rs | 63 ++++++ src/fastalloc/lru.rs | 65 +++++- src/fastalloc/mod.rs | 458 ++++++++++++++++++++++++++++++------------ 3 files changed, 448 insertions(+), 138 deletions(-) diff --git a/src/fastalloc/iter.rs b/src/fastalloc/iter.rs index aea8236b..dbbd2e34 100644 --- a/src/fastalloc/iter.rs +++ b/src/fastalloc/iter.rs @@ -235,6 +235,48 @@ impl<'a> Iterator for NonFixedNonReuseEarlyOperands<'a> { } } +pub struct NonFixedNonReuseLateDefOperands<'a>(Operands<'a>); + +impl<'a> NonFixedNonReuseLateDefOperands<'a> { + pub fn new(operands: &'a [Operand]) -> Self { + Self(Operands::new(operands, SearchConstraint { + pos: Some(OperandPos::Late), + kind: Some(OperandKind::Def), + must_not_have_constraints: [Some(OperandConstraintKind::Reuse), Some(OperandConstraintKind::FixedReg)], + must_have_constraint: None, + })) + } +} + +impl<'a> Iterator for NonFixedNonReuseLateDefOperands<'a> { + type Item = (usize, Operand); + + fn next(&mut self) -> Option { + self.0.next() + } +} + +pub struct NonFixedNonReuseLateUseOperands<'a>(Operands<'a>); + +impl<'a> NonFixedNonReuseLateUseOperands<'a> { + pub fn new(operands: &'a [Operand]) -> Self { + Self(Operands::new(operands, SearchConstraint { + pos: Some(OperandPos::Late), + kind: Some(OperandKind::Use), + must_not_have_constraints: [Some(OperandConstraintKind::Reuse), Some(OperandConstraintKind::FixedReg)], + must_have_constraint: None, + })) + } +} + +impl<'a> Iterator for NonFixedNonReuseLateUseOperands<'a> { + type Item = (usize, Operand); + + fn next(&mut self) -> Option { + self.0.next() + } +} + pub struct NonReuseLateDefOperands<'a>(Operands<'a>); impl<'a> NonReuseLateDefOperands<'a> { @@ -577,4 +619,25 @@ mod tests { fixed_early_def_operand(11), ]); } + + + #[test] + fn non_fixed_non_reuse_late_def() { + let def_operands: Vec = NonFixedNonReuseLateDefOperands::new(&OPERANDS) + .map(|(_, op)| op) + .collect(); + assert_eq!(def_operands, vec![ + late_def_operand(1), + ]); + } + + #[test] + fn non_fixed_non_reuse_late_use() { + let late_operands: Vec = NonFixedNonReuseLateUseOperands::new(&OPERANDS) + .map(|(_, op)| op) + .collect(); + assert_eq!(late_operands, vec![ + late_use_operand(6), + ]); + } } diff --git a/src/fastalloc/lru.rs b/src/fastalloc/lru.rs index 00c691ac..a1dd2977 100644 --- a/src/fastalloc/lru.rs +++ b/src/fastalloc/lru.rs @@ -1,11 +1,10 @@ use alloc::vec::Vec; use alloc::vec; -use core::ops::IndexMut; +use core::{fmt, ops::IndexMut}; use std::ops::Index; use crate::{RegClass, PReg}; /// A least-recently-used cache organized as a linked list based on a vector. -#[derive(Debug)] pub struct Lru { /// The list of node information. /// @@ -28,7 +27,7 @@ pub struct LruNode { impl Lru { pub fn new(regclass: RegClass, regs: &[PReg]) -> Self { - let mut data = vec![LruNode { prev: 0, next: 0 }; PReg::MAX + 1]; + let mut data = vec![LruNode { prev: usize::MAX, next: usize::MAX }; PReg::MAX + 1]; let no_of_regs = regs.len(); for i in 0..no_of_regs { let (reg, prev_reg, next_reg) = ( @@ -63,6 +62,9 @@ impl Lru { /// Gets the least recently used physical register. pub fn pop(&mut self) -> PReg { + if self.is_empty() { + panic!("LRU is empty"); + } let oldest = self.data[self.head].prev; PReg::new(oldest, self.regclass) } @@ -72,15 +74,36 @@ impl Lru { let (iprev, inext) = (self.data[i].prev, self.data[i].next); self.data[iprev].next = self.data[i].next; self.data[inext].prev = self.data[i].prev; + self.data[i].prev = usize::MAX; + self.data[i].next = usize::MAX; + if i == self.head { + if i == inext { + // There are no regs in the LRU + self.head = usize::MAX; + } else { + self.head = inext; + } + } } /// Sets the node `i` to the last in the list. pub fn append(&mut self, i: usize) { - let last_node = self.data[self.head].prev; - self.data[last_node].next = i; - self.data[self.head].prev = i; - self.data[i].prev = last_node; - self.data[i].next = self.head; + if self.head != usize::MAX { + let last_node = self.data[self.head].prev; + self.data[last_node].next = i; + self.data[self.head].prev = i; + self.data[i].prev = last_node; + self.data[i].next = self.head; + } else { + self.head = i; + self.data[i].prev = i; + self.data[i].next = i; + } + } + + pub fn append_and_poke(&mut self, preg: PReg) { + self.append(preg.hw_enc()); + self.poke(preg); } /// Insert node `i` before node `j` in the list. @@ -93,6 +116,32 @@ impl Lru { prev, }; } + + pub fn is_empty(&self) -> bool { + self.head == usize::MAX + } +} + +impl fmt::Debug for Lru { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use std::format; + let data_str = if self.head == usize::MAX { + format!("") + } else { + let mut data_str = format!("p{}", self.head); + let mut node = self.data[self.head].next; + while node != self.head { + data_str += &format!(" -> p{}", node); + node = self.data[node].next; + } + data_str + }; + f.debug_struct("Lru") + .field("head", if self.is_empty() { &"none" } else { &self.head }) + .field("class", &self.regclass) + .field("data", &data_str) + .finish() + } } #[derive(Debug)] diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 1803119c..eec73bca 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -2,12 +2,14 @@ use core::convert::TryInto; use core::iter::FromIterator; use core::ops::{Index, IndexMut}; use crate::domtree::dominates; -use crate::{domtree, postorder, AllocationKind, Block, Inst, InstPosition, Operand, OperandConstraint, OperandKind, PReg, RegClass, SpillSlot, VReg}; +use crate::{domtree, postorder, AllocationKind, Block, Inst, InstPosition, Operand, OperandConstraint, OperandKind, OperandPos, PReg, PRegSet, RegClass, SpillSlot, VReg}; use crate::{Function, MachineEnv, ssa::validate_ssa, ProgPoint, Edit, Output}; use crate::{cfg::CFGInfo, RegAllocError, Allocation, ion::Stats}; use alloc::collections::{BTreeSet, VecDeque}; use alloc::vec::Vec; -use hashbrown::HashSet; +use hashbrown::{HashSet, HashMap}; + +use std::format; mod lru; mod iter; @@ -89,11 +91,14 @@ pub struct Env<'a, F: Function> { /// This is used to keep track of them so that they can be marked as free for reallocation /// after the instruction has completed processing. free_after_curr_inst: HashSet, - /// The virtual registers of operands that have been allocated in the current instruction. + /// The virtual registers of use operands that have been allocated in the current instruction + /// and for which edits had to be inserted to save and restore them because their constraint + /// doesn't allow the allocation they are expected to be in after the instruction. + /// /// This needs to be kept track of to generate the correct moves in the case where a /// single virtual register is used multiple times in a single instruction with /// different constraints. - vregs_allocd_in_curr_inst: HashSet, + use_vregs_saved_and_restored_in_curr_inst: HashSet, /// Physical registers that were used for late def operands and now free to be /// reused for early operands in the current instruction. /// @@ -115,10 +120,17 @@ pub struct Env<'a, F: Function> { /// `freed_def_regs` avoids this by allowing the late def registers to be reused without making it /// possible for this scratch register scenario to happen. freed_def_pregs: PartedByRegClass>, - /// Used to keep track of which used vregs are being used for the first time - /// in the instruction. This is used to determine whether or not reused operands + /// Used to keep track of which used vregs are seen for the first time + /// in the instruction, that is, if the vregs live past the current instruction. + /// This is used to determine whether or not reused operands /// for reuse-input constraints should be restored after an instruction. - first_use: HashSet, + /// It's also used to determine if the an early operand can reuse a freed def operand's + /// allocation. And it's also used to determine the edits to be inserted when + /// allocating a use operand. + vregs_first_seen_in_curr_inst: HashSet, + /// Used to keep track of which vregs have been allocated in the current instruction. + /// This is used to determine which edits to insert when allocating a use operand. + vregs_allocd_in_curr_inst: HashSet, /// Used to keep track of which allocations have been used by use operands in the /// current instruction. This is to determine whether or not an allocation /// for a reuse operand was reused by a use operand, and make decisions on @@ -151,7 +163,13 @@ pub struct Env<'a, F: Function> { /// Used to determine if a scratch register is needed for an /// instruction's moves during the `process_edit` calls. inst_needs_scratch_reg: PartedByRegClass, + /// The operand indexes of the inputs reused for reuse operands. + /// This is used to avoid assigning a freed def preg to a reused input. + reused_inputs_in_curr_inst: Vec, + /// The vregs defined or used in the current instruction. + vregs_in_curr_inst: HashSet, dedicated_scratch_regs: PartedByRegClass>, + preg_index_to_class_and_hw_enc: HashMap, fixed_stack_slots: Vec, @@ -178,6 +196,22 @@ impl<'a, F: Function> Env<'a, F> { trace!("{:?}", env); Self { func, + preg_index_to_class_and_hw_enc: { + let mut map = HashMap::new(); + for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { + let class = class as usize; + for reg in env.preferred_regs_by_class[class].iter().cloned() { + map.insert(reg.index(), reg); + } + for reg in env.non_preferred_regs_by_class[class].iter().cloned() { + map.insert(reg.index(), reg); + } + for reg in env.fixed_stack_slots.iter().cloned() { + map.insert(reg.index(), reg); + } + } + map + }, vreg_allocs: vec![Allocation::none(); func.num_vregs()], vreg_spillslots: vec![SpillSlot::invalid(); func.num_vregs()], live_vregs: HashSet::with_capacity(func.num_vregs()), @@ -204,14 +238,17 @@ impl<'a, F: Function> Env<'a, F> { inst_post_edits: VecDeque::new(), free_after_curr_inst: HashSet::new(), vregs_allocd_in_curr_inst: HashSet::new(), + use_vregs_saved_and_restored_in_curr_inst: HashSet::new(), freed_def_pregs: PartedByRegClass { items: [BTreeSet::new(), BTreeSet::new(), BTreeSet::new()] }, - first_use: HashSet::new(), + vregs_first_seen_in_curr_inst: HashSet::new(), allocs_used_by_use_ops: HashSet::new(), clobbered_reg_is_allocatable: HashSet::new(), safepoint_insts: Vec::new(), liveout_vregs: HashSet::new(), liveout_vreg_def_inst: vec![(Block::invalid(), Inst::invalid()); func.num_vregs()], inst_needs_scratch_reg: PartedByRegClass { items: [false, false, false] }, + reused_inputs_in_curr_inst: Vec::new(), + vregs_in_curr_inst: HashSet::new(), dedicated_scratch_regs: PartedByRegClass { items: [ env.scratch_by_class[0], env.scratch_by_class[1], @@ -240,6 +277,7 @@ impl<'a, F: Function> Env<'a, F> { for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { for preg in self.freed_def_pregs[class].iter().cloned() { self.freepregs[class].insert(preg); + self.lrus[class].append(preg.hw_enc()); } self.freed_def_pregs[class].clear(); } @@ -258,12 +296,16 @@ impl<'a, F: Function> Env<'a, F> { /// ones to move a liveout vreg or block param from its spillslot to its /// expected allocation. fn get_scratch_regs_for_reloading(&self) -> PartedByRegClass> { + trace!("Getting scratch registers for reload_at_begin"); let mut scratch_regs = PartedByRegClass{ items: [None, None, None] }; for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { if self.inst_needs_scratch_reg[class] { + trace!("{:?} class needs a scratch register", class); if self.dedicated_scratch_regs[class].is_some() { + trace!("Using the dedicated scratch register for class {:?}", class); scratch_regs[class] = self.dedicated_scratch_regs[class]; } else { + trace!("No dedicated scratch register for class {:?}. Using the last free register", class); scratch_regs[class] = Some(*self.freepregs[class].last().expect("Allocation impossible?")); } } @@ -274,19 +316,27 @@ impl<'a, F: Function> Env<'a, F> { /// The scratch registers needed for processing edits generated while /// processing instructions. fn get_scratch_regs(&mut self, inst: Inst) -> PartedByRegClass> { + trace!("Getting scratch registers for instruction {:?}", inst); let mut scratch_regs = PartedByRegClass { items: [None, None, None] }; for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { if self.inst_needs_scratch_reg[class] { + trace!("{:?} class needs a scratch register", class); if let Some(reg) = self.dedicated_scratch_regs[class] { + trace!("Using the dedicated scratch register for class {:?}", class); scratch_regs[class] = Some(reg); } else { + trace!("class {:?} has no dedicated scratch register", class); let reg = if let Some(preg) = self.freepregs[class].last() { + trace!("Using the last free {:?} register for scratch", class); *preg } else { + trace!("No free {:?} registers. Evicting a register", class); self.evict_any_reg(inst, class) }; scratch_regs[class] = Some(reg); } + } else { + trace!("{:?} class does not need a scratch register", class); } } scratch_regs @@ -356,6 +406,8 @@ impl<'a, F: Function> Env<'a, F> { from, to }, class)); + // TODO: Check if the source and destination are the same. And if they are, + // don't add the edit. // The sorting out of stack-to-stack moves will be done when the instruction's // edits are processed after all operands have been allocated. if prepend { @@ -380,23 +432,32 @@ impl<'a, F: Function> Env<'a, F> { } fn allocd_within_constraint(&self, inst: Inst, op: Operand) -> bool { - let curr_alloc = self.vreg_allocs[op.vreg().vreg()]; - self.alloc_meets_op_constraint(inst, curr_alloc, op.class(), op.constraint()) - } - - fn alloc_meets_op_constraint(&self, inst: Inst, alloc: Allocation, class: RegClass, constraint: OperandConstraint) -> bool { - if let Some(preg) = alloc.as_reg() { - if self.func.inst_clobbers(inst).contains(preg) { - return false; - } - } - match constraint { - OperandConstraint::Any => alloc.is_some(), + let alloc = self.vreg_allocs[op.vreg().vreg()]; + let alloc_is_clobber = if let Some(preg) = alloc.as_reg() { + self.func.inst_clobbers(inst).contains(preg) + } else { + false + }; + match op.constraint() { + OperandConstraint::Any => { + // Completely avoid assigning clobbers, if possible. + // Assigning a clobber to a def operand that lives past the + // current instruction makes it impossible to restore + // the vreg. + // And assigning a clobber to a use operand that is reused + // by a def operand with a reuse constraint will end up + // assigning the clobber to that def, and if it lives past + // the current instruction, then restoration will be impossible. + alloc.is_some() && !alloc_is_clobber + }, OperandConstraint::Reg => { - alloc.is_reg() && alloc.as_reg().unwrap().class() == class - && !self.is_stack(alloc) + let alloc_is_reg = alloc.is_reg() && alloc.as_reg().unwrap().class() == op.class() + && !self.is_stack(alloc); + alloc_is_reg && !alloc_is_clobber }, OperandConstraint::Stack => self.is_stack(alloc), + // It is possible for an operand to have a fixed register constraint to + // a clobber. OperandConstraint::FixedReg(preg) => alloc.is_reg() && alloc.as_reg().unwrap() == preg, OperandConstraint::Reuse(_) => { @@ -406,7 +467,9 @@ impl<'a, F: Function> Env<'a, F> { } fn evict_vreg_in_preg(&mut self, inst: Inst, preg: PReg) { + trace!("Removing the vreg in preg {:?} for eviction", preg); let evicted_vreg = self.vreg_in_preg[preg.index()]; + trace!("The removed vreg: {:?}", evicted_vreg); debug_assert_ne!(evicted_vreg, VReg::invalid()); if self.vreg_spillslots[evicted_vreg.vreg()].is_invalid() { self.vreg_spillslots[evicted_vreg.vreg()] = self.allocstack(&evicted_vreg); @@ -418,7 +481,9 @@ impl<'a, F: Function> Env<'a, F> { } fn evict_any_reg(&mut self, inst: Inst, regclass: RegClass) -> PReg { + trace!("Evicting a register in evict_any_reg for class {:?}", regclass); let preg = self.lrus[regclass].pop(); + trace!("Selected register from lru: {:?}", preg); // TODO: Check if the preg has already been allocated for this // instruction. If it has, then there are too many stuff to // allocate, making allocation impossible. @@ -430,7 +495,7 @@ impl<'a, F: Function> Env<'a, F> { preg } - fn freealloc(&mut self, vreg: VReg, add_to_freelist: bool) { + fn freealloc(&mut self, vreg: VReg, clobbers: PRegSet, add_to_freelist: bool) { trace!("Freeing vreg {:?} (add_to_freelist: {:?})", vreg, add_to_freelist); let alloc = self.vreg_allocs[vreg.vreg()]; match alloc.kind() { @@ -440,10 +505,27 @@ impl<'a, F: Function> Env<'a, F> { // If it's a fixed stack slot, then it's not allocatable. if !self.is_stack(alloc) { if add_to_freelist { - // Added to the freed def pregs list, not the free pregs - // list to avoid a def's allocated register being used - // as a scratch register. - self.freed_def_pregs[vreg.class()].insert(preg); + if clobbers.contains(preg) { + // For a defined vreg to be restored to the location it's expected to + // be in after the instruction, it cannot be allocated to a clobber because that + // will make the restoration impossible. + // In the case where a reuse operand reuses an input allocated to a clobber, + // the defined vreg will be allocated to a clobber + // and if the vreg lives past the instruction, restoration will be impossible. + // To avoid this, simply make it impossible for a clobber to be allocated to + // a vreg with "any" or "any reg" constraints. + // By adding it to this list, instead of freed_def_pregs, the only way + // a clobber can be newly allocated to a vreg in the instruction is to + // use a fixed register constraint. + self.free_after_curr_inst.insert(preg); + } else { + // Added to the freed def pregs list, not the free pregs + // list to avoid a def's allocated register being used + // as a scratch register. + self.freed_def_pregs[vreg.class()].insert(preg); + } + // Don't allow this register to be evicted. + self.lrus[vreg.class()].remove(preg.hw_enc()); } } } @@ -478,7 +560,7 @@ impl<'a, F: Function> Env<'a, F> { } /// Allocates a physical register for the operand `op`. - fn alloc_reg_for_operand(&mut self, inst: Inst, op: Operand) { + fn alloc_reg_for_operand(&mut self, inst: Inst, op: Operand, op_idx: usize) { trace!("freepregs int: {:?}", self.freepregs[RegClass::Int]); trace!("freepregs vector: {:?}", self.freepregs[RegClass::Vector]); trace!("freepregs float: {:?}", self.freepregs[RegClass::Float]); @@ -486,14 +568,34 @@ impl<'a, F: Function> Env<'a, F> { trace!("freed_def_pregs vector: {:?}", self.freed_def_pregs[RegClass::Vector]); trace!("freed_def_pregs float: {:?}", self.freed_def_pregs[RegClass::Float]); trace!(""); - if let Some(freed_def_preg) = self.freed_def_pregs[op.class()].pop_last() { - // Don't poke the LRU because the freed def register is no - // longer in the LRU. It's not there so as to avoid getting it - // used as a scratch register. - trace!("Reusing the freed def preg: {:?}", freed_def_preg); - self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(freed_def_preg); - self.vreg_in_preg[freed_def_preg.index()] = op.vreg(); - } else { + let mut allocd = false; + // The only way a freed def preg can be reused for an operand is if + // the operand uses or defines a vreg in the early phase and the vreg doesn't + // live past the instruction. If the vreg lives past the instruction, then the + // possible defined value will overwrite it. + if op.pos() == OperandPos::Early && self.vregs_first_seen_in_curr_inst.contains(&op.vreg()) + // A reused input should not have the same allocation as a non-reused def operand. + // For example: + // 1. dev v0 (fixed: p0), def v1 (reuse: 2), use v2 + // 2. use v0, use v1 + // In the above, p0 is assigned to v0 in inst 1. Say, after v0 is freed, + // p0 is re-assigned to v2. Then, when reuse operands are processed, p0 + // will also end up being v1's allocation in inst 1. + // The end result will be allocating two defined vregs, v0 and v1, to the + // same allocation p0. + // To avoid this, don't allow a reused input to have the same allocation as + // a freed def operand. + && !self.reused_inputs_in_curr_inst.contains(&op_idx) + { + if let Some(freed_def_preg) = self.freed_def_pregs[op.class()].pop_last() { + trace!("Reusing the freed def preg: {:?}", freed_def_preg); + self.lrus[freed_def_preg.class()].append_and_poke(freed_def_preg); + self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(freed_def_preg); + self.vreg_in_preg[freed_def_preg.index()] = op.vreg(); + allocd = true; + } + } + if !allocd { let preg = if self.freepregs[op.class()].is_empty() { trace!("Evicting a register"); self.evict_any_reg(inst, op.class()) @@ -510,9 +612,19 @@ impl<'a, F: Function> Env<'a, F> { fn alloc_fixed_reg_for_operand(&mut self, inst: Inst, op: Operand, preg: PReg) { trace!("The fixed preg: {:?} for operand {:?}", preg, op); + + // It is an error for a fixed register clobber to be used for a defined vreg + // that outlives the instruction, because it will be impossible to restore it. + if self.func.inst_clobbers(inst).contains(preg) && op.kind() == OperandKind::Def + && self.vregs_first_seen_in_curr_inst.contains(&op.vreg()) + { + panic!("Invalid input"); + } let mut preg_is_allocatable = false; if self.vreg_in_preg[preg.index()] != VReg::invalid() { // Something is already in that register. Evict it. + // TODO: Check if the evicted register is a register in the + // current instruction. If it is, then there's a problem. self.evict_vreg_in_preg(inst, preg); } else if self.freed_def_pregs[preg.class()].contains(&preg) { // Consider the scenario: @@ -525,10 +637,34 @@ impl<'a, F: Function> Env<'a, F> { // from `freed_def_pregs` here. preg_is_allocatable = true; self.freed_def_pregs[preg.class()].remove(&preg); + self.lrus[preg.class()].append(preg.hw_enc()); + } else if self.free_after_curr_inst.contains(&preg) { + // If the new allocation was once a freed prev_alloc, remove it + // from the free after current inst list. + // For example: + // + // 1. use v0 (fixed: p0), use v0 (fixed: p1) + // 2. use v0 (fixed: p1) + // + // In the processing of the above, v0 is allocated to p1 at inst 2. + // During the processing of inst 1, v0's allocation is changed to p0 + // and p1 is put on the free after current inst list to make it + // available for later allocation. + // But then, it's reallocated for the second operand. + // To prevent reallocating a register while a live one is still in it, + // this register has to be removed from the list. + trace!("{:?} is now using preg {:?}. Removing it from the free after instruction list", op.vreg(), preg); + preg_is_allocatable = true; + self.free_after_curr_inst.remove(&preg); + self.lrus[preg.class()].append(preg.hw_enc()); } else { // Find the register in the list of free registers (if it's there). - // If it's not there, then it must be be a fixed stack slot. + // If it's not there, then it must be be a fixed stack slot or + // a clobber, since clobbers are removed from the free preg list before allocation begins. preg_is_allocatable = self.freepregs[op.class()].remove(&preg); + if !preg_is_allocatable { + preg_is_allocatable = self.clobbered_reg_is_allocatable.contains(&preg); + } } if preg_is_allocatable { self.lrus[op.class()].poke(preg); @@ -543,10 +679,10 @@ impl<'a, F: Function> Env<'a, F> { fn alloc_operand(&mut self, inst: Inst, op: Operand, op_idx: usize) { match op.constraint() { OperandConstraint::Any => { - self.alloc_reg_for_operand(inst, op); + self.alloc_reg_for_operand(inst, op, op_idx); } OperandConstraint::Reg => { - self.alloc_reg_for_operand(inst, op); + self.alloc_reg_for_operand(inst, op, op_idx); } OperandConstraint::Stack => { if self.vreg_spillslots[op.vreg().vreg()].is_invalid() { @@ -573,9 +709,13 @@ impl<'a, F: Function> Env<'a, F> { trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); return; } + self.vregs_in_curr_inst.insert(op.vreg()); self.live_vregs.insert(op.vreg()); if !self.allocd_within_constraint(inst, op) { let prev_alloc = self.vreg_allocs[op.vreg().vreg()]; + if prev_alloc.is_none() { + self.vregs_first_seen_in_curr_inst.insert(op.vreg()); + } self.alloc_operand(inst, op, op_idx); // Need to insert a move to propagate flow from the current // allocation to the subsequent places where the value was @@ -628,17 +768,6 @@ impl<'a, F: Function> Env<'a, F> { true ); } else { - // In the case where `op` is a use, the defined value could - // have the same allocation as the `op` allocation. This - // is due to the fact that def operands are allocated and freed before - // use operands. Because of this, `op`'s allocation could be - // overwritten by the defined value's. And after the instruction, - // the defined value could be in `op`'s allocation, resulting in - // an incorrect value being moved into `prev_alloc`. - // Since, it's a use, the correct `op` value will already be in - // the `op` allocation before the instruction. - // Because of this, the move is done before, not after, `inst`. - // // This was handled by a simple move from the operand to its previous // allocation before the instruction, but this is incorrect. // Consider the scenario: @@ -709,7 +838,12 @@ impl<'a, F: Function> Env<'a, F> { // move from stack_v0 to p1 // 2. use v0 (fixed: p1) - if !self.vregs_allocd_in_curr_inst.contains(&op.vreg()) { + if !self.use_vregs_saved_and_restored_in_curr_inst.contains(&op.vreg()) + && !self.vregs_allocd_in_curr_inst.contains(&op.vreg()) + // Don't restore after the instruction if it doesn't live past + // this instruction. + && !self.vregs_first_seen_in_curr_inst.contains(&op.vreg()) + { if self.vreg_spillslots[op.vreg().vreg()].is_invalid() { self.vreg_spillslots[op.vreg().vreg()] = self.allocstack(&op.vreg()); } @@ -728,8 +862,9 @@ impl<'a, F: Function> Env<'a, F> { prev_alloc, op.class(), InstPosition::After, - false, + true, ); + self.use_vregs_saved_and_restored_in_curr_inst.insert(op.vreg()); } else { self.add_move_later( inst, @@ -742,55 +877,31 @@ impl<'a, F: Function> Env<'a, F> { } } if prev_alloc.is_reg() { - // Free the previous allocation so that it can be - // reused. + // Free the previous allocation so that it can be reused. let preg = prev_alloc.as_reg().unwrap(); self.vreg_in_preg[preg.index()] = VReg::invalid(); // If it's a fixed stack slot, then it's not allocatable. - if !self.is_stack(prev_alloc) { + if !self.is_stack(prev_alloc) + // If it's a clobber, then only fixed register constraints + // can make it get allocated. + && !self.func.inst_clobbers(inst).contains(preg) + { trace!("{:?} is no longer using preg {:?}, so freeing it after instruction", op.vreg(), preg); self.free_after_curr_inst.insert(preg); self.lrus[preg.class()].remove(preg.hw_enc()); } } - } else if op.kind() == OperandKind::Use { - trace!("{:?}'s first use", op.vreg()); - self.first_use.insert(op.vreg()); } trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); } else { self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[op.vreg().vreg()]; trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); } - let new_alloc = self.vreg_allocs[op.vreg().vreg()]; - if new_alloc.is_reg() { - // If the new allocation was once a freed prev_alloc, remove it - // from the free after current inst list. - // For example: - // - // 1. use v0 (fixed: p0), use v0 (fixed: p1) - // 2. use v0 (fixed: p1) - // - // In the processing of the above, v0 is allocated to p1 at inst 2. - // During the processing of inst 1, v0's allocation is changed to p0 - // and p1 is put on the free after current inst list to make it - // available for later allocation. - // But then, it's reallocated for the second operand. - // To prevent reallocating a register while a live one is still in it, - // this register has to be removed from the list. - let preg = new_alloc.as_reg().unwrap(); - if self.free_after_curr_inst.contains(&preg) { - trace!("{:?} is now using preg {:?}. Removing it from the free after instruction list", op.vreg(), preg); - self.free_after_curr_inst.remove(&preg); - } - // The LRU doesn't need to be modified here because it has already - // been handled during the new allocation. - } if op.kind() == OperandKind::Use { // Need to remember that this allocation is used in this instruction // by a use operand, to make decisions on whether to free a reuse operand's // allocation during the processing of reuse operands. - self.allocs_used_by_use_ops.insert(new_alloc); + self.allocs_used_by_use_ops.insert(self.vreg_allocs[op.vreg().vreg()]); } self.vregs_allocd_in_curr_inst.insert(op.vreg()); } @@ -801,11 +912,11 @@ impl<'a, F: Function> Env<'a, F> { op: Operand, op_idx: usize, reused_op: Operand, - reused_idx: usize + reused_idx: usize, + clobbers: PRegSet, ) { debug_assert!(matches!(op.constraint(), OperandConstraint::Reuse(_))); - // TODO: Check if reuse operand is not a def and if it's not, return an error. - + self.vregs_in_curr_inst.insert(op.vreg()); // We first need to check if the reuse operand has already been allocated, // in a previous alloc_inst call. There are 2 cases that need to be considered here: // @@ -872,20 +983,34 @@ impl<'a, F: Function> Env<'a, F> { // are inserted only if the input lives past the instruction, that is, its first use // is not in this instruction. - // TODO: Ensure that the reused operand is a use. - // TODO: Ensure that the reuse operand and its reused input have the - // same register class. trace!("Move Reason: Reuse constraints"); - let reused_op_first_use = self.first_use.contains(&reused_op.vreg()); + if reused_op.kind() != OperandKind::Use || op.kind() != OperandKind::Def + || reused_op.pos() != OperandPos::Early || op.pos() != OperandPos::Late + || reused_op.class() != op.class() + { + panic!("Invalid input"); + } + let reused_op_first_use = self.vregs_first_seen_in_curr_inst.contains(&reused_op.vreg()); if self.vreg_allocs[op.vreg().vreg()].is_some() { - let op_prev_alloc = self.vreg_allocs[op.vreg().vreg()]; let reused_op_vreg = reused_op.vreg(); + // The only way that a vreg can be assigned a clobber is if a fixed register + // constraint demands it. + // Reusing an input assigned to a clobber will result in a clobber being assigned + // to the vreg being defined. Since the vreg is live after this instruction, this + // must be an error, because it will be impossible to restore the defined vreg + // afterwards. + if let Some(preg) = self.vreg_allocs[reused_op_vreg.vreg()].as_reg() { + if clobbers.contains(preg) { + panic!("Invalid input"); + } + } + let op_prev_alloc = self.vreg_allocs[op.vreg().vreg()]; if self.vreg_spillslots[reused_op_vreg.vreg()].is_invalid() { self.vreg_spillslots[reused_op_vreg.vreg()] = self.allocstack(&reused_op_vreg); } let reused_op_spillslot = self.vreg_spillslots[reused_op.vreg().vreg()]; - + // If this is the reused operand's first use, then don't // restore it afterwards, because it doesn't live past this instruction. if !reused_op_first_use { @@ -896,7 +1021,7 @@ impl<'a, F: Function> Env<'a, F> { Allocation::stack(reused_op_spillslot), op.class(), InstPosition::Before, - false, + true, ); } @@ -923,10 +1048,10 @@ impl<'a, F: Function> Env<'a, F> { InstPosition::After, false, ); + self.use_vregs_saved_and_restored_in_curr_inst.insert(reused_op.vreg()); } self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[reused_op_vreg.vreg()]; - self.allocs[(inst.index(), reused_idx)] = self.vreg_allocs[reused_op_vreg.vreg()]; // Deallocate the reuse operand. // We can't just deallocate the reuse operand. @@ -949,7 +1074,7 @@ impl<'a, F: Function> Env<'a, F> { // only if the reuse operand's allocation was not reused by any use operands // in the instruction. let op_alloc_is_in_use = self.allocs_used_by_use_ops.contains(&op_prev_alloc); - self.freealloc(op.vreg(), !op_alloc_is_in_use); + self.freealloc(op.vreg(), clobbers, !op_alloc_is_in_use); trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); } else { let reused_op_vreg = reused_op.vreg(); @@ -967,7 +1092,7 @@ impl<'a, F: Function> Env<'a, F> { Allocation::stack(reused_op_spillslot), op.class(), InstPosition::Before, - false, + true, ); // Move back into its allocation. self.add_move_later( @@ -978,6 +1103,7 @@ impl<'a, F: Function> Env<'a, F> { InstPosition::After, false, ); + self.use_vregs_saved_and_restored_in_curr_inst.insert(reused_op.vreg()); } self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[reused_op_vreg.vreg()]; trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); @@ -1008,32 +1134,61 @@ impl<'a, F: Function> Env<'a, F> { // p0 is clobbered, so v2 is no longer in it and p0 no longer contains v2 at inst 2. // p0 is allocated to the v2 def operand in inst 1. The flow ends up wrong because of // the clobbering. + // + // + // It is also possible for a clobbered register to be allocated to an operand + // in an instruction. In this case, edits only need to be inserted if the + // following conditions are met: + // + // 1. All the operands assigned the clobber are all uses of the same vreg + // with the same constraint (no defs should be assigned the clobber). + // 2. No other operand in the instruction uses that vreg with a different constraint. + // 3. The used vreg lives past the instruction. + // 4. The expected allocation of the vreg after the instruction is the clobber. + // + // Because of the way operand allocation works, edits to save and restore a vreg + // will have already been inserted during operand allocation if any of the following + // conditions are met: + // 1. The expected allocation afterwards is not a clobber. + // 2. There are multiple operands using the vreg with different constraints. + // 3. A def operand has the same clobber allocation assigned to it and + // the vreg lives past the instruction. + // Therefore, the presence of the vreg in `use_vregs_saved_and_restored` + // implies that it violates one of the conditions for the edits to be inserted. + let vreg = self.vreg_in_preg[clobbered_preg.index()]; if vreg != VReg::invalid() { - let preg_alloc = Allocation::reg(clobbered_preg); - let slot = if self.vreg_spillslots[vreg.vreg()].is_valid() { - self.vreg_spillslots[vreg.vreg()] - } else { - self.vreg_spillslots[vreg.vreg()] = self.allocstack(&vreg); - self.vreg_spillslots[vreg.vreg()] - }; - let slot_alloc = Allocation::stack(slot); - self.add_move_later( - inst, - preg_alloc, - slot_alloc, - vreg.class(), - InstPosition::Before, - true - ); - self.add_move_later( - inst, - slot_alloc, - preg_alloc, - vreg.class(), - InstPosition::After, - false, - ); + let vreg_isnt_mentioned_in_curr_inst = !self.vregs_in_curr_inst.contains(&vreg); + let vreg_lives_past_curr_inst = !self.vregs_first_seen_in_curr_inst.contains(&vreg); + if vreg_isnt_mentioned_in_curr_inst + || (!self.use_vregs_saved_and_restored_in_curr_inst.contains(&vreg) + && vreg_lives_past_curr_inst) + { + let preg_alloc = Allocation::reg(clobbered_preg); + let slot = if self.vreg_spillslots[vreg.vreg()].is_valid() { + self.vreg_spillslots[vreg.vreg()] + } else { + self.vreg_spillslots[vreg.vreg()] = self.allocstack(&vreg); + self.vreg_spillslots[vreg.vreg()] + }; + let slot_alloc = Allocation::stack(slot); + self.add_move_later( + inst, + preg_alloc, + slot_alloc, + vreg.class(), + InstPosition::Before, + true + ); + self.add_move_later( + inst, + slot_alloc, + preg_alloc, + vreg.class(), + InstPosition::After, + false, + ); + } } } } @@ -1149,15 +1304,23 @@ impl<'a, F: Function> Env<'a, F> { self.process_branch(block, inst); } let operands = self.func.inst_operands(inst); - for preg in self.func.inst_clobbers(inst) { + let clobbers = self.func.inst_clobbers(inst); + for preg in clobbers { if self.freepregs[preg.class()].remove(&preg) { + self.lrus[preg.class()].remove(preg.hw_enc()); self.clobbered_reg_is_allocatable.insert(preg); } } + for (_, op) in ReuseOperands::new(operands) { + let OperandConstraint::Reuse(reused_idx) = op.constraint() else { + unreachable!() + }; + self.reused_inputs_in_curr_inst.push(reused_idx); + } for (op_idx, op) in FixedLateOperands::new(operands) { self.process_operand_allocation(inst, op, op_idx); } - for (op_idx, op) in NonFixedNonReuseLateOperands::new(operands) { + for (op_idx, op) in NonFixedNonReuseLateDefOperands::new(operands) { self.process_operand_allocation(inst, op, op_idx); } for (_, op) in NonReuseLateDefOperands::new(operands) { @@ -1166,11 +1329,14 @@ impl<'a, F: Function> Env<'a, F> { // vreg was defined when adding reftype vregs to the stackmap. self.liveout_vreg_def_inst[op.vreg().vreg()] = (block, inst); } - self.freealloc(op.vreg(), true); + self.freealloc(op.vreg(), clobbers, true); } for (op_idx, op) in FixedEarlyOperands::new(operands) { self.process_operand_allocation(inst, op, op_idx); } + for (op_idx, op) in NonFixedNonReuseLateUseOperands::new(operands) { + self.process_operand_allocation(inst, op, op_idx); + } for (op_idx, op) in NonFixedNonReuseEarlyOperands::new(operands) { self.process_operand_allocation(inst, op, op_idx); } @@ -1180,7 +1346,7 @@ impl<'a, F: Function> Env<'a, F> { // vreg was defined when adding reftype vregs to the stackmap. self.liveout_vreg_def_inst[op.vreg().vreg()] = (block, inst); } - self.freealloc(op.vreg(), true); + self.freealloc(op.vreg(), clobbers, true); } for (op_idx, op) in ReuseOperands::new(operands) { let OperandConstraint::Reuse(reused_idx) = op.constraint() else { @@ -1191,7 +1357,7 @@ impl<'a, F: Function> Env<'a, F> { // vreg was defined when adding reftype vregs to the stackmap. self.liveout_vreg_def_inst[op.vreg().vreg()] = (block, inst); } - self.process_reuse_operand_allocation(inst, op, op_idx, operands[reused_idx], reused_idx); + self.process_reuse_operand_allocation(inst, op, op_idx, operands[reused_idx], reused_idx, clobbers); } self.save_and_restore_clobbered_registers(inst); if self.func.requires_refs_on_stack(inst) { @@ -1248,6 +1414,7 @@ impl<'a, F: Function> Env<'a, F> { } for preg in self.func.inst_clobbers(inst) { if self.clobbered_reg_is_allocatable.contains(&preg) { + self.lrus[preg.class()].append(preg.hw_enc()); self.freepregs[preg.class()].insert(preg); } } @@ -1255,9 +1422,39 @@ impl<'a, F: Function> Env<'a, F> { let scratch_regs = self.get_scratch_regs(inst); self.process_edits(scratch_regs); self.add_freed_regs_to_freelist(); - self.vregs_allocd_in_curr_inst.clear(); - self.first_use.clear(); + self.use_vregs_saved_and_restored_in_curr_inst.clear(); + self.vregs_first_seen_in_curr_inst.clear(); self.allocs_used_by_use_ops.clear(); + self.vregs_allocd_in_curr_inst.clear(); + self.reused_inputs_in_curr_inst.clear(); + self.vregs_in_curr_inst.clear(); + + // After instruction + trace!(""); + trace!("State after instruction {:?}", inst); + let mut map = HashMap::new(); + for (vreg_idx, alloc) in self.vreg_allocs.iter().enumerate() { + if *alloc != Allocation::none() { + map.insert(format!("vreg{vreg_idx}"), alloc); + } + } + trace!("vreg_allocs: {:?}", map); + let mut map = HashMap::new(); + for i in 0..self.vreg_in_preg.len() { + if self.vreg_in_preg[i] != VReg::invalid() { + map.insert(self.preg_index_to_class_and_hw_enc[&i], self.vreg_in_preg[i]); + } + } + trace!("vreg_in_preg: {:?}", map); + let clobbers = self.func.inst_clobbers(inst); + let mut cls = Vec::new(); + for c in clobbers { + cls.push(c); + } + trace!("Clobbers: {:?}", cls); + trace!("Int LRU: {:?}", self.lrus[RegClass::Int]); + trace!("Float LRU: {:?}", self.lrus[RegClass::Float]); + trace!("Vector LRU: {:?}", self.lrus[RegClass::Vector]); } /// At the beginning of every block, all virtual registers that are @@ -1285,7 +1482,7 @@ impl<'a, F: Function> Env<'a, F> { // A block's block param is not live before the block. // And `vreg_allocs[i]` of a virtual register i is none for // dead vregs. - self.freealloc(vreg, true); + self.freealloc(vreg, PRegSet::empty(), true); if self.func.reftype_vregs().contains(&vreg) { // This marks the definition of the block param. // Record this information which will be used while building @@ -1308,6 +1505,7 @@ impl<'a, F: Function> Env<'a, F> { // freepregs to prevent allocated registers from being // used as scratch registers. self.freed_def_pregs[preg.class()].insert(preg); + self.lrus[preg.class()].remove(preg.hw_enc()); } } } @@ -1440,9 +1638,9 @@ pub fn run( trace!("Processing a new function"); for block in 0..func.num_blocks() { let block = Block::new(block); - trace!("Block {:?}. preds: {:?}. succs: {:?}", + /*trace!("Block {:?}. preds: {:?}. succs: {:?}", block, func.block_preds(block), func.block_succs(block) - ); + );*/ for inst in func.block_insns(block).iter() { trace!("inst{:?}: {:?}", inst.index(), func.inst_operands(inst)); } From 06b7bfc0ba22c84829604ef9a53851b0441018a0 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Sun, 4 Aug 2024 17:10:00 +0100 Subject: [PATCH 15/95] fixed bug with lru management --- src/fastalloc/mod.rs | 74 ++++++++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 19 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index eec73bca..170ff810 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -616,11 +616,13 @@ impl<'a, F: Function> Env<'a, F> { // It is an error for a fixed register clobber to be used for a defined vreg // that outlives the instruction, because it will be impossible to restore it. if self.func.inst_clobbers(inst).contains(preg) && op.kind() == OperandKind::Def - && self.vregs_first_seen_in_curr_inst.contains(&op.vreg()) + && (!self.vregs_first_seen_in_curr_inst.contains(&op.vreg()) + || self.liveout_vregs.contains(&op.vreg())) { panic!("Invalid input"); } - let mut preg_is_allocatable = false; + let is_allocatable = !self.is_stack(Allocation::reg(preg)) + && !self.func.inst_clobbers(inst).contains(preg); if self.vreg_in_preg[preg.index()] != VReg::invalid() { // Something is already in that register. Evict it. // TODO: Check if the evicted register is a register in the @@ -635,7 +637,6 @@ impl<'a, F: Function> Env<'a, F> { // has finished processing. // To avoid the preg being added back to the free list, it must be removed // from `freed_def_pregs` here. - preg_is_allocatable = true; self.freed_def_pregs[preg.class()].remove(&preg); self.lrus[preg.class()].append(preg.hw_enc()); } else if self.free_after_curr_inst.contains(&preg) { @@ -654,23 +655,25 @@ impl<'a, F: Function> Env<'a, F> { // To prevent reallocating a register while a live one is still in it, // this register has to be removed from the list. trace!("{:?} is now using preg {:?}. Removing it from the free after instruction list", op.vreg(), preg); - preg_is_allocatable = true; self.free_after_curr_inst.remove(&preg); - self.lrus[preg.class()].append(preg.hw_enc()); + if is_allocatable { + self.lrus[preg.class()].append(preg.hw_enc()); + } } else { // Find the register in the list of free registers (if it's there). // If it's not there, then it must be be a fixed stack slot or // a clobber, since clobbers are removed from the free preg list before allocation begins. - preg_is_allocatable = self.freepregs[op.class()].remove(&preg); - if !preg_is_allocatable { - preg_is_allocatable = self.clobbered_reg_is_allocatable.contains(&preg); + self.freepregs[op.class()].remove(&preg); + if is_allocatable { + self.lrus[preg.class()].append(preg.hw_enc()); } } - if preg_is_allocatable { + if is_allocatable { self.lrus[op.class()].poke(preg); } self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(preg); self.vreg_in_preg[preg.index()] = op.vreg(); + trace!("vreg {:?} is now in preg {:?}", op.vreg(), preg); } /// Allocates for the operand `op` with index `op_idx` into the @@ -881,14 +884,13 @@ impl<'a, F: Function> Env<'a, F> { let preg = prev_alloc.as_reg().unwrap(); self.vreg_in_preg[preg.index()] = VReg::invalid(); // If it's a fixed stack slot, then it's not allocatable. - if !self.is_stack(prev_alloc) - // If it's a clobber, then only fixed register constraints - // can make it get allocated. - && !self.func.inst_clobbers(inst).contains(preg) - { + if !self.is_stack(prev_alloc) { trace!("{:?} is no longer using preg {:?}, so freeing it after instruction", op.vreg(), preg); self.free_after_curr_inst.insert(preg); - self.lrus[preg.class()].remove(preg.hw_enc()); + // Clobbers have already been removed from the LRU. + if !self.func.inst_clobbers(inst).contains(preg) { + self.lrus[preg.class()].remove(preg.hw_enc()); + } } } } @@ -1120,6 +1122,7 @@ impl<'a, F: Function> Env<'a, F> { } fn save_and_restore_clobbered_registers(&mut self, inst: Inst) { + trace!("Adding save and restore edits for vregs in clobbered registers"); for clobbered_preg in self.func.inst_clobbers(inst) { // If the instruction clobbers a register holding a live vreg, // insert edits to save the live reg and restore it @@ -1164,6 +1167,7 @@ impl<'a, F: Function> Env<'a, F> { || (!self.use_vregs_saved_and_restored_in_curr_inst.contains(&vreg) && vreg_lives_past_curr_inst) { + trace!("Adding save and restore edits for {:?}", vreg); let preg_alloc = Allocation::reg(clobbered_preg); let slot = if self.vreg_spillslots[vreg.vreg()].is_valid() { self.vreg_spillslots[vreg.vreg()] @@ -1191,6 +1195,7 @@ impl<'a, F: Function> Env<'a, F> { } } } + trace!("Done adding edits for clobbered registers"); } /// If instruction `inst` is a branch in `block`, @@ -1300,6 +1305,7 @@ impl<'a, F: Function> Env<'a, F> { } fn alloc_inst(&mut self, block: Block, inst: Inst) { + trace!("Allocating instruction {:?}", inst); if self.func.is_branch(inst) { self.process_branch(block, inst); } @@ -1307,6 +1313,7 @@ impl<'a, F: Function> Env<'a, F> { let clobbers = self.func.inst_clobbers(inst); for preg in clobbers { if self.freepregs[preg.class()].remove(&preg) { + trace!("Removing {:?} from the freelist because it's a clobber", preg); self.lrus[preg.class()].remove(preg.hw_enc()); self.clobbered_reg_is_allocatable.insert(preg); } @@ -1414,10 +1421,21 @@ impl<'a, F: Function> Env<'a, F> { } for preg in self.func.inst_clobbers(inst) { if self.clobbered_reg_is_allocatable.contains(&preg) { + if self.vreg_in_preg[preg.index()] == VReg::invalid() { + // In the case where the clobbered register is allocated to + // something, don't add the register to the freelist, cause + // it isn't free. + trace!("Adding clobbered {:?} back to free list", preg); + self.freepregs[preg.class()].insert(preg); + } + // TODO: Append and poke instead. self.lrus[preg.class()].append(preg.hw_enc()); - self.freepregs[preg.class()].insert(preg); } } + trace!("After the allocation:"); + trace!("freed_def_pregs: {:?}", self.freed_def_pregs); + trace!("free after curr inst: {:?}", self.free_after_curr_inst); + trace!(""); self.clobbered_reg_is_allocatable.clear(); let scratch_regs = self.get_scratch_regs(inst); self.process_edits(scratch_regs); @@ -1455,6 +1473,9 @@ impl<'a, F: Function> Env<'a, F> { trace!("Int LRU: {:?}", self.lrus[RegClass::Int]); trace!("Float LRU: {:?}", self.lrus[RegClass::Float]); trace!("Vector LRU: {:?}", self.lrus[RegClass::Vector]); + trace!("Free int pregs: {:?}", self.freepregs[RegClass::Int]); + trace!("Free float pregs: {:?}", self.freepregs[RegClass::Float]); + trace!("Free vector pregs: {:?}", self.freepregs[RegClass::Vector]); } /// At the beginning of every block, all virtual registers that are @@ -1471,6 +1492,7 @@ impl<'a, F: Function> Env<'a, F> { // These registers are livein and they should be stack-allocated. let live_vregs = self.live_vregs.clone(); for vreg in live_vregs.iter().cloned() { + trace!("Processing {:?}", vreg); if self.vreg_spillslots[vreg.vreg()].is_invalid() { self.vreg_spillslots[vreg.vreg()] = self.allocstack(&vreg); } @@ -1479,28 +1501,35 @@ impl<'a, F: Function> Env<'a, F> { let prev_alloc = self.vreg_allocs[vreg.vreg()]; let slot = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); if self.func.block_params(block).contains(&vreg) { + trace!("{:?} is a block param. Freeing it", vreg); // A block's block param is not live before the block. // And `vreg_allocs[i]` of a virtual register i is none for // dead vregs. self.freealloc(vreg, PRegSet::empty(), true); if self.func.reftype_vregs().contains(&vreg) { + trace!("{:?} is a reftype. Recording it's definition instruction", vreg); // This marks the definition of the block param. // Record this information which will be used while building // the stackmap later. self.liveout_vreg_def_inst[vreg.vreg()] = (block, self.func.block_insns(block).first()); } } else { + trace!("{:?} is not a block param. It's a liveout vreg from some predecessor", vreg); + trace!("Setting {:?}'s current allocation to its spillslot", vreg); // It is a liveout vreg from a predecessor. self.vreg_allocs[vreg.vreg()] = slot; + trace!("Recording that {:?} is a liveout", vreg); // Need to remember that this is a liveout vreg so that its // spillslot, if it's a reftype, can be recorded in the stackmap // later. self.liveout_vregs.insert(vreg); if let Some(preg) = prev_alloc.as_reg() { + trace!("{:?} was in {:?}. Removing it", preg, vreg); // Nothing is in that preg anymore. Return it to // the free preg list. self.vreg_in_preg[preg.index()] = VReg::invalid(); if !self.is_stack(prev_alloc) { + trace!("{:?} is not a fixed stack slot. Recording it in the freed def pregs list", prev_alloc); // Using this instead of directly adding it to // freepregs to prevent allocated registers from being // used as scratch registers. @@ -1525,6 +1554,7 @@ impl<'a, F: Function> Env<'a, F> { ); } for block_param_vreg in self.func.block_params(block) { + trace!("Removing block param {:?} from the live regs set", block_param_vreg); self.live_vregs.remove(block_param_vreg); } self.process_edits(self.get_scratch_regs_for_reloading()); @@ -1638,11 +1668,16 @@ pub fn run( trace!("Processing a new function"); for block in 0..func.num_blocks() { let block = Block::new(block); - /*trace!("Block {:?}. preds: {:?}. succs: {:?}", + trace!("Block {:?}. preds: {:?}. succs: {:?}", block, func.block_preds(block), func.block_succs(block) - );*/ + ); for inst in func.block_insns(block).iter() { - trace!("inst{:?}: {:?}", inst.index(), func.inst_operands(inst)); + let clobbers = func.inst_clobbers(inst); + let mut cls = Vec::new(); + for c in clobbers { + cls.push(c); + } + trace!("inst{:?}: {:?}. Clobbers: {:?}", inst.index(), func.inst_operands(inst), cls); } trace!(""); } @@ -1652,6 +1687,7 @@ pub fn run( trace!("Final edits: {:?}", env.edits); trace!("safepoint_slots: {:?}", env.safepoint_slots); +trace!("\n\n\n\n\n\n\n"); Ok(Output { edits: env.edits.make_contiguous().to_vec(), allocs: env.allocs.allocs, From 28278dd61c6d47c20b5ea6718c3f0e4073ce8f6b Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Sun, 4 Aug 2024 22:53:58 +0100 Subject: [PATCH 16/95] fixed problems with LRU management --- src/fastalloc/lru.rs | 64 ++++++++++++++++++++++++++++++++++++++++++-- src/fastalloc/mod.rs | 61 ++++++++++++++++++++++++++++++----------- 2 files changed, 108 insertions(+), 17 deletions(-) diff --git a/src/fastalloc/lru.rs b/src/fastalloc/lru.rs index a1dd2977..e8613b8e 100644 --- a/src/fastalloc/lru.rs +++ b/src/fastalloc/lru.rs @@ -1,8 +1,11 @@ use alloc::vec::Vec; use alloc::vec; +use hashbrown::HashSet; use core::{fmt, ops::IndexMut}; -use std::ops::Index; -use crate::{RegClass, PReg}; +use std::{ops::Index, print}; +use crate::{PReg, PRegSet, RegClass}; + +use std::{println, format}; /// A least-recently-used cache organized as a linked list based on a vector. pub struct Lru { @@ -48,6 +51,8 @@ impl Lru { /// Marks the physical register `i` as the most recently used /// and sets `vreg` as the virtual register it contains. pub fn poke(&mut self, preg: PReg) { + trace!("Before poking: {:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); + trace!("About to poke {:?} in {:?} LRU", preg, self.regclass); let prev_newest = self.head; let i = preg.hw_enc(); if i == prev_newest { @@ -58,19 +63,27 @@ impl Lru { self.insert_before(i, self.head); } self.head = i; + trace!("Poked {:?} in {:?} LRU", preg, self.regclass); + self.check_for_cycle(); } /// Gets the least recently used physical register. pub fn pop(&mut self) -> PReg { + trace!("Before popping: {:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); + trace!("Popping {:?} LRU", self.regclass); if self.is_empty() { panic!("LRU is empty"); } let oldest = self.data[self.head].prev; + trace!("Popped p{oldest} in {:?} LRU", self.regclass); + self.check_for_cycle(); PReg::new(oldest, self.regclass) } /// Splices out a node from the list. pub fn remove(&mut self, i: usize) { + trace!("Before removing: {:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); + trace!("Removing p{i} from {:?} LRU", self.regclass); let (iprev, inext) = (self.data[i].prev, self.data[i].next); self.data[iprev].next = self.data[i].next; self.data[inext].prev = self.data[i].prev; @@ -84,10 +97,14 @@ impl Lru { self.head = inext; } } + trace!("Removed p{i} from {:?} LRU", self.regclass); + self.check_for_cycle(); } /// Sets the node `i` to the last in the list. pub fn append(&mut self, i: usize) { + trace!("Before appending: {:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); + trace!("Appending p{i} to the {:?} LRU", self.regclass); if self.head != usize::MAX { let last_node = self.data[self.head].prev; self.data[last_node].next = i; @@ -99,6 +116,8 @@ impl Lru { self.data[i].prev = i; self.data[i].next = i; } + trace!("Appended p{i} to the {:?} LRU", self.regclass); + self.check_for_cycle(); } pub fn append_and_poke(&mut self, preg: PReg) { @@ -108,6 +127,8 @@ impl Lru { /// Insert node `i` before node `j` in the list. pub fn insert_before(&mut self, i: usize, j: usize) { + trace!("Before inserting: {:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); + trace!("Inserting p{i} before {j} in {:?} LRU", self.regclass); let prev = self.data[j].prev; self.data[prev].next = i; self.data[j].prev = i; @@ -115,11 +136,44 @@ impl Lru { next: j, prev, }; + trace!("Done inserting p{i} before {j} in {:?} LRU", self.regclass); + self.check_for_cycle(); } pub fn is_empty(&self) -> bool { self.head == usize::MAX } + + fn check_for_cycle(&self) { + trace!("{:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); + if self.head != usize::MAX { + let mut node = self.data[self.head].next; + let mut seen = HashSet::new(); + while node != self.head { + if seen.contains(&node) { + panic!("Cycle detected in {:?} LRU.\n + head: {:?}, actual data: {:?}", self.regclass, self.head, self.data); + } + seen.insert(node); + node = self.data[node].next; + } + for i in 0..self.data.len() { + if self.data[i].prev == usize::MAX && self.data[i].next == usize::MAX { + // Removed + continue; + } + if self.data[i].prev == usize::MAX || self.data[i].next == usize::MAX { + panic!("Invalid LRU. p{} next or previous is an invalid value, but not both", i); + } + if self.data[self.data[i].prev].next != i { + panic!("Invalid LRU. p{i} prev is p{:?}, but p{:?} next is {:?}", self.data[i].prev, self.data[i].prev, self.data[self.data[i].prev].next); + } + if self.data[self.data[i].next].prev != i { + panic!("Invalid LRU. p{i} next is p{:?}, but p{:?} prev is p{:?}", self.data[i].next, self.data[i].next, self.data[self.data[i].next].prev); + } + } + } + } } impl fmt::Debug for Lru { @@ -130,7 +184,13 @@ impl fmt::Debug for Lru { } else { let mut data_str = format!("p{}", self.head); let mut node = self.data[self.head].next; + let mut seen = HashSet::new(); while node != self.head { + if seen.contains(&node) { + panic!("The {:?} LRU is messed up: + head: {:?}, {:?} -> p{node}, actual data: {:?}", self.regclass, self.head, data_str, self.data); + } + seen.insert(node); data_str += &format!(" -> p{}", node); node = self.data[node].next; } diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 170ff810..c568f69c 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -9,6 +9,7 @@ use alloc::collections::{BTreeSet, VecDeque}; use alloc::vec::Vec; use hashbrown::{HashSet, HashMap}; +use std::println; use std::format; mod lru; @@ -518,14 +519,16 @@ impl<'a, F: Function> Env<'a, F> { // a clobber can be newly allocated to a vreg in the instruction is to // use a fixed register constraint. self.free_after_curr_inst.insert(preg); + // No need to remove the preg from the LRU because clobbers + // have already been removed from the LRU. } else { // Added to the freed def pregs list, not the free pregs // list to avoid a def's allocated register being used // as a scratch register. self.freed_def_pregs[vreg.class()].insert(preg); + // Don't allow this register to be evicted. + self.lrus[vreg.class()].remove(preg.hw_enc()); } - // Don't allow this register to be evicted. - self.lrus[vreg.class()].remove(preg.hw_enc()); } } } @@ -664,9 +667,6 @@ impl<'a, F: Function> Env<'a, F> { // If it's not there, then it must be be a fixed stack slot or // a clobber, since clobbers are removed from the free preg list before allocation begins. self.freepregs[op.class()].remove(&preg); - if is_allocatable { - self.lrus[preg.class()].append(preg.hw_enc()); - } } if is_allocatable { self.lrus[op.class()].poke(preg); @@ -886,10 +886,14 @@ impl<'a, F: Function> Env<'a, F> { // If it's a fixed stack slot, then it's not allocatable. if !self.is_stack(prev_alloc) { trace!("{:?} is no longer using preg {:?}, so freeing it after instruction", op.vreg(), preg); - self.free_after_curr_inst.insert(preg); - // Clobbers have already been removed from the LRU. + // A clobber will have already been removed from the LRU + // and will be freed after the instruction has completed processing + // if no vreg is still present in it. if !self.func.inst_clobbers(inst).contains(preg) { + self.free_after_curr_inst.insert(preg); self.lrus[preg.class()].remove(preg.hw_enc()); + } else { + trace!("{:?} is a clobber, so not bothering with the state update", preg); } } } @@ -1464,12 +1468,6 @@ impl<'a, F: Function> Env<'a, F> { } } trace!("vreg_in_preg: {:?}", map); - let clobbers = self.func.inst_clobbers(inst); - let mut cls = Vec::new(); - for c in clobbers { - cls.push(c); - } - trace!("Clobbers: {:?}", cls); trace!("Int LRU: {:?}", self.lrus[RegClass::Int]); trace!("Float LRU: {:?}", self.lrus[RegClass::Float]); trace!("Vector LRU: {:?}", self.lrus[RegClass::Vector]); @@ -1559,6 +1557,30 @@ impl<'a, F: Function> Env<'a, F> { } self.process_edits(self.get_scratch_regs_for_reloading()); self.add_freed_regs_to_freelist(); + + // After reload_at_begin + trace!(""); + trace!("State after instruction reload_at_begin of {:?}", block); + let mut map = HashMap::new(); + for (vreg_idx, alloc) in self.vreg_allocs.iter().enumerate() { + if *alloc != Allocation::none() { + map.insert(format!("vreg{vreg_idx}"), alloc); + } + } + trace!("vreg_allocs: {:?}", map); + let mut map = HashMap::new(); + for i in 0..self.vreg_in_preg.len() { + if self.vreg_in_preg[i] != VReg::invalid() { + map.insert(self.preg_index_to_class_and_hw_enc[&i], self.vreg_in_preg[i]); + } + } + trace!("vreg_in_preg: {:?}", map); + trace!("Int LRU: {:?}", self.lrus[RegClass::Int]); + trace!("Float LRU: {:?}", self.lrus[RegClass::Float]); + trace!("Vector LRU: {:?}", self.lrus[RegClass::Vector]); + trace!("Free int pregs: {:?}", self.freepregs[RegClass::Int]); + trace!("Free float pregs: {:?}", self.freepregs[RegClass::Float]); + trace!("Free vector pregs: {:?}", self.freepregs[RegClass::Vector]); } fn build_safepoint_stackmap(&mut self) { @@ -1668,8 +1690,9 @@ pub fn run( trace!("Processing a new function"); for block in 0..func.num_blocks() { let block = Block::new(block); - trace!("Block {:?}. preds: {:?}. succs: {:?}", - block, func.block_preds(block), func.block_succs(block) + trace!("Block {:?}. preds: {:?}. succs: {:?}, params: {:?}", + block, func.block_preds(block), func.block_succs(block), + func.block_params(block) ); for inst in func.block_insns(block).iter() { let clobbers = func.inst_clobbers(inst); @@ -1677,7 +1700,15 @@ pub fn run( for c in clobbers { cls.push(c); } + use std::print; trace!("inst{:?}: {:?}. Clobbers: {:?}", inst.index(), func.inst_operands(inst), cls); + if func.is_branch(inst) { + trace!("Block args: "); + for (succ_idx, _succ) in func.block_succs(block).iter().enumerate() { + trace!(" {:?}", func.branch_blockparams(block, inst, succ_idx)); + } + } + trace!(""); } trace!(""); } From a909242354edd1f87d639c81a94b1b708a930e0c Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Mon, 5 Aug 2024 00:48:54 +0100 Subject: [PATCH 17/95] fixed register leak of clobbered regs --- src/fastalloc/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index c568f69c..dd6c8bec 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1316,8 +1316,10 @@ impl<'a, F: Function> Env<'a, F> { let operands = self.func.inst_operands(inst); let clobbers = self.func.inst_clobbers(inst); for preg in clobbers { - if self.freepregs[preg.class()].remove(&preg) { + //if self.freepregs[preg.class()].remove(&preg) { + if !self.is_stack(Allocation::reg(preg)) { trace!("Removing {:?} from the freelist because it's a clobber", preg); + self.freepregs[preg.class()].remove(&preg); self.lrus[preg.class()].remove(preg.hw_enc()); self.clobbered_reg_is_allocatable.insert(preg); } From 747d9751f5e21d4d1dc68bcf9a111ad302d5bc15 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Mon, 5 Aug 2024 11:46:14 +0100 Subject: [PATCH 18/95] rethought reuse operand handling --- src/fastalloc/mod.rs | 312 +++++++++++++++++++++++-------------------- 1 file changed, 167 insertions(+), 145 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index dd6c8bec..d9c132e5 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -132,11 +132,6 @@ pub struct Env<'a, F: Function> { /// Used to keep track of which vregs have been allocated in the current instruction. /// This is used to determine which edits to insert when allocating a use operand. vregs_allocd_in_curr_inst: HashSet, - /// Used to keep track of which allocations have been used by use operands in the - /// current instruction. This is to determine whether or not an allocation - /// for a reuse operand was reused by a use operand, and make decisions on - /// whether or not to free the allocation. - allocs_used_by_use_ops: HashSet, /// Used to check if a clobbered register in the current instruction is an /// allocatable register, to make decisions on whether or not is should be returned to /// the free register list after allocation of the instruction's operands. @@ -242,7 +237,6 @@ impl<'a, F: Function> Env<'a, F> { use_vregs_saved_and_restored_in_curr_inst: HashSet::new(), freed_def_pregs: PartedByRegClass { items: [BTreeSet::new(), BTreeSet::new(), BTreeSet::new()] }, vregs_first_seen_in_curr_inst: HashSet::new(), - allocs_used_by_use_ops: HashSet::new(), clobbered_reg_is_allocatable: HashSet::new(), safepoint_insts: Vec::new(), liveout_vregs: HashSet::new(), @@ -407,10 +401,10 @@ impl<'a, F: Function> Env<'a, F> { from, to }, class)); - // TODO: Check if the source and destination are the same. And if they are, - // don't add the edit. - // The sorting out of stack-to-stack moves will be done when the instruction's - // edits are processed after all operands have been allocated. + if from == to { + trace!("Deciding not to record the edit, since the source and dest are the same"); + return; + } if prepend { target_edits.push_front((ProgPoint::new(inst, pos), Edit::Move { from, @@ -496,8 +490,8 @@ impl<'a, F: Function> Env<'a, F> { preg } - fn freealloc(&mut self, vreg: VReg, clobbers: PRegSet, add_to_freelist: bool) { - trace!("Freeing vreg {:?} (add_to_freelist: {:?})", vreg, add_to_freelist); + fn freealloc(&mut self, vreg: VReg, clobbers: PRegSet) { + trace!("Freeing vreg {:?}", vreg); let alloc = self.vreg_allocs[vreg.vreg()]; match alloc.kind() { AllocationKind::Reg => { @@ -505,37 +499,32 @@ impl<'a, F: Function> Env<'a, F> { self.vreg_in_preg[preg.index()] = VReg::invalid(); // If it's a fixed stack slot, then it's not allocatable. if !self.is_stack(alloc) { - if add_to_freelist { - if clobbers.contains(preg) { - // For a defined vreg to be restored to the location it's expected to - // be in after the instruction, it cannot be allocated to a clobber because that - // will make the restoration impossible. - // In the case where a reuse operand reuses an input allocated to a clobber, - // the defined vreg will be allocated to a clobber - // and if the vreg lives past the instruction, restoration will be impossible. - // To avoid this, simply make it impossible for a clobber to be allocated to - // a vreg with "any" or "any reg" constraints. - // By adding it to this list, instead of freed_def_pregs, the only way - // a clobber can be newly allocated to a vreg in the instruction is to - // use a fixed register constraint. - self.free_after_curr_inst.insert(preg); - // No need to remove the preg from the LRU because clobbers - // have already been removed from the LRU. - } else { - // Added to the freed def pregs list, not the free pregs - // list to avoid a def's allocated register being used - // as a scratch register. - self.freed_def_pregs[vreg.class()].insert(preg); - // Don't allow this register to be evicted. - self.lrus[vreg.class()].remove(preg.hw_enc()); - } + if clobbers.contains(preg) { + // For a defined vreg to be restored to the location it's expected to + // be in after the instruction, it cannot be allocated to a clobber because that + // will make the restoration impossible. + // In the case where a reuse operand reuses an input allocated to a clobber, + // the defined vreg will be allocated to a clobber + // and if the vreg lives past the instruction, restoration will be impossible. + // To avoid this, simply make it impossible for a clobber to be allocated to + // a vreg with "any" or "any reg" constraints. + // By adding it to this list, instead of freed_def_pregs, the only way + // a clobber can be newly allocated to a vreg in the instruction is to + // use a fixed register constraint. + self.free_after_curr_inst.insert(preg); + // No need to remove the preg from the LRU because clobbers + // have already been removed from the LRU. + } else { + // Added to the freed def pregs list, not the free pregs + // list to avoid a def's allocated register being used + // as a scratch register. + self.freed_def_pregs[vreg.class()].insert(preg); + // Don't allow this register to be evicted. + self.lrus[vreg.class()].remove(preg.hw_enc()); } } } - AllocationKind::Stack => { - // Do nothing. - // I think it the allocation will be cheaper this way. - } + AllocationKind::Stack => (), AllocationKind::None => panic!("Attempting to free an unallocated operand!") } self.vreg_allocs[vreg.vreg()] = Allocation::none(); @@ -903,12 +892,6 @@ impl<'a, F: Function> Env<'a, F> { self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[op.vreg().vreg()]; trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); } - if op.kind() == OperandKind::Use { - // Need to remember that this allocation is used in this instruction - // by a use operand, to make decisions on whether to free a reuse operand's - // allocation during the processing of reuse operands. - self.allocs_used_by_use_ops.insert(self.vreg_allocs[op.vreg().vreg()]); - } self.vregs_allocd_in_curr_inst.insert(op.vreg()); } @@ -918,77 +901,148 @@ impl<'a, F: Function> Env<'a, F> { op: Operand, op_idx: usize, reused_op: Operand, - reused_idx: usize, clobbers: PRegSet, ) { debug_assert!(matches!(op.constraint(), OperandConstraint::Reuse(_))); self.vregs_in_curr_inst.insert(op.vreg()); - // We first need to check if the reuse operand has already been allocated, + // To handle reuse operands, the reused input's allocation is always used for + // both operands. This is to avoid having to think about how to handle fixed + // register constraints. For example: + // 1. def v0 (reuse: 1), use v1 (fixed: p0) + // p0 is simply used for both operands, regardless of where v0 is expected to be + // after the instruction. + // + // + // A check has to be made to see if the reuse operand has already been allocated, // in a previous alloc_inst call. There are 2 cases that need to be considered here: // // Case 1: The reuse operand has already been allocated. + // Case 2: The reuse operand has no allocation. + // + // For case 1: // An example: - // inst 1: reuse def v0 (1), use v1 + // + // inst 1: def v0 (reuse: 1), use v1 // inst 2: use v0 // In the above example, v0 will have already been allocated by the time inst 1 // is about to be processed. // After this inst 1, v0 is expected to be in some location l0. - // But because of the constraints, it should be in v1's location. - // To account for this, the reused input (v1) is moved into its spillslot before the instruction - // and its allocation is used for both the reuse operand (v0) and the reused input - // (the reused input's allocation is used for both of them, just in case the - // reused input has a fixed register constraint). - // After the instruction, v0 is first moved from v1's allocation to l0, the location it's expected to be - // after the instruction and v1 is moved from its spillslot into its current allocation. + // Depending on whether v1 lives past this instruction, there are also two subcases: + // Case 1.1: The reused input, v1, lives past the instruction and is expected to be in some + // location l1 afterwards. + // Case 1.2: The reused input v1, doesn't live past the instruction. // - // Case 2: The reuse operand has not yet been allocated. - // This could happen in a scenario such as: - // inst 1: reuse def v0 (1), use v1 - // inst 2: use v1 - // Since v1 and v0 have to be the same allocation, then one of the following could be done: - // 1. A new location is allocated for v0, v1 is moved into that new location before the - // instruction, and the allocs for both v0 and v1 are set to that location (Not good if - // v1 has a fixed register constraint). - // 2. v1 is moved into its spillslot before the instruction, used as the allocation for - // v0, then v1 is moved from its spillslot into its allocation after the instruction. + // In case 1.1, both v0 and v1 live past the instruction, so edits have to be inserted to + // ensure that both end up in their expected after instruction locations. + // Again, depending on whether or not l0 is used by another operand in the current instruction, + // there are two subcases here: + // Case 1.1.1: l0 is used by another operand in the instruction. + // Case 1.1.2: l0 isn't use by another operand in the instruction. // - // No 1. is better with respect to moves: only 1 move is generated rather than 2. - // No 2. is better with respect to allocations: no extra allocation is required. Especially - // considering the fact that, since reuse operands are always defs, the allocation will be - // deallocated immediately. - // No 1. may lead to better runtime performance, because less stack movements are required - // (assuming no eviction takes place) while no 2. may lead to better compile time performance - // because less bookkeeping has to be done to effect it. - // We're going with no 2. here. + // In case 1.1.1: // - // There is also a problem that could arise when the reused input is the first encountered - // use of a vreg. - // Consider a scenario: - // - // 1. def v12 (reuse: 1), use v7 (fixed: p31) - // 2. def v13, use v12 (fixed: p31) - // v12 is in p31 afterwards + // 1. def v0 (reuse: 1), use v1, use v2 + // 2. use v0, use v1 // - // Inst 2 is processed first and after its processing - // v12 is in p31, right before inst 2. - // During the processing of inst 1, because of the way reuse - // operands are handled, v7's allocation is first saved before inst 1, - // then it is restored afterwards. The resulting modifications are: + // At inst 2, v0 is expected to be at l0 and v1 is expected to be at l1. + // During the processing of inst 1, the v1 and v2 operands are processed before the reuse operand + // v0. If by some fixed register constraint either v1 or v2 get l0 as an allocation, then v0 + // will be evicted and an edit will be added after inst 1 to move from v0's spillslot to l0 + // by the eviction procedure. Then we'll have: // - // move from p31 to stack_v7 // v7 is in p31 from this point upwards - // 1. def v12 (reuse: 1), use v7 (fixed: p31) // Both are allocated to p31 - // move from p31 to p31 // to flow v12 to the location it's expected to be afterwards - // move from stack_v7 to p31 // to restore v7 - // 2. def v13, use v12 (fixed: p31) + // 1. def v0 (reuse: 1), use v1, use v2 + // move from stack_v0 to l0 // Added during eviction. + // 2. use v0, use v1 + // + // And v0's current allocation, at the point of invoking this reuse operand allocation + // procedure will be stack_v0 (vreg_allocs[v0] == stack_v0). + // Suppose v1 is allocated to p0. + // For the flow to be correct, two things have to occur: + // 1. The reused input v1 has to be saved and restored, because the definition of v0 + // overwrites it in inst 1. + // 2. v0 has to be moved into its current allocation (stack_v0) before the eviction edit. + // + // The edits added by this procedure will be like so: + // + // move from p0 to stack_v1 // Saving v1. + // 1. def v0 (reuse: 1), use v1, use v2 // p0 is used for both v1 and v0. + // move from p0 to vreg_allocs[v0] (== stack_v0) // Move into v0's current allocation. + // move from stack_v0 to l0 // Added during eviction. + // move from stack_v1 to l1 // Restoring v1. + // 2. use v0, use v1 + // + // Hence, the edits have to be inserted in the following order: + // 1. The edit to save the reused input is preprended to the other edits before the instruction. + // 2. The edit to move v0 from its reused input into its current allocation (spillslot) + // will be prepended to the edits after the instruction. + // 3. The edit to restore v1 will be appended to the edits after the instruction. + // + // In the case 1.1.2, l0 isn't used by any other operand in the instruction, so no eviction + // occurs and vreg_allocs[v0] at this point == l0. + // Here, the only edits that needs to be added is the move from the reused input allocation + // into vreg_allocs[v0] (which is l0) and the save and restore of v1. + // For example: + // + // 1. def v0 (reuse: 1), use v1, use v2 // Both v0 and v1 use p0 + // 2. use v0, use v1 + // + // Becomes: + // + // move from p0 to stack_v1 // Save v1. + // 1. def v0 (reuse: 1), use v1, use v2 + // move from p0 to vreg_allocs[v0] (== l0) // Move to v0's current allocation. + // move from stack_v1 to l1 // Restore v1. + // 2. use v0, use v1 + // + // So, the procedure for case 1.1.1 is the same for case 1.1.2. + // + // In case 1.2, the reused input doesn't live past the instruction. Only v0 + // does. In this case, the only edit added is the one to move v0 from the reused + // input allocation p0 to l0. The edits to save and restore v1 are omitted. + // For example: + // + // 1. def v0 (reuse: 1), use v1, use v2 // Both v0 and v1 use p0 + // 2. use v0 // Only v0 lives past inst 1 + // + // The only edit inserted is the one to move from p0 to vreg_allocs[v0] (l0): + // + // 1. def v0 (reuse: 1), use v1, use v2 // Both v0 and v1 use p0 + // move from p0 to vreg_allocs[v0] // Move to v0's current allocation + // 2. use v0, use v1 + // + // In case 2, v0 has no allocation, meaning it doesn't live past the instruction. + // There are two subcases here: + // Case 2.1: The reused input v1 lives past the instruction. + // Case 2.2: The reused input v1 doesn't live past the instruction. + // + // In case 2.1, the only edits that need to be added are the ones to save and restore v1. + // For example: + // + // 1. def v0 (reuse: 1), use v1, use v2 // Both v0 and v1 use p0 + // 2. use v1 + // + // Becomes: + // + // move from p0 to stack_v1 // Save v1. + // 1. def v0 (reuse: 1), use v1, use v2 // Both v0 and v1 use p0. + // move from stack_v1 to p0 // Restore v1. + // 2. use v1 + // + // In case 2.2, no edits should be inserted at all since none of them live past the + // instruction. + // + // From these cases, it can be concluded that edits to save and restore the reused input + // should be inserted whenever the reused input lives past the current instruction. + // And the edit to move the reuse operand into its after-instruction-allocation should + // only be inserted if it lives past the current instruction. + // And because of interactions with other edits relating to eviction, the edits, + // when inserted, should be in the following order: + // + // 1. The edit to save the reused input is preprended to the other edits before the instruction. + // 2. The edit to move v0 from its reused input into its current allocation (spillslot) + // will be prepended to the edits after the instruction. + // 3. The edit to restore v1 will be appended to the edits after the instruction. // - // The problem with the above is that the reuse operand handling assumed that vregs - // used in an instruction will be live after, but it isn't in this case. v12 uses p31 - // after inst 1 because it is supposed to be free. Since v7's first use is in inst 1, - // it should not be moved into its allocation afterwards. - // Hence, moves to flow the reused input into its allocation after the instruction - // are inserted only if the input lives past the instruction, that is, its first use - // is not in this instruction. - trace!("Move Reason: Reuse constraints"); if reused_op.kind() != OperandKind::Use || op.kind() != OperandKind::Def @@ -997,7 +1051,7 @@ impl<'a, F: Function> Env<'a, F> { { panic!("Invalid input"); } - let reused_op_first_use = self.vregs_first_seen_in_curr_inst.contains(&reused_op.vreg()); + let reused_input_lives_past_curr_inst = !self.vregs_first_seen_in_curr_inst.contains(&reused_op.vreg()); if self.vreg_allocs[op.vreg().vreg()].is_some() { let reused_op_vreg = reused_op.vreg(); // The only way that a vreg can be assigned a clobber is if a fixed register @@ -1017,10 +1071,8 @@ impl<'a, F: Function> Env<'a, F> { } let reused_op_spillslot = self.vreg_spillslots[reused_op.vreg().vreg()]; - // If this is the reused operand's first use, then don't - // restore it afterwards, because it doesn't live past this instruction. - if !reused_op_first_use { - // Move the reused input into its spillslot. + // Save the reused input only if it lives past the current instruction. + if reused_input_lives_past_curr_inst { self.add_move_later( inst, self.vreg_allocs[reused_op_vreg.vreg()], @@ -1039,13 +1091,11 @@ impl<'a, F: Function> Env<'a, F> { op_prev_alloc, op.class(), InstPosition::After, - false, + true, ); - // If this is the reused operand's first use, then don't - // restore it afterwards, because it doesn't live past this instruction. - if !reused_op_first_use { - // Move the reused input from its spillslot into its current allocation + // Restore the reused input only if it lives past the current instruction. + if reused_input_lives_past_curr_inst { self.add_move_later( inst, Allocation::stack(reused_op_spillslot), @@ -1058,40 +1108,16 @@ impl<'a, F: Function> Env<'a, F> { } self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[reused_op_vreg.vreg()]; - - // Deallocate the reuse operand. - // We can't just deallocate the reuse operand. - // The reason for this is that, since reuse operands are defs - // it is possible for its allocation to be reused by a use operand. - // If it is freed here, then the allocation could be reallocated to another - // vreg while the use it was allocated to is still live. - // For example: - // - // 1. def v0 - // 2. def v1, use v2 - // 3. def v3 (reuse: 1), use v0 - // - // If v0 is allocated to p0, then v3 will also be allocated to p0. - // Since reuse operands are processed last, then if v3 is just freed normally, - // then p0 will be free for allocation to v1 and v2, overwriting whatever - // value was defd in v0 in inst 1. - // To avoid this allocation of a place that has already been allocated to a live vreg, - // the `add_to_freelist` parameter is set to true - // only if the reuse operand's allocation was not reused by any use operands - // in the instruction. - let op_alloc_is_in_use = self.allocs_used_by_use_ops.contains(&op_prev_alloc); - self.freealloc(op.vreg(), clobbers, !op_alloc_is_in_use); + self.freealloc(op.vreg(), clobbers); trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); } else { let reused_op_vreg = reused_op.vreg(); - // If this is the reused operand's first use, then don't - // restore it afterwards, because it doesn't live past this instruction. - if !reused_op_first_use { + if reused_input_lives_past_curr_inst { if self.vreg_spillslots[reused_op_vreg.vreg()].is_invalid() { self.vreg_spillslots[reused_op_vreg.vreg()] = self.allocstack(&reused_op_vreg); } let reused_op_spillslot = self.vreg_spillslots[reused_op.vreg().vreg()]; - // Move the reused input into its spillslot before the instruction. + // Save the reused input to its spillslot before the instruction. self.add_move_later( inst, self.vreg_allocs[reused_op_vreg.vreg()], @@ -1100,7 +1126,7 @@ impl<'a, F: Function> Env<'a, F> { InstPosition::Before, true, ); - // Move back into its allocation. + // Restore the reused input. self.add_move_later( inst, Allocation::stack(reused_op_spillslot), @@ -1210,8 +1236,6 @@ impl<'a, F: Function> Env<'a, F> { /// /// 1. Move all branch arguments into corresponding temporary spillslots. /// 2. Move values from the temporary spillslots to corresponding block param spillslots. - /// 3. Move values from the temporary spillslots to post-block locatioks, if any, for - /// non-block-param arguments. /// /// These temporaries are used because the moves have to be parallel in the case where /// a block parameter of the successor block is a branch argument. @@ -1316,7 +1340,6 @@ impl<'a, F: Function> Env<'a, F> { let operands = self.func.inst_operands(inst); let clobbers = self.func.inst_clobbers(inst); for preg in clobbers { - //if self.freepregs[preg.class()].remove(&preg) { if !self.is_stack(Allocation::reg(preg)) { trace!("Removing {:?} from the freelist because it's a clobber", preg); self.freepregs[preg.class()].remove(&preg); @@ -1342,7 +1365,7 @@ impl<'a, F: Function> Env<'a, F> { // vreg was defined when adding reftype vregs to the stackmap. self.liveout_vreg_def_inst[op.vreg().vreg()] = (block, inst); } - self.freealloc(op.vreg(), clobbers, true); + self.freealloc(op.vreg(), clobbers); } for (op_idx, op) in FixedEarlyOperands::new(operands) { self.process_operand_allocation(inst, op, op_idx); @@ -1359,7 +1382,7 @@ impl<'a, F: Function> Env<'a, F> { // vreg was defined when adding reftype vregs to the stackmap. self.liveout_vreg_def_inst[op.vreg().vreg()] = (block, inst); } - self.freealloc(op.vreg(), clobbers, true); + self.freealloc(op.vreg(), clobbers); } for (op_idx, op) in ReuseOperands::new(operands) { let OperandConstraint::Reuse(reused_idx) = op.constraint() else { @@ -1370,7 +1393,7 @@ impl<'a, F: Function> Env<'a, F> { // vreg was defined when adding reftype vregs to the stackmap. self.liveout_vreg_def_inst[op.vreg().vreg()] = (block, inst); } - self.process_reuse_operand_allocation(inst, op, op_idx, operands[reused_idx], reused_idx, clobbers); + self.process_reuse_operand_allocation(inst, op, op_idx, operands[reused_idx], clobbers); } self.save_and_restore_clobbered_registers(inst); if self.func.requires_refs_on_stack(inst) { @@ -1448,7 +1471,6 @@ impl<'a, F: Function> Env<'a, F> { self.add_freed_regs_to_freelist(); self.use_vregs_saved_and_restored_in_curr_inst.clear(); self.vregs_first_seen_in_curr_inst.clear(); - self.allocs_used_by_use_ops.clear(); self.vregs_allocd_in_curr_inst.clear(); self.reused_inputs_in_curr_inst.clear(); self.vregs_in_curr_inst.clear(); @@ -1505,7 +1527,7 @@ impl<'a, F: Function> Env<'a, F> { // A block's block param is not live before the block. // And `vreg_allocs[i]` of a virtual register i is none for // dead vregs. - self.freealloc(vreg, PRegSet::empty(), true); + self.freealloc(vreg, PRegSet::empty()); if self.func.reftype_vregs().contains(&vreg) { trace!("{:?} is a reftype. Recording it's definition instruction", vreg); // This marks the definition of the block param. From 792bd18746ba0be4a8081657a4cdc9ff7d4a2748 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Mon, 5 Aug 2024 12:52:28 +0100 Subject: [PATCH 19/95] condition to remove clobber from free list changed --- src/fastalloc/mod.rs | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index d9c132e5..582ed708 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -132,10 +132,6 @@ pub struct Env<'a, F: Function> { /// Used to keep track of which vregs have been allocated in the current instruction. /// This is used to determine which edits to insert when allocating a use operand. vregs_allocd_in_curr_inst: HashSet, - /// Used to check if a clobbered register in the current instruction is an - /// allocatable register, to make decisions on whether or not is should be returned to - /// the free register list after allocation of the instruction's operands. - clobbered_reg_is_allocatable: HashSet, /// All the safepoint instructions encountered during allocation and their blocks. /// When allocation is completed, this contains all the safepoint instructions /// in the function. @@ -192,6 +188,7 @@ impl<'a, F: Function> Env<'a, F> { trace!("{:?}", env); Self { func, + // Just using this for debugging preg_index_to_class_and_hw_enc: { let mut map = HashMap::new(); for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { @@ -237,7 +234,6 @@ impl<'a, F: Function> Env<'a, F> { use_vregs_saved_and_restored_in_curr_inst: HashSet::new(), freed_def_pregs: PartedByRegClass { items: [BTreeSet::new(), BTreeSet::new(), BTreeSet::new()] }, vregs_first_seen_in_curr_inst: HashSet::new(), - clobbered_reg_is_allocatable: HashSet::new(), safepoint_insts: Vec::new(), liveout_vregs: HashSet::new(), liveout_vreg_def_inst: vec![(Block::invalid(), Inst::invalid()); func.num_vregs()], @@ -390,13 +386,6 @@ impl<'a, F: Function> Env<'a, F> { } fn add_move_later(&mut self, inst: Inst, from: Allocation, to: Allocation, class: RegClass, pos: InstPosition, prepend: bool) { - if self.is_stack(from) && self.is_stack(to) { - self.inst_needs_scratch_reg[class] = true; - } - let target_edits = match pos { - InstPosition::After => &mut self.inst_post_edits, - InstPosition::Before => &mut self.inst_pre_edits - }; trace!("Recording edit to add later: {:?}", (ProgPoint::new(inst, pos), Edit::Move { from, to @@ -405,6 +394,13 @@ impl<'a, F: Function> Env<'a, F> { trace!("Deciding not to record the edit, since the source and dest are the same"); return; } + if self.is_stack(from) && self.is_stack(to) { + self.inst_needs_scratch_reg[class] = true; + } + let target_edits = match pos { + InstPosition::After => &mut self.inst_post_edits, + InstPosition::Before => &mut self.inst_pre_edits + }; if prepend { target_edits.push_front((ProgPoint::new(inst, pos), Edit::Move { from, @@ -564,7 +560,7 @@ impl<'a, F: Function> Env<'a, F> { // The only way a freed def preg can be reused for an operand is if // the operand uses or defines a vreg in the early phase and the vreg doesn't // live past the instruction. If the vreg lives past the instruction, then the - // possible defined value will overwrite it. + // defined value will overwrite it. if op.pos() == OperandPos::Early && self.vregs_first_seen_in_curr_inst.contains(&op.vreg()) // A reused input should not have the same allocation as a non-reused def operand. // For example: @@ -1340,11 +1336,17 @@ impl<'a, F: Function> Env<'a, F> { let operands = self.func.inst_operands(inst); let clobbers = self.func.inst_clobbers(inst); for preg in clobbers { + // To avoid allocating clobbers, they are removed from the + // free register list. To also avoid a clobber being evicted, + // it's also removed from the LRU. + // The only way a clobber can be marked as the allocation of + // an operand is through a fixed register constraint to the clobber + // or a reused input constraint of an operand with a fixed register + // constraint to use a clobber. if !self.is_stack(Allocation::reg(preg)) { trace!("Removing {:?} from the freelist because it's a clobber", preg); self.freepregs[preg.class()].remove(&preg); self.lrus[preg.class()].remove(preg.hw_enc()); - self.clobbered_reg_is_allocatable.insert(preg); } } for (_, op) in ReuseOperands::new(operands) { @@ -1449,7 +1451,7 @@ impl<'a, F: Function> Env<'a, F> { } } for preg in self.func.inst_clobbers(inst) { - if self.clobbered_reg_is_allocatable.contains(&preg) { + if !self.is_stack(Allocation::reg(preg)) { if self.vreg_in_preg[preg.index()] == VReg::invalid() { // In the case where the clobbered register is allocated to // something, don't add the register to the freelist, cause @@ -1465,7 +1467,6 @@ impl<'a, F: Function> Env<'a, F> { trace!("freed_def_pregs: {:?}", self.freed_def_pregs); trace!("free after curr inst: {:?}", self.free_after_curr_inst); trace!(""); - self.clobbered_reg_is_allocatable.clear(); let scratch_regs = self.get_scratch_regs(inst); self.process_edits(scratch_regs); self.add_freed_regs_to_freelist(); @@ -1748,6 +1749,7 @@ trace!("\n\n\n\n\n\n\n"); allocs: env.allocs.allocs, inst_alloc_offsets: env.allocs.inst_alloc_offsets, num_spillslots: env.num_spillslots as usize, + // TODO: Handle debug locations. debug_locations: Vec::new(), safepoint_slots: env.safepoint_slots, stats: env.stats, From 5f3b355993c99c6d80f4b289b2da5c9a7e971b34 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Mon, 5 Aug 2024 14:02:51 +0100 Subject: [PATCH 20/95] fixed bug with adding clobbers back to the free list --- src/fastalloc/lru.rs | 1 + src/fastalloc/mod.rs | 40 +++++++++++++++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/src/fastalloc/lru.rs b/src/fastalloc/lru.rs index e8613b8e..8a9b5d43 100644 --- a/src/fastalloc/lru.rs +++ b/src/fastalloc/lru.rs @@ -144,6 +144,7 @@ impl Lru { self.head == usize::MAX } + // Using this to debug. fn check_for_cycle(&self) { trace!("{:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); if self.head != usize::MAX { diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 582ed708..5c7d8d97 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1456,11 +1456,41 @@ impl<'a, F: Function> Env<'a, F> { // In the case where the clobbered register is allocated to // something, don't add the register to the freelist, cause // it isn't free. - trace!("Adding clobbered {:?} back to free list", preg); - self.freepregs[preg.class()].insert(preg); + trace!("Adding clobbered {:?} to free after inst list", preg); + // Consider a scenario: + // + // 1. use v0 (fixed: p1). Clobbers: [p0] + // 2. use v0 (fixed: p0) + // + // In the above, v0 is first allocated to p0 at inst 2. + // At inst 1, v0's allocation is changed to p1 and edits are inserted + // to save and restore v0: + // + // move from p1 to stack_v0 + // 1. use v0 (fixed: p1). Clobbers: [p0] + // move from stack_v0 to p0 + // 2. use v0 (fixed: p0) + // + // Suppose some other edits need to be inserted before/after inst 1 + // and scratch registers are needed. + // If the clobber p0 is added back to the free list directly, + // p0 may end up be being used as a scratch register and get overwritten + // before inst 2 is reached. This could happen if inst 1 is a safepoint and + // edits to save and restore reftypes are prepended before the inst + // and after resulting in the following scenario: + // + // --- p0 is overwritten --- + // move from p1 to stack_v0 + // 1. use v0 (fixed: p1). Clobbers: [p0] + // move from stack_v0 to p0 + // --- p0 is overwritten --- + // 2. use v0 (fixed: p0) + // + // To avoid this scenario, the registers are added to the + // `free_after_curr_inst` instead, to ensure that it isn't used as + // a scratch register. + self.free_after_curr_inst.insert(preg); } - // TODO: Append and poke instead. - self.lrus[preg.class()].append(preg.hw_enc()); } } trace!("After the allocation:"); @@ -1530,7 +1560,7 @@ impl<'a, F: Function> Env<'a, F> { // dead vregs. self.freealloc(vreg, PRegSet::empty()); if self.func.reftype_vregs().contains(&vreg) { - trace!("{:?} is a reftype. Recording it's definition instruction", vreg); + trace!("{:?} is a reftype. Recording its definition instruction", vreg); // This marks the definition of the block param. // Record this information which will be used while building // the stackmap later. From 7f26dc644b03dc8513b594ccf275917e05968830 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Mon, 5 Aug 2024 14:02:51 +0100 Subject: [PATCH 21/95] fixed bug with adding clobbers back to the free list --- src/fastalloc/lru.rs | 1 + src/fastalloc/mod.rs | 45 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/src/fastalloc/lru.rs b/src/fastalloc/lru.rs index e8613b8e..8a9b5d43 100644 --- a/src/fastalloc/lru.rs +++ b/src/fastalloc/lru.rs @@ -144,6 +144,7 @@ impl Lru { self.head == usize::MAX } + // Using this to debug. fn check_for_cycle(&self) { trace!("{:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); if self.head != usize::MAX { diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 582ed708..cad3b89d 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1456,11 +1456,46 @@ impl<'a, F: Function> Env<'a, F> { // In the case where the clobbered register is allocated to // something, don't add the register to the freelist, cause // it isn't free. - trace!("Adding clobbered {:?} back to free list", preg); - self.freepregs[preg.class()].insert(preg); + trace!("Adding clobbered {:?} to free after inst list", preg); + // Consider a scenario: + // + // 1. use v0 (fixed: p1). Clobbers: [p0] + // 2. use v0 (fixed: p0) + // + // In the above, v0 is first allocated to p0 at inst 2. + // At inst 1, v0's allocation is changed to p1 and edits are inserted + // to save and restore v0: + // + // move from p1 to stack_v0 + // 1. use v0 (fixed: p1). Clobbers: [p0] + // move from stack_v0 to p0 + // 2. use v0 (fixed: p0) + // + // Suppose some other edits need to be inserted before/after inst 1 + // and scratch registers are needed. + // If the clobber p0 is added back to the free list directly, + // p0 may end up be being used as a scratch register and get overwritten + // before inst 2 is reached. This could happen if inst 1 is a safepoint and + // edits to save and restore reftypes are prepended before the inst + // and after resulting in the following scenario: + // + // --- p0 is overwritten --- + // move from p1 to stack_v0 + // 1. use v0 (fixed: p1). Clobbers: [p0] + // move from stack_v0 to p0 + // --- p0 is overwritten --- + // 2. use v0 (fixed: p0) + // + // To avoid this scenario, the registers are added to the + // `free_after_curr_inst` instead, to ensure that it isn't used as + // a scratch register. + self.free_after_curr_inst.insert(preg); + } else { + // Something is still in the clobber. + // After this instruction, it's no longer a clobber. + // Add it back to the LRU. + self.lrus[preg.class()].append_and_poke(preg); } - // TODO: Append and poke instead. - self.lrus[preg.class()].append(preg.hw_enc()); } } trace!("After the allocation:"); @@ -1530,7 +1565,7 @@ impl<'a, F: Function> Env<'a, F> { // dead vregs. self.freealloc(vreg, PRegSet::empty()); if self.func.reftype_vregs().contains(&vreg) { - trace!("{:?} is a reftype. Recording it's definition instruction", vreg); + trace!("{:?} is a reftype. Recording its definition instruction", vreg); // This marks the definition of the block param. // Record this information which will be used while building // the stackmap later. From 958864c77a01438b0bf6ec8df8fd4049fc31ac6b Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Tue, 6 Aug 2024 19:15:22 +0000 Subject: [PATCH 22/95] change condition to remove clobber from free list --- src/fastalloc/mod.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index e4205c56..ce95be8e 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -160,6 +160,7 @@ pub struct Env<'a, F: Function> { reused_inputs_in_curr_inst: Vec, /// The vregs defined or used in the current instruction. vregs_in_curr_inst: HashSet, + allocatable_regs: PRegSet, dedicated_scratch_regs: PartedByRegClass>, preg_index_to_class_and_hw_enc: HashMap, @@ -188,6 +189,7 @@ impl<'a, F: Function> Env<'a, F> { trace!("{:?}", env); Self { func, + allocatable_regs: PRegSet::from(env), // Just using this for debugging preg_index_to_class_and_hw_enc: { let mut map = HashMap::new(); @@ -1343,7 +1345,7 @@ impl<'a, F: Function> Env<'a, F> { // an operand is through a fixed register constraint to the clobber // or a reused input constraint of an operand with a fixed register // constraint to use a clobber. - if !self.is_stack(Allocation::reg(preg)) { + if self.allocatable_regs.contains(preg) { trace!("Removing {:?} from the freelist because it's a clobber", preg); self.freepregs[preg.class()].remove(&preg); self.lrus[preg.class()].remove(preg.hw_enc()); @@ -1451,7 +1453,7 @@ impl<'a, F: Function> Env<'a, F> { } } for preg in self.func.inst_clobbers(inst) { - if !self.is_stack(Allocation::reg(preg)) { + if self.allocatable_regs.contains(preg) { if self.vreg_in_preg[preg.index()] == VReg::invalid() { // In the case where the clobbered register is allocated to // something, don't add the register to the freelist, cause From 70d6fdee5a4d8ea7b5bcb9ce807056756f69405b Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Wed, 7 Aug 2024 15:45:53 +0100 Subject: [PATCH 23/95] added cfgs to logging and validation function calls --- src/fastalloc/lru.rs | 17 +++-- src/fastalloc/mod.rs | 156 ++++++++++++++++++++----------------------- 2 files changed, 85 insertions(+), 88 deletions(-) diff --git a/src/fastalloc/lru.rs b/src/fastalloc/lru.rs index 8a9b5d43..267359a7 100644 --- a/src/fastalloc/lru.rs +++ b/src/fastalloc/lru.rs @@ -64,7 +64,8 @@ impl Lru { } self.head = i; trace!("Poked {:?} in {:?} LRU", preg, self.regclass); - self.check_for_cycle(); + #[cfg(debug_assertions)] + self.validate_lru(); } /// Gets the least recently used physical register. @@ -76,7 +77,8 @@ impl Lru { } let oldest = self.data[self.head].prev; trace!("Popped p{oldest} in {:?} LRU", self.regclass); - self.check_for_cycle(); + #[cfg(debug_assertions)] + self.validate_lru(); PReg::new(oldest, self.regclass) } @@ -98,7 +100,8 @@ impl Lru { } } trace!("Removed p{i} from {:?} LRU", self.regclass); - self.check_for_cycle(); + #[cfg(debug_assertions)] + self.validate_lru(); } /// Sets the node `i` to the last in the list. @@ -117,7 +120,8 @@ impl Lru { self.data[i].next = i; } trace!("Appended p{i} to the {:?} LRU", self.regclass); - self.check_for_cycle(); + #[cfg(debug_assertions)] + self.validate_lru(); } pub fn append_and_poke(&mut self, preg: PReg) { @@ -137,7 +141,8 @@ impl Lru { prev, }; trace!("Done inserting p{i} before {j} in {:?} LRU", self.regclass); - self.check_for_cycle(); + #[cfg(debug_assertions)] + self.validate_lru(); } pub fn is_empty(&self) -> bool { @@ -145,7 +150,7 @@ impl Lru { } // Using this to debug. - fn check_for_cycle(&self) { + fn validate_lru(&self) { trace!("{:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); if self.head != usize::MAX { let mut node = self.data[self.head].next; diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index ce95be8e..b58150cc 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -27,11 +27,9 @@ struct Allocs { impl Allocs { fn new(func: &F, env: &MachineEnv) -> Self { - // The number of operands is <= number of virtual registers - // It can be lesser in the case where virtual registers are used multiple - // times in a single instruction. - let mut allocs = Vec::with_capacity(func.num_vregs()); - let mut inst_alloc_offsets = Vec::with_capacity(func.num_vregs()); + let operand_no_guess = func.num_vregs() * 3; + let mut allocs = Vec::with_capacity(operand_no_guess); + let mut inst_alloc_offsets = Vec::with_capacity(operand_no_guess); for inst in 0..func.num_insts() { let operands_len = func.inst_operands(Inst::new(inst)).len() as u32; inst_alloc_offsets.push(allocs.len() as u32); @@ -162,7 +160,6 @@ pub struct Env<'a, F: Function> { vregs_in_curr_inst: HashSet, allocatable_regs: PRegSet, dedicated_scratch_regs: PartedByRegClass>, - preg_index_to_class_and_hw_enc: HashMap, fixed_stack_slots: Vec, @@ -190,23 +187,6 @@ impl<'a, F: Function> Env<'a, F> { Self { func, allocatable_regs: PRegSet::from(env), - // Just using this for debugging - preg_index_to_class_and_hw_enc: { - let mut map = HashMap::new(); - for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { - let class = class as usize; - for reg in env.preferred_regs_by_class[class].iter().cloned() { - map.insert(reg.index(), reg); - } - for reg in env.non_preferred_regs_by_class[class].iter().cloned() { - map.insert(reg.index(), reg); - } - for reg in env.fixed_stack_slots.iter().cloned() { - map.insert(reg.index(), reg); - } - } - map - }, vreg_allocs: vec![Allocation::none(); func.num_vregs()], vreg_spillslots: vec![SpillSlot::invalid(); func.num_vregs()], live_vregs: HashSet::with_capacity(func.num_vregs()), @@ -1514,29 +1494,8 @@ impl<'a, F: Function> Env<'a, F> { self.reused_inputs_in_curr_inst.clear(); self.vregs_in_curr_inst.clear(); - // After instruction - trace!(""); - trace!("State after instruction {:?}", inst); - let mut map = HashMap::new(); - for (vreg_idx, alloc) in self.vreg_allocs.iter().enumerate() { - if *alloc != Allocation::none() { - map.insert(format!("vreg{vreg_idx}"), alloc); - } - } - trace!("vreg_allocs: {:?}", map); - let mut map = HashMap::new(); - for i in 0..self.vreg_in_preg.len() { - if self.vreg_in_preg[i] != VReg::invalid() { - map.insert(self.preg_index_to_class_and_hw_enc[&i], self.vreg_in_preg[i]); - } - } - trace!("vreg_in_preg: {:?}", map); - trace!("Int LRU: {:?}", self.lrus[RegClass::Int]); - trace!("Float LRU: {:?}", self.lrus[RegClass::Float]); - trace!("Vector LRU: {:?}", self.lrus[RegClass::Vector]); - trace!("Free int pregs: {:?}", self.freepregs[RegClass::Int]); - trace!("Free float pregs: {:?}", self.freepregs[RegClass::Float]); - trace!("Free vector pregs: {:?}", self.freepregs[RegClass::Vector]); + #[cfg(feature = "trace-log")] + self.log_post_inst_processing_state(block, inst); } /// At the beginning of every block, all virtual registers that are @@ -1621,7 +1580,11 @@ impl<'a, F: Function> Env<'a, F> { self.process_edits(self.get_scratch_regs_for_reloading()); self.add_freed_regs_to_freelist(); - // After reload_at_begin + #[cfg(feature = "trace-log")] + self.log_post_reload_at_begin_state(block); + } + + fn log_post_reload_at_begin_state(&self, block: Block) { trace!(""); trace!("State after instruction reload_at_begin of {:?}", block); let mut map = HashMap::new(); @@ -1634,7 +1597,32 @@ impl<'a, F: Function> Env<'a, F> { let mut map = HashMap::new(); for i in 0..self.vreg_in_preg.len() { if self.vreg_in_preg[i] != VReg::invalid() { - map.insert(self.preg_index_to_class_and_hw_enc[&i], self.vreg_in_preg[i]); + map.insert(PReg::from_index(i), self.vreg_in_preg[i]); + } + } + trace!("vreg_in_preg: {:?}", map); + trace!("Int LRU: {:?}", self.lrus[RegClass::Int]); + trace!("Float LRU: {:?}", self.lrus[RegClass::Float]); + trace!("Vector LRU: {:?}", self.lrus[RegClass::Vector]); + trace!("Free int pregs: {:?}", self.freepregs[RegClass::Int]); + trace!("Free float pregs: {:?}", self.freepregs[RegClass::Float]); + trace!("Free vector pregs: {:?}", self.freepregs[RegClass::Vector]); + } + + fn log_post_inst_processing_state(&self, block: Block, inst: Inst) { + trace!(""); + trace!("State after instruction {:?}", inst); + let mut map = HashMap::new(); + for (vreg_idx, alloc) in self.vreg_allocs.iter().enumerate() { + if *alloc != Allocation::none() { + map.insert(format!("vreg{vreg_idx}"), alloc); + } + } + trace!("vreg_allocs: {:?}", map); + let mut map = HashMap::new(); + for i in 0..self.vreg_in_preg.len() { + if self.vreg_in_preg[i] != VReg::invalid() { + map.insert(PReg::from_index(i), self.vreg_in_preg[i]); } } trace!("vreg_in_preg: {:?}", map); @@ -1716,40 +1704,11 @@ impl<'a, F: Function> Env<'a, F> { } self.build_safepoint_stackmap(); - ///////////////////////////////////////////////////////////////////////////////////// - trace!("Done!"); - struct Z(usize); - impl std::fmt::Debug for Z { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "v{}", self.0) - } - } - let mut v = Vec::new(); - for i in 0..self.func.num_vregs() { - if self.vreg_spillslots[i].is_valid() { - v.push((Z(i), Allocation::stack(self.vreg_spillslots[i]))); - } - } - trace!("{:?}", v); - trace!("\nTemp spillslots: {:?}", self.temp_spillslots); - ///////////////////////////////////////////////////////////////////////////////////// - Ok(()) } } -pub fn run( - func: &F, - mach_env: &MachineEnv, - enable_annotations: bool, - enable_ssa_checker: bool, -) -> Result { - let cfginfo = CFGInfo::new(func)?; - - if enable_ssa_checker { - validate_ssa(func, &cfginfo)?; - } - +fn log_function(func: &F) { trace!("Processing a new function"); for block in 0..func.num_blocks() { let block = Block::new(block); @@ -1763,7 +1722,6 @@ pub fn run( for c in clobbers { cls.push(c); } - use std::print; trace!("inst{:?}: {:?}. Clobbers: {:?}", inst.index(), func.inst_operands(inst), cls); if func.is_branch(inst) { trace!("Block args: "); @@ -1775,13 +1733,47 @@ pub fn run( } trace!(""); } +} + +fn log_output<'a, F: Function>(env: &Env<'a, F>) { + trace!("Done!"); + let mut v = Vec::new(); + for i in 0..env.func.num_vregs() { + if env.vreg_spillslots[i].is_valid() { + v.push(( + VReg::new(i, RegClass::Int), + Allocation::stack(env.vreg_spillslots[i]) + )); + } + } + trace!("VReg spillslots: {:?}", v); + trace!("\nTemp spillslots: {:?}", env.temp_spillslots); + trace!("Final edits: {:?}", env.edits); + trace!("safepoint_slots: {:?}", env.safepoint_slots); + trace!("\n\n\n\n\n\n\n"); +} + +pub fn run( + func: &F, + mach_env: &MachineEnv, + enable_annotations: bool, + enable_ssa_checker: bool, +) -> Result { + let cfginfo = CFGInfo::new(func)?; + + if enable_ssa_checker { + validate_ssa(func, &cfginfo)?; + } + + #[cfg(feature = "trace-log")] + log_function(func); let mut env = Env::new(func, mach_env); env.run()?; -trace!("Final edits: {:?}", env.edits); -trace!("safepoint_slots: {:?}", env.safepoint_slots); -trace!("\n\n\n\n\n\n\n"); + #[cfg(feature = "trace-log")] + log_output(&env); + Ok(Output { edits: env.edits.make_contiguous().to_vec(), allocs: env.allocs.allocs, From c3cf1e416c7071a4c13c81cf790721ade06fb9c7 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Wed, 7 Aug 2024 16:07:28 +0100 Subject: [PATCH 24/95] remove support for safepoints --- fuzz/fuzz_targets/fastalloc_checker.rs | 2 +- src/fastalloc/mod.rs | 152 +------------------------ 2 files changed, 4 insertions(+), 150 deletions(-) diff --git a/fuzz/fuzz_targets/fastalloc_checker.rs b/fuzz/fuzz_targets/fastalloc_checker.rs index b8b7e467..b099c27a 100644 --- a/fuzz/fuzz_targets/fastalloc_checker.rs +++ b/fuzz/fuzz_targets/fastalloc_checker.rs @@ -24,7 +24,7 @@ impl Arbitrary<'_> for TestCase { fixed_regs: true, fixed_nonallocatable: true, clobbers: true, - reftypes: true, + reftypes: false, }, )?, }) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index b58150cc..bfd9b303 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -8,6 +8,7 @@ use crate::{cfg::CFGInfo, RegAllocError, Allocation, ion::Stats}; use alloc::collections::{BTreeSet, VecDeque}; use alloc::vec::Vec; use hashbrown::{HashSet, HashMap}; +use log::warn; use std::println; use std::format; @@ -130,26 +131,11 @@ pub struct Env<'a, F: Function> { /// Used to keep track of which vregs have been allocated in the current instruction. /// This is used to determine which edits to insert when allocating a use operand. vregs_allocd_in_curr_inst: HashSet, - /// All the safepoint instructions encountered during allocation and their blocks. - /// When allocation is completed, this contains all the safepoint instructions - /// in the function. - /// This is used to build the stackmap after allocation is complete. - safepoint_insts: Vec<(Block, Inst)>, /// All the liveout vregs encountered during allocation. /// When allocation is completed, this contains all the liveout vregs in /// the function. /// This is used to build the stackmap after allocation is complete. liveout_vregs: HashSet, - /// When allocation is completed, `liveout_vreg_def_inst[i]` holds the block - /// and instruction in which liveout vreg `i` is defined. If vreg `i` is not liveout, - /// then the block and instruction will be invalid. - /// This is used to build the stackmap after allocation is complete. - liveout_vreg_def_inst: Vec<(Block, Inst)>, - /// When allocation is completed, this holds all the reftype vregs that - /// already have a slot in the stackmap. - /// This is used while building the stackmap after allocation is completed, - /// to avoid adding duplicate entries for liveout vregs. - slot_is_in_stackmap: HashSet<(Inst, VReg)>, /// Used to determine if a scratch register is needed for an /// instruction's moves during the `process_edit` calls. inst_needs_scratch_reg: PartedByRegClass, @@ -216,9 +202,7 @@ impl<'a, F: Function> Env<'a, F> { use_vregs_saved_and_restored_in_curr_inst: HashSet::new(), freed_def_pregs: PartedByRegClass { items: [BTreeSet::new(), BTreeSet::new(), BTreeSet::new()] }, vregs_first_seen_in_curr_inst: HashSet::new(), - safepoint_insts: Vec::new(), liveout_vregs: HashSet::new(), - liveout_vreg_def_inst: vec![(Block::invalid(), Inst::invalid()); func.num_vregs()], inst_needs_scratch_reg: PartedByRegClass { items: [false, false, false] }, reused_inputs_in_curr_inst: Vec::new(), vregs_in_curr_inst: HashSet::new(), @@ -227,7 +211,6 @@ impl<'a, F: Function> Env<'a, F> { env.scratch_by_class[1], env.scratch_by_class[2], ] }, - slot_is_in_stackmap: HashSet::new(), allocs: Allocs::new(func, env), edits: VecDeque::new(), safepoint_slots: Vec::new(), @@ -1344,11 +1327,6 @@ impl<'a, F: Function> Env<'a, F> { self.process_operand_allocation(inst, op, op_idx); } for (_, op) in NonReuseLateDefOperands::new(operands) { - if self.liveout_vregs.contains(&op.vreg()) { - // Need to remember the instruction in which a liveout - // vreg was defined when adding reftype vregs to the stackmap. - self.liveout_vreg_def_inst[op.vreg().vreg()] = (block, inst); - } self.freealloc(op.vreg(), clobbers); } for (op_idx, op) in FixedEarlyOperands::new(operands) { @@ -1361,77 +1339,15 @@ impl<'a, F: Function> Env<'a, F> { self.process_operand_allocation(inst, op, op_idx); } for (_, op) in NonReuseEarlyDefOperands::new(operands) { - if self.liveout_vregs.contains(&op.vreg()) { - // Need to remember the instruction in which a liveout - // vreg was defined when adding reftype vregs to the stackmap. - self.liveout_vreg_def_inst[op.vreg().vreg()] = (block, inst); - } self.freealloc(op.vreg(), clobbers); } for (op_idx, op) in ReuseOperands::new(operands) { let OperandConstraint::Reuse(reused_idx) = op.constraint() else { unreachable!() }; - if self.liveout_vregs.contains(&op.vreg()) { - // Need to remember the instruction in which a liveout - // vreg was defined when adding reftype vregs to the stackmap. - self.liveout_vreg_def_inst[op.vreg().vreg()] = (block, inst); - } self.process_reuse_operand_allocation(inst, op, op_idx, operands[reused_idx], clobbers); } self.save_and_restore_clobbered_registers(inst); - if self.func.requires_refs_on_stack(inst) { - trace!("{:?} is a safepoint instruction. Need to move reftypes to stack", inst); - // Need to remember that this is a safepoint instruction when adding reftype - // liveout vregs to the stackmap. - self.safepoint_insts.push((block, inst)); - // Insert edits to save and restore live reftype vregs - // not already on the stack. - for reftype_vreg in self.func.reftype_vregs() { - trace!("{:?} is a reftype vreg and needs to be on the stack", reftype_vreg); - let curr_alloc = self.vreg_allocs[reftype_vreg.vreg()]; - trace!("curr_alloc: {:?}", curr_alloc); - if let Some(_preg) = curr_alloc.as_reg() { - trace!("{:?} is currently in a preg. Inserting moves to save and restore it", reftype_vreg); - let slot = if self.vreg_spillslots[reftype_vreg.vreg()].is_valid() { - self.vreg_spillslots[reftype_vreg.vreg()] - } else { - self.vreg_spillslots[reftype_vreg.vreg()] = self.allocstack(&reftype_vreg); - self.vreg_spillslots[reftype_vreg.vreg()] - }; - let slot_alloc = Allocation::stack(slot); - self.add_move_later( - inst, - curr_alloc, - slot_alloc, - reftype_vreg.class(), - InstPosition::Before, - true - ); - self.add_move_later( - inst, - slot_alloc, - curr_alloc, - reftype_vreg.class(), - InstPosition::After, - false - ); - self.safepoint_slots.push((ProgPoint::new(inst, InstPosition::Before), slot_alloc)); - // Need to remember that this reftype's slot is already in the stackmap to - // avoid adding duplicated entries when adding entries for liveout reftype vregs. - self.slot_is_in_stackmap.insert((inst, *reftype_vreg)); - } else if let Some(slot) = curr_alloc.as_stack() { - trace!("{:?} is already on the stack.", reftype_vreg); - self.safepoint_slots.push(( - ProgPoint::new(inst, InstPosition::Before), - Allocation::stack(slot) - )); - // Need to remember that this reftype's slot is already in the stackmap to - // avoid adding duplicated entries when adding entries for liveout reftype vregs. - self.slot_is_in_stackmap.insert((inst, *reftype_vreg)); - } - } - } for preg in self.func.inst_clobbers(inst) { if self.allocatable_regs.contains(preg) { if self.vreg_in_preg[preg.index()] == VReg::invalid() { @@ -1495,7 +1411,7 @@ impl<'a, F: Function> Env<'a, F> { self.vregs_in_curr_inst.clear(); #[cfg(feature = "trace-log")] - self.log_post_inst_processing_state(block, inst); + self.log_post_inst_processing_state(inst); } /// At the beginning of every block, all virtual registers that are @@ -1526,13 +1442,6 @@ impl<'a, F: Function> Env<'a, F> { // And `vreg_allocs[i]` of a virtual register i is none for // dead vregs. self.freealloc(vreg, PRegSet::empty()); - if self.func.reftype_vregs().contains(&vreg) { - trace!("{:?} is a reftype. Recording its definition instruction", vreg); - // This marks the definition of the block param. - // Record this information which will be used while building - // the stackmap later. - self.liveout_vreg_def_inst[vreg.vreg()] = (block, self.func.block_insns(block).first()); - } } else { trace!("{:?} is not a block param. It's a liveout vreg from some predecessor", vreg); trace!("Setting {:?}'s current allocation to its spillslot", vreg); @@ -1609,7 +1518,7 @@ impl<'a, F: Function> Env<'a, F> { trace!("Free vector pregs: {:?}", self.freepregs[RegClass::Vector]); } - fn log_post_inst_processing_state(&self, block: Block, inst: Inst) { + fn log_post_inst_processing_state(&self, inst: Inst) { trace!(""); trace!("State after instruction {:?}", inst); let mut map = HashMap::new(); @@ -1634,60 +1543,6 @@ impl<'a, F: Function> Env<'a, F> { trace!("Free vector pregs: {:?}", self.freepregs[RegClass::Vector]); } - fn build_safepoint_stackmap(&mut self) { - let postorder = postorder::calculate(self.func.num_blocks(), self.func.entry_block(), |block| { - self.func.block_succs(block) - }); - let domtree = domtree::calculate( - self.func.num_blocks(), - |block| self.func.block_preds(block), - &postorder[..], - self.func.entry_block(), - ); - // Check if the liveout vreg was defined before the safepoint - // instruction. If it was defined before it, then record the liveout - // with its spillslot in the stackmap (because the liveout vreg's first - // use hasn't been encountered yet. It is possible that a loop could). - for (safepoint_block, safepoint_inst) in self.safepoint_insts.iter() { - for liveout_vreg in self.liveout_vregs.iter() { - let (liveout_vreg_def_block, liveout_vreg_def_inst) = self.liveout_vreg_def_inst[liveout_vreg.vreg()]; - if self.func.reftype_vregs().contains(liveout_vreg) - && !self.slot_is_in_stackmap.contains(&(*safepoint_inst, *liveout_vreg)) - && dominates(&domtree, liveout_vreg_def_block, *safepoint_block) - { - if self.func.block_params(liveout_vreg_def_block).contains(liveout_vreg) { - // Since block params aren't explicitly defined, they are marked as defined - // in the first instruction in the block, even though they are actually - // defined just before that. - // This is the reason why <= is used here instead of just <. - if liveout_vreg_def_inst <= *safepoint_inst { - trace!("Liveout vreg inst: {:?}", self.liveout_vreg_def_inst[liveout_vreg.vreg()]); - trace!("Safepoint inst: {:?}", safepoint_inst); - trace!("Adding a stackmap slot for liveout vreg {:?}", liveout_vreg); - self.safepoint_slots.push(( - ProgPoint::before(*safepoint_inst), - Allocation::stack(self.vreg_spillslots[liveout_vreg.vreg()]) - )); - } - } - // The definition of the vreg must come before the safepoint instruction - // This is necessary because, while the `dominates` call checks for different - // blocks, in the case where the vreg definition and the safepoint instructions - // are in the same block, we need to make this check. - else if liveout_vreg_def_inst < *safepoint_inst { - self.safepoint_slots.push(( - ProgPoint::before(*safepoint_inst), - Allocation::stack(self.vreg_spillslots[liveout_vreg.vreg()]) - )); - } - } - } - } - self.safepoint_slots.sort_by( - |slot0, slot1| slot0.0.cmp(&slot1.0) - ); - } - fn alloc_block(&mut self, block: Block) { trace!("{:?} start", block); for inst in self.func.block_insns(block).iter().rev() { @@ -1702,7 +1557,6 @@ impl<'a, F: Function> Env<'a, F> { for block in (0..self.func.num_blocks()).rev() { self.alloc_block(Block::new(block)); } - self.build_safepoint_stackmap(); Ok(()) } From 0824a1834011cb3899070cbad8d0dd64a0589492 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Wed, 7 Aug 2024 16:52:25 +0100 Subject: [PATCH 25/95] added FromIterator implementation to PRegSet --- src/lib.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 4f2620c2..3b0c4c72 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,6 +35,7 @@ macro_rules! trace_enabled { } use core::hash::BuildHasherDefault; +use std::iter::FromIterator; use rustc_hash::FxHasher; type FxHashMap = hashbrown::HashMap>; type FxHashSet = hashbrown::HashSet>; @@ -305,6 +306,16 @@ impl From<&MachineEnv> for PRegSet { } } +impl FromIterator for PRegSet { + fn from_iter>(iter: T) -> Self { + let mut set = Self::default(); + for preg in iter { + set.add(preg); + } + set + } +} + /// A virtual register. Contains a virtual register number and a /// class. /// From 0fa2735f8783e69492195aa293fede8029fc8628 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Wed, 7 Aug 2024 16:52:50 +0100 Subject: [PATCH 26/95] now using PRegSet for sets of physical registers --- src/fastalloc/mod.rs | 78 +++++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 34 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index bfd9b303..b9a2acd9 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1,14 +1,12 @@ use core::convert::TryInto; use core::iter::FromIterator; use core::ops::{Index, IndexMut}; -use crate::domtree::dominates; -use crate::{domtree, postorder, AllocationKind, Block, Inst, InstPosition, Operand, OperandConstraint, OperandKind, OperandPos, PReg, PRegSet, RegClass, SpillSlot, VReg}; +use crate::{AllocationKind, Block, Inst, InstPosition, Operand, OperandConstraint, OperandKind, OperandPos, PReg, PRegSet, RegClass, SpillSlot, VReg}; use crate::{Function, MachineEnv, ssa::validate_ssa, ProgPoint, Edit, Output}; use crate::{cfg::CFGInfo, RegAllocError, Allocation, ion::Stats}; use alloc::collections::{BTreeSet, VecDeque}; use alloc::vec::Vec; use hashbrown::{HashSet, HashMap}; -use log::warn; use std::println; use std::format; @@ -60,6 +58,15 @@ impl IndexMut<(usize, usize)> for Allocs { } } +fn remove_any_from_pregset(set: &mut PRegSet) -> Option { + if let Some(preg) = set.into_iter().next() { + set.remove(preg); + Some(preg) + } else { + None + } +} + #[derive(Debug)] pub struct Env<'a, F: Function> { func: &'a F, @@ -72,7 +79,7 @@ pub struct Env<'a, F: Function> { /// The virtual registers that are currently live. live_vregs: HashSet, /// Allocatable free physical registers for classes Int, Float, and Vector, respectively. - freepregs: PartedByRegClass>, + freepregs: PartedByRegClass, /// Least-recently-used caches for register classes Int, Float, and Vector, respectively. lrus: Lrus, /// `vreg_in_preg[i]` is the virtual register currently in the physical register @@ -90,7 +97,7 @@ pub struct Env<'a, F: Function> { /// /// This is used to keep track of them so that they can be marked as free for reallocation /// after the instruction has completed processing. - free_after_curr_inst: HashSet, + free_after_curr_inst: PRegSet, /// The virtual registers of use operands that have been allocated in the current instruction /// and for which edits had to be inserted to save and restore them because their constraint /// doesn't allow the allocation they are expected to be in after the instruction. @@ -119,7 +126,7 @@ pub struct Env<'a, F: Function> { /// which is incorrect because p0 will end up holding whatever is in stack0, not v0. /// `freed_def_regs` avoids this by allowing the late def registers to be reused without making it /// possible for this scratch register scenario to happen. - freed_def_pregs: PartedByRegClass>, + freed_def_pregs: PartedByRegClass, /// Used to keep track of which used vregs are seen for the first time /// in the instruction, that is, if the vregs live past the current instruction. /// This is used to determine whether or not reused operands @@ -178,9 +185,9 @@ impl<'a, F: Function> Env<'a, F> { live_vregs: HashSet::with_capacity(func.num_vregs()), freepregs: PartedByRegClass { items: [ - BTreeSet::from_iter(regs[0].clone()), - BTreeSet::from_iter(regs[1].clone()), - BTreeSet::from_iter(regs[2].clone()), + PRegSet::from_iter(regs[0].iter().cloned()), + PRegSet::from_iter(regs[1].iter().cloned()), + PRegSet::from_iter(regs[2].iter().cloned()), ] }, lrus: Lrus::new( @@ -197,10 +204,10 @@ impl<'a, F: Function> Env<'a, F> { ] }, inst_pre_edits: VecDeque::new(), inst_post_edits: VecDeque::new(), - free_after_curr_inst: HashSet::new(), + free_after_curr_inst: PRegSet::empty(), vregs_allocd_in_curr_inst: HashSet::new(), use_vregs_saved_and_restored_in_curr_inst: HashSet::new(), - freed_def_pregs: PartedByRegClass { items: [BTreeSet::new(), BTreeSet::new(), BTreeSet::new()] }, + freed_def_pregs: PartedByRegClass { items: [PRegSet::empty(), PRegSet::empty(), PRegSet::empty()] }, vregs_first_seen_in_curr_inst: HashSet::new(), liveout_vregs: HashSet::new(), inst_needs_scratch_reg: PartedByRegClass { items: [false, false, false] }, @@ -231,17 +238,17 @@ impl<'a, F: Function> Env<'a, F> { fn add_freed_regs_to_freelist(&mut self) { for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { - for preg in self.freed_def_pregs[class].iter().cloned() { - self.freepregs[class].insert(preg); + for preg in self.freed_def_pregs[class] { + self.freepregs[class].add(preg); self.lrus[class].append(preg.hw_enc()); } - self.freed_def_pregs[class].clear(); + self.freed_def_pregs[class] = PRegSet::empty(); } - for preg in self.free_after_curr_inst.iter().cloned() { - self.freepregs[preg.class()].insert(preg); + for preg in self.free_after_curr_inst { + self.freepregs[preg.class()].add(preg); self.lrus[preg.class()].append(preg.hw_enc()); } - self.free_after_curr_inst.clear(); + self.free_after_curr_inst = PRegSet::empty(); } /// The scratch registers needed for processing the edits generated @@ -262,7 +269,10 @@ impl<'a, F: Function> Env<'a, F> { scratch_regs[class] = self.dedicated_scratch_regs[class]; } else { trace!("No dedicated scratch register for class {:?}. Using the last free register", class); - scratch_regs[class] = Some(*self.freepregs[class].last().expect("Allocation impossible?")); + scratch_regs[class] = Some(self.freepregs[class] + .into_iter() + .next() + .expect("Allocation impossible?")); } } } @@ -282,9 +292,9 @@ impl<'a, F: Function> Env<'a, F> { scratch_regs[class] = Some(reg); } else { trace!("class {:?} has no dedicated scratch register", class); - let reg = if let Some(preg) = self.freepregs[class].last() { + let reg = if let Some(preg) = self.freepregs[class].into_iter().next() { trace!("Using the last free {:?} register for scratch", class); - *preg + preg } else { trace!("No free {:?} registers. Evicting a register", class); self.evict_any_reg(inst, class) @@ -472,14 +482,14 @@ impl<'a, F: Function> Env<'a, F> { // By adding it to this list, instead of freed_def_pregs, the only way // a clobber can be newly allocated to a vreg in the instruction is to // use a fixed register constraint. - self.free_after_curr_inst.insert(preg); + self.free_after_curr_inst.add(preg); // No need to remove the preg from the LRU because clobbers // have already been removed from the LRU. } else { // Added to the freed def pregs list, not the free pregs // list to avoid a def's allocated register being used // as a scratch register. - self.freed_def_pregs[vreg.class()].insert(preg); + self.freed_def_pregs[vreg.class()].add(preg); // Don't allow this register to be evicted. self.lrus[vreg.class()].remove(preg.hw_enc()); } @@ -540,7 +550,7 @@ impl<'a, F: Function> Env<'a, F> { // a freed def operand. && !self.reused_inputs_in_curr_inst.contains(&op_idx) { - if let Some(freed_def_preg) = self.freed_def_pregs[op.class()].pop_last() { + if let Some(freed_def_preg) = remove_any_from_pregset(&mut self.freed_def_pregs[op.class()]) { trace!("Reusing the freed def preg: {:?}", freed_def_preg); self.lrus[freed_def_preg.class()].append_and_poke(freed_def_preg); self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(freed_def_preg); @@ -549,12 +559,12 @@ impl<'a, F: Function> Env<'a, F> { } } if !allocd { - let preg = if self.freepregs[op.class()].is_empty() { + let preg = if self.freepregs[op.class()] == PRegSet::empty() { trace!("Evicting a register"); self.evict_any_reg(inst, op.class()) } else { trace!("Getting a register from freepregs"); - self.freepregs[op.class()].pop_last().unwrap() + remove_any_from_pregset(&mut self.freepregs[op.class()]).unwrap() }; trace!("The allocated register for vreg {:?}: {:?}", preg, op.vreg()); self.lrus[op.class()].poke(preg); @@ -581,7 +591,7 @@ impl<'a, F: Function> Env<'a, F> { // TODO: Check if the evicted register is a register in the // current instruction. If it is, then there's a problem. self.evict_vreg_in_preg(inst, preg); - } else if self.freed_def_pregs[preg.class()].contains(&preg) { + } else if self.freed_def_pregs[preg.class()].contains(preg) { // Consider the scenario: // def v0 (fixed: p0), use v1 (fixed: p0) // In the above, p0 has already been used for v0, and since it's a @@ -590,9 +600,9 @@ impl<'a, F: Function> Env<'a, F> { // has finished processing. // To avoid the preg being added back to the free list, it must be removed // from `freed_def_pregs` here. - self.freed_def_pregs[preg.class()].remove(&preg); + self.freed_def_pregs[preg.class()].remove(preg); self.lrus[preg.class()].append(preg.hw_enc()); - } else if self.free_after_curr_inst.contains(&preg) { + } else if self.free_after_curr_inst.contains(preg) { // If the new allocation was once a freed prev_alloc, remove it // from the free after current inst list. // For example: @@ -608,7 +618,7 @@ impl<'a, F: Function> Env<'a, F> { // To prevent reallocating a register while a live one is still in it, // this register has to be removed from the list. trace!("{:?} is now using preg {:?}. Removing it from the free after instruction list", op.vreg(), preg); - self.free_after_curr_inst.remove(&preg); + self.free_after_curr_inst.remove(preg); if is_allocatable { self.lrus[preg.class()].append(preg.hw_enc()); } @@ -616,7 +626,7 @@ impl<'a, F: Function> Env<'a, F> { // Find the register in the list of free registers (if it's there). // If it's not there, then it must be be a fixed stack slot or // a clobber, since clobbers are removed from the free preg list before allocation begins. - self.freepregs[op.class()].remove(&preg); + self.freepregs[op.class()].remove(preg); } if is_allocatable { self.lrus[op.class()].poke(preg); @@ -840,7 +850,7 @@ impl<'a, F: Function> Env<'a, F> { // and will be freed after the instruction has completed processing // if no vreg is still present in it. if !self.func.inst_clobbers(inst).contains(preg) { - self.free_after_curr_inst.insert(preg); + self.free_after_curr_inst.add(preg); self.lrus[preg.class()].remove(preg.hw_enc()); } else { trace!("{:?} is a clobber, so not bothering with the state update", preg); @@ -1310,7 +1320,7 @@ impl<'a, F: Function> Env<'a, F> { // constraint to use a clobber. if self.allocatable_regs.contains(preg) { trace!("Removing {:?} from the freelist because it's a clobber", preg); - self.freepregs[preg.class()].remove(&preg); + self.freepregs[preg.class()].remove(preg); self.lrus[preg.class()].remove(preg.hw_enc()); } } @@ -1387,7 +1397,7 @@ impl<'a, F: Function> Env<'a, F> { // To avoid this scenario, the registers are added to the // `free_after_curr_inst` instead, to ensure that it isn't used as // a scratch register. - self.free_after_curr_inst.insert(preg); + self.free_after_curr_inst.add(preg); } else { // Something is still in the clobber. // After this instruction, it's no longer a clobber. @@ -1462,7 +1472,7 @@ impl<'a, F: Function> Env<'a, F> { // Using this instead of directly adding it to // freepregs to prevent allocated registers from being // used as scratch registers. - self.freed_def_pregs[preg.class()].insert(preg); + self.freed_def_pregs[preg.class()].add(preg); self.lrus[preg.class()].remove(preg.hw_enc()); } } From 2ea4e2d2b6b5572dc94575451ad27c2df56727c3 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Wed, 7 Aug 2024 17:29:38 +0100 Subject: [PATCH 27/95] now using u8s in the LRU. moved unnecessary cfg calculations --- src/fastalloc/lru.rs | 118 ++++++++++++++++++++++--------------------- src/fastalloc/mod.rs | 4 +- 2 files changed, 63 insertions(+), 59 deletions(-) diff --git a/src/fastalloc/lru.rs b/src/fastalloc/lru.rs index 267359a7..5ff08b4c 100644 --- a/src/fastalloc/lru.rs +++ b/src/fastalloc/lru.rs @@ -15,7 +15,7 @@ pub struct Lru { /// The index of a node is the `address` from the perspective of the linked list. pub data: Vec, /// Index of the most recently used register. - pub head: usize, + pub head: u8, /// Class of registers in the cache. pub regclass: RegClass, } @@ -23,14 +23,14 @@ pub struct Lru { #[derive(Clone, Copy, Debug)] pub struct LruNode { /// The previous physical register in the list. - pub prev: usize, + pub prev: u8, /// The next physical register in the list. - pub next: usize, + pub next: u8, } impl Lru { pub fn new(regclass: RegClass, regs: &[PReg]) -> Self { - let mut data = vec![LruNode { prev: usize::MAX, next: usize::MAX }; PReg::MAX + 1]; + let mut data = vec![LruNode { prev: u8::MAX, next: u8::MAX }; PReg::MAX + 1]; let no_of_regs = regs.len(); for i in 0..no_of_regs { let (reg, prev_reg, next_reg) = ( @@ -38,11 +38,11 @@ impl Lru { regs[i.checked_sub(1).unwrap_or(no_of_regs - 1)], regs[if i >= no_of_regs - 1 { 0 } else { i + 1 }] ); - data[reg.hw_enc()].prev = prev_reg.hw_enc(); - data[reg.hw_enc()].next = next_reg.hw_enc(); + data[reg.hw_enc()].prev = prev_reg.hw_enc() as u8; + data[reg.hw_enc()].next = next_reg.hw_enc() as u8; } Self { - head: if regs.is_empty() { usize::MAX } else { regs[0].hw_enc() }, + head: if regs.is_empty() { u8::MAX } else { regs[0].hw_enc() as u8 }, data, regclass, } @@ -54,15 +54,15 @@ impl Lru { trace!("Before poking: {:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); trace!("About to poke {:?} in {:?} LRU", preg, self.regclass); let prev_newest = self.head; - let i = preg.hw_enc(); - if i == prev_newest { + let hw_enc = preg.hw_enc() as u8; + if hw_enc == prev_newest { return; } - if self.data[prev_newest].prev != i { - self.remove(i); - self.insert_before(i, self.head); + if self.data[prev_newest as usize].prev != hw_enc { + self.remove(hw_enc as usize); + self.insert_before(hw_enc, self.head); } - self.head = i; + self.head = hw_enc; trace!("Poked {:?} in {:?} LRU", preg, self.regclass); #[cfg(debug_assertions)] self.validate_lru(); @@ -75,51 +75,55 @@ impl Lru { if self.is_empty() { panic!("LRU is empty"); } - let oldest = self.data[self.head].prev; + let oldest = self.data[self.head as usize].prev; trace!("Popped p{oldest} in {:?} LRU", self.regclass); #[cfg(debug_assertions)] self.validate_lru(); - PReg::new(oldest, self.regclass) + PReg::new(oldest as usize, self.regclass) } /// Splices out a node from the list. - pub fn remove(&mut self, i: usize) { + pub fn remove(&mut self, hw_enc: usize) { trace!("Before removing: {:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); - trace!("Removing p{i} from {:?} LRU", self.regclass); - let (iprev, inext) = (self.data[i].prev, self.data[i].next); - self.data[iprev].next = self.data[i].next; - self.data[inext].prev = self.data[i].prev; - self.data[i].prev = usize::MAX; - self.data[i].next = usize::MAX; - if i == self.head { - if i == inext { + trace!("Removing p{hw_enc} from {:?} LRU", self.regclass); + let (iprev, inext) = ( + self.data[hw_enc].prev as usize, + self.data[hw_enc].next as usize + ); + self.data[iprev].next = self.data[hw_enc].next; + self.data[inext].prev = self.data[hw_enc].prev; + self.data[hw_enc].prev = u8::MAX; + self.data[hw_enc].next = u8::MAX; + if hw_enc == self.head as usize { + if hw_enc == inext { // There are no regs in the LRU - self.head = usize::MAX; + self.head = u8::MAX; } else { - self.head = inext; + self.head = inext as u8; } } - trace!("Removed p{i} from {:?} LRU", self.regclass); + trace!("Removed p{hw_enc} from {:?} LRU", self.regclass); #[cfg(debug_assertions)] self.validate_lru(); } /// Sets the node `i` to the last in the list. - pub fn append(&mut self, i: usize) { + pub fn append(&mut self, hw_enc: usize) { trace!("Before appending: {:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); - trace!("Appending p{i} to the {:?} LRU", self.regclass); - if self.head != usize::MAX { - let last_node = self.data[self.head].prev; - self.data[last_node].next = i; - self.data[self.head].prev = i; - self.data[i].prev = last_node; - self.data[i].next = self.head; + trace!("Appending p{hw_enc} to the {:?} LRU", self.regclass); + if self.head != u8::MAX { + let head = self.head as usize; + let last_node = self.data[head].prev; + self.data[last_node as usize].next = hw_enc as u8; + self.data[head].prev = hw_enc as u8; + self.data[hw_enc].prev = last_node; + self.data[hw_enc].next = self.head; } else { - self.head = i; - self.data[i].prev = i; - self.data[i].next = i; + self.head = hw_enc as u8; + self.data[hw_enc].prev = hw_enc as u8; + self.data[hw_enc].next = hw_enc as u8; } - trace!("Appended p{i} to the {:?} LRU", self.regclass); + trace!("Appended p{hw_enc} to the {:?} LRU", self.regclass); #[cfg(debug_assertions)] self.validate_lru(); } @@ -130,13 +134,13 @@ impl Lru { } /// Insert node `i` before node `j` in the list. - pub fn insert_before(&mut self, i: usize, j: usize) { + pub fn insert_before(&mut self, i: u8, j: u8) { trace!("Before inserting: {:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); trace!("Inserting p{i} before {j} in {:?} LRU", self.regclass); - let prev = self.data[j].prev; - self.data[prev].next = i; - self.data[j].prev = i; - self.data[i] = LruNode { + let prev = self.data[j as usize].prev; + self.data[prev as usize].next = i; + self.data[j as usize].prev = i; + self.data[i as usize] = LruNode { next: j, prev, }; @@ -146,14 +150,14 @@ impl Lru { } pub fn is_empty(&self) -> bool { - self.head == usize::MAX + self.head == u8::MAX } // Using this to debug. fn validate_lru(&self) { trace!("{:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); - if self.head != usize::MAX { - let mut node = self.data[self.head].next; + if self.head != u8::MAX { + let mut node = self.data[self.head as usize].next; let mut seen = HashSet::new(); while node != self.head { if seen.contains(&node) { @@ -161,21 +165,21 @@ impl Lru { head: {:?}, actual data: {:?}", self.regclass, self.head, self.data); } seen.insert(node); - node = self.data[node].next; + node = self.data[node as usize].next; } for i in 0..self.data.len() { - if self.data[i].prev == usize::MAX && self.data[i].next == usize::MAX { + if self.data[i].prev == u8::MAX && self.data[i].next == u8::MAX { // Removed continue; } - if self.data[i].prev == usize::MAX || self.data[i].next == usize::MAX { + if self.data[i].prev == u8::MAX || self.data[i].next == u8::MAX { panic!("Invalid LRU. p{} next or previous is an invalid value, but not both", i); } - if self.data[self.data[i].prev].next != i { - panic!("Invalid LRU. p{i} prev is p{:?}, but p{:?} next is {:?}", self.data[i].prev, self.data[i].prev, self.data[self.data[i].prev].next); + if self.data[self.data[i].prev as usize].next != i as u8 { + panic!("Invalid LRU. p{i} prev is p{:?}, but p{:?} next is {:?}", self.data[i].prev, self.data[i].prev, self.data[self.data[i].prev as usize].next); } - if self.data[self.data[i].next].prev != i { - panic!("Invalid LRU. p{i} next is p{:?}, but p{:?} prev is p{:?}", self.data[i].next, self.data[i].next, self.data[self.data[i].next].prev); + if self.data[self.data[i].next as usize].prev != i as u8 { + panic!("Invalid LRU. p{i} next is p{:?}, but p{:?} prev is p{:?}", self.data[i].next, self.data[i].next, self.data[self.data[i].next as usize].prev); } } } @@ -185,11 +189,11 @@ impl Lru { impl fmt::Debug for Lru { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { use std::format; - let data_str = if self.head == usize::MAX { + let data_str = if self.head == u8::MAX { format!("") } else { let mut data_str = format!("p{}", self.head); - let mut node = self.data[self.head].next; + let mut node = self.data[self.head as usize].next; let mut seen = HashSet::new(); while node != self.head { if seen.contains(&node) { @@ -198,7 +202,7 @@ impl fmt::Debug for Lru { } seen.insert(node); data_str += &format!(" -> p{}", node); - node = self.data[node].next; + node = self.data[node as usize].next; } data_str }; diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index b9a2acd9..bf00b943 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -4,7 +4,7 @@ use core::ops::{Index, IndexMut}; use crate::{AllocationKind, Block, Inst, InstPosition, Operand, OperandConstraint, OperandKind, OperandPos, PReg, PRegSet, RegClass, SpillSlot, VReg}; use crate::{Function, MachineEnv, ssa::validate_ssa, ProgPoint, Edit, Output}; use crate::{cfg::CFGInfo, RegAllocError, Allocation, ion::Stats}; -use alloc::collections::{BTreeSet, VecDeque}; +use alloc::collections::VecDeque; use alloc::vec::Vec; use hashbrown::{HashSet, HashMap}; @@ -1623,9 +1623,9 @@ pub fn run( enable_annotations: bool, enable_ssa_checker: bool, ) -> Result { - let cfginfo = CFGInfo::new(func)?; if enable_ssa_checker { + let cfginfo = CFGInfo::new(func)?; validate_ssa(func, &cfginfo)?; } From 2dd493fcf02dd4ffe96e6a6d4e141e70c20b49f7 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Wed, 7 Aug 2024 20:26:23 +0100 Subject: [PATCH 28/95] refactor operand iterators --- src/fastalloc/iter.rs | 516 +++++++++--------------------------------- src/fastalloc/mod.rs | 27 ++- 2 files changed, 119 insertions(+), 424 deletions(-) diff --git a/src/fastalloc/iter.rs b/src/fastalloc/iter.rs index dbbd2e34..8041afff 100644 --- a/src/fastalloc/iter.rs +++ b/src/fastalloc/iter.rs @@ -9,399 +9,105 @@ enum OperandConstraintKind { Reuse, } -impl PartialEq for OperandConstraintKind { - fn eq(&self, other: &OperandConstraint) -> bool { - match other { - OperandConstraint::Any => *self == Self::Any, - OperandConstraint::Reg => *self == Self::Reg, - OperandConstraint::Stack => *self == Self::Stack, - OperandConstraint::FixedReg(_) => *self == Self::FixedReg, - OperandConstraint::Reuse(_) => *self == Self::Reuse, +impl From for OperandConstraintKind { + fn from(constraint: OperandConstraint) -> Self { + match constraint { + OperandConstraint::Any => Self::Any, + OperandConstraint::Reg => Self::Reg, + OperandConstraint::Stack => Self::Stack, + OperandConstraint::FixedReg(_) => Self::FixedReg, + OperandConstraint::Reuse(_) => Self::Reuse, } } } -#[derive(Clone, Copy, PartialEq)] -struct SearchConstraint { - kind: Option, - pos: Option, - must_not_have_constraints: [Option; 2], - must_have_constraint: Option, -} - -impl SearchConstraint { - fn meets_constraint(&self, op: Operand) -> bool { - match self.pos { - None => (), - Some(expected_pos) => if op.pos() != expected_pos { - return false; - } - }; - match self.kind { - None => (), - Some(expected_kind) => if op.kind() != expected_kind { - return false; - } - }; - for must_not_have_constraint in self.must_not_have_constraints.iter().cloned() { - match must_not_have_constraint { - None => (), - Some(should_not_be_constraint) => if should_not_be_constraint == op.constraint() { - return false; - } - } - } - match self.must_have_constraint { - None => (), - Some(should_be_constraint) => if should_be_constraint != op.constraint() { - return false; - } - } - true - } -} - -struct Operands<'a> { - operands: &'a [Operand], - idx: usize, - search_constraint: SearchConstraint, -} +pub struct Operands<'a>(pub &'a [Operand]); impl<'a> Operands<'a> { - fn new(operands: &'a [Operand], search_constraint: SearchConstraint) -> Self { - Self { operands, search_constraint, idx: 0 } - } -} - -impl<'a> Iterator for Operands<'a> { - type Item = (usize, Operand); - - fn next(&mut self) -> Option { - while self.idx < self.operands.len() - && !self.search_constraint.meets_constraint(self.operands[self.idx]) - { - self.idx += 1; - } - if self.idx >= self.operands.len() { - None - } else { - self.idx += 1; - Some((self.idx - 1, self.operands[self.idx - 1])) - } - } -} - - -/*/// Iterate over operands in position `pos` and kind -/// `kind` in no particular order. -struct ByKindAndPosOperands<'a> { - operands: &'a [Operand], - idx: usize, - kind: OperandKind, - pos: OperandPos, -} - -impl<'a> ByKindAndPosOperands<'a> { - fn new(operands: &'a [Operand], kind: OperandKind, pos: OperandPos) -> Self { - Self { operands, idx: 0, kind, pos } - } -} - -impl<'a> Iterator for ByKindAndPosOperands<'a> { - type Item = (usize, Operand); - - fn next(&mut self) -> Option { - while self.idx < self.operands.len() && (self.operands[self.idx].kind() != self.kind - || self.operands[self.idx].pos() != self.pos) { - self.idx += 1; - } - if self.idx >= self.operands.len() { - None - } else { - self.idx += 1; - Some((self.idx - 1, self.operands[self.idx - 1])) - } - } -} - -/// Iterate over operands with position `pos` starting from the ones with -/// fixed registers, then the rest. -struct ByPosOperands<'a> { - operands: &'a [Operand], - idx: usize, - looking_for: LookingFor, - pos: OperandPos, -} - -impl<'a> ByPosOperands<'a> { - fn new(operands: &'a [Operand], pos: OperandPos) -> Self { - Self { operands, idx: 0, looking_for: LookingFor::FixedReg, pos } - } -} - -impl<'a> ByPosOperands<'a> { - fn next_fixed_reg(&mut self) -> Option<(usize, Operand)> { - while self.idx < self.operands.len() && (self.operands[self.idx].pos() != self.pos - || !matches!(self.operands[self.idx].constraint(), OperandConstraint::FixedReg(_))) { - self.idx += 1; - } - if self.idx >= self.operands.len() { - None - } else { - self.idx += 1; - Some((self.idx - 1, self.operands[self.idx - 1])) - } - } - - fn next_others(&mut self) -> Option<(usize, Operand)> { - while self.idx < self.operands.len() && (self.operands[self.idx].pos() != self.pos - || matches!(self.operands[self.idx].constraint(), OperandConstraint::FixedReg(_))) { - self.idx += 1; - } - if self.idx >= self.operands.len() { - None - } else { - self.idx += 1; - Some((self.idx - 1, self.operands[self.idx - 1])) - } - } -} - -impl<'a> Iterator for ByPosOperands<'a> { - type Item = (usize, Operand); - - fn next(&mut self) -> Option { - if self.idx >= self.operands.len() { - if self.looking_for == LookingFor::FixedReg { - self.idx = 0; - self.looking_for = LookingFor::Others; - } else { - return None; - } - } - match self.looking_for { - LookingFor::FixedReg => { - let next = self.next_fixed_reg(); - if next.is_none() { - self.next() - } else { - next - } - }, - LookingFor::Others => self.next_others(), - } - } -}*/ - -pub struct NonFixedNonReuseLateOperands<'a>(Operands<'a>); - -impl<'a> NonFixedNonReuseLateOperands<'a> { - pub fn new(operands: &'a [Operand]) -> Self { - Self(Operands::new(operands, SearchConstraint { - pos: Some(OperandPos::Late), - kind: None, - must_not_have_constraints: [Some(OperandConstraintKind::Reuse), Some(OperandConstraintKind::FixedReg)], - must_have_constraint: None, - })) - } -} - -impl<'a> Iterator for NonFixedNonReuseLateOperands<'a> { - type Item = (usize, Operand); - - fn next(&mut self) -> Option { - self.0.next() - } -} - -pub struct NonFixedNonReuseEarlyOperands<'a>(Operands<'a>); - -impl<'a> NonFixedNonReuseEarlyOperands<'a> { - pub fn new(operands: &'a [Operand]) -> Self { - Self(Operands::new(operands, SearchConstraint { - pos: Some(OperandPos::Early), - kind: None, - must_not_have_constraints: [Some(OperandConstraintKind::Reuse), Some(OperandConstraintKind::FixedReg)], - must_have_constraint: None, - })) - } -} - -impl<'a> Iterator for NonFixedNonReuseEarlyOperands<'a> { - type Item = (usize, Operand); - - fn next(&mut self) -> Option { - self.0.next() - } -} - -pub struct NonFixedNonReuseLateDefOperands<'a>(Operands<'a>); - -impl<'a> NonFixedNonReuseLateDefOperands<'a> { - pub fn new(operands: &'a [Operand]) -> Self { - Self(Operands::new(operands, SearchConstraint { - pos: Some(OperandPos::Late), - kind: Some(OperandKind::Def), - must_not_have_constraints: [Some(OperandConstraintKind::Reuse), Some(OperandConstraintKind::FixedReg)], - must_have_constraint: None, - })) - } -} - -impl<'a> Iterator for NonFixedNonReuseLateDefOperands<'a> { - type Item = (usize, Operand); - - fn next(&mut self) -> Option { - self.0.next() - } -} - -pub struct NonFixedNonReuseLateUseOperands<'a>(Operands<'a>); - -impl<'a> NonFixedNonReuseLateUseOperands<'a> { - pub fn new(operands: &'a [Operand]) -> Self { - Self(Operands::new(operands, SearchConstraint { - pos: Some(OperandPos::Late), - kind: Some(OperandKind::Use), - must_not_have_constraints: [Some(OperandConstraintKind::Reuse), Some(OperandConstraintKind::FixedReg)], - must_have_constraint: None, - })) - } -} - -impl<'a> Iterator for NonFixedNonReuseLateUseOperands<'a> { - type Item = (usize, Operand); - - fn next(&mut self) -> Option { - self.0.next() - } -} - -pub struct NonReuseLateDefOperands<'a>(Operands<'a>); - -impl<'a> NonReuseLateDefOperands<'a> { pub fn new(operands: &'a [Operand]) -> Self { - Self(Operands::new(operands, SearchConstraint { - kind: Some(OperandKind::Def), - pos: Some(OperandPos::Late), - must_not_have_constraints: [Some(OperandConstraintKind::Reuse), None], - must_have_constraint: None, - })) + Self(operands) } -} - -impl<'a> Iterator for NonReuseLateDefOperands<'a> { - type Item = (usize, Operand); - fn next(&mut self) -> Option { - self.0.next() + pub fn matches bool + 'a>(&self, predicate: F) -> impl Iterator + 'a { + self.0.iter() + .cloned() + .enumerate() + .filter(move |(_, op)| predicate(*op)) } -} - -pub struct NonReuseEarlyDefOperands<'a>(Operands<'a>); -impl<'a> NonReuseEarlyDefOperands<'a> { - pub fn new(operands: &'a [Operand]) -> Self { - Self(Operands::new(operands, SearchConstraint { - kind: Some(OperandKind::Def), - pos: Some(OperandPos::Early), - must_have_constraint: None, - must_not_have_constraints: [Some(OperandConstraintKind::Reuse), None], - })) + pub fn non_fixed_non_reuse_late(&self) -> impl Iterator + 'a { + self.matches(|op| + OperandConstraintKind::FixedReg != op.constraint().into() + && OperandConstraintKind::Reuse != op.constraint().into() + && op.pos() == OperandPos::Late + ) } -} - -impl<'a> Iterator for NonReuseEarlyDefOperands<'a> { - type Item = (usize, Operand); - fn next(&mut self) -> Option { - self.0.next() + pub fn non_reuse_late_def(&self) -> impl Iterator + 'a { + self.matches(|op| + OperandConstraintKind::Reuse != op.constraint().into() + && op.pos() == OperandPos::Late + && op.kind() == OperandKind::Def + ) } -} -/// Operands that reuse and input allocation. -/// They are all expected to be def operands. -pub struct ReuseOperands<'a>(Operands<'a>); - -impl<'a> ReuseOperands<'a> { - pub fn new(operands: &'a [Operand]) -> Self { - Self(Operands::new(operands, SearchConstraint { - kind: None, - pos: None, - must_have_constraint: Some(OperandConstraintKind::Reuse), - must_not_have_constraints: [None, None], - })) + pub fn non_fixed_non_reuse_early(&self) -> impl Iterator + 'a { + self.matches(|op| + OperandConstraintKind::FixedReg != op.constraint().into() + && OperandConstraintKind::Reuse != op.constraint().into() + && op.pos() == OperandPos::Early + ) } -} -impl<'a> Iterator for ReuseOperands<'a> { - type Item = (usize, Operand); - - fn next(&mut self) -> Option { - self.0.next() + pub fn reuse(&self) -> impl Iterator + 'a { + self.matches(|op| OperandConstraintKind::Reuse == op.constraint().into()) } -} -pub struct FixedLateOperands<'a>(Operands<'a>); - -impl<'a> FixedLateOperands<'a> { - pub fn new(operands: &'a [Operand]) -> Self { - Self(Operands::new(operands, SearchConstraint { - kind: None, - pos: Some(OperandPos::Late), - must_have_constraint: Some(OperandConstraintKind::FixedReg), - must_not_have_constraints: [None, None], - })) + pub fn non_reuse_early_def(&self) -> impl Iterator + 'a { + self.matches(|op| + OperandConstraintKind::Reuse != op.constraint().into() + && op.pos() == OperandPos::Early + && op.kind() == OperandKind::Def + ) } -} -impl<'a> Iterator for FixedLateOperands<'a> { - type Item = (usize, Operand); - - fn next(&mut self) -> Option { - self.0.next() + pub fn fixed_early(&self) -> impl Iterator + 'a { + self.matches(|op| + OperandConstraintKind::FixedReg == op.constraint().into() + && op.pos() == OperandPos::Early + ) } -} - -pub struct FixedEarlyOperands<'a>(Operands<'a>); -impl<'a> FixedEarlyOperands<'a> { - pub fn new(operands: &'a [Operand]) -> Self { - Self(Operands::new(operands, SearchConstraint { - kind: None, - pos: Some(OperandPos::Early), - must_have_constraint: Some(OperandConstraintKind::FixedReg), - must_not_have_constraints: [None, None], - })) + pub fn fixed_late(&self) -> impl Iterator + 'a { + self.matches(|op| + OperandConstraintKind::FixedReg == op.constraint().into() + && op.pos() == OperandPos::Late + ) } -} - -impl<'a> Iterator for FixedEarlyOperands<'a> { - type Item = (usize, Operand); - fn next(&mut self) -> Option { - self.0.next() + pub fn non_reuse_def(&self) -> impl Iterator + 'a { + self.matches(|op| + OperandConstraintKind::Reuse != op.constraint().into() + && op.kind() == OperandKind::Def + ) } -} -pub struct NonReuseDefOperands<'a>(Operands<'a>); - -impl<'a> NonReuseDefOperands<'a> { - pub fn new(operands: &'a [Operand]) -> Self { - Self(Operands::new(operands, SearchConstraint { - kind: Some(OperandKind::Def), - pos: None, - must_have_constraint: None, - must_not_have_constraints: [Some(OperandConstraintKind::Reuse), None], - })) + pub fn non_fixed_non_reuse_late_use(&self) -> impl Iterator + 'a { + self.matches(|op| + OperandConstraintKind::FixedReg != op.constraint().into() + && OperandConstraintKind::Reuse != op.constraint().into() + && op.pos() == OperandPos::Late + && op.kind() == OperandKind::Use + ) } -} -impl<'a> Iterator for NonReuseDefOperands<'a> { - type Item = (usize, Operand); - - fn next(&mut self) -> Option { - self.0.next() + pub fn non_fixed_non_reuse_late_def(&self) -> impl Iterator + 'a { + self.matches(|op| + OperandConstraintKind::FixedReg != op.constraint().into() + && OperandConstraintKind::Reuse != op.constraint().into() + && op.pos() == OperandPos::Late + && op.kind() == OperandKind::Def + ) } } @@ -526,118 +232,108 @@ mod tests { #[test] fn late() { - let late_operands: Vec = NonFixedNonReuseLateOperands::new(&OPERANDS) - .map(|(_, op)| op) + let late_operands: Vec<(usize, Operand)> = Operands::new(&OPERANDS).non_fixed_non_reuse_late() .collect(); assert_eq!(late_operands, vec![ - late_def_operand(1), - late_use_operand(6), + (1, late_def_operand(1)), + (6, late_use_operand(6)), ]); } #[test] fn late_def() { - let late_def_operands: Vec = NonReuseLateDefOperands::new(&OPERANDS) - .map(|(_, op)| op) + let late_def_operands: Vec<(usize, Operand)> = Operands::new(&OPERANDS).non_reuse_late_def() .collect(); assert_eq!(late_def_operands, vec![ - late_def_operand(1), - fixed_late_def_operand(10), + (1, late_def_operand(1)), + (10, fixed_late_def_operand(10)), ]); } #[test] fn early() { - let early_operands: Vec = NonFixedNonReuseEarlyOperands::new(&OPERANDS) - .map(|(_, op)| op) + let early_operands: Vec<(usize, Operand)> = Operands::new(&OPERANDS).non_fixed_non_reuse_early() .collect(); assert_eq!(early_operands, vec![ - early_use_operand(3), - early_def_operand(4), - early_def_operand(8), - early_use_operand(9), + (3, early_use_operand(3)), + (4, early_def_operand(4)), + (8, early_def_operand(8)), + (9, early_use_operand(9)), ]); } #[test] fn early_def() { - let early_def_operands: Vec = NonReuseEarlyDefOperands::new(&OPERANDS) - .map(|(_, op)| op) + let early_def_operands: Vec<(usize, Operand)> = Operands::new(&OPERANDS).non_reuse_early_def() .collect(); assert_eq!(early_def_operands, vec![ - early_def_operand(4), - early_def_operand(8), - fixed_early_def_operand(11), + (4, early_def_operand(4)), + (8, early_def_operand(8)), + (11, fixed_early_def_operand(11)), ]); } #[test] fn reuse() { - let reuse_operands: Vec = ReuseOperands::new(&OPERANDS) - .map(|(_, op)| op) + let reuse_operands: Vec<(usize, Operand)> = Operands::new(&OPERANDS).reuse() .collect(); assert_eq!(reuse_operands, vec![ - late_reuse_def_operand(0), - early_reuse_def_operand(2), - late_reuse_def_operand(5), - late_reuse_use_operand(7), + (0, late_reuse_def_operand(0)), + (2, early_reuse_def_operand(2)), + (5, late_reuse_def_operand(5)), + (7, late_reuse_use_operand(7)), ]); } #[test] fn fixed_late() { - let fixed_late_operands: Vec = FixedLateOperands::new(&OPERANDS) - .map(|(_, op)| op) + let fixed_late_operands: Vec<(usize, Operand)> = Operands::new(&OPERANDS).fixed_late() .collect(); assert_eq!(fixed_late_operands, vec![ - fixed_late_def_operand(10), - fixed_late_use_operand(12), + (10, fixed_late_def_operand(10)), + (12, fixed_late_use_operand(12)), ]); } #[test] fn fixed_early() { - let fixed_early_operands: Vec = FixedEarlyOperands::new(&OPERANDS) - .map(|(_, op)| op) + let fixed_early_operands: Vec<(usize, Operand)> = Operands::new(&OPERANDS).fixed_early() .collect(); assert_eq!(fixed_early_operands, vec![ - fixed_early_def_operand(11), - fixed_early_use_operand(13), + (11, fixed_early_def_operand(11)), + (13, fixed_early_use_operand(13)), ]); } #[test] fn def() { - let def_operands: Vec = NonReuseDefOperands::new(&OPERANDS) - .map(|(_, op)| op) + let def_operands: Vec<(usize, Operand)> = Operands::new(&OPERANDS).non_reuse_def() .collect(); assert_eq!(def_operands, vec![ - late_def_operand(1), - early_def_operand(4), - early_def_operand(8), - fixed_late_def_operand(10), - fixed_early_def_operand(11), + (1, late_def_operand(1)), + (4, early_def_operand(4)), + (8, early_def_operand(8)), + (10, fixed_late_def_operand(10)), + (11, fixed_early_def_operand(11)), ]); } #[test] fn non_fixed_non_reuse_late_def() { - let def_operands: Vec = NonFixedNonReuseLateDefOperands::new(&OPERANDS) - .map(|(_, op)| op) + let def_operands: Vec<(usize, Operand)> = Operands::new(&OPERANDS).non_fixed_non_reuse_late_def() .collect(); assert_eq!(def_operands, vec![ - late_def_operand(1), + (1, late_def_operand(1)), ]); } #[test] fn non_fixed_non_reuse_late_use() { - let late_operands: Vec = NonFixedNonReuseLateUseOperands::new(&OPERANDS) - .map(|(_, op)| op) + let late_operands: Vec<(usize, Operand)> = Operands::new(&OPERANDS).non_fixed_non_reuse_late_use() .collect(); assert_eq!(late_operands, vec![ - late_use_operand(6), + (6, late_use_operand(6)), ]); } } diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index bf00b943..6286c5f2 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1308,7 +1308,7 @@ impl<'a, F: Function> Env<'a, F> { if self.func.is_branch(inst) { self.process_branch(block, inst); } - let operands = self.func.inst_operands(inst); + let operands = Operands::new(self.func.inst_operands(inst)); let clobbers = self.func.inst_clobbers(inst); for preg in clobbers { // To avoid allocating clobbers, they are removed from the @@ -1324,38 +1324,38 @@ impl<'a, F: Function> Env<'a, F> { self.lrus[preg.class()].remove(preg.hw_enc()); } } - for (_, op) in ReuseOperands::new(operands) { + for (_, op) in operands.reuse() { let OperandConstraint::Reuse(reused_idx) = op.constraint() else { unreachable!() }; self.reused_inputs_in_curr_inst.push(reused_idx); } - for (op_idx, op) in FixedLateOperands::new(operands) { + for (op_idx, op) in operands.fixed_late() { self.process_operand_allocation(inst, op, op_idx); } - for (op_idx, op) in NonFixedNonReuseLateDefOperands::new(operands) { + for (op_idx, op) in operands.non_fixed_non_reuse_late_def() { self.process_operand_allocation(inst, op, op_idx); } - for (_, op) in NonReuseLateDefOperands::new(operands) { + for (_, op) in operands.non_reuse_late_def() { self.freealloc(op.vreg(), clobbers); } - for (op_idx, op) in FixedEarlyOperands::new(operands) { + for (op_idx, op) in operands.fixed_early() { self.process_operand_allocation(inst, op, op_idx); } - for (op_idx, op) in NonFixedNonReuseLateUseOperands::new(operands) { + for (op_idx, op) in operands.non_fixed_non_reuse_late_use() { self.process_operand_allocation(inst, op, op_idx); } - for (op_idx, op) in NonFixedNonReuseEarlyOperands::new(operands) { + for (op_idx, op) in operands.non_fixed_non_reuse_early() { self.process_operand_allocation(inst, op, op_idx); } - for (_, op) in NonReuseEarlyDefOperands::new(operands) { + for (_, op) in operands.non_reuse_early_def() { self.freealloc(op.vreg(), clobbers); } - for (op_idx, op) in ReuseOperands::new(operands) { + for (op_idx, op) in operands.reuse() { let OperandConstraint::Reuse(reused_idx) = op.constraint() else { unreachable!() }; - self.process_reuse_operand_allocation(inst, op, op_idx, operands[reused_idx], clobbers); + self.process_reuse_operand_allocation(inst, op, op_idx, operands.0[reused_idx], clobbers); } self.save_and_restore_clobbered_registers(inst); for preg in self.func.inst_clobbers(inst) { @@ -1611,10 +1611,9 @@ fn log_output<'a, F: Function>(env: &Env<'a, F>) { } } trace!("VReg spillslots: {:?}", v); - trace!("\nTemp spillslots: {:?}", env.temp_spillslots); + trace!("Temp spillslots: {:?}", env.temp_spillslots); trace!("Final edits: {:?}", env.edits); - trace!("safepoint_slots: {:?}", env.safepoint_slots); - trace!("\n\n\n\n\n\n\n"); + trace!("safepoint_slots: {:?}\n", env.safepoint_slots); } pub fn run( From 709941269523a7631f22f64e995b5e09e97a583a Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Wed, 7 Aug 2024 20:49:03 +0100 Subject: [PATCH 29/95] changed interface to specify which algorithm to run --- regalloc2-tool/src/main.rs | 23 ++++++++++++++++++++--- src/lib.rs | 20 +++++++++++++------- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/regalloc2-tool/src/main.rs b/regalloc2-tool/src/main.rs index 1763456f..c224834b 100644 --- a/regalloc2-tool/src/main.rs +++ b/regalloc2-tool/src/main.rs @@ -2,8 +2,7 @@ use std::path::PathBuf; use clap::Parser; use regalloc2::{ - checker::Checker, serialize::SerializableFunction, Block, Edit, Function, InstOrEdit, Output, - RegallocOptions, + checker::Checker, serialize::SerializableFunction, Algorithm, Block, Edit, Function, InstOrEdit, Output, RegallocOptions }; #[derive(Parser)] @@ -15,6 +14,24 @@ struct Args { /// Input file containing a bincode-encoded SerializedFunction. input: PathBuf, + + /// Which register allocation algorithm to use. + algorithm: CliAlgorithm, +} + +#[derive(Clone, Copy, Debug, clap::ValueEnum)] +enum CliAlgorithm { + Ion, + Fastalloc, +} + +impl From for Algorithm { + fn from(cli_algo: CliAlgorithm) -> Algorithm { + match cli_algo { + CliAlgorithm::Ion => Algorithm::Ion, + CliAlgorithm::Fastalloc => Algorithm::Fastalloc, + } + } } fn main() { @@ -32,7 +49,7 @@ fn main() { let options = RegallocOptions { verbose_log: true, validate_ssa: true, - use_fastalloc: true, + algorithm: args.algorithm.into() }; let output = match regalloc2::run(&function, function.machine_env(), &options) { Ok(output) => output, diff --git a/src/lib.rs b/src/lib.rs index 3b0c4c72..248d5a83 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,7 +34,7 @@ macro_rules! trace_enabled { }; } -use core::hash::BuildHasherDefault; +use core::{default, hash::BuildHasherDefault}; use std::iter::FromIterator; use rustc_hash::FxHasher; type FxHashMap = hashbrown::HashMap>; @@ -1559,13 +1559,19 @@ pub fn run( env: &MachineEnv, options: &RegallocOptions, ) -> Result { - if options.use_fastalloc { - fastalloc::run(func, env, options.verbose_log, options.validate_ssa) - } else { - ion::run(func, env, options.verbose_log, options.validate_ssa) + match options.algorithm { + Algorithm::Ion => ion::run(func, env, options.verbose_log, options.validate_ssa), + Algorithm::Fastalloc => fastalloc::run(func, env, options.verbose_log, options.validate_ssa) } } +#[derive(Clone, Copy, Debug, Default)] +pub enum Algorithm { + #[default] + Ion, + Fastalloc, +} + /// Options for allocation. #[derive(Clone, Copy, Debug, Default)] pub struct RegallocOptions { @@ -1575,6 +1581,6 @@ pub struct RegallocOptions { /// Run the SSA validator before allocating registers. pub validate_ssa: bool, - /// Run the SSRA algorithm - pub use_fastalloc: bool, + /// The register allocation algorithm to be used. + pub algorithm: Algorithm, } From 7b13bf89627f4c3ba77e852ea3b3855ffeb6706d Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Wed, 7 Aug 2024 20:50:46 +0100 Subject: [PATCH 30/95] changed guess of number of operands --- src/fastalloc/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 6286c5f2..25cfaa10 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -25,8 +25,8 @@ struct Allocs { } impl Allocs { - fn new(func: &F, env: &MachineEnv) -> Self { - let operand_no_guess = func.num_vregs() * 3; + fn new(func: &F) -> Self { + let operand_no_guess = func.num_insts() * 3; let mut allocs = Vec::with_capacity(operand_no_guess); let mut inst_alloc_offsets = Vec::with_capacity(operand_no_guess); for inst in 0..func.num_insts() { @@ -218,7 +218,7 @@ impl<'a, F: Function> Env<'a, F> { env.scratch_by_class[1], env.scratch_by_class[2], ] }, - allocs: Allocs::new(func, env), + allocs: Allocs::new(func), edits: VecDeque::new(), safepoint_slots: Vec::new(), num_spillslots: 0, From 1fdaef44f877ce52aefe9216346d0eead4e85cce Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Wed, 7 Aug 2024 21:24:51 +0100 Subject: [PATCH 31/95] using PRegSet union to free registers --- src/fastalloc/mod.rs | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 25cfaa10..69202b08 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -97,7 +97,7 @@ pub struct Env<'a, F: Function> { /// /// This is used to keep track of them so that they can be marked as free for reallocation /// after the instruction has completed processing. - free_after_curr_inst: PRegSet, + free_after_curr_inst: PartedByRegClass, /// The virtual registers of use operands that have been allocated in the current instruction /// and for which edits had to be inserted to save and restore them because their constraint /// doesn't allow the allocation they are expected to be in after the instruction. @@ -204,7 +204,7 @@ impl<'a, F: Function> Env<'a, F> { ] }, inst_pre_edits: VecDeque::new(), inst_post_edits: VecDeque::new(), - free_after_curr_inst: PRegSet::empty(), + free_after_curr_inst: PartedByRegClass { items: [PRegSet::empty(), PRegSet::empty(), PRegSet::empty()] }, vregs_allocd_in_curr_inst: HashSet::new(), use_vregs_saved_and_restored_in_curr_inst: HashSet::new(), freed_def_pregs: PartedByRegClass { items: [PRegSet::empty(), PRegSet::empty(), PRegSet::empty()] }, @@ -239,16 +239,17 @@ impl<'a, F: Function> Env<'a, F> { fn add_freed_regs_to_freelist(&mut self) { for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { for preg in self.freed_def_pregs[class] { - self.freepregs[class].add(preg); self.lrus[class].append(preg.hw_enc()); } + self.freepregs[class].union_from(self.freed_def_pregs[class]); self.freed_def_pregs[class] = PRegSet::empty(); + + for preg in self.free_after_curr_inst[class] { + self.lrus[preg.class()].append(preg.hw_enc()); + } + self.freepregs[class].union_from(self.free_after_curr_inst[class]); + self.free_after_curr_inst[class] = PRegSet::empty(); } - for preg in self.free_after_curr_inst { - self.freepregs[preg.class()].add(preg); - self.lrus[preg.class()].append(preg.hw_enc()); - } - self.free_after_curr_inst = PRegSet::empty(); } /// The scratch registers needed for processing the edits generated @@ -482,7 +483,7 @@ impl<'a, F: Function> Env<'a, F> { // By adding it to this list, instead of freed_def_pregs, the only way // a clobber can be newly allocated to a vreg in the instruction is to // use a fixed register constraint. - self.free_after_curr_inst.add(preg); + self.free_after_curr_inst[preg.class()].add(preg); // No need to remove the preg from the LRU because clobbers // have already been removed from the LRU. } else { @@ -602,7 +603,7 @@ impl<'a, F: Function> Env<'a, F> { // from `freed_def_pregs` here. self.freed_def_pregs[preg.class()].remove(preg); self.lrus[preg.class()].append(preg.hw_enc()); - } else if self.free_after_curr_inst.contains(preg) { + } else if self.free_after_curr_inst[preg.class()].contains(preg) { // If the new allocation was once a freed prev_alloc, remove it // from the free after current inst list. // For example: @@ -618,7 +619,7 @@ impl<'a, F: Function> Env<'a, F> { // To prevent reallocating a register while a live one is still in it, // this register has to be removed from the list. trace!("{:?} is now using preg {:?}. Removing it from the free after instruction list", op.vreg(), preg); - self.free_after_curr_inst.remove(preg); + self.free_after_curr_inst[preg.class()].remove(preg); if is_allocatable { self.lrus[preg.class()].append(preg.hw_enc()); } @@ -850,7 +851,7 @@ impl<'a, F: Function> Env<'a, F> { // and will be freed after the instruction has completed processing // if no vreg is still present in it. if !self.func.inst_clobbers(inst).contains(preg) { - self.free_after_curr_inst.add(preg); + self.free_after_curr_inst[preg.class()].add(preg); self.lrus[preg.class()].remove(preg.hw_enc()); } else { trace!("{:?} is a clobber, so not bothering with the state update", preg); @@ -1397,7 +1398,7 @@ impl<'a, F: Function> Env<'a, F> { // To avoid this scenario, the registers are added to the // `free_after_curr_inst` instead, to ensure that it isn't used as // a scratch register. - self.free_after_curr_inst.add(preg); + self.free_after_curr_inst[preg.class()].add(preg); } else { // Something is still in the clobber. // After this instruction, it's no longer a clobber. From 04c53dc2431c13af8f08d5bbbb0b6d50115132ce Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Wed, 7 Aug 2024 22:32:44 +0100 Subject: [PATCH 32/95] remove std imports; better checks for trace logging --- src/fastalloc/lru.rs | 15 ++++++--------- src/fastalloc/mod.rs | 25 ++++++++++++++----------- src/lib.rs | 3 +-- 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/fastalloc/lru.rs b/src/fastalloc/lru.rs index 5ff08b4c..cf1aeba6 100644 --- a/src/fastalloc/lru.rs +++ b/src/fastalloc/lru.rs @@ -1,11 +1,8 @@ use alloc::vec::Vec; use alloc::vec; use hashbrown::HashSet; -use core::{fmt, ops::IndexMut}; -use std::{ops::Index, print}; -use crate::{PReg, PRegSet, RegClass}; - -use std::{println, format}; +use core::{fmt, ops::{IndexMut, Index}}; +use crate::{PReg, RegClass}; /// A least-recently-used cache organized as a linked list based on a vector. pub struct Lru { @@ -188,7 +185,7 @@ impl Lru { impl fmt::Debug for Lru { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - use std::format; + use alloc::format; let data_str = if self.head == u8::MAX { format!("") } else { @@ -215,11 +212,11 @@ impl fmt::Debug for Lru { } #[derive(Debug)] -pub struct PartedByRegClass { +pub struct PartedByRegClass { pub items: [T; 3], } -impl Index for PartedByRegClass { +impl Index for PartedByRegClass { type Output = T; fn index(&self, index: RegClass) -> &Self::Output { @@ -227,7 +224,7 @@ impl Index for PartedByRegClass { } } -impl IndexMut for PartedByRegClass { +impl IndexMut for PartedByRegClass { fn index_mut(&mut self, index: RegClass) -> &mut Self::Output { &mut self.items[index as usize] } diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 69202b08..d9f9bc8e 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -8,9 +8,6 @@ use alloc::collections::VecDeque; use alloc::vec::Vec; use hashbrown::{HashSet, HashMap}; -use std::println; -use std::format; - mod lru; mod iter; use lru::*; @@ -1421,8 +1418,9 @@ impl<'a, F: Function> Env<'a, F> { self.reused_inputs_in_curr_inst.clear(); self.vregs_in_curr_inst.clear(); - #[cfg(feature = "trace-log")] - self.log_post_inst_processing_state(inst); + if trace_enabled!() { + self.log_post_inst_processing_state(inst); + } } /// At the beginning of every block, all virtual registers that are @@ -1500,11 +1498,13 @@ impl<'a, F: Function> Env<'a, F> { self.process_edits(self.get_scratch_regs_for_reloading()); self.add_freed_regs_to_freelist(); - #[cfg(feature = "trace-log")] - self.log_post_reload_at_begin_state(block); + if trace_enabled!() { + self.log_post_reload_at_begin_state(block); + } } fn log_post_reload_at_begin_state(&self, block: Block) { + use alloc::format; trace!(""); trace!("State after instruction reload_at_begin of {:?}", block); let mut map = HashMap::new(); @@ -1530,6 +1530,7 @@ impl<'a, F: Function> Env<'a, F> { } fn log_post_inst_processing_state(&self, inst: Inst) { + use alloc::format; trace!(""); trace!("State after instruction {:?}", inst); let mut map = HashMap::new(); @@ -1629,14 +1630,16 @@ pub fn run( validate_ssa(func, &cfginfo)?; } - #[cfg(feature = "trace-log")] - log_function(func); + if trace_enabled!() { + log_function(func); + } let mut env = Env::new(func, mach_env); env.run()?; - #[cfg(feature = "trace-log")] - log_output(&env); + if trace_enabled!() { + log_output(&env); + } Ok(Output { edits: env.edits.make_contiguous().to_vec(), diff --git a/src/lib.rs b/src/lib.rs index 248d5a83..56ac21e7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,8 +34,7 @@ macro_rules! trace_enabled { }; } -use core::{default, hash::BuildHasherDefault}; -use std::iter::FromIterator; +use core::{hash::BuildHasherDefault, iter::FromIterator}; use rustc_hash::FxHasher; type FxHashMap = hashbrown::HashMap>; type FxHashSet = hashbrown::HashSet>; From cd351d812773e886a6205bc256e00ff9128b5538 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 8 Aug 2024 09:24:05 +0100 Subject: [PATCH 33/95] implement display for PRegSet --- src/lib.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 56ac21e7..8e6765f0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -315,6 +315,16 @@ impl FromIterator for PRegSet { } } +impl core::fmt::Display for PRegSet { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "{{")?; + for preg in self.into_iter() { + write!(f, "{preg}, ")?; + } + write!(f, "}}") + } +} + /// A virtual register. Contains a virtual register number and a /// class. /// From f7ce37dc35f2a7b05c845c602d2d6d26e61c2b26 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 8 Aug 2024 13:24:55 +0100 Subject: [PATCH 34/95] implemented display for PartedByRegClass; rethought reuse operands (again); fixed bug with LRU management --- src/fastalloc/lru.rs | 12 ++ src/fastalloc/mod.rs | 280 ++++++++++++++++++++++++++++++++++++------- 2 files changed, 246 insertions(+), 46 deletions(-) diff --git a/src/fastalloc/lru.rs b/src/fastalloc/lru.rs index cf1aeba6..0c98cc04 100644 --- a/src/fastalloc/lru.rs +++ b/src/fastalloc/lru.rs @@ -244,3 +244,15 @@ impl Lrus { } } } + +use core::fmt::{Debug, Display}; + +impl Display for PartedByRegClass { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{{ int: {}, float: {}, vector: {} }}", + self.items[0], self.items[1], self.items[2] + ) + } +} diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index d9f9bc8e..05bad63b 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -148,6 +148,10 @@ pub struct Env<'a, F: Function> { reused_inputs_in_curr_inst: Vec, /// The vregs defined or used in the current instruction. vregs_in_curr_inst: HashSet, + /// The physical registers allocated to the operands in the current instruction. + /// Used during eviction to detect eviction of a register that is already in use in the + /// instruction being processed, implying that there aren't enough registers for allocation. + pregs_allocd_in_curr_inst: PRegSet, allocatable_regs: PRegSet, dedicated_scratch_regs: PartedByRegClass>, @@ -210,6 +214,7 @@ impl<'a, F: Function> Env<'a, F> { inst_needs_scratch_reg: PartedByRegClass { items: [false, false, false] }, reused_inputs_in_curr_inst: Vec::new(), vregs_in_curr_inst: HashSet::new(), + pregs_allocd_in_curr_inst: PRegSet::empty(), dedicated_scratch_regs: PartedByRegClass { items: [ env.scratch_by_class[0], env.scratch_by_class[1], @@ -448,13 +453,16 @@ impl<'a, F: Function> Env<'a, F> { trace!("Evicting a register in evict_any_reg for class {:?}", regclass); let preg = self.lrus[regclass].pop(); trace!("Selected register from lru: {:?}", preg); - // TODO: Check if the preg has already been allocated for this + // Check if the preg has already been allocated for this // instruction. If it has, then there are too many stuff to // allocate, making allocation impossible. // Remember that for this to be true, the fixed registers must have // be allocated already. Why? Because if some register p0 has been allocated // and some fixed constraint register is encountered that needs p0, then // allocation will fail regardless of whether or not there are other free registers + if self.pregs_allocd_in_curr_inst.contains(preg) { + panic!("No enough registers for allocation?"); + } self.evict_vreg_in_preg(inst, preg); preg } @@ -522,12 +530,12 @@ impl<'a, F: Function> Env<'a, F> { /// Allocates a physical register for the operand `op`. fn alloc_reg_for_operand(&mut self, inst: Inst, op: Operand, op_idx: usize) { - trace!("freepregs int: {:?}", self.freepregs[RegClass::Int]); - trace!("freepregs vector: {:?}", self.freepregs[RegClass::Vector]); - trace!("freepregs float: {:?}", self.freepregs[RegClass::Float]); - trace!("freed_def_pregs int: {:?}", self.freed_def_pregs[RegClass::Int]); - trace!("freed_def_pregs vector: {:?}", self.freed_def_pregs[RegClass::Vector]); - trace!("freed_def_pregs float: {:?}", self.freed_def_pregs[RegClass::Float]); + trace!("freepregs int: {}", self.freepregs[RegClass::Int]); + trace!("freepregs vector: {}", self.freepregs[RegClass::Vector]); + trace!("freepregs float: {}", self.freepregs[RegClass::Float]); + trace!("freed_def_pregs int: {}", self.freed_def_pregs[RegClass::Int]); + trace!("freed_def_pregs vector: {}", self.freed_def_pregs[RegClass::Vector]); + trace!("freed_def_pregs float: {}", self.freed_def_pregs[RegClass::Float]); trace!(""); let mut allocd = false; // The only way a freed def preg can be reused for an operand is if @@ -549,7 +557,7 @@ impl<'a, F: Function> Env<'a, F> { && !self.reused_inputs_in_curr_inst.contains(&op_idx) { if let Some(freed_def_preg) = remove_any_from_pregset(&mut self.freed_def_pregs[op.class()]) { - trace!("Reusing the freed def preg: {:?}", freed_def_preg); + trace!("Reusing the freed def preg: {}", freed_def_preg); self.lrus[freed_def_preg.class()].append_and_poke(freed_def_preg); self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(freed_def_preg); self.vreg_in_preg[freed_def_preg.index()] = op.vreg(); @@ -586,8 +594,14 @@ impl<'a, F: Function> Env<'a, F> { && !self.func.inst_clobbers(inst).contains(preg); if self.vreg_in_preg[preg.index()] != VReg::invalid() { // Something is already in that register. Evict it. - // TODO: Check if the evicted register is a register in the - // current instruction. If it is, then there's a problem. + // Check if the evicted register is a register in the + // current instruction. If it is, then there must be multiple + // fixed register constraints for the same `preg` in the same + // operand position (early or late), because the fixed registers + // are considered first. + if self.pregs_allocd_in_curr_inst.contains(preg) { + panic!("Allocation impossible?"); + } self.evict_vreg_in_preg(inst, preg); } else if self.freed_def_pregs[preg.class()].contains(preg) { // Consider the scenario: @@ -719,7 +733,6 @@ impl<'a, F: Function> Env<'a, F> { // the location v0 is expected to be in after inst 1. // This messes up the dataflow. // To avoid this, the moves are prepended. - //self.move_after_inst(inst, op.vreg(), prev_alloc); self.add_move_later( inst, self.vreg_allocs[op.vreg().vreg()], @@ -859,9 +872,17 @@ impl<'a, F: Function> Env<'a, F> { trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); } else { self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[op.vreg().vreg()]; + if let Some(preg) = self.allocs[(inst.index(), op_idx)].as_reg() { + if !self.func.inst_clobbers(inst).contains(preg) { + self.lrus[preg.class()].poke(preg); + } + } trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); } self.vregs_allocd_in_curr_inst.insert(op.vreg()); + if let Some(preg) = self.allocs[(inst.index(), op_idx)].as_reg() { + self.pregs_allocd_in_curr_inst.add(preg); + } } fn process_reuse_operand_allocation( @@ -870,6 +891,7 @@ impl<'a, F: Function> Env<'a, F> { op: Operand, op_idx: usize, reused_op: Operand, + reused_op_idx: usize, clobbers: PRegSet, ) { debug_assert!(matches!(op.constraint(), OperandConstraint::Reuse(_))); @@ -910,6 +932,12 @@ impl<'a, F: Function> Env<'a, F> { // // In case 1.1.1: // + // Again, depending on whether or not v1 is used multiple times in the same instruction: + // Case 1.1.1.1: v1 is not used multiple times + // Case 1.1.1.2: v1 is used multiple times + // + // Case 1.1.1.1: + // // 1. def v0 (reuse: 1), use v1, use v2 // 2. use v0, use v1 // @@ -946,7 +974,132 @@ impl<'a, F: Function> Env<'a, F> { // will be prepended to the edits after the instruction. // 3. The edit to restore v1 will be appended to the edits after the instruction. // - // In the case 1.1.2, l0 isn't used by any other operand in the instruction, so no eviction + // In case 1.1.1.2, v1 is used multiple times. For example: + // + // 1. def v0 (reuse: 1), use v1, use v1 + // 2. use v0, use v1 + // + // Depending on the constraints, there are two sub cases: + // + // Case 1.1.1.2.1: All the v1 uses have the same constraints. + // Case 1.1.1.2.2: Some of the v1 uses constraints differ. + // + // In case 1.1.1.2.1, no edits will have been inserted by the `process_operand_allocation` + // procedure because they all have the same constraint. It is equivalent to a single use. + // So, it can be handled just like case 1.1.1.1. + // + // In case 1.1.1.2.2, they have different constraints, so edits will have already been inserted + // to effect the flow from the current allocation to the previous ones. + // For example: + // + // 1. def v0 (reuse: 1), use v1 (fixed: p5), use v1 (fixed: p8), use v2 + // 2. use v0, use v1 + // + // In the above, v1 is used multiple times with different constraints. + // Now, there are two subcases: + // Case 1.1.1.2.2.1: l1 == p5 (or whatever allocation is used for the first v1 operand) + // Case 1.1.1.2.2.2: l1 != p5 + // + // In case 1.1.1.2.2.1: + // + // No edit will be inserted when the first v1 use is encountered. But when the second is + // encountered, an edit will be prepended by `process_operand_allocation` to the edits + // before inst 1 to move from p8 to p5, and p8 will be marked as v1's current allocation, + // that is, vreg_allocs[v1] == p8: + // + // move from p8 to p5 // Current alloc to prev alloc in the same instruction. + // 1. def v0 (reuse: 1), use v1 (fixed: p5), use v1 (fixed: p8), use v2 + // 2. use v0, use v1 + // + // Also, depending on whether or not p8 was already used by another operand, an eviction may + // occur and an edit will be inserted to move from a spillslot to p8 after the instruction: + // + // move from p8 to p5 // Current alloc to prev alloc in the same instruction. + // 1. def v0 (reuse: 1), use v1 (fixed: p5), use v1 (fixed: p8), use v2 + // /* move from stack_? to p8 (possible eviction) */ + // 2. use v0, use v1 + // + // In fact, because of the possibility of using multiple v1s and other fixed register + // constraints, there are a variable number of eviction edits after the instruction: + // + // move from p8 to p5 // Current alloc to prev alloc in the same instruction. + // 1. def v0 (reuse: 1), use v1 (fixed: p5), use v1 (fixed: p8), use v2 + // 0 or more edits, possibly writing to vreg_allocs[v1] + // 2. use v0, use v1 + // + // By the preconditions of this case, one of the use operands uses l0. + // Either it's one of the v1 uses or a non-v1 use, like the v2 use in the above example, + // but I don't think that matters in this case. + // Because of this, the edits inserted between insts 1 and 2 will contain an eviction + // edit to move from stack_v0 to l0 and vreg_allocs[v0] == stack_v0: + // + // move from p8 to p5 // Current alloc to prev alloc in the same instruction. + // 1. def v0 (reuse: 1), use v1 (fixed: p5), use v1 (fixed: p8), use v2 + // 1 or more edits, with one from stack_v0 to l0 + // 2. use v0, use v1 + // + // Now, this can be handled just like in case 1.1.1.1. An edit is prepended to the before-inst-1 + // edits to save v1 to the stack. An edit is prepended to the after-inst-1 edits to move + // v0 from its reused input to its current allocation, vreg_allocs[v0]. And one more edit + // to move from v1's spillslot to l1 is appended to the after-inst-1 edits. + // + // move from p8 to stack_v1 // Saving v1 + // move from p8 to p5 // Current alloc to prev alloc in the same instruction. + // 1. def v0 (reuse: 1), use v1 (fixed: p5), use v1 (fixed: p8), use v2 + // move from p5 to stack_v0 (== vreg_allocs[v0]) // Move into v0's current allocation. + // 0 or more edits, with one from stack_v0 to l0 + // move from stack_v1 to l1 // Restoring v1 + // 2. use v0, use v1 + // + // Case 1.1.1.2.2.2: + // + // Now, if l1 != p5, then `process_operand_allocation` will insert two edits: a save + // and restore for v1, with the save being appended and the restore being prepended: + // + // move from p5 to stack_v1 // Save inserted by `process_operand_allocation` + // 1. def v0 (reuse: 1), use v1 (fixed: p5), use v1 (fixed: p8), use v2 + // move from stack_v1 to l1 // Restore inserted by `process_operand_allocation` + // 2. use v0, use v1 + // + // And when the second allocation is encountered, an edit is also inserted to move from + // the new current allocation (p8) to the previous p5. + // + // move from p8 to p5 // Current alloc to prev alloc in the same instruction. + // move from p5 to stack_v1 // Save inserted by `process_operand_allocation`. + // 1. def v0 (reuse: 1), use v1 (fixed: p5), use v1 (fixed: p8), use v2 + // move from stack_v1 to l1 // Restore inserted by `process_operand_allocation`. + // 2. use v0, use v1 + // + // By the preconditions, one of the operand uses l0, so an eviction edit has already + // been inserted to move from stack_v0 to l0: + // + // move from p8 to p5 // Current alloc to prev alloc in the same instruction. + // move from p5 to stack_v1 // Save inserted by `process_operand_allocation`. + // 1. def v0 (reuse: 1), use v1 (fixed: p5), use v1 (fixed: p8), use v2 + // move from stack_v1 to l1 // Restore inserted by `process_operand_allocation`. + // 1 or more edits, with one from stack_v0 to l0 // + // 2. use v0, use v1 + // + // The restore inserted by the `process_operand_allocation` can come before or after any + // of the other edits, depending on the order of the operands. But that isn't a problem + // because the move is from its spillslot and every vreg has its own unique spillslot, + // so we can be sure that only v1 is in stack_v1. + // + // This can be handled like case 1.1.1.1, but the only edit inserted is the one prepended + // to the after-inst-1 edits to move from the reused input to vreg_allocs[v0]. + // This will work because v1 already has save-and-reload edits inserted by + // `process_operand_allocation`, so those edits don't need to be inserted anymore. + // And because of how `process_operand_allocation` and the eviction procedures insert edits, + // no matter how edits are inserted, the edit from stack_v1 to l1 will always + // be the last write to v1 before the next instruction. + // + // In the case 1.1.2, depending on whether v1 is used multiple times: + // Case 1.1.2.1: v1 is not used multiple times. + // Case 1.1.2.2: v1 is used multiple times. + // + // In case 1.1.2.1: + // + // l0 isn't used by any other operand in the instruction, so no eviction // occurs and vreg_allocs[v0] at this point == l0. // Here, the only edits that needs to be added is the move from the reused input allocation // into vreg_allocs[v0] (which is l0) and the save and restore of v1. @@ -963,7 +1116,17 @@ impl<'a, F: Function> Env<'a, F> { // move from stack_v1 to l1 // Restore v1. // 2. use v0, use v1 // - // So, the procedure for case 1.1.1 is the same for case 1.1.2. + // So, the procedure for case 1.1.1.1 is the same for case 1.1.2.1. + // + // In case 1.1.2.2, v1 is used multiple times. If they all have the same constraint, + // then it can be handled just like case 1.1.1.1. + // If they have different constraints, then the only difference between this case and + // case 1.1.1.2.2 is that vreg_allocs[v0] at this point is already == l0. + // This can be handled just like case 1.1.1.2.2, with the save and restore of v1 and + // move from the reused input to vreg_allocs[p1] being inserted when + // `process_operand_allocation` has not inserted any save and restores of v1. + // And only the move from the reused input to vreg_allocs[p1] is added when `proces_operand_allocation` + // has already inserted a save and restore for v1. // // In case 1.2, the reused input doesn't live past the instruction. Only v0 // does. In this case, the only edit added is the one to move v0 from the reused @@ -984,7 +1147,14 @@ impl<'a, F: Function> Env<'a, F> { // Case 2.1: The reused input v1 lives past the instruction. // Case 2.2: The reused input v1 doesn't live past the instruction. // - // In case 2.1, the only edits that need to be added are the ones to save and restore v1. + // And their sub subcases: + // + // Case 2.1.1: The reused input is not used multiple times. + // Case 2.1.2: The reused input is used multiple times. + // We don't need to consider multiple uses in case 2.2 because v1 doesn't live past + // the instruction. + // + // In case 2.1.1, the only edits that need to be added are the ones to save and restore v1. // For example: // // 1. def v0 (reuse: 1), use v1, use v2 // Both v0 and v1 use p0 @@ -997,18 +1167,25 @@ impl<'a, F: Function> Env<'a, F> { // move from stack_v1 to p0 // Restore v1. // 2. use v1 // + // Case 2.1.2, like in case 1.1.1.2, edits to save and restore v1 only need to be added + // if `process_operand_allocation` didn't add them already. The edit to move from + // the reused input to vreg_allocs[v0] can be omitted, since v0 hasn't been allocated. + // // In case 2.2, no edits should be inserted at all since none of them live past the // instruction. // // From these cases, it can be concluded that edits to save and restore the reused input - // should be inserted whenever the reused input lives past the current instruction. + // should be inserted whenever the reused input lives past the current instruction + // and `process_operand_allocation` hasn't inserted any save and restores of the + // reused input (a vreg will be present in `use_vregs_saved_and_restored_in_curr_inst` + // at this point if `process_operand_allocation` has inserted a save and reload for the vreg). // And the edit to move the reuse operand into its after-instruction-allocation should // only be inserted if it lives past the current instruction. - // And because of interactions with other edits relating to eviction, the edits, + // And because of interactions with other edits, the edits, // when inserted, should be in the following order: // // 1. The edit to save the reused input is preprended to the other edits before the instruction. - // 2. The edit to move v0 from its reused input into its current allocation (spillslot) + // 2. The edit to move v0 from its reused input into its current allocation // will be prepended to the edits after the instruction. // 3. The edit to restore v1 will be appended to the edits after the instruction. // @@ -1021,6 +1198,7 @@ impl<'a, F: Function> Env<'a, F> { panic!("Invalid input"); } let reused_input_lives_past_curr_inst = !self.vregs_first_seen_in_curr_inst.contains(&reused_op.vreg()); + let reused_op_alloc = self.allocs[(inst.index(), reused_op_idx)]; if self.vreg_allocs[op.vreg().vreg()].is_some() { let reused_op_vreg = reused_op.vreg(); // The only way that a vreg can be assigned a clobber is if a fixed register @@ -1029,7 +1207,7 @@ impl<'a, F: Function> Env<'a, F> { // to the vreg being defined. Since the vreg is live after this instruction, this // must be an error, because it will be impossible to restore the defined vreg // afterwards. - if let Some(preg) = self.vreg_allocs[reused_op_vreg.vreg()].as_reg() { + if let Some(preg) = reused_op_alloc.as_reg() { if clobbers.contains(preg) { panic!("Invalid input"); } @@ -1038,10 +1216,14 @@ impl<'a, F: Function> Env<'a, F> { if self.vreg_spillslots[reused_op_vreg.vreg()].is_invalid() { self.vreg_spillslots[reused_op_vreg.vreg()] = self.allocstack(&reused_op_vreg); } - let reused_op_spillslot = self.vreg_spillslots[reused_op.vreg().vreg()]; - - // Save the reused input only if it lives past the current instruction. - if reused_input_lives_past_curr_inst { + let reused_op_spillslot = self.vreg_spillslots[reused_op_vreg.vreg()]; + + // Save the reused input only if it lives past the current instruction + // and `process_operand_allocation` hasn't inserted save and reload edits + // for it. + if reused_input_lives_past_curr_inst + && !self.use_vregs_saved_and_restored_in_curr_inst.contains(&reused_op_vreg) + { self.add_move_later( inst, self.vreg_allocs[reused_op_vreg.vreg()], @@ -1056,19 +1238,26 @@ impl<'a, F: Function> Env<'a, F> { // expected to be in after the current instruction. self.add_move_later( inst, - self.vreg_allocs[reused_op_vreg.vreg()], + reused_op_alloc, + // It is possible for reused_op_alloc != vreg_allocs[reused_op] + // in the case where multiple operands use the reused vreg with different + // constraints. op_prev_alloc, op.class(), InstPosition::After, true, ); - // Restore the reused input only if it lives past the current instruction. - if reused_input_lives_past_curr_inst { + // Save the reused input only if it lives past the current instruction + // and `process_operand_allocation` hasn't inserted save and reload edits + // for it. + if reused_input_lives_past_curr_inst + && !self.use_vregs_saved_and_restored_in_curr_inst.contains(&reused_op_vreg) + { self.add_move_later( inst, Allocation::stack(reused_op_spillslot), - self.vreg_allocs[reused_op_vreg.vreg()], + reused_op_alloc, op.class(), InstPosition::After, false, @@ -1081,11 +1270,13 @@ impl<'a, F: Function> Env<'a, F> { trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); } else { let reused_op_vreg = reused_op.vreg(); - if reused_input_lives_past_curr_inst { + if reused_input_lives_past_curr_inst + && !self.use_vregs_saved_and_restored_in_curr_inst.contains(&reused_op_vreg) + { if self.vreg_spillslots[reused_op_vreg.vreg()].is_invalid() { self.vreg_spillslots[reused_op_vreg.vreg()] = self.allocstack(&reused_op_vreg); } - let reused_op_spillslot = self.vreg_spillslots[reused_op.vreg().vreg()]; + let reused_op_spillslot = self.vreg_spillslots[reused_op_vreg.vreg()]; // Save the reused input to its spillslot before the instruction. self.add_move_later( inst, @@ -1099,7 +1290,7 @@ impl<'a, F: Function> Env<'a, F> { self.add_move_later( inst, Allocation::stack(reused_op_spillslot), - self.vreg_allocs[reused_op_vreg.vreg()], + reused_op_alloc, op.class(), InstPosition::After, false, @@ -1353,7 +1544,7 @@ impl<'a, F: Function> Env<'a, F> { let OperandConstraint::Reuse(reused_idx) = op.constraint() else { unreachable!() }; - self.process_reuse_operand_allocation(inst, op, op_idx, operands.0[reused_idx], clobbers); + self.process_reuse_operand_allocation(inst, op, op_idx, operands.0[reused_idx], reused_idx, clobbers); } self.save_and_restore_clobbered_registers(inst); for preg in self.func.inst_clobbers(inst) { @@ -1406,8 +1597,8 @@ impl<'a, F: Function> Env<'a, F> { } } trace!("After the allocation:"); - trace!("freed_def_pregs: {:?}", self.freed_def_pregs); - trace!("free after curr inst: {:?}", self.free_after_curr_inst); + trace!("freed_def_pregs: {}", self.freed_def_pregs); + trace!("free after curr inst: {}", self.free_after_curr_inst); trace!(""); let scratch_regs = self.get_scratch_regs(inst); self.process_edits(scratch_regs); @@ -1417,6 +1608,7 @@ impl<'a, F: Function> Env<'a, F> { self.vregs_allocd_in_curr_inst.clear(); self.reused_inputs_in_curr_inst.clear(); self.vregs_in_curr_inst.clear(); + self.pregs_allocd_in_curr_inst = PRegSet::empty(); if trace_enabled!() { self.log_post_inst_processing_state(inst); @@ -1524,9 +1716,9 @@ impl<'a, F: Function> Env<'a, F> { trace!("Int LRU: {:?}", self.lrus[RegClass::Int]); trace!("Float LRU: {:?}", self.lrus[RegClass::Float]); trace!("Vector LRU: {:?}", self.lrus[RegClass::Vector]); - trace!("Free int pregs: {:?}", self.freepregs[RegClass::Int]); - trace!("Free float pregs: {:?}", self.freepregs[RegClass::Float]); - trace!("Free vector pregs: {:?}", self.freepregs[RegClass::Vector]); + trace!("Free int pregs: {}", self.freepregs[RegClass::Int]); + trace!("Free float pregs: {}", self.freepregs[RegClass::Float]); + trace!("Free vector pregs: {}", self.freepregs[RegClass::Vector]); } fn log_post_inst_processing_state(&self, inst: Inst) { @@ -1550,9 +1742,9 @@ impl<'a, F: Function> Env<'a, F> { trace!("Int LRU: {:?}", self.lrus[RegClass::Int]); trace!("Float LRU: {:?}", self.lrus[RegClass::Float]); trace!("Vector LRU: {:?}", self.lrus[RegClass::Vector]); - trace!("Free int pregs: {:?}", self.freepregs[RegClass::Int]); - trace!("Free float pregs: {:?}", self.freepregs[RegClass::Float]); - trace!("Free vector pregs: {:?}", self.freepregs[RegClass::Vector]); + trace!("Free int pregs: {}", self.freepregs[RegClass::Int]); + trace!("Free float pregs: {}", self.freepregs[RegClass::Float]); + trace!("Free vector pregs: {}", self.freepregs[RegClass::Vector]); } fn alloc_block(&mut self, block: Block) { @@ -1584,18 +1776,13 @@ fn log_function(func: &F) { ); for inst in func.block_insns(block).iter() { let clobbers = func.inst_clobbers(inst); - let mut cls = Vec::new(); - for c in clobbers { - cls.push(c); - } - trace!("inst{:?}: {:?}. Clobbers: {:?}", inst.index(), func.inst_operands(inst), cls); + trace!("inst{:?}: {:?}. Clobbers: {}", inst.index(), func.inst_operands(inst), clobbers); if func.is_branch(inst) { trace!("Block args: "); for (succ_idx, _succ) in func.block_succs(block).iter().enumerate() { trace!(" {:?}", func.branch_blockparams(block, inst, succ_idx)); } } - trace!(""); } trace!(""); } @@ -1603,12 +1790,13 @@ fn log_function(func: &F) { fn log_output<'a, F: Function>(env: &Env<'a, F>) { trace!("Done!"); + use alloc::format; let mut v = Vec::new(); for i in 0..env.func.num_vregs() { if env.vreg_spillslots[i].is_valid() { v.push(( - VReg::new(i, RegClass::Int), - Allocation::stack(env.vreg_spillslots[i]) + format!("{}", VReg::new(i, RegClass::Int)), + format!("{}", Allocation::stack(env.vreg_spillslots[i])) )); } } From 093ba10c314478fac568a501a8cd7af7cd3be18b Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 8 Aug 2024 13:40:05 +0100 Subject: [PATCH 35/95] better error handling --- src/fastalloc/mod.rs | 110 +++++++++++++++++++++++-------------------- 1 file changed, 60 insertions(+), 50 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 05bad63b..15f87100 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -284,7 +284,7 @@ impl<'a, F: Function> Env<'a, F> { /// The scratch registers needed for processing edits generated while /// processing instructions. - fn get_scratch_regs(&mut self, inst: Inst) -> PartedByRegClass> { + fn get_scratch_regs(&mut self, inst: Inst) -> Result>, RegAllocError> { trace!("Getting scratch registers for instruction {:?}", inst); let mut scratch_regs = PartedByRegClass { items: [None, None, None] }; for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { @@ -300,7 +300,7 @@ impl<'a, F: Function> Env<'a, F> { preg } else { trace!("No free {:?} registers. Evicting a register", class); - self.evict_any_reg(inst, class) + self.evict_any_reg(inst, class)? }; scratch_regs[class] = Some(reg); } @@ -308,7 +308,7 @@ impl<'a, F: Function> Env<'a, F> { trace!("{:?} class does not need a scratch register", class); } } - scratch_regs + Ok(scratch_regs) } fn process_edits(&mut self, scratch_regs: PartedByRegClass>) { @@ -449,7 +449,7 @@ impl<'a, F: Function> Env<'a, F> { self.move_after_inst(inst, evicted_vreg, Allocation::reg(preg)); } - fn evict_any_reg(&mut self, inst: Inst, regclass: RegClass) -> PReg { + fn evict_any_reg(&mut self, inst: Inst, regclass: RegClass) -> Result { trace!("Evicting a register in evict_any_reg for class {:?}", regclass); let preg = self.lrus[regclass].pop(); trace!("Selected register from lru: {:?}", preg); @@ -461,10 +461,11 @@ impl<'a, F: Function> Env<'a, F> { // and some fixed constraint register is encountered that needs p0, then // allocation will fail regardless of whether or not there are other free registers if self.pregs_allocd_in_curr_inst.contains(preg) { - panic!("No enough registers for allocation?"); + // No enough registers for allocation? + return Err(RegAllocError::TooManyLiveRegs); } self.evict_vreg_in_preg(inst, preg); - preg + Ok(preg) } fn freealloc(&mut self, vreg: VReg, clobbers: PRegSet) { @@ -502,7 +503,7 @@ impl<'a, F: Function> Env<'a, F> { } } AllocationKind::Stack => (), - AllocationKind::None => panic!("Attempting to free an unallocated operand!") + AllocationKind::None => unreachable!("Attempting to free an unallocated operand!") } self.vreg_allocs[vreg.vreg()] = Allocation::none(); self.live_vregs.remove(&vreg); @@ -529,7 +530,7 @@ impl<'a, F: Function> Env<'a, F> { } /// Allocates a physical register for the operand `op`. - fn alloc_reg_for_operand(&mut self, inst: Inst, op: Operand, op_idx: usize) { + fn alloc_reg_for_operand(&mut self, inst: Inst, op: Operand, op_idx: usize) -> Result<(), RegAllocError> { trace!("freepregs int: {}", self.freepregs[RegClass::Int]); trace!("freepregs vector: {}", self.freepregs[RegClass::Vector]); trace!("freepregs float: {}", self.freepregs[RegClass::Float]); @@ -537,7 +538,6 @@ impl<'a, F: Function> Env<'a, F> { trace!("freed_def_pregs vector: {}", self.freed_def_pregs[RegClass::Vector]); trace!("freed_def_pregs float: {}", self.freed_def_pregs[RegClass::Float]); trace!(""); - let mut allocd = false; // The only way a freed def preg can be reused for an operand is if // the operand uses or defines a vreg in the early phase and the vreg doesn't // live past the instruction. If the vreg lives past the instruction, then the @@ -561,25 +561,24 @@ impl<'a, F: Function> Env<'a, F> { self.lrus[freed_def_preg.class()].append_and_poke(freed_def_preg); self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(freed_def_preg); self.vreg_in_preg[freed_def_preg.index()] = op.vreg(); - allocd = true; + return Ok(()); } } - if !allocd { - let preg = if self.freepregs[op.class()] == PRegSet::empty() { - trace!("Evicting a register"); - self.evict_any_reg(inst, op.class()) - } else { - trace!("Getting a register from freepregs"); - remove_any_from_pregset(&mut self.freepregs[op.class()]).unwrap() - }; - trace!("The allocated register for vreg {:?}: {:?}", preg, op.vreg()); - self.lrus[op.class()].poke(preg); - self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(preg); - self.vreg_in_preg[preg.index()] = op.vreg(); - } + let preg = if self.freepregs[op.class()] == PRegSet::empty() { + trace!("Evicting a register"); + self.evict_any_reg(inst, op.class())? + } else { + trace!("Getting a register from freepregs"); + remove_any_from_pregset(&mut self.freepregs[op.class()]).unwrap() + }; + trace!("The allocated register for vreg {:?}: {:?}", preg, op.vreg()); + self.lrus[op.class()].poke(preg); + self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(preg); + self.vreg_in_preg[preg.index()] = op.vreg(); + Ok(()) } - fn alloc_fixed_reg_for_operand(&mut self, inst: Inst, op: Operand, preg: PReg) { + fn alloc_fixed_reg_for_operand(&mut self, inst: Inst, op: Operand, preg: PReg) -> Result<(), RegAllocError> { trace!("The fixed preg: {:?} for operand {:?}", preg, op); // It is an error for a fixed register clobber to be used for a defined vreg @@ -588,7 +587,8 @@ impl<'a, F: Function> Env<'a, F> { && (!self.vregs_first_seen_in_curr_inst.contains(&op.vreg()) || self.liveout_vregs.contains(&op.vreg())) { - panic!("Invalid input"); + // Invalid input. + return Err(RegAllocError::TooManyLiveRegs); } let is_allocatable = !self.is_stack(Allocation::reg(preg)) && !self.func.inst_clobbers(inst).contains(preg); @@ -600,7 +600,7 @@ impl<'a, F: Function> Env<'a, F> { // operand position (early or late), because the fixed registers // are considered first. if self.pregs_allocd_in_curr_inst.contains(preg) { - panic!("Allocation impossible?"); + return Err(RegAllocError::TooManyLiveRegs); } self.evict_vreg_in_preg(inst, preg); } else if self.freed_def_pregs[preg.class()].contains(preg) { @@ -646,18 +646,19 @@ impl<'a, F: Function> Env<'a, F> { self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(preg); self.vreg_in_preg[preg.index()] = op.vreg(); trace!("vreg {:?} is now in preg {:?}", op.vreg(), preg); + Ok(()) } /// Allocates for the operand `op` with index `op_idx` into the /// vector of instruction `inst`'s operands. /// Only non reuse-input operands. - fn alloc_operand(&mut self, inst: Inst, op: Operand, op_idx: usize) { + fn alloc_operand(&mut self, inst: Inst, op: Operand, op_idx: usize) -> Result<(), RegAllocError> { match op.constraint() { OperandConstraint::Any => { - self.alloc_reg_for_operand(inst, op, op_idx); + self.alloc_reg_for_operand(inst, op, op_idx)?; } OperandConstraint::Reg => { - self.alloc_reg_for_operand(inst, op, op_idx); + self.alloc_reg_for_operand(inst, op, op_idx)?; } OperandConstraint::Stack => { if self.vreg_spillslots[op.vreg().vreg()].is_invalid() { @@ -666,7 +667,7 @@ impl<'a, F: Function> Env<'a, F> { self.vreg_allocs[op.vreg().vreg()] = Allocation::stack(self.vreg_spillslots[op.vreg().vreg()]); } OperandConstraint::FixedReg(preg) => { - self.alloc_fixed_reg_for_operand(inst, op, preg); + self.alloc_fixed_reg_for_operand(inst, op, preg)?; } OperandConstraint::Reuse(_) => { // This is handled elsewhere @@ -674,15 +675,16 @@ impl<'a, F: Function> Env<'a, F> { } } self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[op.vreg().vreg()]; + Ok(()) } /// Only processes non reuse-input operands - fn process_operand_allocation(&mut self, inst: Inst, op: Operand, op_idx: usize) { + fn process_operand_allocation(&mut self, inst: Inst, op: Operand, op_idx: usize) -> Result<(), RegAllocError> { debug_assert!(!matches!(op.constraint(), OperandConstraint::Reuse(_))); if let Some(preg) = op.as_fixed_nonallocatable() { self.allocs[(inst.index(), op_idx)] = Allocation::reg(preg); trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); - return; + return Ok(()); } self.vregs_in_curr_inst.insert(op.vreg()); self.live_vregs.insert(op.vreg()); @@ -691,7 +693,7 @@ impl<'a, F: Function> Env<'a, F> { if prev_alloc.is_none() { self.vregs_first_seen_in_curr_inst.insert(op.vreg()); } - self.alloc_operand(inst, op, op_idx); + self.alloc_operand(inst, op, op_idx)?; // Need to insert a move to propagate flow from the current // allocation to the subsequent places where the value was // used (in `prev_alloc`, that is). @@ -883,6 +885,7 @@ impl<'a, F: Function> Env<'a, F> { if let Some(preg) = self.allocs[(inst.index(), op_idx)].as_reg() { self.pregs_allocd_in_curr_inst.add(preg); } + Ok(()) } fn process_reuse_operand_allocation( @@ -893,7 +896,7 @@ impl<'a, F: Function> Env<'a, F> { reused_op: Operand, reused_op_idx: usize, clobbers: PRegSet, - ) { + ) -> Result<(), RegAllocError> { debug_assert!(matches!(op.constraint(), OperandConstraint::Reuse(_))); self.vregs_in_curr_inst.insert(op.vreg()); // To handle reuse operands, the reused input's allocation is always used for @@ -1195,7 +1198,8 @@ impl<'a, F: Function> Env<'a, F> { || reused_op.pos() != OperandPos::Early || op.pos() != OperandPos::Late || reused_op.class() != op.class() { - panic!("Invalid input"); + // Invalid input. + return Err(RegAllocError::TooManyLiveRegs); } let reused_input_lives_past_curr_inst = !self.vregs_first_seen_in_curr_inst.contains(&reused_op.vreg()); let reused_op_alloc = self.allocs[(inst.index(), reused_op_idx)]; @@ -1209,7 +1213,8 @@ impl<'a, F: Function> Env<'a, F> { // afterwards. if let Some(preg) = reused_op_alloc.as_reg() { if clobbers.contains(preg) { - panic!("Invalid input"); + // Invalid input. + return Err(RegAllocError::TooManyLiveRegs); } } let op_prev_alloc = self.vreg_allocs[op.vreg().vreg()]; @@ -1300,6 +1305,7 @@ impl<'a, F: Function> Env<'a, F> { self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[reused_op_vreg.vreg()]; trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); } + Ok(()) } fn alloc_slots_for_block_params(&mut self, succ: Block) { @@ -1492,7 +1498,7 @@ impl<'a, F: Function> Env<'a, F> { } } - fn alloc_inst(&mut self, block: Block, inst: Inst) { + fn alloc_inst(&mut self, block: Block, inst: Inst) -> Result<(), RegAllocError> { trace!("Allocating instruction {:?}", inst); if self.func.is_branch(inst) { self.process_branch(block, inst); @@ -1520,22 +1526,22 @@ impl<'a, F: Function> Env<'a, F> { self.reused_inputs_in_curr_inst.push(reused_idx); } for (op_idx, op) in operands.fixed_late() { - self.process_operand_allocation(inst, op, op_idx); + self.process_operand_allocation(inst, op, op_idx)?; } for (op_idx, op) in operands.non_fixed_non_reuse_late_def() { - self.process_operand_allocation(inst, op, op_idx); + self.process_operand_allocation(inst, op, op_idx)?; } for (_, op) in operands.non_reuse_late_def() { self.freealloc(op.vreg(), clobbers); } for (op_idx, op) in operands.fixed_early() { - self.process_operand_allocation(inst, op, op_idx); + self.process_operand_allocation(inst, op, op_idx)?; } for (op_idx, op) in operands.non_fixed_non_reuse_late_use() { - self.process_operand_allocation(inst, op, op_idx); + self.process_operand_allocation(inst, op, op_idx)?; } for (op_idx, op) in operands.non_fixed_non_reuse_early() { - self.process_operand_allocation(inst, op, op_idx); + self.process_operand_allocation(inst, op, op_idx)?; } for (_, op) in operands.non_reuse_early_def() { self.freealloc(op.vreg(), clobbers); @@ -1544,7 +1550,7 @@ impl<'a, F: Function> Env<'a, F> { let OperandConstraint::Reuse(reused_idx) = op.constraint() else { unreachable!() }; - self.process_reuse_operand_allocation(inst, op, op_idx, operands.0[reused_idx], reused_idx, clobbers); + self.process_reuse_operand_allocation(inst, op, op_idx, operands.0[reused_idx], reused_idx, clobbers)?; } self.save_and_restore_clobbered_registers(inst); for preg in self.func.inst_clobbers(inst) { @@ -1600,7 +1606,7 @@ impl<'a, F: Function> Env<'a, F> { trace!("freed_def_pregs: {}", self.freed_def_pregs); trace!("free after curr inst: {}", self.free_after_curr_inst); trace!(""); - let scratch_regs = self.get_scratch_regs(inst); + let scratch_regs = self.get_scratch_regs(inst)?; self.process_edits(scratch_regs); self.add_freed_regs_to_freelist(); self.use_vregs_saved_and_restored_in_curr_inst.clear(); @@ -1609,10 +1615,10 @@ impl<'a, F: Function> Env<'a, F> { self.reused_inputs_in_curr_inst.clear(); self.vregs_in_curr_inst.clear(); self.pregs_allocd_in_curr_inst = PRegSet::empty(); - if trace_enabled!() { self.log_post_inst_processing_state(inst); } + Ok(()) } /// At the beginning of every block, all virtual registers that are @@ -1747,22 +1753,26 @@ impl<'a, F: Function> Env<'a, F> { trace!("Free vector pregs: {}", self.freepregs[RegClass::Vector]); } - fn alloc_block(&mut self, block: Block) { + fn alloc_block(&mut self, block: Block) -> Result<(), RegAllocError> { trace!("{:?} start", block); for inst in self.func.block_insns(block).iter().rev() { - self.alloc_inst(block, inst); + self.alloc_inst(block, inst)?; } self.reload_at_begin(block); trace!("{:?} end\n", block); + Ok(()) } fn run(&mut self) -> Result<(), RegAllocError> { debug_assert_eq!(self.func.entry_block().index(), 0); for block in (0..self.func.num_blocks()).rev() { - self.alloc_block(Block::new(block)); + self.alloc_block(Block::new(block))?; + } + if !self.live_vregs.is_empty() { + Err(RegAllocError::EntryLivein) + } else { + Ok(()) } - - Ok(()) } } From 6ce9d569fb3b3f5c8fdd98fc33491ddf59a124d2 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 8 Aug 2024 14:45:10 +0100 Subject: [PATCH 36/95] fixed regression with LRU management --- src/fastalloc/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 15f87100..7536611b 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -875,7 +875,9 @@ impl<'a, F: Function> Env<'a, F> { } else { self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[op.vreg().vreg()]; if let Some(preg) = self.allocs[(inst.index(), op_idx)].as_reg() { - if !self.func.inst_clobbers(inst).contains(preg) { + if self.allocatable_regs.contains(preg) + && !self.func.inst_clobbers(inst).contains(preg) + { self.lrus[preg.class()].poke(preg); } } From 8680569cbccd779cad906e73b1999fea4b089a72 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 8 Aug 2024 15:55:09 +0100 Subject: [PATCH 37/95] removed unnecessary tracking of liveout vregs --- src/fastalloc/mod.rs | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 7536611b..1d5a8b7a 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -6,7 +6,7 @@ use crate::{Function, MachineEnv, ssa::validate_ssa, ProgPoint, Edit, Output}; use crate::{cfg::CFGInfo, RegAllocError, Allocation, ion::Stats}; use alloc::collections::VecDeque; use alloc::vec::Vec; -use hashbrown::{HashSet, HashMap}; +use hashbrown::HashSet; mod lru; mod iter; @@ -135,11 +135,6 @@ pub struct Env<'a, F: Function> { /// Used to keep track of which vregs have been allocated in the current instruction. /// This is used to determine which edits to insert when allocating a use operand. vregs_allocd_in_curr_inst: HashSet, - /// All the liveout vregs encountered during allocation. - /// When allocation is completed, this contains all the liveout vregs in - /// the function. - /// This is used to build the stackmap after allocation is complete. - liveout_vregs: HashSet, /// Used to determine if a scratch register is needed for an /// instruction's moves during the `process_edit` calls. inst_needs_scratch_reg: PartedByRegClass, @@ -210,7 +205,6 @@ impl<'a, F: Function> Env<'a, F> { use_vregs_saved_and_restored_in_curr_inst: HashSet::new(), freed_def_pregs: PartedByRegClass { items: [PRegSet::empty(), PRegSet::empty(), PRegSet::empty()] }, vregs_first_seen_in_curr_inst: HashSet::new(), - liveout_vregs: HashSet::new(), inst_needs_scratch_reg: PartedByRegClass { items: [false, false, false] }, reused_inputs_in_curr_inst: Vec::new(), vregs_in_curr_inst: HashSet::new(), @@ -583,13 +577,8 @@ impl<'a, F: Function> Env<'a, F> { // It is an error for a fixed register clobber to be used for a defined vreg // that outlives the instruction, because it will be impossible to restore it. - if self.func.inst_clobbers(inst).contains(preg) && op.kind() == OperandKind::Def - && (!self.vregs_first_seen_in_curr_inst.contains(&op.vreg()) - || self.liveout_vregs.contains(&op.vreg())) - { - // Invalid input. - return Err(RegAllocError::TooManyLiveRegs); - } + // But checking for that will be expensive? + let is_allocatable = !self.is_stack(Allocation::reg(preg)) && !self.func.inst_clobbers(inst).contains(preg); if self.vreg_in_preg[preg.index()] != VReg::invalid() { @@ -1654,13 +1643,7 @@ impl<'a, F: Function> Env<'a, F> { } else { trace!("{:?} is not a block param. It's a liveout vreg from some predecessor", vreg); trace!("Setting {:?}'s current allocation to its spillslot", vreg); - // It is a liveout vreg from a predecessor. self.vreg_allocs[vreg.vreg()] = slot; - trace!("Recording that {:?} is a liveout", vreg); - // Need to remember that this is a liveout vreg so that its - // spillslot, if it's a reftype, can be recorded in the stackmap - // later. - self.liveout_vregs.insert(vreg); if let Some(preg) = prev_alloc.as_reg() { trace!("{:?} was in {:?}. Removing it", preg, vreg); // Nothing is in that preg anymore. Return it to @@ -1704,6 +1687,7 @@ impl<'a, F: Function> Env<'a, F> { } fn log_post_reload_at_begin_state(&self, block: Block) { + use hashbrown::HashMap; use alloc::format; trace!(""); trace!("State after instruction reload_at_begin of {:?}", block); @@ -1730,6 +1714,7 @@ impl<'a, F: Function> Env<'a, F> { } fn log_post_inst_processing_state(&self, inst: Inst) { + use hashbrown::HashMap; use alloc::format; trace!(""); trace!("State after instruction {:?}", inst); From 956a041c04c8d0fc0ab0915969152337e9d43b79 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 8 Aug 2024 17:55:10 +0100 Subject: [PATCH 38/95] WIP --- src/fastalloc/bitset.rs | 73 +++++++++++++++++++++++++++++++++++++++++ src/fastalloc/mod.rs | 2 ++ 2 files changed, 75 insertions(+) create mode 100644 src/fastalloc/bitset.rs diff --git a/src/fastalloc/bitset.rs b/src/fastalloc/bitset.rs new file mode 100644 index 00000000..c77fe58e --- /dev/null +++ b/src/fastalloc/bitset.rs @@ -0,0 +1,73 @@ +use alloc::vec::Vec; +use alloc::vec; + +type Frame = u64; +const BITS_PER_FRAME: usize = core::mem::size_of::() * 8; + +pub struct BitSet { + bits: Vec +} + +impl BitSet { + + pub fn with_capacity(n: usize) -> Self { + let quot = n / BITS_PER_FRAME; + // The number of frames needed cannot be > the quotient; + let no_of_frames = quot + 1; + Self { + bits: vec![0; no_of_frames], + } + } + + pub fn compute_index(&self, el: usize) -> (usize, usize) { + (el / BITS_PER_FRAME, el % BITS_PER_FRAME) + } + + pub fn insert(&mut self, el: usize) { + let (frame_no, idx) = self.compute_index(el); + self.bits[frame_no] |= 1 << idx; + } + + pub fn remove(&mut self, el: usize) { + let (frame_no, idx) = self.compute_index(el); + self.bits[frame_no] &= !(1 << idx); + } + + pub fn contains(&self, el: usize) -> bool { + let (frame_no, idx) = self.compute_index(el); + self.bits[frame_no] & (1 << idx) != 0 + } + + pub fn clear(&mut self) { + for frame in self.bits.iter_mut() { + *frame = 0; + } + } + + pub fn is_empty(&mut self) { + + } +} + + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn operations() { + let mut set = BitSet::with_capacity(200); + set.insert(10); + set.insert(11); + set.insert(23); + set.insert(45); + assert!(set.contains(10)); + assert!(!set.contains(12)); + assert!(!set.contains(2000)); + assert!(set.contains(45)); + assert!(set.contains(23)); + assert!(set.contains(11)); + set.remove(10); + assert!(!set.contains(10)); + } +} diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 1d5a8b7a..8a68ecfe 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -8,10 +8,12 @@ use alloc::collections::VecDeque; use alloc::vec::Vec; use hashbrown::HashSet; +mod bitset; mod lru; mod iter; use lru::*; use iter::*; +use bitset::BitSet; #[derive(Debug)] struct Allocs { From da202879509614bc406295d5d418594d92a34165 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 8 Aug 2024 18:53:29 +0100 Subject: [PATCH 39/95] using matches instead of OperandConstraintKind in operand iterators --- src/fastalloc/iter.rs | 52 ++++++++++++++----------------------------- 1 file changed, 17 insertions(+), 35 deletions(-) diff --git a/src/fastalloc/iter.rs b/src/fastalloc/iter.rs index 8041afff..7ebfdee2 100644 --- a/src/fastalloc/iter.rs +++ b/src/fastalloc/iter.rs @@ -1,26 +1,5 @@ use crate::{Operand, OperandKind, OperandPos, OperandConstraint}; -#[derive(Clone, Copy, PartialEq)] -enum OperandConstraintKind { - Any, - Reg, - Stack, - FixedReg, - Reuse, -} - -impl From for OperandConstraintKind { - fn from(constraint: OperandConstraint) -> Self { - match constraint { - OperandConstraint::Any => Self::Any, - OperandConstraint::Reg => Self::Reg, - OperandConstraint::Stack => Self::Stack, - OperandConstraint::FixedReg(_) => Self::FixedReg, - OperandConstraint::Reuse(_) => Self::Reuse, - } - } -} - pub struct Operands<'a>(pub &'a [Operand]); impl<'a> Operands<'a> { @@ -37,15 +16,14 @@ impl<'a> Operands<'a> { pub fn non_fixed_non_reuse_late(&self) -> impl Iterator + 'a { self.matches(|op| - OperandConstraintKind::FixedReg != op.constraint().into() - && OperandConstraintKind::Reuse != op.constraint().into() + !matches!(op.constraint(), OperandConstraint::FixedReg(_) | OperandConstraint::Reuse(_)) && op.pos() == OperandPos::Late ) } pub fn non_reuse_late_def(&self) -> impl Iterator + 'a { self.matches(|op| - OperandConstraintKind::Reuse != op.constraint().into() + !matches!(op.constraint(), OperandConstraint::Reuse(_)) && op.pos() == OperandPos::Late && op.kind() == OperandKind::Def ) @@ -53,19 +31,19 @@ impl<'a> Operands<'a> { pub fn non_fixed_non_reuse_early(&self) -> impl Iterator + 'a { self.matches(|op| - OperandConstraintKind::FixedReg != op.constraint().into() - && OperandConstraintKind::Reuse != op.constraint().into() + !matches!(op.constraint(), OperandConstraint::FixedReg(_)) + && !matches!(op.constraint(), OperandConstraint::Reuse(_)) && op.pos() == OperandPos::Early ) } pub fn reuse(&self) -> impl Iterator + 'a { - self.matches(|op| OperandConstraintKind::Reuse == op.constraint().into()) + self.matches(|op| matches!(op.constraint(), OperandConstraint::Reuse(_))) } pub fn non_reuse_early_def(&self) -> impl Iterator + 'a { self.matches(|op| - OperandConstraintKind::Reuse != op.constraint().into() + !matches!(op.constraint(), OperandConstraint::Reuse(_)) && op.pos() == OperandPos::Early && op.kind() == OperandKind::Def ) @@ -73,29 +51,31 @@ impl<'a> Operands<'a> { pub fn fixed_early(&self) -> impl Iterator + 'a { self.matches(|op| - OperandConstraintKind::FixedReg == op.constraint().into() + matches!(op.constraint(), OperandConstraint::FixedReg(_)) && op.pos() == OperandPos::Early ) } pub fn fixed_late(&self) -> impl Iterator + 'a { self.matches(|op| - OperandConstraintKind::FixedReg == op.constraint().into() + matches!(op.constraint(), OperandConstraint::FixedReg(_)) && op.pos() == OperandPos::Late ) } pub fn non_reuse_def(&self) -> impl Iterator + 'a { self.matches(|op| - OperandConstraintKind::Reuse != op.constraint().into() + !matches!(op.constraint(), OperandConstraint::Reuse(_)) && op.kind() == OperandKind::Def ) } pub fn non_fixed_non_reuse_late_use(&self) -> impl Iterator + 'a { self.matches(|op| - OperandConstraintKind::FixedReg != op.constraint().into() - && OperandConstraintKind::Reuse != op.constraint().into() + !matches!(op.constraint(), + OperandConstraint::FixedReg(_) + | OperandConstraint::Reuse(_) + ) && op.pos() == OperandPos::Late && op.kind() == OperandKind::Use ) @@ -103,8 +83,10 @@ impl<'a> Operands<'a> { pub fn non_fixed_non_reuse_late_def(&self) -> impl Iterator + 'a { self.matches(|op| - OperandConstraintKind::FixedReg != op.constraint().into() - && OperandConstraintKind::Reuse != op.constraint().into() + !matches!( + op.constraint(), + OperandConstraint::FixedReg(_) | OperandConstraint::Reuse(_) + ) && op.pos() == OperandPos::Late && op.kind() == OperandKind::Def ) From b2916bf4f18a044d7d7a4468fb24ea6972b7d827 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 8 Aug 2024 19:06:13 +0100 Subject: [PATCH 40/95] updated outdated comments; changed debug_assertion checks in LRU; removed unnecessary Debug bound in PartedByRegClass --- src/fastalloc/lru.rs | 34 +++++++++++++++++++--------------- src/fastalloc/mod.rs | 14 +++++++++++++- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/src/fastalloc/lru.rs b/src/fastalloc/lru.rs index 0c98cc04..3ffb6ced 100644 --- a/src/fastalloc/lru.rs +++ b/src/fastalloc/lru.rs @@ -45,8 +45,7 @@ impl Lru { } } - /// Marks the physical register `i` as the most recently used - /// and sets `vreg` as the virtual register it contains. + /// Marks the physical register `preg` as the most recently used pub fn poke(&mut self, preg: PReg) { trace!("Before poking: {:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); trace!("About to poke {:?} in {:?} LRU", preg, self.regclass); @@ -61,8 +60,9 @@ impl Lru { } self.head = hw_enc; trace!("Poked {:?} in {:?} LRU", preg, self.regclass); - #[cfg(debug_assertions)] - self.validate_lru(); + if cfg!(debug_assertions) { + self.validate_lru(); + } } /// Gets the least recently used physical register. @@ -74,8 +74,9 @@ impl Lru { } let oldest = self.data[self.head as usize].prev; trace!("Popped p{oldest} in {:?} LRU", self.regclass); - #[cfg(debug_assertions)] - self.validate_lru(); + if cfg!(debug_assertions) { + self.validate_lru(); + } PReg::new(oldest as usize, self.regclass) } @@ -100,11 +101,12 @@ impl Lru { } } trace!("Removed p{hw_enc} from {:?} LRU", self.regclass); - #[cfg(debug_assertions)] - self.validate_lru(); + if cfg!(debug_assertions) { + self.validate_lru(); + } } - /// Sets the node `i` to the last in the list. + /// Sets the physical register with hw_enc `hw_enc` to the last in the list. pub fn append(&mut self, hw_enc: usize) { trace!("Before appending: {:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); trace!("Appending p{hw_enc} to the {:?} LRU", self.regclass); @@ -121,8 +123,9 @@ impl Lru { self.data[hw_enc].next = hw_enc as u8; } trace!("Appended p{hw_enc} to the {:?} LRU", self.regclass); - #[cfg(debug_assertions)] - self.validate_lru(); + if cfg!(debug_assertions) { + self.validate_lru(); + } } pub fn append_and_poke(&mut self, preg: PReg) { @@ -131,7 +134,7 @@ impl Lru { } /// Insert node `i` before node `j` in the list. - pub fn insert_before(&mut self, i: u8, j: u8) { + fn insert_before(&mut self, i: u8, j: u8) { trace!("Before inserting: {:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); trace!("Inserting p{i} before {j} in {:?} LRU", self.regclass); let prev = self.data[j as usize].prev; @@ -142,8 +145,9 @@ impl Lru { prev, }; trace!("Done inserting p{i} before {j} in {:?} LRU", self.regclass); - #[cfg(debug_assertions)] - self.validate_lru(); + if cfg!(debug_assertions) { + self.validate_lru(); + } } pub fn is_empty(&self) -> bool { @@ -212,7 +216,7 @@ impl fmt::Debug for Lru { } #[derive(Debug)] -pub struct PartedByRegClass { +pub struct PartedByRegClass { pub items: [T; 3], } diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 1d5a8b7a..feae5781 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1419,7 +1419,19 @@ impl<'a, F: Function> Env<'a, F> { for (succ_idx, _) in self.func.block_succs(block).iter().enumerate() { // Move from branch args spillslots to temporaries. // - // Consider a scenario: block X branches to block Y and block Y branches to block X. + // Consider a scenario: + // + // block entry: + // goto Y(...) + // + // block Y(vp) + // goto X + // + // block X + // use vp + // goto Y(va) + // + // block X branches to block Y and block Y branches to block X. // Block Y has block param vp and block X uses virtual register va as the branch arg for vp. // Block X has an instruction that uses vp. // In the case where branch arg va is defined in a predecessor, there is a possibility From 73396f0f418909a22afe191d6cb9af0d99731368 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 8 Aug 2024 19:18:42 +0100 Subject: [PATCH 41/95] removed unnecessary loop and function call in process_branch --- src/fastalloc/mod.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index feae5781..bfa11f30 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1487,13 +1487,7 @@ impl<'a, F: Function> Env<'a, F> { trace!(" --- Placing branch arg {:?} in {:?}", vreg, temp); trace!("{:?} which is now in {:?} inserting move to {:?}", vreg, temp, param_alloc); self.add_move_later(inst, temp, param_alloc, vreg.class(), InstPosition::Before, false); - } - } - - reset_temp_idx(&mut next_temp_idx); - for (succ_idx, _) in self.func.block_succs(block).iter().enumerate() { - for vreg in self.func.branch_blockparams(block, inst, succ_idx).iter() { // All branch arguments should be in their spillslots at the end of the function. self.vreg_allocs[vreg.vreg()] = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); self.live_vregs.insert(*vreg); From 51b4b3231bf07499545fef9b48786e664e33299e Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 8 Aug 2024 22:41:04 +0100 Subject: [PATCH 42/95] rework reload_at_begin --- src/fastalloc/mod.rs | 80 ++++++++++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 29 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index bfa11f30..db532a5e 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1629,10 +1629,46 @@ impl<'a, F: Function> Env<'a, F> { trace!("Live registers at the beginning of block {:?}: {:?}", block, self.live_vregs); trace!("Block params at block {:?} beginning: {:?}", block, self.func.block_params(block)); // We need to check for the registers that are still live. - // These registers are livein and they should be stack-allocated. + // These registers are either livein or block params + // Liveins should be stack-allocated and block params should be freed. + for vreg in self.func.block_params(block).iter().cloned() { + trace!("Processing {:?}", vreg); + if self.vreg_allocs[vreg.vreg()] == Allocation::none() { + // If this block param was never used, its allocation will + // be none at this point. + continue; + } + if self.vreg_spillslots[vreg.vreg()].is_invalid() { + self.vreg_spillslots[vreg.vreg()] = self.allocstack(&vreg); + } + // The allocation where the vreg is expected to be before + // the first instruction. + let prev_alloc = self.vreg_allocs[vreg.vreg()]; + let slot = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); + trace!("{:?} is a block param. Freeing it", vreg); + // A block's block param is not live before the block. + // And `vreg_allocs[i]` of a virtual register i is none for + // dead vregs. + self.freealloc(vreg, PRegSet::empty()); + if slot == prev_alloc { + // No need to do any movements if the spillslot is where the vreg is expected to be. + trace!("No need to reload {:?} because it's already in its expected allocation", vreg); + continue; + } + trace!("Move reason: reload {:?} at begin - move from its spillslot", vreg); + self.add_move_later( + self.func.block_insns(block).first(), + slot, + prev_alloc, + vreg.class(), + InstPosition::Before, + true + ); + } let live_vregs = self.live_vregs.clone(); for vreg in live_vregs.iter().cloned() { trace!("Processing {:?}", vreg); + trace!("{:?} is not a block param. It's a liveout vreg from some predecessor", vreg); if self.vreg_spillslots[vreg.vreg()].is_invalid() { self.vreg_spillslots[vreg.vreg()] = self.allocstack(&vreg); } @@ -1640,29 +1676,20 @@ impl<'a, F: Function> Env<'a, F> { // the first instruction. let prev_alloc = self.vreg_allocs[vreg.vreg()]; let slot = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); - if self.func.block_params(block).contains(&vreg) { - trace!("{:?} is a block param. Freeing it", vreg); - // A block's block param is not live before the block. - // And `vreg_allocs[i]` of a virtual register i is none for - // dead vregs. - self.freealloc(vreg, PRegSet::empty()); - } else { - trace!("{:?} is not a block param. It's a liveout vreg from some predecessor", vreg); - trace!("Setting {:?}'s current allocation to its spillslot", vreg); - self.vreg_allocs[vreg.vreg()] = slot; - if let Some(preg) = prev_alloc.as_reg() { - trace!("{:?} was in {:?}. Removing it", preg, vreg); - // Nothing is in that preg anymore. Return it to - // the free preg list. - self.vreg_in_preg[preg.index()] = VReg::invalid(); - if !self.is_stack(prev_alloc) { - trace!("{:?} is not a fixed stack slot. Recording it in the freed def pregs list", prev_alloc); - // Using this instead of directly adding it to - // freepregs to prevent allocated registers from being - // used as scratch registers. - self.freed_def_pregs[preg.class()].add(preg); - self.lrus[preg.class()].remove(preg.hw_enc()); - } + trace!("Setting {:?}'s current allocation to its spillslot", vreg); + self.vreg_allocs[vreg.vreg()] = slot; + if let Some(preg) = prev_alloc.as_reg() { + trace!("{:?} was in {:?}. Removing it", preg, vreg); + // Nothing is in that preg anymore. Return it to + // the free preg list. + self.vreg_in_preg[preg.index()] = VReg::invalid(); + if !self.is_stack(prev_alloc) { + trace!("{:?} is not a fixed stack slot. Recording it in the freed def pregs list", prev_alloc); + // Using this instead of directly adding it to + // freepregs to prevent allocated registers from being + // used as scratch registers. + self.freed_def_pregs[preg.class()].add(preg); + self.lrus[preg.class()].remove(preg.hw_enc()); } } if slot == prev_alloc { @@ -1680,13 +1707,8 @@ impl<'a, F: Function> Env<'a, F> { true ); } - for block_param_vreg in self.func.block_params(block) { - trace!("Removing block param {:?} from the live regs set", block_param_vreg); - self.live_vregs.remove(block_param_vreg); - } self.process_edits(self.get_scratch_regs_for_reloading()); self.add_freed_regs_to_freelist(); - if trace_enabled!() { self.log_post_reload_at_begin_state(block); } From 376b45cfc89b56802b2894d4c9b9fd8ecf40cf2d Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Sat, 10 Aug 2024 00:40:47 +0100 Subject: [PATCH 43/95] simplified handling of reuse operands --- src/fastalloc/iter.rs | 258 ++++-------------- src/fastalloc/mod.rs | 610 +++++++++--------------------------------- 2 files changed, 177 insertions(+), 691 deletions(-) diff --git a/src/fastalloc/iter.rs b/src/fastalloc/iter.rs index 7ebfdee2..91919d5d 100644 --- a/src/fastalloc/iter.rs +++ b/src/fastalloc/iter.rs @@ -49,6 +49,10 @@ impl<'a> Operands<'a> { ) } + pub fn fixed(&self) -> impl Iterator + 'a { + self.matches(|op| matches!(op.constraint(), OperandConstraint::FixedReg(_))) + } + pub fn fixed_early(&self) -> impl Iterator + 'a { self.matches(|op| matches!(op.constraint(), OperandConstraint::FixedReg(_)) @@ -70,6 +74,13 @@ impl<'a> Operands<'a> { ) } + pub fn non_fixed_def(&self) -> impl Iterator + 'a { + self.matches(|op| + !matches!(op.constraint(), OperandConstraint::FixedReg(_)) + && op.kind() == OperandKind::Def + ) + } + pub fn non_fixed_non_reuse_late_use(&self) -> impl Iterator + 'a { self.matches(|op| !matches!(op.constraint(), @@ -91,231 +102,68 @@ impl<'a> Operands<'a> { && op.kind() == OperandKind::Def ) } -} - -#[cfg(test)] -mod tests { - use alloc::vec::Vec; - use alloc::vec; - use crate::{PReg, RegClass}; - use super::*; - - // Using a new function because Operand::new isn't a const function - const fn operand(vreg_no: u32, constraint: OperandConstraint, kind: OperandKind, pos: OperandPos) -> Operand { - let constraint_field = match constraint { - OperandConstraint::Any => 0, - OperandConstraint::Reg => 1, - OperandConstraint::Stack => 2, - OperandConstraint::FixedReg(preg) => { - 0b1000000 | preg.hw_enc() as u32 - } - OperandConstraint::Reuse(which) => { - 0b0100000 | which as u32 - } - }; - let class_field = RegClass::Int as u8 as u32; - let pos_field = pos as u8 as u32; - let kind_field = kind as u8 as u32; - Operand { - bits: vreg_no - | (class_field << 21) - | (pos_field << 23) - | (kind_field << 24) - | (constraint_field << 25), - } - } - - const fn late_reuse_def_operand(vreg_no: u32) -> Operand { - operand(vreg_no, OperandConstraint::Reuse(0), OperandKind::Def, OperandPos::Late) - } - - const fn early_reuse_def_operand(vreg_no: u32) -> Operand { - operand(vreg_no, OperandConstraint::Reuse(0), OperandKind::Def, OperandPos::Early) - } - const fn early_reuse_use_operand(vreg_no: u32) -> Operand { - operand(vreg_no, OperandConstraint::Reuse(0), OperandKind::Use, OperandPos::Early) - } - - const fn late_reuse_use_operand(vreg_no: u32) -> Operand { - operand(vreg_no, OperandConstraint::Reuse(0), OperandKind::Use, OperandPos::Late) - } - - const fn late_def_operand(vreg_no: u32) -> Operand { - operand(vreg_no, OperandConstraint::Any, OperandKind::Def, OperandPos::Late) - } - - const fn late_use_operand(vreg_no: u32) -> Operand { - operand(vreg_no, OperandConstraint::Any, OperandKind::Use, OperandPos::Late) - } - - const fn early_use_operand(vreg_no: u32) -> Operand { - operand(vreg_no, OperandConstraint::Any, OperandKind::Use, OperandPos::Early) - } - - const fn early_def_operand(vreg_no: u32) -> Operand { - operand(vreg_no, OperandConstraint::Any, OperandKind::Def, OperandPos::Early) - } - - const fn fixed_late_def_operand(vreg_no: u32) -> Operand { - operand( - vreg_no, - OperandConstraint::FixedReg(PReg::new(1, RegClass::Int)), - OperandKind::Def, - OperandPos::Late, + pub fn non_fixed_late_use(&self) -> impl Iterator + 'a { + self.matches(|op| !matches!(op.constraint(), OperandConstraint::FixedReg(_)) + && op.pos() == OperandPos::Late + && op.kind() == OperandKind::Use ) } - const fn fixed_early_def_operand(vreg_no: u32) -> Operand { - operand( - vreg_no, - OperandConstraint::FixedReg(PReg::new(1, RegClass::Int)), - OperandKind::Def, - OperandPos::Early, + pub fn non_fixed_late_def(&self) -> impl Iterator + 'a { + self.matches(|op| !matches!(op.constraint(), OperandConstraint::FixedReg(_)) + && op.pos() == OperandPos::Late + && op.kind() == OperandKind::Def ) } - - const fn fixed_late_use_operand(vreg_no: u32) -> Operand { - operand( - vreg_no, - OperandConstraint::FixedReg(PReg::new(1, RegClass::Int)), - OperandKind::Use, - OperandPos::Late, + pub fn non_fixed_early_use(&self) -> impl Iterator + 'a { + self.matches(|op| !matches!(op.constraint(), OperandConstraint::FixedReg(_)) + && op.pos() == OperandPos::Early + && op.kind() == OperandKind::Use ) } - const fn fixed_early_use_operand(vreg_no: u32) -> Operand { - operand( - vreg_no, - OperandConstraint::FixedReg(PReg::new(1, RegClass::Int)), - OperandKind::Use, - OperandPos::Early, + pub fn non_fixed_early_def(&self) -> impl Iterator + 'a { + self.matches(|op| !matches!(op.constraint(), OperandConstraint::FixedReg(_)) + && op.pos() == OperandPos::Early + && op.kind() == OperandKind::Def ) } - static OPERANDS: [Operand; 14] = [ - late_reuse_def_operand(0), - late_def_operand(1), - early_reuse_def_operand(2), - early_use_operand(3), - early_def_operand(4), - late_reuse_def_operand(5), - late_use_operand(6), - late_reuse_use_operand(7), - early_def_operand(8), - early_use_operand(9), - - fixed_late_def_operand(10), - fixed_early_def_operand(11), - fixed_late_use_operand(12), - fixed_early_use_operand(13), - ]; - - #[test] - fn late() { - let late_operands: Vec<(usize, Operand)> = Operands::new(&OPERANDS).non_fixed_non_reuse_late() - .collect(); - assert_eq!(late_operands, vec![ - (1, late_def_operand(1)), - (6, late_use_operand(6)), - ]); - } - - #[test] - fn late_def() { - let late_def_operands: Vec<(usize, Operand)> = Operands::new(&OPERANDS).non_reuse_late_def() - .collect(); - assert_eq!(late_def_operands, vec![ - (1, late_def_operand(1)), - (10, fixed_late_def_operand(10)), - ]); - } - - #[test] - fn early() { - let early_operands: Vec<(usize, Operand)> = Operands::new(&OPERANDS).non_fixed_non_reuse_early() - .collect(); - assert_eq!(early_operands, vec![ - (3, early_use_operand(3)), - (4, early_def_operand(4)), - (8, early_def_operand(8)), - (9, early_use_operand(9)), - ]); - } - - #[test] - fn early_def() { - let early_def_operands: Vec<(usize, Operand)> = Operands::new(&OPERANDS).non_reuse_early_def() - .collect(); - assert_eq!(early_def_operands, vec![ - (4, early_def_operand(4)), - (8, early_def_operand(8)), - (11, fixed_early_def_operand(11)), - ]); - } - - #[test] - fn reuse() { - let reuse_operands: Vec<(usize, Operand)> = Operands::new(&OPERANDS).reuse() - .collect(); - assert_eq!(reuse_operands, vec![ - (0, late_reuse_def_operand(0)), - (2, early_reuse_def_operand(2)), - (5, late_reuse_def_operand(5)), - (7, late_reuse_use_operand(7)), - ]); + pub fn late_def(&self) -> impl Iterator + 'a { + self.matches(|op| + op.pos() == OperandPos::Late + && op.kind() == OperandKind::Def + ) } - #[test] - fn fixed_late() { - let fixed_late_operands: Vec<(usize, Operand)> = Operands::new(&OPERANDS).fixed_late() - .collect(); - assert_eq!(fixed_late_operands, vec![ - (10, fixed_late_def_operand(10)), - (12, fixed_late_use_operand(12)), - ]); + pub fn early_def(&self) -> impl Iterator + 'a { + self.matches(|op| + op.pos() == OperandPos::Early + && op.kind() == OperandKind::Def + ) } - - #[test] - fn fixed_early() { - let fixed_early_operands: Vec<(usize, Operand)> = Operands::new(&OPERANDS).fixed_early() - .collect(); - assert_eq!(fixed_early_operands, vec![ - (11, fixed_early_def_operand(11)), - (13, fixed_early_use_operand(13)), - ]); + + pub fn fixed_early_use(&self) -> impl Iterator + 'a { + self.matches(|op| matches!(op.constraint(), OperandConstraint::FixedReg(_)) + && op.pos() == OperandPos::Early + && op.kind() == OperandKind::Use + ) } - #[test] - fn def() { - let def_operands: Vec<(usize, Operand)> = Operands::new(&OPERANDS).non_reuse_def() - .collect(); - assert_eq!(def_operands, vec![ - (1, late_def_operand(1)), - (4, early_def_operand(4)), - (8, early_def_operand(8)), - (10, fixed_late_def_operand(10)), - (11, fixed_early_def_operand(11)), - ]); + pub fn fixed_late_def(&self) -> impl Iterator + 'a { + self.matches(|op| matches!(op.constraint(), OperandConstraint::FixedReg(_)) + && op.pos() == OperandPos::Late + && op.kind() == OperandKind::Def + ) } +} +impl<'a> core::ops::Index for Operands<'a> { + type Output = Operand; - #[test] - fn non_fixed_non_reuse_late_def() { - let def_operands: Vec<(usize, Operand)> = Operands::new(&OPERANDS).non_fixed_non_reuse_late_def() - .collect(); - assert_eq!(def_operands, vec![ - (1, late_def_operand(1)), - ]); - } - - #[test] - fn non_fixed_non_reuse_late_use() { - let late_operands: Vec<(usize, Operand)> = Operands::new(&OPERANDS).non_fixed_non_reuse_late_use() - .collect(); - assert_eq!(late_operands, vec![ - (6, late_use_operand(6)), - ]); + fn index(&self, index: usize) -> &Self::Output { + &self.0[index] } } diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index db532a5e..c3a7d1ba 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -6,7 +6,7 @@ use crate::{Function, MachineEnv, ssa::validate_ssa, ProgPoint, Edit, Output}; use crate::{cfg::CFGInfo, RegAllocError, Allocation, ion::Stats}; use alloc::collections::VecDeque; use alloc::vec::Vec; -use hashbrown::HashSet; +use hashbrown::{HashMap, HashSet}; mod lru; mod iter; @@ -22,21 +22,23 @@ struct Allocs { } impl Allocs { - fn new(func: &F) -> Self { + fn new(func: &F) -> (Self, u32) { let operand_no_guess = func.num_insts() * 3; let mut allocs = Vec::with_capacity(operand_no_guess); let mut inst_alloc_offsets = Vec::with_capacity(operand_no_guess); + let mut max_operand_len = 0; for inst in 0..func.num_insts() { let operands_len = func.inst_operands(Inst::new(inst)).len() as u32; + max_operand_len = max_operand_len.max(operands_len); inst_alloc_offsets.push(allocs.len() as u32); for _ in 0..operands_len { allocs.push(Allocation::none()); } } - Self { + (Self { allocs, inst_alloc_offsets, - } + }, max_operand_len) } } @@ -138,9 +140,10 @@ pub struct Env<'a, F: Function> { /// Used to determine if a scratch register is needed for an /// instruction's moves during the `process_edit` calls. inst_needs_scratch_reg: PartedByRegClass, - /// The operand indexes of the inputs reused for reuse operands. - /// This is used to avoid assigning a freed def preg to a reused input. - reused_inputs_in_curr_inst: Vec, + /// `reused_input_to_reuse_op[i]` is the operand index of the reuse operand + /// that uses the `i`th operand in the current instruction as its input. + /// This is used to + reused_input_to_reuse_op: Vec, /// The vregs defined or used in the current instruction. vregs_in_curr_inst: HashSet, /// The physical registers allocated to the operands in the current instruction. @@ -173,6 +176,7 @@ impl<'a, F: Function> Env<'a, F> { regs[2].extend(env.non_preferred_regs_by_class[RegClass::Vector as usize].iter().cloned()); use alloc::vec; trace!("{:?}", env); + let (allocs, max_operand_len) = Allocs::new(func); Self { func, allocatable_regs: PRegSet::from(env), @@ -206,7 +210,7 @@ impl<'a, F: Function> Env<'a, F> { freed_def_pregs: PartedByRegClass { items: [PRegSet::empty(), PRegSet::empty(), PRegSet::empty()] }, vregs_first_seen_in_curr_inst: HashSet::new(), inst_needs_scratch_reg: PartedByRegClass { items: [false, false, false] }, - reused_inputs_in_curr_inst: Vec::new(), + reused_input_to_reuse_op: vec![usize::MAX; max_operand_len as usize], vregs_in_curr_inst: HashSet::new(), pregs_allocd_in_curr_inst: PRegSet::empty(), dedicated_scratch_regs: PartedByRegClass { items: [ @@ -214,7 +218,7 @@ impl<'a, F: Function> Env<'a, F> { env.scratch_by_class[1], env.scratch_by_class[2], ] }, - allocs: Allocs::new(func), + allocs, edits: VecDeque::new(), safepoint_slots: Vec::new(), num_spillslots: 0, @@ -462,7 +466,7 @@ impl<'a, F: Function> Env<'a, F> { Ok(preg) } - fn freealloc(&mut self, vreg: VReg, clobbers: PRegSet) { + fn freealloc(&mut self, vreg: VReg, clobbers: PRegSet, is_fixed_def: bool) { trace!("Freeing vreg {:?}", vreg); let alloc = self.vreg_allocs[vreg.vreg()]; match alloc.kind() { @@ -471,7 +475,7 @@ impl<'a, F: Function> Env<'a, F> { self.vreg_in_preg[preg.index()] = VReg::invalid(); // If it's a fixed stack slot, then it's not allocatable. if !self.is_stack(alloc) { - if clobbers.contains(preg) { + if clobbers.contains(preg) || is_fixed_def { // For a defined vreg to be restored to the location it's expected to // be in after the instruction, it cannot be allocated to a clobber because that // will make the restoration impossible. @@ -484,8 +488,11 @@ impl<'a, F: Function> Env<'a, F> { // a clobber can be newly allocated to a vreg in the instruction is to // use a fixed register constraint. self.free_after_curr_inst[preg.class()].add(preg); - // No need to remove the preg from the LRU because clobbers - // have already been removed from the LRU. + if is_fixed_def { + self.lrus[vreg.class()].remove(preg.hw_enc()); + } + // No need to remove the preg from the LRU if it's a clobber + // because clobbers have already been removed from the LRU. } else { // Added to the freed def pregs list, not the free pregs // list to avoid a def's allocated register being used @@ -495,6 +502,7 @@ impl<'a, F: Function> Env<'a, F> { self.lrus[vreg.class()].remove(preg.hw_enc()); } } + self.pregs_allocd_in_curr_inst.remove(preg); } AllocationKind::Stack => (), AllocationKind::None => unreachable!("Attempting to free an unallocated operand!") @@ -502,6 +510,7 @@ impl<'a, F: Function> Env<'a, F> { self.vreg_allocs[vreg.vreg()] = Allocation::none(); self.live_vregs.remove(&vreg); trace!("{:?} curr alloc is now {:?}", vreg, self.vreg_allocs[vreg.vreg()]); + trace!("Pregs currently allocated: {}", self.pregs_allocd_in_curr_inst); } /// Allocates a spill slot on the stack for `vreg` @@ -524,7 +533,7 @@ impl<'a, F: Function> Env<'a, F> { } /// Allocates a physical register for the operand `op`. - fn alloc_reg_for_operand(&mut self, inst: Inst, op: Operand, op_idx: usize) -> Result<(), RegAllocError> { + fn alloc_reg_for_operand(&mut self, inst: Inst, op: Operand) -> Result<(), RegAllocError> { trace!("freepregs int: {}", self.freepregs[RegClass::Int]); trace!("freepregs vector: {}", self.freepregs[RegClass::Vector]); trace!("freepregs float: {}", self.freepregs[RegClass::Float]); @@ -536,19 +545,9 @@ impl<'a, F: Function> Env<'a, F> { // the operand uses or defines a vreg in the early phase and the vreg doesn't // live past the instruction. If the vreg lives past the instruction, then the // defined value will overwrite it. - if op.pos() == OperandPos::Early && self.vregs_first_seen_in_curr_inst.contains(&op.vreg()) - // A reused input should not have the same allocation as a non-reused def operand. - // For example: - // 1. dev v0 (fixed: p0), def v1 (reuse: 2), use v2 - // 2. use v0, use v1 - // In the above, p0 is assigned to v0 in inst 1. Say, after v0 is freed, - // p0 is re-assigned to v2. Then, when reuse operands are processed, p0 - // will also end up being v1's allocation in inst 1. - // The end result will be allocating two defined vregs, v0 and v1, to the - // same allocation p0. - // To avoid this, don't allow a reused input to have the same allocation as - // a freed def operand. - && !self.reused_inputs_in_curr_inst.contains(&op_idx) + if op.pos() == OperandPos::Early + && op.kind() == OperandKind::Use + && self.vregs_first_seen_in_curr_inst.contains(&op.vreg()) { if let Some(freed_def_preg) = remove_any_from_pregset(&mut self.freed_def_pregs[op.class()]) { trace!("Reusing the freed def preg: {}", freed_def_preg); @@ -569,6 +568,7 @@ impl<'a, F: Function> Env<'a, F> { self.lrus[op.class()].poke(preg); self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(preg); self.vreg_in_preg[preg.index()] = op.vreg(); + self.pregs_allocd_in_curr_inst.add(preg); Ok(()) } @@ -634,32 +634,37 @@ impl<'a, F: Function> Env<'a, F> { } self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(preg); self.vreg_in_preg[preg.index()] = op.vreg(); + self.pregs_allocd_in_curr_inst.add(preg); trace!("vreg {:?} is now in preg {:?}", op.vreg(), preg); Ok(()) } /// Allocates for the operand `op` with index `op_idx` into the /// vector of instruction `inst`'s operands. - /// Only non reuse-input operands. - fn alloc_operand(&mut self, inst: Inst, op: Operand, op_idx: usize) -> Result<(), RegAllocError> { + fn alloc_operand(&mut self, inst: Inst, op: Operand, op_idx: usize, fixed_spillslot: Option) -> Result<(), RegAllocError> { match op.constraint() { OperandConstraint::Any => { - self.alloc_reg_for_operand(inst, op, op_idx)?; + self.alloc_reg_for_operand(inst, op)?; } OperandConstraint::Reg => { - self.alloc_reg_for_operand(inst, op, op_idx)?; + self.alloc_reg_for_operand(inst, op)?; } OperandConstraint::Stack => { - if self.vreg_spillslots[op.vreg().vreg()].is_invalid() { - self.vreg_spillslots[op.vreg().vreg()] = self.allocstack(&op.vreg()); - } - self.vreg_allocs[op.vreg().vreg()] = Allocation::stack(self.vreg_spillslots[op.vreg().vreg()]); + let slot = if let Some(spillslot) = fixed_spillslot { + spillslot + } else { + if self.vreg_spillslots[op.vreg().vreg()].is_invalid() { + self.vreg_spillslots[op.vreg().vreg()] = self.allocstack(&op.vreg()); + } + self.vreg_spillslots[op.vreg().vreg()] + }; + self.vreg_allocs[op.vreg().vreg()] = Allocation::stack(slot); } OperandConstraint::FixedReg(preg) => { self.alloc_fixed_reg_for_operand(inst, op, preg)?; } OperandConstraint::Reuse(_) => { - // This is handled elsewhere + // This is handled elsewhere. unreachable!(); } } @@ -667,9 +672,11 @@ impl<'a, F: Function> Env<'a, F> { Ok(()) } - /// Only processes non reuse-input operands - fn process_operand_allocation(&mut self, inst: Inst, op: Operand, op_idx: usize) -> Result<(), RegAllocError> { - debug_assert!(!matches!(op.constraint(), OperandConstraint::Reuse(_))); + /// Allocate operand the `op_idx`th operand `op` in instruction `inst` within its constraint. + /// Since only fixed register constraints are allowed, `fixed_spillslot` is used when a + /// fixed stack allocation is needed, like when transferring a stack allocation from a + /// reuse operand allocation to the reused input. + fn process_operand_allocation(&mut self, inst: Inst, op: Operand, op_idx: usize, fixed_spillslot: Option) -> Result<(), RegAllocError> { if let Some(preg) = op.as_fixed_nonallocatable() { self.allocs[(inst.index(), op_idx)] = Allocation::reg(preg); trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); @@ -682,7 +689,7 @@ impl<'a, F: Function> Env<'a, F> { if prev_alloc.is_none() { self.vregs_first_seen_in_curr_inst.insert(op.vreg()); } - self.alloc_operand(inst, op, op_idx)?; + self.alloc_operand(inst, op, op_idx, fixed_spillslot)?; // Need to insert a move to propagate flow from the current // allocation to the subsequent places where the value was // used (in `prev_alloc`, that is). @@ -873,429 +880,6 @@ impl<'a, F: Function> Env<'a, F> { trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); } self.vregs_allocd_in_curr_inst.insert(op.vreg()); - if let Some(preg) = self.allocs[(inst.index(), op_idx)].as_reg() { - self.pregs_allocd_in_curr_inst.add(preg); - } - Ok(()) - } - - fn process_reuse_operand_allocation( - &mut self, - inst: Inst, - op: Operand, - op_idx: usize, - reused_op: Operand, - reused_op_idx: usize, - clobbers: PRegSet, - ) -> Result<(), RegAllocError> { - debug_assert!(matches!(op.constraint(), OperandConstraint::Reuse(_))); - self.vregs_in_curr_inst.insert(op.vreg()); - // To handle reuse operands, the reused input's allocation is always used for - // both operands. This is to avoid having to think about how to handle fixed - // register constraints. For example: - // 1. def v0 (reuse: 1), use v1 (fixed: p0) - // p0 is simply used for both operands, regardless of where v0 is expected to be - // after the instruction. - // - // - // A check has to be made to see if the reuse operand has already been allocated, - // in a previous alloc_inst call. There are 2 cases that need to be considered here: - // - // Case 1: The reuse operand has already been allocated. - // Case 2: The reuse operand has no allocation. - // - // For case 1: - // An example: - // - // inst 1: def v0 (reuse: 1), use v1 - // inst 2: use v0 - // In the above example, v0 will have already been allocated by the time inst 1 - // is about to be processed. - // After this inst 1, v0 is expected to be in some location l0. - // Depending on whether v1 lives past this instruction, there are also two subcases: - // Case 1.1: The reused input, v1, lives past the instruction and is expected to be in some - // location l1 afterwards. - // Case 1.2: The reused input v1, doesn't live past the instruction. - // - // In case 1.1, both v0 and v1 live past the instruction, so edits have to be inserted to - // ensure that both end up in their expected after instruction locations. - // Again, depending on whether or not l0 is used by another operand in the current instruction, - // there are two subcases here: - // Case 1.1.1: l0 is used by another operand in the instruction. - // Case 1.1.2: l0 isn't use by another operand in the instruction. - // - // In case 1.1.1: - // - // Again, depending on whether or not v1 is used multiple times in the same instruction: - // Case 1.1.1.1: v1 is not used multiple times - // Case 1.1.1.2: v1 is used multiple times - // - // Case 1.1.1.1: - // - // 1. def v0 (reuse: 1), use v1, use v2 - // 2. use v0, use v1 - // - // At inst 2, v0 is expected to be at l0 and v1 is expected to be at l1. - // During the processing of inst 1, the v1 and v2 operands are processed before the reuse operand - // v0. If by some fixed register constraint either v1 or v2 get l0 as an allocation, then v0 - // will be evicted and an edit will be added after inst 1 to move from v0's spillslot to l0 - // by the eviction procedure. Then we'll have: - // - // 1. def v0 (reuse: 1), use v1, use v2 - // move from stack_v0 to l0 // Added during eviction. - // 2. use v0, use v1 - // - // And v0's current allocation, at the point of invoking this reuse operand allocation - // procedure will be stack_v0 (vreg_allocs[v0] == stack_v0). - // Suppose v1 is allocated to p0. - // For the flow to be correct, two things have to occur: - // 1. The reused input v1 has to be saved and restored, because the definition of v0 - // overwrites it in inst 1. - // 2. v0 has to be moved into its current allocation (stack_v0) before the eviction edit. - // - // The edits added by this procedure will be like so: - // - // move from p0 to stack_v1 // Saving v1. - // 1. def v0 (reuse: 1), use v1, use v2 // p0 is used for both v1 and v0. - // move from p0 to vreg_allocs[v0] (== stack_v0) // Move into v0's current allocation. - // move from stack_v0 to l0 // Added during eviction. - // move from stack_v1 to l1 // Restoring v1. - // 2. use v0, use v1 - // - // Hence, the edits have to be inserted in the following order: - // 1. The edit to save the reused input is preprended to the other edits before the instruction. - // 2. The edit to move v0 from its reused input into its current allocation (spillslot) - // will be prepended to the edits after the instruction. - // 3. The edit to restore v1 will be appended to the edits after the instruction. - // - // In case 1.1.1.2, v1 is used multiple times. For example: - // - // 1. def v0 (reuse: 1), use v1, use v1 - // 2. use v0, use v1 - // - // Depending on the constraints, there are two sub cases: - // - // Case 1.1.1.2.1: All the v1 uses have the same constraints. - // Case 1.1.1.2.2: Some of the v1 uses constraints differ. - // - // In case 1.1.1.2.1, no edits will have been inserted by the `process_operand_allocation` - // procedure because they all have the same constraint. It is equivalent to a single use. - // So, it can be handled just like case 1.1.1.1. - // - // In case 1.1.1.2.2, they have different constraints, so edits will have already been inserted - // to effect the flow from the current allocation to the previous ones. - // For example: - // - // 1. def v0 (reuse: 1), use v1 (fixed: p5), use v1 (fixed: p8), use v2 - // 2. use v0, use v1 - // - // In the above, v1 is used multiple times with different constraints. - // Now, there are two subcases: - // Case 1.1.1.2.2.1: l1 == p5 (or whatever allocation is used for the first v1 operand) - // Case 1.1.1.2.2.2: l1 != p5 - // - // In case 1.1.1.2.2.1: - // - // No edit will be inserted when the first v1 use is encountered. But when the second is - // encountered, an edit will be prepended by `process_operand_allocation` to the edits - // before inst 1 to move from p8 to p5, and p8 will be marked as v1's current allocation, - // that is, vreg_allocs[v1] == p8: - // - // move from p8 to p5 // Current alloc to prev alloc in the same instruction. - // 1. def v0 (reuse: 1), use v1 (fixed: p5), use v1 (fixed: p8), use v2 - // 2. use v0, use v1 - // - // Also, depending on whether or not p8 was already used by another operand, an eviction may - // occur and an edit will be inserted to move from a spillslot to p8 after the instruction: - // - // move from p8 to p5 // Current alloc to prev alloc in the same instruction. - // 1. def v0 (reuse: 1), use v1 (fixed: p5), use v1 (fixed: p8), use v2 - // /* move from stack_? to p8 (possible eviction) */ - // 2. use v0, use v1 - // - // In fact, because of the possibility of using multiple v1s and other fixed register - // constraints, there are a variable number of eviction edits after the instruction: - // - // move from p8 to p5 // Current alloc to prev alloc in the same instruction. - // 1. def v0 (reuse: 1), use v1 (fixed: p5), use v1 (fixed: p8), use v2 - // 0 or more edits, possibly writing to vreg_allocs[v1] - // 2. use v0, use v1 - // - // By the preconditions of this case, one of the use operands uses l0. - // Either it's one of the v1 uses or a non-v1 use, like the v2 use in the above example, - // but I don't think that matters in this case. - // Because of this, the edits inserted between insts 1 and 2 will contain an eviction - // edit to move from stack_v0 to l0 and vreg_allocs[v0] == stack_v0: - // - // move from p8 to p5 // Current alloc to prev alloc in the same instruction. - // 1. def v0 (reuse: 1), use v1 (fixed: p5), use v1 (fixed: p8), use v2 - // 1 or more edits, with one from stack_v0 to l0 - // 2. use v0, use v1 - // - // Now, this can be handled just like in case 1.1.1.1. An edit is prepended to the before-inst-1 - // edits to save v1 to the stack. An edit is prepended to the after-inst-1 edits to move - // v0 from its reused input to its current allocation, vreg_allocs[v0]. And one more edit - // to move from v1's spillslot to l1 is appended to the after-inst-1 edits. - // - // move from p8 to stack_v1 // Saving v1 - // move from p8 to p5 // Current alloc to prev alloc in the same instruction. - // 1. def v0 (reuse: 1), use v1 (fixed: p5), use v1 (fixed: p8), use v2 - // move from p5 to stack_v0 (== vreg_allocs[v0]) // Move into v0's current allocation. - // 0 or more edits, with one from stack_v0 to l0 - // move from stack_v1 to l1 // Restoring v1 - // 2. use v0, use v1 - // - // Case 1.1.1.2.2.2: - // - // Now, if l1 != p5, then `process_operand_allocation` will insert two edits: a save - // and restore for v1, with the save being appended and the restore being prepended: - // - // move from p5 to stack_v1 // Save inserted by `process_operand_allocation` - // 1. def v0 (reuse: 1), use v1 (fixed: p5), use v1 (fixed: p8), use v2 - // move from stack_v1 to l1 // Restore inserted by `process_operand_allocation` - // 2. use v0, use v1 - // - // And when the second allocation is encountered, an edit is also inserted to move from - // the new current allocation (p8) to the previous p5. - // - // move from p8 to p5 // Current alloc to prev alloc in the same instruction. - // move from p5 to stack_v1 // Save inserted by `process_operand_allocation`. - // 1. def v0 (reuse: 1), use v1 (fixed: p5), use v1 (fixed: p8), use v2 - // move from stack_v1 to l1 // Restore inserted by `process_operand_allocation`. - // 2. use v0, use v1 - // - // By the preconditions, one of the operand uses l0, so an eviction edit has already - // been inserted to move from stack_v0 to l0: - // - // move from p8 to p5 // Current alloc to prev alloc in the same instruction. - // move from p5 to stack_v1 // Save inserted by `process_operand_allocation`. - // 1. def v0 (reuse: 1), use v1 (fixed: p5), use v1 (fixed: p8), use v2 - // move from stack_v1 to l1 // Restore inserted by `process_operand_allocation`. - // 1 or more edits, with one from stack_v0 to l0 // - // 2. use v0, use v1 - // - // The restore inserted by the `process_operand_allocation` can come before or after any - // of the other edits, depending on the order of the operands. But that isn't a problem - // because the move is from its spillslot and every vreg has its own unique spillslot, - // so we can be sure that only v1 is in stack_v1. - // - // This can be handled like case 1.1.1.1, but the only edit inserted is the one prepended - // to the after-inst-1 edits to move from the reused input to vreg_allocs[v0]. - // This will work because v1 already has save-and-reload edits inserted by - // `process_operand_allocation`, so those edits don't need to be inserted anymore. - // And because of how `process_operand_allocation` and the eviction procedures insert edits, - // no matter how edits are inserted, the edit from stack_v1 to l1 will always - // be the last write to v1 before the next instruction. - // - // In the case 1.1.2, depending on whether v1 is used multiple times: - // Case 1.1.2.1: v1 is not used multiple times. - // Case 1.1.2.2: v1 is used multiple times. - // - // In case 1.1.2.1: - // - // l0 isn't used by any other operand in the instruction, so no eviction - // occurs and vreg_allocs[v0] at this point == l0. - // Here, the only edits that needs to be added is the move from the reused input allocation - // into vreg_allocs[v0] (which is l0) and the save and restore of v1. - // For example: - // - // 1. def v0 (reuse: 1), use v1, use v2 // Both v0 and v1 use p0 - // 2. use v0, use v1 - // - // Becomes: - // - // move from p0 to stack_v1 // Save v1. - // 1. def v0 (reuse: 1), use v1, use v2 - // move from p0 to vreg_allocs[v0] (== l0) // Move to v0's current allocation. - // move from stack_v1 to l1 // Restore v1. - // 2. use v0, use v1 - // - // So, the procedure for case 1.1.1.1 is the same for case 1.1.2.1. - // - // In case 1.1.2.2, v1 is used multiple times. If they all have the same constraint, - // then it can be handled just like case 1.1.1.1. - // If they have different constraints, then the only difference between this case and - // case 1.1.1.2.2 is that vreg_allocs[v0] at this point is already == l0. - // This can be handled just like case 1.1.1.2.2, with the save and restore of v1 and - // move from the reused input to vreg_allocs[p1] being inserted when - // `process_operand_allocation` has not inserted any save and restores of v1. - // And only the move from the reused input to vreg_allocs[p1] is added when `proces_operand_allocation` - // has already inserted a save and restore for v1. - // - // In case 1.2, the reused input doesn't live past the instruction. Only v0 - // does. In this case, the only edit added is the one to move v0 from the reused - // input allocation p0 to l0. The edits to save and restore v1 are omitted. - // For example: - // - // 1. def v0 (reuse: 1), use v1, use v2 // Both v0 and v1 use p0 - // 2. use v0 // Only v0 lives past inst 1 - // - // The only edit inserted is the one to move from p0 to vreg_allocs[v0] (l0): - // - // 1. def v0 (reuse: 1), use v1, use v2 // Both v0 and v1 use p0 - // move from p0 to vreg_allocs[v0] // Move to v0's current allocation - // 2. use v0, use v1 - // - // In case 2, v0 has no allocation, meaning it doesn't live past the instruction. - // There are two subcases here: - // Case 2.1: The reused input v1 lives past the instruction. - // Case 2.2: The reused input v1 doesn't live past the instruction. - // - // And their sub subcases: - // - // Case 2.1.1: The reused input is not used multiple times. - // Case 2.1.2: The reused input is used multiple times. - // We don't need to consider multiple uses in case 2.2 because v1 doesn't live past - // the instruction. - // - // In case 2.1.1, the only edits that need to be added are the ones to save and restore v1. - // For example: - // - // 1. def v0 (reuse: 1), use v1, use v2 // Both v0 and v1 use p0 - // 2. use v1 - // - // Becomes: - // - // move from p0 to stack_v1 // Save v1. - // 1. def v0 (reuse: 1), use v1, use v2 // Both v0 and v1 use p0. - // move from stack_v1 to p0 // Restore v1. - // 2. use v1 - // - // Case 2.1.2, like in case 1.1.1.2, edits to save and restore v1 only need to be added - // if `process_operand_allocation` didn't add them already. The edit to move from - // the reused input to vreg_allocs[v0] can be omitted, since v0 hasn't been allocated. - // - // In case 2.2, no edits should be inserted at all since none of them live past the - // instruction. - // - // From these cases, it can be concluded that edits to save and restore the reused input - // should be inserted whenever the reused input lives past the current instruction - // and `process_operand_allocation` hasn't inserted any save and restores of the - // reused input (a vreg will be present in `use_vregs_saved_and_restored_in_curr_inst` - // at this point if `process_operand_allocation` has inserted a save and reload for the vreg). - // And the edit to move the reuse operand into its after-instruction-allocation should - // only be inserted if it lives past the current instruction. - // And because of interactions with other edits, the edits, - // when inserted, should be in the following order: - // - // 1. The edit to save the reused input is preprended to the other edits before the instruction. - // 2. The edit to move v0 from its reused input into its current allocation - // will be prepended to the edits after the instruction. - // 3. The edit to restore v1 will be appended to the edits after the instruction. - // - trace!("Move Reason: Reuse constraints"); - - if reused_op.kind() != OperandKind::Use || op.kind() != OperandKind::Def - || reused_op.pos() != OperandPos::Early || op.pos() != OperandPos::Late - || reused_op.class() != op.class() - { - // Invalid input. - return Err(RegAllocError::TooManyLiveRegs); - } - let reused_input_lives_past_curr_inst = !self.vregs_first_seen_in_curr_inst.contains(&reused_op.vreg()); - let reused_op_alloc = self.allocs[(inst.index(), reused_op_idx)]; - if self.vreg_allocs[op.vreg().vreg()].is_some() { - let reused_op_vreg = reused_op.vreg(); - // The only way that a vreg can be assigned a clobber is if a fixed register - // constraint demands it. - // Reusing an input assigned to a clobber will result in a clobber being assigned - // to the vreg being defined. Since the vreg is live after this instruction, this - // must be an error, because it will be impossible to restore the defined vreg - // afterwards. - if let Some(preg) = reused_op_alloc.as_reg() { - if clobbers.contains(preg) { - // Invalid input. - return Err(RegAllocError::TooManyLiveRegs); - } - } - let op_prev_alloc = self.vreg_allocs[op.vreg().vreg()]; - if self.vreg_spillslots[reused_op_vreg.vreg()].is_invalid() { - self.vreg_spillslots[reused_op_vreg.vreg()] = self.allocstack(&reused_op_vreg); - } - let reused_op_spillslot = self.vreg_spillslots[reused_op_vreg.vreg()]; - - // Save the reused input only if it lives past the current instruction - // and `process_operand_allocation` hasn't inserted save and reload edits - // for it. - if reused_input_lives_past_curr_inst - && !self.use_vregs_saved_and_restored_in_curr_inst.contains(&reused_op_vreg) - { - self.add_move_later( - inst, - self.vreg_allocs[reused_op_vreg.vreg()], - Allocation::stack(reused_op_spillslot), - op.class(), - InstPosition::Before, - true, - ); - } - - // Move the reuse operand from the reused input's allocation into the location it's - // expected to be in after the current instruction. - self.add_move_later( - inst, - reused_op_alloc, - // It is possible for reused_op_alloc != vreg_allocs[reused_op] - // in the case where multiple operands use the reused vreg with different - // constraints. - op_prev_alloc, - op.class(), - InstPosition::After, - true, - ); - - // Save the reused input only if it lives past the current instruction - // and `process_operand_allocation` hasn't inserted save and reload edits - // for it. - if reused_input_lives_past_curr_inst - && !self.use_vregs_saved_and_restored_in_curr_inst.contains(&reused_op_vreg) - { - self.add_move_later( - inst, - Allocation::stack(reused_op_spillslot), - reused_op_alloc, - op.class(), - InstPosition::After, - false, - ); - self.use_vregs_saved_and_restored_in_curr_inst.insert(reused_op.vreg()); - } - - self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[reused_op_vreg.vreg()]; - self.freealloc(op.vreg(), clobbers); - trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); - } else { - let reused_op_vreg = reused_op.vreg(); - if reused_input_lives_past_curr_inst - && !self.use_vregs_saved_and_restored_in_curr_inst.contains(&reused_op_vreg) - { - if self.vreg_spillslots[reused_op_vreg.vreg()].is_invalid() { - self.vreg_spillslots[reused_op_vreg.vreg()] = self.allocstack(&reused_op_vreg); - } - let reused_op_spillslot = self.vreg_spillslots[reused_op_vreg.vreg()]; - // Save the reused input to its spillslot before the instruction. - self.add_move_later( - inst, - self.vreg_allocs[reused_op_vreg.vreg()], - Allocation::stack(reused_op_spillslot), - op.class(), - InstPosition::Before, - true, - ); - // Restore the reused input. - self.add_move_later( - inst, - Allocation::stack(reused_op_spillslot), - reused_op_alloc, - op.class(), - InstPosition::After, - false, - ); - self.use_vregs_saved_and_restored_in_curr_inst.insert(reused_op.vreg()); - } - self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[reused_op_vreg.vreg()]; - trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); - } Ok(()) } @@ -1516,38 +1100,90 @@ impl<'a, F: Function> Env<'a, F> { self.lrus[preg.class()].remove(preg.hw_enc()); } } - for (_, op) in operands.reuse() { + for (op_idx, op) in operands.reuse() { let OperandConstraint::Reuse(reused_idx) = op.constraint() else { unreachable!() }; - self.reused_inputs_in_curr_inst.push(reused_idx); + self.reused_input_to_reuse_op[reused_idx] = op_idx; } - for (op_idx, op) in operands.fixed_late() { - self.process_operand_allocation(inst, op, op_idx)?; + for (op_idx, op) in operands.fixed() { + let reuse_op_idx = self.reused_input_to_reuse_op[op_idx]; + if reuse_op_idx != usize::MAX { + let reuse_op = operands[reuse_op_idx]; + let new_reuse_op = Operand::new(reuse_op.vreg(), op.constraint(), reuse_op.kind(), reuse_op.pos()); + self.process_operand_allocation(inst, new_reuse_op, reuse_op_idx, None)?; + } + // It's possible for a fixed early use to have the same fixed constraint + // as a fixed late def. Because of this, handle the fixed early use without + // explicit reuse operand constraints later. + else if op.pos() != OperandPos::Early || op.kind() != OperandKind::Use { + self.process_operand_allocation(inst, op, op_idx, None)?; + } } - for (op_idx, op) in operands.non_fixed_non_reuse_late_def() { - self.process_operand_allocation(inst, op, op_idx)?; + for (_, op) in operands.fixed_late_def() { + // It is possible for a fixed early use to + // use a register allocated to a fixed late def. + // This deallocates fixed late defs, making it possible + // for those early fixed uses to be allocated successfully, + // without making the fixed registers available for reuse by other + // operands in the instruction. + self.freealloc(op.vreg(), clobbers, true); } - for (_, op) in operands.non_reuse_late_def() { - self.freealloc(op.vreg(), clobbers); + for (op_idx, op) in operands.fixed_early_use() { + // The reuse operands inputs already have their allocations with + // the reuse operands. Those allocations will be moved over to the + // reused input records when the reuse operands are deallocated. + if self.reused_input_to_reuse_op[op_idx] == usize::MAX { + self.process_operand_allocation(inst, op, op_idx, None)?; + } else { + trace!("Not allocating {} now because it's a reused input", op); + } } - for (op_idx, op) in operands.fixed_early() { - self.process_operand_allocation(inst, op, op_idx)?; + for (op_idx, op) in operands.non_fixed_def() { + if let OperandConstraint::Reuse(reused_idx) = op.constraint() { + let reused_op = operands[reused_idx]; + if matches!(reused_op.constraint(), OperandConstraint::FixedReg(_)) { + // The reuse operands that reuse early fixed uses have already been + // allocated. + continue; + } + let new_reuse_op = Operand::new(op.vreg(), reused_op.constraint(), op.kind(), op.pos()); + self.process_operand_allocation(inst, new_reuse_op, op_idx, None)?; + } else { + self.process_operand_allocation(inst, op, op_idx, None)?; + } } - for (op_idx, op) in operands.non_fixed_non_reuse_late_use() { - self.process_operand_allocation(inst, op, op_idx)?; + for (op_idx, op) in operands.non_fixed_late_use() { + self.process_operand_allocation(inst, op, op_idx, None)?; } - for (op_idx, op) in operands.non_fixed_non_reuse_early() { - self.process_operand_allocation(inst, op, op_idx)?; + for (op_idx, op) in operands.non_fixed_late_def() { + if let OperandConstraint::Reuse(reused_idx) = op.constraint() { + let alloc = self.allocs[(inst.index(), op_idx)]; + self.freealloc(op.vreg(), clobbers, false); + // Transfer the allocation for the reuse operand to + // the reused input. + let reused_op = operands[reused_idx]; + let new_reused_op: Operand; + let mut fixed_stack_alloc = None; + if let Some(preg) = alloc.as_reg() { + new_reused_op = Operand::new(reused_op.vreg(), OperandConstraint::FixedReg(preg), reused_op.kind(), reused_op.pos()); + } else { + fixed_stack_alloc = alloc.as_stack(); + new_reused_op = Operand::new(reused_op.vreg(), OperandConstraint::Stack, reused_op.kind(), reused_op.pos()); + } + self.process_operand_allocation(inst, new_reused_op, reused_idx, fixed_stack_alloc)?; + } else { + self.freealloc(op.vreg(), clobbers, false); + } } - for (_, op) in operands.non_reuse_early_def() { - self.freealloc(op.vreg(), clobbers); + for (op_idx, op) in operands.non_fixed_early_use() { + // Reused inputs already have their allocations. + if self.reused_input_to_reuse_op[op_idx] == usize::MAX { + self.process_operand_allocation(inst, op, op_idx, None)?; + } } - for (op_idx, op) in operands.reuse() { - let OperandConstraint::Reuse(reused_idx) = op.constraint() else { - unreachable!() - }; - self.process_reuse_operand_allocation(inst, op, op_idx, operands.0[reused_idx], reused_idx, clobbers)?; + for (_, op) in operands.early_def() { + self.freealloc(op.vreg(), clobbers, false); } self.save_and_restore_clobbered_registers(inst); for preg in self.func.inst_clobbers(inst) { @@ -1609,7 +1245,9 @@ impl<'a, F: Function> Env<'a, F> { self.use_vregs_saved_and_restored_in_curr_inst.clear(); self.vregs_first_seen_in_curr_inst.clear(); self.vregs_allocd_in_curr_inst.clear(); - self.reused_inputs_in_curr_inst.clear(); + for entry in self.reused_input_to_reuse_op.iter_mut() { + *entry = usize::MAX; + } self.vregs_in_curr_inst.clear(); self.pregs_allocd_in_curr_inst = PRegSet::empty(); if trace_enabled!() { @@ -1649,7 +1287,7 @@ impl<'a, F: Function> Env<'a, F> { // A block's block param is not live before the block. // And `vreg_allocs[i]` of a virtual register i is none for // dead vregs. - self.freealloc(vreg, PRegSet::empty()); + self.freealloc(vreg, PRegSet::empty(), false); if slot == prev_alloc { // No need to do any movements if the spillslot is where the vreg is expected to be. trace!("No need to reload {:?} because it's already in its expected allocation", vreg); From 832eeb41f4a2c7328ad3d65bcc5ed4c1d4293af7 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Sat, 10 Aug 2024 12:23:57 +0100 Subject: [PATCH 44/95] now using vreg sets as bitsets --- src/fastalloc/bitset.rs | 77 ++++++++++++++++++++++++++++++++++++++--- src/fastalloc/mod.rs | 38 ++++++++++---------- 2 files changed, 91 insertions(+), 24 deletions(-) diff --git a/src/fastalloc/bitset.rs b/src/fastalloc/bitset.rs index c77fe58e..2e1dd70a 100644 --- a/src/fastalloc/bitset.rs +++ b/src/fastalloc/bitset.rs @@ -44,11 +44,56 @@ impl BitSet { } } - pub fn is_empty(&mut self) { - + pub fn is_empty(&mut self) -> bool { + self.bits.iter() + .all(|frame| *frame == 0) + } + + pub fn iter(&self) -> BitSetIter { + BitSetIter { + next_frame_idx: 0, + curr_frame: 0, + bits: &self.bits + } + } +} + +pub struct BitSetIter<'a> { + next_frame_idx: usize, + curr_frame: Frame, + bits: &'a [Frame] +} + +impl<'a> Iterator for BitSetIter<'a> { + type Item = usize; + + fn next(&mut self) -> Option { + loop { + while self.curr_frame == 0 { + if self.next_frame_idx >= self.bits.len() { + return None; + } + self.curr_frame = self.bits[self.next_frame_idx]; + self.next_frame_idx += 1; + } + let skip = self.curr_frame.trailing_zeros(); + self.curr_frame &= !(1 << skip); + return Some((self.next_frame_idx - 1) * BITS_PER_FRAME + skip as usize) + } } } +use core::fmt; + +impl fmt::Debug for BitSet { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{{ ")?; + for el in self.iter() { + write!(f, "{el} ")?; + } + write!(f, "}}") + } +} #[cfg(test)] mod tests { @@ -59,15 +104,37 @@ mod tests { let mut set = BitSet::with_capacity(200); set.insert(10); set.insert(11); + set.insert(199); set.insert(23); set.insert(45); + let els = [10, 11, 23, 45, 199]; + use std::println; + println!("{:b}", set.bits[0]); + for (actual_el, expected_el) in set.iter().zip(els.iter()) { + assert_eq!(actual_el, *expected_el as usize); + } assert!(set.contains(10)); assert!(!set.contains(12)); - assert!(!set.contains(2000)); + assert!(!set.contains(197)); assert!(set.contains(45)); assert!(set.contains(23)); assert!(set.contains(11)); - set.remove(10); - assert!(!set.contains(10)); + set.remove(23); + assert!(!set.contains(23)); + set.insert(73); + let els = [10, 11, 45, 73, 199]; + for (actual_el, expected_el) in set.iter().zip(els.iter()) { + assert_eq!(actual_el, *expected_el as usize); + } + } + + #[test] + fn empty() { + let mut set = BitSet::with_capacity(2000); + assert!(set.is_empty()); + set.insert(100); + assert!(!set.is_empty()); + set.remove(100); + assert!(set.is_empty()); } } diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 08f1c180..9d43c857 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -106,7 +106,7 @@ pub struct Env<'a, F: Function> { /// This needs to be kept track of to generate the correct moves in the case where a /// single virtual register is used multiple times in a single instruction with /// different constraints. - use_vregs_saved_and_restored_in_curr_inst: HashSet, + use_vregs_saved_and_restored_in_curr_inst: BitSet, /// Physical registers that were used for late def operands and now free to be /// reused for early operands in the current instruction. /// @@ -135,10 +135,10 @@ pub struct Env<'a, F: Function> { /// It's also used to determine if the an early operand can reuse a freed def operand's /// allocation. And it's also used to determine the edits to be inserted when /// allocating a use operand. - vregs_first_seen_in_curr_inst: HashSet, + vregs_first_seen_in_curr_inst: BitSet, /// Used to keep track of which vregs have been allocated in the current instruction. /// This is used to determine which edits to insert when allocating a use operand. - vregs_allocd_in_curr_inst: HashSet, + vregs_allocd_in_curr_inst: BitSet, /// Used to determine if a scratch register is needed for an /// instruction's moves during the `process_edit` calls. inst_needs_scratch_reg: PartedByRegClass, @@ -147,7 +147,7 @@ pub struct Env<'a, F: Function> { /// This is used to reused_input_to_reuse_op: Vec, /// The vregs defined or used in the current instruction. - vregs_in_curr_inst: HashSet, + vregs_in_curr_inst: BitSet, /// The physical registers allocated to the operands in the current instruction. /// Used during eviction to detect eviction of a register that is already in use in the /// instruction being processed, implying that there aren't enough registers for allocation. @@ -207,13 +207,13 @@ impl<'a, F: Function> Env<'a, F> { inst_pre_edits: VecDeque::new(), inst_post_edits: VecDeque::new(), free_after_curr_inst: PartedByRegClass { items: [PRegSet::empty(), PRegSet::empty(), PRegSet::empty()] }, - vregs_allocd_in_curr_inst: HashSet::new(), - use_vregs_saved_and_restored_in_curr_inst: HashSet::new(), + vregs_allocd_in_curr_inst: BitSet::with_capacity(func.num_vregs()), + use_vregs_saved_and_restored_in_curr_inst: BitSet::with_capacity(func.num_vregs()), freed_def_pregs: PartedByRegClass { items: [PRegSet::empty(), PRegSet::empty(), PRegSet::empty()] }, - vregs_first_seen_in_curr_inst: HashSet::new(), + vregs_first_seen_in_curr_inst: BitSet::with_capacity(func.num_vregs()), inst_needs_scratch_reg: PartedByRegClass { items: [false, false, false] }, reused_input_to_reuse_op: vec![usize::MAX; max_operand_len as usize], - vregs_in_curr_inst: HashSet::new(), + vregs_in_curr_inst: BitSet::with_capacity(func.num_vregs()), pregs_allocd_in_curr_inst: PRegSet::empty(), dedicated_scratch_regs: PartedByRegClass { items: [ env.scratch_by_class[0], @@ -549,7 +549,7 @@ impl<'a, F: Function> Env<'a, F> { // defined value will overwrite it. if op.pos() == OperandPos::Early && op.kind() == OperandKind::Use - && self.vregs_first_seen_in_curr_inst.contains(&op.vreg()) + && self.vregs_first_seen_in_curr_inst.contains(op.vreg().vreg()) { if let Some(freed_def_preg) = remove_any_from_pregset(&mut self.freed_def_pregs[op.class()]) { trace!("Reusing the freed def preg: {}", freed_def_preg); @@ -684,12 +684,12 @@ impl<'a, F: Function> Env<'a, F> { trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); return Ok(()); } - self.vregs_in_curr_inst.insert(op.vreg()); + self.vregs_in_curr_inst.insert(op.vreg().vreg()); self.live_vregs.insert(op.vreg()); if !self.allocd_within_constraint(inst, op) { let prev_alloc = self.vreg_allocs[op.vreg().vreg()]; if prev_alloc.is_none() { - self.vregs_first_seen_in_curr_inst.insert(op.vreg()); + self.vregs_first_seen_in_curr_inst.insert(op.vreg().vreg()); } self.alloc_operand(inst, op, op_idx, fixed_spillslot)?; // Need to insert a move to propagate flow from the current @@ -812,11 +812,11 @@ impl<'a, F: Function> Env<'a, F> { // move from stack_v0 to p1 // 2. use v0 (fixed: p1) - if !self.use_vregs_saved_and_restored_in_curr_inst.contains(&op.vreg()) - && !self.vregs_allocd_in_curr_inst.contains(&op.vreg()) + if !self.use_vregs_saved_and_restored_in_curr_inst.contains(op.vreg().vreg()) + && !self.vregs_allocd_in_curr_inst.contains(op.vreg().vreg()) // Don't restore after the instruction if it doesn't live past // this instruction. - && !self.vregs_first_seen_in_curr_inst.contains(&op.vreg()) + && !self.vregs_first_seen_in_curr_inst.contains(op.vreg().vreg()) { if self.vreg_spillslots[op.vreg().vreg()].is_invalid() { self.vreg_spillslots[op.vreg().vreg()] = self.allocstack(&op.vreg()); @@ -838,7 +838,7 @@ impl<'a, F: Function> Env<'a, F> { InstPosition::After, true, ); - self.use_vregs_saved_and_restored_in_curr_inst.insert(op.vreg()); + self.use_vregs_saved_and_restored_in_curr_inst.insert(op.vreg().vreg()); } else { self.add_move_later( inst, @@ -881,7 +881,7 @@ impl<'a, F: Function> Env<'a, F> { } trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); } - self.vregs_allocd_in_curr_inst.insert(op.vreg()); + self.vregs_allocd_in_curr_inst.insert(op.vreg().vreg()); Ok(()) } @@ -934,10 +934,10 @@ impl<'a, F: Function> Env<'a, F> { let vreg = self.vreg_in_preg[clobbered_preg.index()]; if vreg != VReg::invalid() { - let vreg_isnt_mentioned_in_curr_inst = !self.vregs_in_curr_inst.contains(&vreg); - let vreg_lives_past_curr_inst = !self.vregs_first_seen_in_curr_inst.contains(&vreg); + let vreg_isnt_mentioned_in_curr_inst = !self.vregs_in_curr_inst.contains(vreg.vreg()); + let vreg_lives_past_curr_inst = !self.vregs_first_seen_in_curr_inst.contains(vreg.vreg()); if vreg_isnt_mentioned_in_curr_inst - || (!self.use_vregs_saved_and_restored_in_curr_inst.contains(&vreg) + || (!self.use_vregs_saved_and_restored_in_curr_inst.contains(vreg.vreg()) && vreg_lives_past_curr_inst) { trace!("Adding save and restore edits for {:?}", vreg); From af5ec3159799da63a9f06f2541c2a22f50d4a7d6 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Sat, 10 Aug 2024 12:32:51 +0100 Subject: [PATCH 45/95] vreg sets are now initialized with num of vregs capacity --- src/fastalloc/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index c3a7d1ba..524eb789 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -205,13 +205,13 @@ impl<'a, F: Function> Env<'a, F> { inst_pre_edits: VecDeque::new(), inst_post_edits: VecDeque::new(), free_after_curr_inst: PartedByRegClass { items: [PRegSet::empty(), PRegSet::empty(), PRegSet::empty()] }, - vregs_allocd_in_curr_inst: HashSet::new(), - use_vregs_saved_and_restored_in_curr_inst: HashSet::new(), + vregs_allocd_in_curr_inst: HashSet::with_capacity(func.num_vregs()), + use_vregs_saved_and_restored_in_curr_inst: HashSet::with_capacity(func.num_vregs()), freed_def_pregs: PartedByRegClass { items: [PRegSet::empty(), PRegSet::empty(), PRegSet::empty()] }, - vregs_first_seen_in_curr_inst: HashSet::new(), + vregs_first_seen_in_curr_inst: HashSet::with_capacity(func.num_vregs()), inst_needs_scratch_reg: PartedByRegClass { items: [false, false, false] }, reused_input_to_reuse_op: vec![usize::MAX; max_operand_len as usize], - vregs_in_curr_inst: HashSet::new(), + vregs_in_curr_inst: HashSet::with_capacity(func.num_vregs()), pregs_allocd_in_curr_inst: PRegSet::empty(), dedicated_scratch_regs: PartedByRegClass { items: [ env.scratch_by_class[0], From 2982c6d6c1ed681bd36957b6add7a664b7ad73a8 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Sat, 10 Aug 2024 21:50:56 +0100 Subject: [PATCH 46/95] removed live_vregs clone in reload_at_begin --- src/fastalloc/lru.rs | 2 +- src/fastalloc/mod.rs | 349 ++++++++++++++++++++++++------------------- 2 files changed, 197 insertions(+), 154 deletions(-) diff --git a/src/fastalloc/lru.rs b/src/fastalloc/lru.rs index 3ffb6ced..b2138b67 100644 --- a/src/fastalloc/lru.rs +++ b/src/fastalloc/lru.rs @@ -215,7 +215,7 @@ impl fmt::Debug for Lru { } } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct PartedByRegClass { pub items: [T; 3], } diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 9d43c857..2c5451fd 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -6,7 +6,7 @@ use crate::{Function, MachineEnv, ssa::validate_ssa, ProgPoint, Edit, Output}; use crate::{cfg::CFGInfo, RegAllocError, Allocation, ion::Stats}; use alloc::collections::VecDeque; use alloc::vec::Vec; -use hashbrown::{HashMap, HashSet}; +use hashbrown::HashSet; mod bitset; mod lru; @@ -68,6 +68,159 @@ fn remove_any_from_pregset(set: &mut PRegSet) -> Option { } } +#[derive(Debug)] +struct Stack<'a, F: Function> { + num_spillslots: u32, + func: &'a F +} + +impl<'a, F: Function> Stack<'a, F> { + fn new(func: &'a F) -> Self { + Self { + num_spillslots: 0, + func, + } + } + + /// Allocates a spill slot on the stack for `vreg` + fn allocstack(&mut self, vreg: &VReg) -> SpillSlot { + let size: u32 = self.func.spillslot_size(vreg.class()).try_into().unwrap(); + // Rest of this function was copied verbatim + // from `Env::allocate_spillslot` in src/ion/spill.rs. + let mut offset = self.num_spillslots; + // Align up to `size`. + debug_assert!(size.is_power_of_two()); + offset = (offset + size - 1) & !(size - 1); + let slot = if self.func.multi_spillslot_named_by_last_slot() { + offset + size - 1 + } else { + offset + }; + offset += size; + self.num_spillslots = offset; + SpillSlot::new(slot as usize) + } +} + +#[derive(Debug)] +struct Edits { + /// The edits to be inserted before the currently processed instruction. + inst_pre_edits: VecDeque<(ProgPoint, Edit, RegClass)>, + /// The edits to be inserted after the currently processed instruction. + inst_post_edits: VecDeque<(ProgPoint, Edit, RegClass)>, + /// The final output edits. + edits: VecDeque<(ProgPoint, Edit)>, + /// Used to determine if a scratch register is needed for an + /// instruction's moves during the `process_edit` calls. + inst_needs_scratch_reg: PartedByRegClass, + fixed_stack_slots: PRegSet, +} + +impl Edits { + fn new(fixed_stack_slots: PRegSet) -> Self { + Self { + inst_pre_edits: VecDeque::new(), + inst_post_edits: VecDeque::new(), + edits: VecDeque::new(), + fixed_stack_slots, + inst_needs_scratch_reg: PartedByRegClass { items: [false, false, false] } + } + } +} + +impl Edits { + fn is_stack(&self, alloc: Allocation) -> bool { + if alloc.is_stack() { + return true; + } + if alloc.is_reg() { + return self.fixed_stack_slots.contains(alloc.as_reg().unwrap()); + } + false + } + + fn process_edits(&mut self, scratch_regs: PartedByRegClass>) { + for i in (0..self.inst_post_edits.len()).rev() { + let (point, edit, class) = self.inst_post_edits[i].clone(); + self.process_edit(point, edit, scratch_regs[class]); + } + for i in (0..self.inst_pre_edits.len()).rev() { + let (point, edit, class) = self.inst_pre_edits[i].clone(); + self.process_edit(point, edit, scratch_regs[class]); + } + for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { + self.inst_needs_scratch_reg[class] = false; + } + self.inst_post_edits.clear(); + self.inst_pre_edits.clear(); + } + + fn process_edit(&mut self, point: ProgPoint, edit: Edit, scratch_reg: Option) { + trace!("Processing edit: {:?}", edit); + let Edit::Move { from, to } = edit; + if self.is_stack(from) && self.is_stack(to) { + let scratch_reg = scratch_reg.unwrap(); + trace!("Edit is stack-to-stack, generating two moves with a scratch register {:?}", scratch_reg); + let scratch_alloc = Allocation::reg(scratch_reg); + trace!("Processed Edit: {:?}", (point, Edit::Move { + from: scratch_alloc, + to, + })); + self.edits.push_front((point, Edit::Move { + from: scratch_alloc, + to, + })); + trace!("Processed Edit: {:?}", (point, Edit::Move { + from, + to: scratch_alloc, + })); + self.edits.push_front((point, Edit::Move { + from, + to: scratch_alloc, + })); + } else { + trace!("Edit is not stack-to-stack. Adding it directly:"); + trace!("Processed Edit: {:?}", (point, Edit::Move { + from, + to, + })); + self.edits.push_front((point, Edit::Move { + from, + to, + })); + } + } + + fn add_move_later(&mut self, inst: Inst, from: Allocation, to: Allocation, class: RegClass, pos: InstPosition, prepend: bool) { + trace!("Recording edit to add later: {:?}", (ProgPoint::new(inst, pos), Edit::Move { + from, + to + }, class)); + if from == to { + trace!("Deciding not to record the edit, since the source and dest are the same"); + return; + } + if self.is_stack(from) && self.is_stack(to) { + self.inst_needs_scratch_reg[class] = true; + } + let target_edits = match pos { + InstPosition::After => &mut self.inst_post_edits, + InstPosition::Before => &mut self.inst_pre_edits + }; + if prepend { + target_edits.push_front((ProgPoint::new(inst, pos), Edit::Move { + from, + to, + }, class)); + } else { + target_edits.push_back((ProgPoint::new(inst, pos), Edit::Move { + from, + to, + }, class)); + } + } +} + #[derive(Debug)] pub struct Env<'a, F: Function> { func: &'a F, @@ -88,10 +241,6 @@ pub struct Env<'a, F: Function> { vreg_in_preg: Vec, /// For parallel moves from branch args to block param spillslots. temp_spillslots: PartedByRegClass>, - /// The edits to be inserted before the currently processed instruction. - inst_pre_edits: VecDeque<(ProgPoint, Edit, RegClass)>, - /// The edits to be inserted after the currently processed instruction. - inst_post_edits: VecDeque<(ProgPoint, Edit, RegClass)>, /// All the allocatables registers that were used for one thing or the other /// but need to be freed after the current instruction has completed processing, /// not immediately, like allocatable registers used as scratch registers. @@ -139,9 +288,6 @@ pub struct Env<'a, F: Function> { /// Used to keep track of which vregs have been allocated in the current instruction. /// This is used to determine which edits to insert when allocating a use operand. vregs_allocd_in_curr_inst: BitSet, - /// Used to determine if a scratch register is needed for an - /// instruction's moves during the `process_edit` calls. - inst_needs_scratch_reg: PartedByRegClass, /// `reused_input_to_reuse_op[i]` is the operand index of the reuse operand /// that uses the `i`th operand in the current instruction as its input. /// This is used to @@ -154,13 +300,13 @@ pub struct Env<'a, F: Function> { pregs_allocd_in_curr_inst: PRegSet, allocatable_regs: PRegSet, dedicated_scratch_regs: PartedByRegClass>, + stack: Stack<'a, F>, - fixed_stack_slots: Vec, + fixed_stack_slots: PRegSet, // Output. allocs: Allocs, - edits: VecDeque<(ProgPoint, Edit)>, - safepoint_slots: Vec<(ProgPoint, Allocation)>, + edits: Edits, num_spillslots: u32, stats: Stats, } @@ -179,6 +325,7 @@ impl<'a, F: Function> Env<'a, F> { use alloc::vec; trace!("{:?}", env); let (allocs, max_operand_len) = Allocs::new(func); + let fixed_stack_slots = PRegSet::from_iter(env.fixed_stack_slots.iter().cloned()); Self { func, allocatable_regs: PRegSet::from(env), @@ -198,20 +345,18 @@ impl<'a, F: Function> Env<'a, F> { ®s[2] ), vreg_in_preg: vec![VReg::invalid(); PReg::NUM_INDEX], - fixed_stack_slots: env.fixed_stack_slots.clone(), + stack: Stack::new(func), + fixed_stack_slots, temp_spillslots: PartedByRegClass { items: [ Vec::with_capacity(func.num_vregs()), Vec::with_capacity(func.num_vregs()), Vec::with_capacity(func.num_vregs()), ] }, - inst_pre_edits: VecDeque::new(), - inst_post_edits: VecDeque::new(), free_after_curr_inst: PartedByRegClass { items: [PRegSet::empty(), PRegSet::empty(), PRegSet::empty()] }, vregs_allocd_in_curr_inst: BitSet::with_capacity(func.num_vregs()), use_vregs_saved_and_restored_in_curr_inst: BitSet::with_capacity(func.num_vregs()), freed_def_pregs: PartedByRegClass { items: [PRegSet::empty(), PRegSet::empty(), PRegSet::empty()] }, vregs_first_seen_in_curr_inst: BitSet::with_capacity(func.num_vregs()), - inst_needs_scratch_reg: PartedByRegClass { items: [false, false, false] }, reused_input_to_reuse_op: vec![usize::MAX; max_operand_len as usize], vregs_in_curr_inst: BitSet::with_capacity(func.num_vregs()), pregs_allocd_in_curr_inst: PRegSet::empty(), @@ -221,8 +366,7 @@ impl<'a, F: Function> Env<'a, F> { env.scratch_by_class[2], ] }, allocs, - edits: VecDeque::new(), - safepoint_slots: Vec::new(), + edits: Edits::new(fixed_stack_slots), num_spillslots: 0, stats: Stats::default(), } @@ -233,7 +377,7 @@ impl<'a, F: Function> Env<'a, F> { return true; } if alloc.is_reg() { - return self.fixed_stack_slots.contains(&alloc.as_reg().unwrap()); + return self.fixed_stack_slots.contains(alloc.as_reg().unwrap()); } false } @@ -261,11 +405,11 @@ impl<'a, F: Function> Env<'a, F> { /// already been processed. The only edits being processed will be for the /// ones to move a liveout vreg or block param from its spillslot to its /// expected allocation. - fn get_scratch_regs_for_reloading(&self) -> PartedByRegClass> { + fn get_scratch_regs_for_reloading(&self, inst_needs_scratch_reg: PartedByRegClass) -> PartedByRegClass> { trace!("Getting scratch registers for reload_at_begin"); let mut scratch_regs = PartedByRegClass{ items: [None, None, None] }; for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { - if self.inst_needs_scratch_reg[class] { + if inst_needs_scratch_reg[class] { trace!("{:?} class needs a scratch register", class); if self.dedicated_scratch_regs[class].is_some() { trace!("Using the dedicated scratch register for class {:?}", class); @@ -284,11 +428,11 @@ impl<'a, F: Function> Env<'a, F> { /// The scratch registers needed for processing edits generated while /// processing instructions. - fn get_scratch_regs(&mut self, inst: Inst) -> Result>, RegAllocError> { + fn get_scratch_regs(&mut self, inst: Inst, inst_needs_scratch_reg: PartedByRegClass) -> Result>, RegAllocError> { trace!("Getting scratch registers for instruction {:?}", inst); let mut scratch_regs = PartedByRegClass { items: [None, None, None] }; for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { - if self.inst_needs_scratch_reg[class] { + if inst_needs_scratch_reg[class] { trace!("{:?} class needs a scratch register", class); if let Some(reg) = self.dedicated_scratch_regs[class] { trace!("Using the dedicated scratch register for class {:?}", class); @@ -310,94 +454,13 @@ impl<'a, F: Function> Env<'a, F> { } Ok(scratch_regs) } - - fn process_edits(&mut self, scratch_regs: PartedByRegClass>) { - for i in (0..self.inst_post_edits.len()).rev() { - let (point, edit, class) = self.inst_post_edits[i].clone(); - self.process_edit(point, edit, scratch_regs[class]); - } - for i in (0..self.inst_pre_edits.len()).rev() { - let (point, edit, class) = self.inst_pre_edits[i].clone(); - self.process_edit(point, edit, scratch_regs[class]); - } - for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { - self.inst_needs_scratch_reg[class] = false; - } - self.inst_post_edits.clear(); - self.inst_pre_edits.clear(); - } - - fn process_edit(&mut self, point: ProgPoint, edit: Edit, scratch_reg: Option) { - trace!("Processing edit: {:?}", edit); - let Edit::Move { from, to } = edit; - if self.is_stack(from) && self.is_stack(to) { - let scratch_reg = scratch_reg.unwrap(); - trace!("Edit is stack-to-stack, generating two moves with a scratch register {:?}", scratch_reg); - let scratch_alloc = Allocation::reg(scratch_reg); - trace!("Processed Edit: {:?}", (point, Edit::Move { - from: scratch_alloc, - to, - })); - self.edits.push_front((point, Edit::Move { - from: scratch_alloc, - to, - })); - trace!("Processed Edit: {:?}", (point, Edit::Move { - from, - to: scratch_alloc, - })); - self.edits.push_front((point, Edit::Move { - from, - to: scratch_alloc, - })); - } else { - trace!("Edit is not stack-to-stack. Adding it directly:"); - trace!("Processed Edit: {:?}", (point, Edit::Move { - from, - to, - })); - self.edits.push_front((point, Edit::Move { - from, - to, - })); - } - } - - fn add_move_later(&mut self, inst: Inst, from: Allocation, to: Allocation, class: RegClass, pos: InstPosition, prepend: bool) { - trace!("Recording edit to add later: {:?}", (ProgPoint::new(inst, pos), Edit::Move { - from, - to - }, class)); - if from == to { - trace!("Deciding not to record the edit, since the source and dest are the same"); - return; - } - if self.is_stack(from) && self.is_stack(to) { - self.inst_needs_scratch_reg[class] = true; - } - let target_edits = match pos { - InstPosition::After => &mut self.inst_post_edits, - InstPosition::Before => &mut self.inst_pre_edits - }; - if prepend { - target_edits.push_front((ProgPoint::new(inst, pos), Edit::Move { - from, - to, - }, class)); - } else { - target_edits.push_back((ProgPoint::new(inst, pos), Edit::Move { - from, - to, - }, class)); - } - } - + fn move_after_inst(&mut self, inst: Inst, vreg: VReg, to: Allocation) { - self.add_move_later(inst, self.vreg_allocs[vreg.vreg()], to, vreg.class(), InstPosition::After, false); + self.edits.add_move_later(inst, self.vreg_allocs[vreg.vreg()], to, vreg.class(), InstPosition::After, false); } fn move_before_inst(&mut self, inst: Inst, vreg: VReg, to: Allocation) { - self.add_move_later(inst, self.vreg_allocs[vreg.vreg()], to, vreg.class(), InstPosition::Before, false); + self.edits.add_move_later(inst, self.vreg_allocs[vreg.vreg()], to, vreg.class(), InstPosition::Before, false); } fn allocd_within_constraint(&self, inst: Inst, op: Operand) -> bool { @@ -441,7 +504,7 @@ impl<'a, F: Function> Env<'a, F> { trace!("The removed vreg: {:?}", evicted_vreg); debug_assert_ne!(evicted_vreg, VReg::invalid()); if self.vreg_spillslots[evicted_vreg.vreg()].is_invalid() { - self.vreg_spillslots[evicted_vreg.vreg()] = self.allocstack(&evicted_vreg); + self.vreg_spillslots[evicted_vreg.vreg()] = self.stack.allocstack(&evicted_vreg); } let slot = self.vreg_spillslots[evicted_vreg.vreg()]; self.vreg_allocs[evicted_vreg.vreg()] = Allocation::stack(slot); @@ -514,25 +577,6 @@ impl<'a, F: Function> Env<'a, F> { trace!("{:?} curr alloc is now {:?}", vreg, self.vreg_allocs[vreg.vreg()]); trace!("Pregs currently allocated: {}", self.pregs_allocd_in_curr_inst); } - - /// Allocates a spill slot on the stack for `vreg` - fn allocstack(&mut self, vreg: &VReg) -> SpillSlot { - let size: u32 = self.func.spillslot_size(vreg.class()).try_into().unwrap(); - // Rest of this function was copied verbatim - // from `Env::allocate_spillslot` in src/ion/spill.rs. - let mut offset = self.num_spillslots; - // Align up to `size`. - debug_assert!(size.is_power_of_two()); - offset = (offset + size - 1) & !(size - 1); - let slot = if self.func.multi_spillslot_named_by_last_slot() { - offset + size - 1 - } else { - offset - }; - offset += size; - self.num_spillslots = offset; - SpillSlot::new(slot as usize) - } /// Allocates a physical register for the operand `op`. fn alloc_reg_for_operand(&mut self, inst: Inst, op: Operand) -> Result<(), RegAllocError> { @@ -656,7 +700,7 @@ impl<'a, F: Function> Env<'a, F> { spillslot } else { if self.vreg_spillslots[op.vreg().vreg()].is_invalid() { - self.vreg_spillslots[op.vreg().vreg()] = self.allocstack(&op.vreg()); + self.vreg_spillslots[op.vreg().vreg()] = self.stack.allocstack(&op.vreg()); } self.vreg_spillslots[op.vreg().vreg()] }; @@ -733,7 +777,7 @@ impl<'a, F: Function> Env<'a, F> { // the location v0 is expected to be in after inst 1. // This messes up the dataflow. // To avoid this, the moves are prepended. - self.add_move_later( + self.edits.add_move_later( inst, self.vreg_allocs[op.vreg().vreg()], prev_alloc, @@ -819,10 +863,10 @@ impl<'a, F: Function> Env<'a, F> { && !self.vregs_first_seen_in_curr_inst.contains(op.vreg().vreg()) { if self.vreg_spillslots[op.vreg().vreg()].is_invalid() { - self.vreg_spillslots[op.vreg().vreg()] = self.allocstack(&op.vreg()); + self.vreg_spillslots[op.vreg().vreg()] = self.stack.allocstack(&op.vreg()); } let op_spillslot = Allocation::stack(self.vreg_spillslots[op.vreg().vreg()]); - self.add_move_later( + self.edits.add_move_later( inst, self.vreg_allocs[op.vreg().vreg()], op_spillslot, @@ -830,7 +874,7 @@ impl<'a, F: Function> Env<'a, F> { InstPosition::Before, false, ); - self.add_move_later( + self.edits.add_move_later( inst, op_spillslot, prev_alloc, @@ -840,7 +884,7 @@ impl<'a, F: Function> Env<'a, F> { ); self.use_vregs_saved_and_restored_in_curr_inst.insert(op.vreg().vreg()); } else { - self.add_move_later( + self.edits.add_move_later( inst, self.vreg_allocs[op.vreg().vreg()], prev_alloc, @@ -888,7 +932,7 @@ impl<'a, F: Function> Env<'a, F> { fn alloc_slots_for_block_params(&mut self, succ: Block) { for vreg in self.func.block_params(succ) { if self.vreg_spillslots[vreg.vreg()].is_invalid() { - self.vreg_spillslots[vreg.vreg()] = self.allocstack(vreg); + self.vreg_spillslots[vreg.vreg()] = self.stack.allocstack(vreg); trace!("Block param {:?} is in {:?}", vreg, Allocation::stack(self.vreg_spillslots[vreg.vreg()])); } } @@ -945,11 +989,11 @@ impl<'a, F: Function> Env<'a, F> { let slot = if self.vreg_spillslots[vreg.vreg()].is_valid() { self.vreg_spillslots[vreg.vreg()] } else { - self.vreg_spillslots[vreg.vreg()] = self.allocstack(&vreg); + self.vreg_spillslots[vreg.vreg()] = self.stack.allocstack(&vreg); self.vreg_spillslots[vreg.vreg()] }; let slot_alloc = Allocation::stack(slot); - self.add_move_later( + self.edits.add_move_later( inst, preg_alloc, slot_alloc, @@ -957,7 +1001,7 @@ impl<'a, F: Function> Env<'a, F> { InstPosition::Before, true ); - self.add_move_later( + self.edits.add_move_later( inst, slot_alloc, preg_alloc, @@ -1041,11 +1085,11 @@ impl<'a, F: Function> Env<'a, F> { // be in another vreg's spillslot at the block beginning. for vreg in self.func.branch_blockparams(block, inst, succ_idx).iter() { if self.vreg_spillslots[vreg.vreg()].is_invalid() { - self.vreg_spillslots[vreg.vreg()] = self.allocstack(vreg); + self.vreg_spillslots[vreg.vreg()] = self.stack.allocstack(vreg); trace!("Block arg {:?} is going to be in {:?}", vreg, Allocation::stack(self.vreg_spillslots[vreg.vreg()])); } if self.temp_spillslots[vreg.class()].len() == next_temp_idx[vreg.class()] { - let newslot = self.allocstack(vreg); + let newslot = self.stack.allocstack(vreg); self.temp_spillslots[vreg.class()].push(newslot); } let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; @@ -1053,7 +1097,7 @@ impl<'a, F: Function> Env<'a, F> { next_temp_idx[vreg.class()] += 1; let vreg_spill = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); trace!("{:?} which is going to be in {:?} inserting move to {:?}", vreg, vreg_spill, temp); - self.add_move_later(inst, vreg_spill, temp, vreg.class(), InstPosition::Before, false); + self.edits.add_move_later(inst, vreg_spill, temp, vreg.class(), InstPosition::Before, false); } } @@ -1072,7 +1116,7 @@ impl<'a, F: Function> Env<'a, F> { next_temp_idx[vreg.class()] += 1; trace!(" --- Placing branch arg {:?} in {:?}", vreg, temp); trace!("{:?} which is now in {:?} inserting move to {:?}", vreg, temp, param_alloc); - self.add_move_later(inst, temp, param_alloc, vreg.class(), InstPosition::Before, false); + self.edits.add_move_later(inst, temp, param_alloc, vreg.class(), InstPosition::Before, false); // All branch arguments should be in their spillslots at the end of the function. self.vreg_allocs[vreg.vreg()] = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); @@ -1241,8 +1285,8 @@ impl<'a, F: Function> Env<'a, F> { trace!("freed_def_pregs: {}", self.freed_def_pregs); trace!("free after curr inst: {}", self.free_after_curr_inst); trace!(""); - let scratch_regs = self.get_scratch_regs(inst)?; - self.process_edits(scratch_regs); + let scratch_regs = self.get_scratch_regs(inst, self.edits.inst_needs_scratch_reg.clone())?; + self.edits.process_edits(scratch_regs); self.add_freed_regs_to_freelist(); self.use_vregs_saved_and_restored_in_curr_inst.clear(); self.vregs_first_seen_in_curr_inst.clear(); @@ -1279,7 +1323,7 @@ impl<'a, F: Function> Env<'a, F> { continue; } if self.vreg_spillslots[vreg.vreg()].is_invalid() { - self.vreg_spillslots[vreg.vreg()] = self.allocstack(&vreg); + self.vreg_spillslots[vreg.vreg()] = self.stack.allocstack(&vreg); } // The allocation where the vreg is expected to be before // the first instruction. @@ -1296,7 +1340,7 @@ impl<'a, F: Function> Env<'a, F> { continue; } trace!("Move reason: reload {:?} at begin - move from its spillslot", vreg); - self.add_move_later( + self.edits.add_move_later( self.func.block_insns(block).first(), slot, prev_alloc, @@ -1305,12 +1349,11 @@ impl<'a, F: Function> Env<'a, F> { true ); } - let live_vregs = self.live_vregs.clone(); - for vreg in live_vregs.iter().cloned() { + for vreg in self.live_vregs.iter().cloned() { trace!("Processing {:?}", vreg); trace!("{:?} is not a block param. It's a liveout vreg from some predecessor", vreg); if self.vreg_spillslots[vreg.vreg()].is_invalid() { - self.vreg_spillslots[vreg.vreg()] = self.allocstack(&vreg); + self.vreg_spillslots[vreg.vreg()] = self.stack.allocstack(&vreg); } // The allocation where the vreg is expected to be before // the first instruction. @@ -1338,7 +1381,7 @@ impl<'a, F: Function> Env<'a, F> { continue; } trace!("Move reason: reload {:?} at begin - move from its spillslot", vreg); - self.add_move_later( + self.edits.add_move_later( self.func.block_insns(block).first(), slot, prev_alloc, @@ -1347,7 +1390,8 @@ impl<'a, F: Function> Env<'a, F> { true ); } - self.process_edits(self.get_scratch_regs_for_reloading()); + let scratch_regs = self.get_scratch_regs_for_reloading(self.edits.inst_needs_scratch_reg.clone()); + self.edits.process_edits(scratch_regs); self.add_freed_regs_to_freelist(); if trace_enabled!() { self.log_post_reload_at_begin_state(block); @@ -1467,8 +1511,7 @@ fn log_output<'a, F: Function>(env: &Env<'a, F>) { } trace!("VReg spillslots: {:?}", v); trace!("Temp spillslots: {:?}", env.temp_spillslots); - trace!("Final edits: {:?}", env.edits); - trace!("safepoint_slots: {:?}\n", env.safepoint_slots); + trace!("Final edits: {:?}", env.edits.edits); } pub fn run( @@ -1495,13 +1538,13 @@ pub fn run( } Ok(Output { - edits: env.edits.make_contiguous().to_vec(), + edits: env.edits.edits.make_contiguous().to_vec(), allocs: env.allocs.allocs, inst_alloc_offsets: env.allocs.inst_alloc_offsets, num_spillslots: env.num_spillslots as usize, // TODO: Handle debug locations. debug_locations: Vec::new(), - safepoint_slots: env.safepoint_slots, + safepoint_slots: Vec::new(), stats: env.stats, }) } From 30967259228cd3c6dae1eb2b0566bec127bad138 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Sun, 11 Aug 2024 17:04:47 +0100 Subject: [PATCH 47/95] now using custom vregset for live_vregs --- src/fastalloc/bitset.rs | 3 - src/fastalloc/mod.rs | 11 ++- src/fastalloc/vregset.rs | 201 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 207 insertions(+), 8 deletions(-) create mode 100644 src/fastalloc/vregset.rs diff --git a/src/fastalloc/bitset.rs b/src/fastalloc/bitset.rs index 2e1dd70a..45ffe406 100644 --- a/src/fastalloc/bitset.rs +++ b/src/fastalloc/bitset.rs @@ -12,7 +12,6 @@ impl BitSet { pub fn with_capacity(n: usize) -> Self { let quot = n / BITS_PER_FRAME; - // The number of frames needed cannot be > the quotient; let no_of_frames = quot + 1; Self { bits: vec![0; no_of_frames], @@ -108,8 +107,6 @@ mod tests { set.insert(23); set.insert(45); let els = [10, 11, 23, 45, 199]; - use std::println; - println!("{:b}", set.bits[0]); for (actual_el, expected_el) in set.iter().zip(els.iter()) { assert_eq!(actual_el, *expected_el as usize); } diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 2c5451fd..92b3ed35 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -6,14 +6,15 @@ use crate::{Function, MachineEnv, ssa::validate_ssa, ProgPoint, Edit, Output}; use crate::{cfg::CFGInfo, RegAllocError, Allocation, ion::Stats}; use alloc::collections::VecDeque; use alloc::vec::Vec; -use hashbrown::HashSet; +mod vregset; mod bitset; mod lru; mod iter; use lru::*; use iter::*; use bitset::BitSet; +use vregset::VRegSet; #[derive(Debug)] struct Allocs { @@ -231,7 +232,7 @@ pub struct Env<'a, F: Function> { /// `vreg_spillslots[i]` is the spillslot for virtual register `i`. vreg_spillslots: Vec, /// The virtual registers that are currently live. - live_vregs: HashSet, + live_vregs: VRegSet, /// Allocatable free physical registers for classes Int, Float, and Vector, respectively. freepregs: PartedByRegClass, /// Least-recently-used caches for register classes Int, Float, and Vector, respectively. @@ -331,7 +332,7 @@ impl<'a, F: Function> Env<'a, F> { allocatable_regs: PRegSet::from(env), vreg_allocs: vec![Allocation::none(); func.num_vregs()], vreg_spillslots: vec![SpillSlot::invalid(); func.num_vregs()], - live_vregs: HashSet::with_capacity(func.num_vregs()), + live_vregs: VRegSet::with_capacity(func.num_vregs()), freepregs: PartedByRegClass { items: [ PRegSet::from_iter(regs[0].iter().cloned()), @@ -573,7 +574,7 @@ impl<'a, F: Function> Env<'a, F> { AllocationKind::None => unreachable!("Attempting to free an unallocated operand!") } self.vreg_allocs[vreg.vreg()] = Allocation::none(); - self.live_vregs.remove(&vreg); + self.live_vregs.remove(vreg.vreg()); trace!("{:?} curr alloc is now {:?}", vreg, self.vreg_allocs[vreg.vreg()]); trace!("Pregs currently allocated: {}", self.pregs_allocd_in_curr_inst); } @@ -1349,7 +1350,7 @@ impl<'a, F: Function> Env<'a, F> { true ); } - for vreg in self.live_vregs.iter().cloned() { + for vreg in self.live_vregs.iter() { trace!("Processing {:?}", vreg); trace!("{:?} is not a block param. It's a liveout vreg from some predecessor", vreg); if self.vreg_spillslots[vreg.vreg()].is_invalid() { diff --git a/src/fastalloc/vregset.rs b/src/fastalloc/vregset.rs new file mode 100644 index 00000000..0eaa2d79 --- /dev/null +++ b/src/fastalloc/vregset.rs @@ -0,0 +1,201 @@ +use alloc::vec::Vec; +use alloc::vec; +use core::fmt; +use std::convert::{TryFrom, TryInto}; +use crate::{RegClass, VReg}; + +struct RegClassNum; + +impl RegClassNum { + const INVALID: u8 = 0b00; + const MAX: u8 = 0b11; + // 0b11 + const INT: u8 = Self::MAX - RegClass::Int as u8; + // 0b10 + const FLOAT: u8 = Self::MAX - RegClass::Float as u8; + // 0b01 + const VECTOR: u8 = Self::MAX - RegClass::Vector as u8; +} + +impl TryFrom for RegClass { + type Error = (); + fn try_from(value: u64) -> Result { + if value == RegClassNum::INT as u64 { + Ok(RegClass::Int) + } else if value == RegClassNum::FLOAT as u64 { + Ok(RegClass::Float) + } else if value == RegClassNum::VECTOR as u64 { + Ok(RegClass::Vector) + } else if value == RegClassNum::INVALID as u64 { + Err(()) + } else { + unreachable!() + } + } +} + +impl From for Frame { + fn from(value: RegClass) -> Self { + (match value { + RegClass::Int => RegClassNum::INT, + RegClass::Float => RegClassNum::FLOAT, + RegClass::Vector => RegClassNum::VECTOR + }) as Frame + } +} + +type Frame = u64; +const BITS_PER_FRAME: usize = core::mem::size_of::() * 8; +const VREGS_PER_FRAME: usize = BITS_PER_FRAME / 2; +const EMPTY_FRAME: Frame = RegClassNum::INVALID as Frame; + +pub struct VRegSet { + bits: Vec +} + +impl VRegSet { + + pub fn with_capacity(n: usize) -> Self { + let no_of_bits_needed = 2 * n; + let quot = no_of_bits_needed / BITS_PER_FRAME; + let no_of_frames = quot + 1; + Self { + bits: vec![RegClassNum::INVALID as Frame; no_of_frames], + } + } + + fn compute_index(&self, el: usize) -> (usize, usize) { + (el / BITS_PER_FRAME, el % BITS_PER_FRAME) + } + + pub fn insert(&mut self, vreg: VReg) { + let (frame_no, idx) = self.compute_index(vreg.vreg() * 2); + let reg_class_num: Frame = vreg.class().into(); + self.bits[frame_no] |= reg_class_num << idx; + } + + pub fn remove(&mut self, vreg_num: usize) { + let (frame_no, idx) = self.compute_index(vreg_num * 2); + self.bits[frame_no] &= !(0b11 << idx); + } + + pub fn contains(&self, vreg_num: usize) -> bool { + let (frame_no, idx) = self.compute_index(vreg_num * 2); + self.bits[frame_no] & (0b11 << idx) != RegClassNum::INVALID as Frame + } + + pub fn clear(&mut self) { + for frame in self.bits.iter_mut() { + *frame = RegClassNum::INVALID as Frame; + } + } + + pub fn is_empty(&mut self) -> bool { + self.bits.iter() + .all(|frame| *frame == EMPTY_FRAME) + } + + pub fn iter(&self) -> BitSetIter { + BitSetIter { + next_frame_idx: 0, + curr_frame: EMPTY_FRAME, + bits: &self.bits + } + } +} + +pub struct BitSetIter<'a> { + next_frame_idx: usize, + curr_frame: Frame, + bits: &'a [Frame] +} + +impl<'a> Iterator for BitSetIter<'a> { + type Item = VReg; + + fn next(&mut self) -> Option { + loop { + while self.curr_frame == EMPTY_FRAME { + if self.next_frame_idx >= self.bits.len() { + return None; + } + self.curr_frame = self.bits[self.next_frame_idx]; + self.next_frame_idx += 1; + } + let mut skip = self.curr_frame.trailing_zeros(); + if skip % 2 != 0 { + skip -= 1; + } + let vreg_num = (self.next_frame_idx - 1) * VREGS_PER_FRAME + (skip / 2) as usize; + let class = (self.curr_frame >> skip) & 0b11; + self.curr_frame &= !(0b11 << skip); + return Some(VReg::new(vreg_num, class.try_into().unwrap())); + } + } +} + + +impl fmt::Debug for VRegSet { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{{ ")?; + for el in self.iter() { + write!(f, "{el} ")?; + } + write!(f, "}}") + } +} + +#[cfg(test)] +mod tests { + use super::*; + use RegClass::*; + const VREG: fn(usize, RegClass) -> VReg = VReg::new; + + #[test] + fn operations() { + let mut set = VRegSet::with_capacity(3090); + set.insert(VREG(10, Int)); + set.insert(VREG(2000, Int)); + set.insert(VREG(11, Vector)); + set.insert(VREG(199, Float)); + set.insert(VREG(23, Int)); + let els = [ + VREG(10, Int), + VREG(11, Vector), + VREG(23, Int), + VREG(199, Float), + VREG(2000, Int) + ]; + for (actual_el, expected_el) in set.iter().zip(els.iter()) { + assert_eq!(actual_el, *expected_el); + } + assert!(set.contains(10)); + assert!(!set.contains(12)); + assert!(!set.contains(197)); + assert!(set.contains(23)); + assert!(set.contains(11)); + set.remove(23); + assert!(!set.contains(23)); + set.insert(VREG(73, Vector)); + let els = [ + VREG(10, Int), + VREG(11, Vector), + VREG(73, Vector), + VREG(199, Float), + VREG(2000, Int), + ]; + for (actual_el, expected_el) in set.iter().zip(els.iter()) { + assert_eq!(actual_el, *expected_el); + } + } + + #[test] + fn empty() { + let mut set = VRegSet::with_capacity(2000); + assert!(set.is_empty()); + set.insert(VREG(100, Int)); + assert!(!set.is_empty()); + set.remove(100); + assert!(set.is_empty()); + } +} From 4afc7f6361d17ca7f1f0845aca3fd0c23d18a873 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Sun, 11 Aug 2024 19:51:15 +0100 Subject: [PATCH 48/95] removed check for livein vregs in entry block --- src/fastalloc/mod.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 92b3ed35..d23f746c 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1468,11 +1468,9 @@ impl<'a, F: Function> Env<'a, F> { for block in (0..self.func.num_blocks()).rev() { self.alloc_block(Block::new(block))?; } - if !self.live_vregs.is_empty() { - Err(RegAllocError::EntryLivein) - } else { - Ok(()) - } + // Ought to check if there are livein registers + // then throw an error, but will that be expensive? + Ok(()) } } From 47eed234427172935ca59fcd60f1cff88cbd671d Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Mon, 12 Aug 2024 19:18:37 +0100 Subject: [PATCH 49/95] num spillslots is now included in the output --- src/fastalloc/mod.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index d23f746c..a6265b0e 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -308,7 +308,6 @@ pub struct Env<'a, F: Function> { // Output. allocs: Allocs, edits: Edits, - num_spillslots: u32, stats: Stats, } @@ -368,7 +367,6 @@ impl<'a, F: Function> Env<'a, F> { ] }, allocs, edits: Edits::new(fixed_stack_slots), - num_spillslots: 0, stats: Stats::default(), } } @@ -1540,7 +1538,7 @@ pub fn run( edits: env.edits.edits.make_contiguous().to_vec(), allocs: env.allocs.allocs, inst_alloc_offsets: env.allocs.inst_alloc_offsets, - num_spillslots: env.num_spillslots as usize, + num_spillslots: env.stack.num_spillslots as usize, // TODO: Handle debug locations. debug_locations: Vec::new(), safepoint_slots: Vec::new(), From 58130602436c33555aeaeae96c11ab67031a6c83 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Mon, 12 Aug 2024 23:16:47 +0100 Subject: [PATCH 50/95] fixed clobber handling bug --- src/fastalloc/mod.rs | 84 ++++++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 38 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index a6265b0e..17699db2 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -249,14 +249,6 @@ pub struct Env<'a, F: Function> { /// This is used to keep track of them so that they can be marked as free for reallocation /// after the instruction has completed processing. free_after_curr_inst: PartedByRegClass, - /// The virtual registers of use operands that have been allocated in the current instruction - /// and for which edits had to be inserted to save and restore them because their constraint - /// doesn't allow the allocation they are expected to be in after the instruction. - /// - /// This needs to be kept track of to generate the correct moves in the case where a - /// single virtual register is used multiple times in a single instruction with - /// different constraints. - use_vregs_saved_and_restored_in_curr_inst: BitSet, /// Physical registers that were used for late def operands and now free to be /// reused for early operands in the current instruction. /// @@ -354,7 +346,6 @@ impl<'a, F: Function> Env<'a, F> { ] }, free_after_curr_inst: PartedByRegClass { items: [PRegSet::empty(), PRegSet::empty(), PRegSet::empty()] }, vregs_allocd_in_curr_inst: BitSet::with_capacity(func.num_vregs()), - use_vregs_saved_and_restored_in_curr_inst: BitSet::with_capacity(func.num_vregs()), freed_def_pregs: PartedByRegClass { items: [PRegSet::empty(), PRegSet::empty(), PRegSet::empty()] }, vregs_first_seen_in_curr_inst: BitSet::with_capacity(func.num_vregs()), reused_input_to_reuse_op: vec![usize::MAX; max_operand_len as usize], @@ -853,10 +844,9 @@ impl<'a, F: Function> Env<'a, F> { // move from stack0 to stack_v0 // 1. use v0 (fixed: stack0), use v0 (fixed: p0) // move from stack_v0 to p1 - // 2. use v0 (fixed: p1) + // 2. use v0 (fixed: p1) - if !self.use_vregs_saved_and_restored_in_curr_inst.contains(op.vreg().vreg()) - && !self.vregs_allocd_in_curr_inst.contains(op.vreg().vreg()) + if !self.vregs_allocd_in_curr_inst.contains(op.vreg().vreg()) // Don't restore after the instruction if it doesn't live past // this instruction. && !self.vregs_first_seen_in_curr_inst.contains(op.vreg().vreg()) @@ -881,7 +871,6 @@ impl<'a, F: Function> Env<'a, F> { InstPosition::After, true, ); - self.use_vregs_saved_and_restored_in_curr_inst.insert(op.vreg().vreg()); } else { self.edits.add_move_later( inst, @@ -916,9 +905,48 @@ impl<'a, F: Function> Env<'a, F> { } else { self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[op.vreg().vreg()]; if let Some(preg) = self.allocs[(inst.index(), op_idx)].as_reg() { - if self.allocatable_regs.contains(preg) - && !self.func.inst_clobbers(inst).contains(preg) - { + if self.func.inst_clobbers(inst).contains(preg) { + // It is possible for the first use of a vreg in an instruction + // to be some clobber p0 and the expected location of that vreg + // after the instruction is also p0: + // + // 1. use v0 (fixed: p0), use v0 (fixed: p1). clobbers: [p0] + // 2. use v0 (fixed: p0) + // + // When the second use of v0 is encountered in inst 1, a save and restore is + // not inserted because it's not the first use of v0 in the instruction. Instead, + // a single edit to move from p1 to p0 is inserted before the instruction: + // + // move from p1 to p0 + // 1. use v0 (fixed: p0), use v0 (fixed: p1). clobbers: [p0] + // 2. use v0 (fixed: p0) + // + // To avoid this scenario, a save and restore is added here. + if !self.vregs_allocd_in_curr_inst.contains(op.vreg().vreg()) + && !self.vregs_first_seen_in_curr_inst.contains(op.vreg().vreg()) + { + if self.vreg_spillslots[op.vreg().vreg()].is_invalid() { + self.vreg_spillslots[op.vreg().vreg()] = self.stack.allocstack(&op.vreg()); + } + let op_spillslot = Allocation::stack(self.vreg_spillslots[op.vreg().vreg()]); + self.edits.add_move_later( + inst, + self.vreg_allocs[op.vreg().vreg()], + op_spillslot, + op.class(), + InstPosition::Before, + false, + ); + self.edits.add_move_later( + inst, + op_spillslot, + self.vreg_allocs[op.vreg().vreg()], + op.class(), + InstPosition::After, + true, + ); + } + } else if self.allocatable_regs.contains(preg) { self.lrus[preg.class()].poke(preg); } } @@ -956,32 +984,13 @@ impl<'a, F: Function> Env<'a, F> { // // // It is also possible for a clobbered register to be allocated to an operand - // in an instruction. In this case, edits only need to be inserted if the - // following conditions are met: - // - // 1. All the operands assigned the clobber are all uses of the same vreg - // with the same constraint (no defs should be assigned the clobber). - // 2. No other operand in the instruction uses that vreg with a different constraint. - // 3. The used vreg lives past the instruction. - // 4. The expected allocation of the vreg after the instruction is the clobber. - // - // Because of the way operand allocation works, edits to save and restore a vreg - // will have already been inserted during operand allocation if any of the following - // conditions are met: - // 1. The expected allocation afterwards is not a clobber. - // 2. There are multiple operands using the vreg with different constraints. - // 3. A def operand has the same clobber allocation assigned to it and - // the vreg lives past the instruction. - // Therefore, the presence of the vreg in `use_vregs_saved_and_restored` - // implies that it violates one of the conditions for the edits to be inserted. + // in an instruction. No edits need to be inserted here because + // `process_operand_allocation` has already done all the insertions. let vreg = self.vreg_in_preg[clobbered_preg.index()]; if vreg != VReg::invalid() { let vreg_isnt_mentioned_in_curr_inst = !self.vregs_in_curr_inst.contains(vreg.vreg()); - let vreg_lives_past_curr_inst = !self.vregs_first_seen_in_curr_inst.contains(vreg.vreg()); if vreg_isnt_mentioned_in_curr_inst - || (!self.use_vregs_saved_and_restored_in_curr_inst.contains(vreg.vreg()) - && vreg_lives_past_curr_inst) { trace!("Adding save and restore edits for {:?}", vreg); let preg_alloc = Allocation::reg(clobbered_preg); @@ -1287,7 +1296,6 @@ impl<'a, F: Function> Env<'a, F> { let scratch_regs = self.get_scratch_regs(inst, self.edits.inst_needs_scratch_reg.clone())?; self.edits.process_edits(scratch_regs); self.add_freed_regs_to_freelist(); - self.use_vregs_saved_and_restored_in_curr_inst.clear(); self.vregs_first_seen_in_curr_inst.clear(); self.vregs_allocd_in_curr_inst.clear(); for entry in self.reused_input_to_reuse_op.iter_mut() { From feb106762c7db5ee877d59036cefad88e7e2ebf6 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Tue, 13 Aug 2024 11:56:36 +0100 Subject: [PATCH 51/95] now checks for and panics on safepoint instructions, removed std dep --- src/fastalloc/mod.rs | 3 +++ src/fastalloc/vregset.rs | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 17699db2..60e19f89 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1135,6 +1135,9 @@ impl<'a, F: Function> Env<'a, F> { fn alloc_inst(&mut self, block: Block, inst: Inst) -> Result<(), RegAllocError> { trace!("Allocating instruction {:?}", inst); + if self.func.requires_refs_on_stack(inst) { + panic!("Safepoint instructions aren't supported"); + } if self.func.is_branch(inst) { self.process_branch(block, inst); } diff --git a/src/fastalloc/vregset.rs b/src/fastalloc/vregset.rs index 0eaa2d79..8f261eb0 100644 --- a/src/fastalloc/vregset.rs +++ b/src/fastalloc/vregset.rs @@ -1,7 +1,7 @@ use alloc::vec::Vec; use alloc::vec; use core::fmt; -use std::convert::{TryFrom, TryInto}; +use core::convert::{TryFrom, TryInto}; use crate::{RegClass, VReg}; struct RegClassNum; From 2ad7f3d6ac4292b3d211ea8b578efde010a93d5b Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Tue, 13 Aug 2024 13:57:13 +0100 Subject: [PATCH 52/95] changed safepoint check --- src/fastalloc/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 60e19f89..12e347fd 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1135,7 +1135,7 @@ impl<'a, F: Function> Env<'a, F> { fn alloc_inst(&mut self, block: Block, inst: Inst) -> Result<(), RegAllocError> { trace!("Allocating instruction {:?}", inst); - if self.func.requires_refs_on_stack(inst) { + if self.func.requires_refs_on_stack(inst) && !self.func.reftype_vregs().is_empty() { panic!("Safepoint instructions aren't supported"); } if self.func.is_branch(inst) { From c2f9fabc7da39f3e2e41994428d589d53d89f980 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Tue, 13 Aug 2024 22:53:15 +0100 Subject: [PATCH 53/95] fixed out-of-order edits problem that arose when using a vreg as a branch arg and a use operand in the same branch instruction --- src/fastalloc/mod.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 12e347fd..34dbbfb0 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1105,6 +1105,10 @@ impl<'a, F: Function> Env<'a, F> { next_temp_idx[vreg.class()] += 1; let vreg_spill = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); trace!("{:?} which is going to be in {:?} inserting move to {:?}", vreg, vreg_spill, temp); + // Assuming that vregs defined in the current branch instruction can't be + // used as branch args for successors, else inserting the moves before, instead + // of after will be wrong. But the edits are inserted before because the fuzzer + // doesn't recognize moves inserted after branch instructions. self.edits.add_move_later(inst, vreg_spill, temp, vreg.class(), InstPosition::Before, false); } } @@ -1120,15 +1124,24 @@ impl<'a, F: Function> Env<'a, F> { let param_alloc = Allocation::stack(self.vreg_spillslots[succ_param_vreg.vreg()]); let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; let temp = Allocation::stack(temp_slot); - self.vreg_allocs[vreg.vreg()] = temp; next_temp_idx[vreg.class()] += 1; trace!(" --- Placing branch arg {:?} in {:?}", vreg, temp); trace!("{:?} which is now in {:?} inserting move to {:?}", vreg, temp, param_alloc); self.edits.add_move_later(inst, temp, param_alloc, vreg.class(), InstPosition::Before, false); // All branch arguments should be in their spillslots at the end of the function. + // + // The invariants posed by `vregs_first_seen_in_curr_inst` and + // `vregs_allocd_in_curr_inst` must be maintained in order to + // insert edits in the correct order when vregs used as branch args + // are also used as operands. + if self.vreg_allocs[vreg.vreg()].is_none() { + self.vregs_first_seen_in_curr_inst.insert(vreg.vreg()); + } self.vreg_allocs[vreg.vreg()] = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); self.live_vregs.insert(*vreg); + self.vregs_allocd_in_curr_inst.insert(vreg.vreg()); + self.vregs_in_curr_inst.insert(vreg.vreg()); } } } From 014ee3d81d3aa4e79a2cc97c3d52b49d41d6c0ca Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 15 Aug 2024 11:33:19 +0100 Subject: [PATCH 54/95] formatting --- regalloc2-tool/src/main.rs | 5 +- src/fastalloc/bitset.rs | 14 +- src/fastalloc/iter.rs | 159 ++++----- src/fastalloc/lru.rs | 111 +++++-- src/fastalloc/mod.rs | 639 +++++++++++++++++++++++++------------ src/fastalloc/vregset.rs | 29 +- src/lib.rs | 6 +- 7 files changed, 633 insertions(+), 330 deletions(-) diff --git a/regalloc2-tool/src/main.rs b/regalloc2-tool/src/main.rs index c224834b..e396ddab 100644 --- a/regalloc2-tool/src/main.rs +++ b/regalloc2-tool/src/main.rs @@ -2,7 +2,8 @@ use std::path::PathBuf; use clap::Parser; use regalloc2::{ - checker::Checker, serialize::SerializableFunction, Algorithm, Block, Edit, Function, InstOrEdit, Output, RegallocOptions + checker::Checker, serialize::SerializableFunction, Algorithm, Block, Edit, Function, + InstOrEdit, Output, RegallocOptions, }; #[derive(Parser)] @@ -49,7 +50,7 @@ fn main() { let options = RegallocOptions { verbose_log: true, validate_ssa: true, - algorithm: args.algorithm.into() + algorithm: args.algorithm.into(), }; let output = match regalloc2::run(&function, function.machine_env(), &options) { Ok(output) => output, diff --git a/src/fastalloc/bitset.rs b/src/fastalloc/bitset.rs index 45ffe406..99d5b34e 100644 --- a/src/fastalloc/bitset.rs +++ b/src/fastalloc/bitset.rs @@ -1,15 +1,14 @@ -use alloc::vec::Vec; use alloc::vec; +use alloc::vec::Vec; type Frame = u64; const BITS_PER_FRAME: usize = core::mem::size_of::() * 8; pub struct BitSet { - bits: Vec + bits: Vec, } impl BitSet { - pub fn with_capacity(n: usize) -> Self { let quot = n / BITS_PER_FRAME; let no_of_frames = quot + 1; @@ -44,15 +43,14 @@ impl BitSet { } pub fn is_empty(&mut self) -> bool { - self.bits.iter() - .all(|frame| *frame == 0) + self.bits.iter().all(|frame| *frame == 0) } pub fn iter(&self) -> BitSetIter { BitSetIter { next_frame_idx: 0, curr_frame: 0, - bits: &self.bits + bits: &self.bits, } } } @@ -60,7 +58,7 @@ impl BitSet { pub struct BitSetIter<'a> { next_frame_idx: usize, curr_frame: Frame, - bits: &'a [Frame] + bits: &'a [Frame], } impl<'a> Iterator for BitSetIter<'a> { @@ -77,7 +75,7 @@ impl<'a> Iterator for BitSetIter<'a> { } let skip = self.curr_frame.trailing_zeros(); self.curr_frame &= !(1 << skip); - return Some((self.next_frame_idx - 1) * BITS_PER_FRAME + skip as usize) + return Some((self.next_frame_idx - 1) * BITS_PER_FRAME + skip as usize); } } } diff --git a/src/fastalloc/iter.rs b/src/fastalloc/iter.rs index 91919d5d..b296c6af 100644 --- a/src/fastalloc/iter.rs +++ b/src/fastalloc/iter.rs @@ -1,4 +1,4 @@ -use crate::{Operand, OperandKind, OperandPos, OperandConstraint}; +use crate::{Operand, OperandConstraint, OperandKind, OperandPos}; pub struct Operands<'a>(pub &'a [Operand]); @@ -7,34 +7,40 @@ impl<'a> Operands<'a> { Self(operands) } - pub fn matches bool + 'a>(&self, predicate: F) -> impl Iterator + 'a { - self.0.iter() + pub fn matches bool + 'a>( + &self, + predicate: F, + ) -> impl Iterator + 'a { + self.0 + .iter() .cloned() .enumerate() .filter(move |(_, op)| predicate(*op)) } pub fn non_fixed_non_reuse_late(&self) -> impl Iterator + 'a { - self.matches(|op| - !matches!(op.constraint(), OperandConstraint::FixedReg(_) | OperandConstraint::Reuse(_)) - && op.pos() == OperandPos::Late - ) + self.matches(|op| { + !matches!( + op.constraint(), + OperandConstraint::FixedReg(_) | OperandConstraint::Reuse(_) + ) && op.pos() == OperandPos::Late + }) } pub fn non_reuse_late_def(&self) -> impl Iterator + 'a { - self.matches(|op| + self.matches(|op| { !matches!(op.constraint(), OperandConstraint::Reuse(_)) - && op.pos() == OperandPos::Late - && op.kind() == OperandKind::Def - ) + && op.pos() == OperandPos::Late + && op.kind() == OperandKind::Def + }) } pub fn non_fixed_non_reuse_early(&self) -> impl Iterator + 'a { - self.matches(|op| + self.matches(|op| { !matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && !matches!(op.constraint(), OperandConstraint::Reuse(_)) - && op.pos() == OperandPos::Early - ) + && !matches!(op.constraint(), OperandConstraint::Reuse(_)) + && op.pos() == OperandPos::Early + }) } pub fn reuse(&self) -> impl Iterator + 'a { @@ -42,11 +48,11 @@ impl<'a> Operands<'a> { } pub fn non_reuse_early_def(&self) -> impl Iterator + 'a { - self.matches(|op| + self.matches(|op| { !matches!(op.constraint(), OperandConstraint::Reuse(_)) - && op.pos() == OperandPos::Early - && op.kind() == OperandKind::Def - ) + && op.pos() == OperandPos::Early + && op.kind() == OperandKind::Def + }) } pub fn fixed(&self) -> impl Iterator + 'a { @@ -54,109 +60,106 @@ impl<'a> Operands<'a> { } pub fn fixed_early(&self) -> impl Iterator + 'a { - self.matches(|op| + self.matches(|op| { matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && op.pos() == OperandPos::Early - ) + && op.pos() == OperandPos::Early + }) } pub fn fixed_late(&self) -> impl Iterator + 'a { - self.matches(|op| + self.matches(|op| { matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && op.pos() == OperandPos::Late - ) + && op.pos() == OperandPos::Late + }) } pub fn non_reuse_def(&self) -> impl Iterator + 'a { - self.matches(|op| - !matches!(op.constraint(), OperandConstraint::Reuse(_)) - && op.kind() == OperandKind::Def - ) + self.matches(|op| { + !matches!(op.constraint(), OperandConstraint::Reuse(_)) && op.kind() == OperandKind::Def + }) } pub fn non_fixed_def(&self) -> impl Iterator + 'a { - self.matches(|op| + self.matches(|op| { !matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && op.kind() == OperandKind::Def - ) + && op.kind() == OperandKind::Def + }) } pub fn non_fixed_non_reuse_late_use(&self) -> impl Iterator + 'a { - self.matches(|op| - !matches!(op.constraint(), - OperandConstraint::FixedReg(_) - | OperandConstraint::Reuse(_) - ) - && op.pos() == OperandPos::Late - && op.kind() == OperandKind::Use - ) + self.matches(|op| { + !matches!( + op.constraint(), + OperandConstraint::FixedReg(_) | OperandConstraint::Reuse(_) + ) && op.pos() == OperandPos::Late + && op.kind() == OperandKind::Use + }) } pub fn non_fixed_non_reuse_late_def(&self) -> impl Iterator + 'a { - self.matches(|op| + self.matches(|op| { !matches!( op.constraint(), OperandConstraint::FixedReg(_) | OperandConstraint::Reuse(_) - ) - && op.pos() == OperandPos::Late - && op.kind() == OperandKind::Def - ) + ) && op.pos() == OperandPos::Late + && op.kind() == OperandKind::Def + }) } pub fn non_fixed_late_use(&self) -> impl Iterator + 'a { - self.matches(|op| !matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && op.pos() == OperandPos::Late - && op.kind() == OperandKind::Use - ) + self.matches(|op| { + !matches!(op.constraint(), OperandConstraint::FixedReg(_)) + && op.pos() == OperandPos::Late + && op.kind() == OperandKind::Use + }) } pub fn non_fixed_late_def(&self) -> impl Iterator + 'a { - self.matches(|op| !matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && op.pos() == OperandPos::Late - && op.kind() == OperandKind::Def - ) + self.matches(|op| { + !matches!(op.constraint(), OperandConstraint::FixedReg(_)) + && op.pos() == OperandPos::Late + && op.kind() == OperandKind::Def + }) } pub fn non_fixed_early_use(&self) -> impl Iterator + 'a { - self.matches(|op| !matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && op.pos() == OperandPos::Early - && op.kind() == OperandKind::Use - ) + self.matches(|op| { + !matches!(op.constraint(), OperandConstraint::FixedReg(_)) + && op.pos() == OperandPos::Early + && op.kind() == OperandKind::Use + }) } pub fn non_fixed_early_def(&self) -> impl Iterator + 'a { - self.matches(|op| !matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && op.pos() == OperandPos::Early - && op.kind() == OperandKind::Def - ) + self.matches(|op| { + !matches!(op.constraint(), OperandConstraint::FixedReg(_)) + && op.pos() == OperandPos::Early + && op.kind() == OperandKind::Def + }) } pub fn late_def(&self) -> impl Iterator + 'a { - self.matches(|op| - op.pos() == OperandPos::Late - && op.kind() == OperandKind::Def - ) + self.matches(|op| op.pos() == OperandPos::Late && op.kind() == OperandKind::Def) } pub fn early_def(&self) -> impl Iterator + 'a { - self.matches(|op| - op.pos() == OperandPos::Early - && op.kind() == OperandKind::Def - ) + self.matches(|op| op.pos() == OperandPos::Early && op.kind() == OperandKind::Def) } - + pub fn fixed_early_use(&self) -> impl Iterator + 'a { - self.matches(|op| matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && op.pos() == OperandPos::Early - && op.kind() == OperandKind::Use - ) + self.matches(|op| { + matches!(op.constraint(), OperandConstraint::FixedReg(_)) + && op.pos() == OperandPos::Early + && op.kind() == OperandKind::Use + }) } pub fn fixed_late_def(&self) -> impl Iterator + 'a { - self.matches(|op| matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && op.pos() == OperandPos::Late - && op.kind() == OperandKind::Def - ) + self.matches(|op| { + matches!(op.constraint(), OperandConstraint::FixedReg(_)) + && op.pos() == OperandPos::Late + && op.kind() == OperandKind::Def + }) } } diff --git a/src/fastalloc/lru.rs b/src/fastalloc/lru.rs index b2138b67..cc8ae818 100644 --- a/src/fastalloc/lru.rs +++ b/src/fastalloc/lru.rs @@ -1,8 +1,11 @@ -use alloc::vec::Vec; +use crate::{PReg, RegClass}; use alloc::vec; +use alloc::vec::Vec; +use core::{ + fmt, + ops::{Index, IndexMut}, +}; use hashbrown::HashSet; -use core::{fmt, ops::{IndexMut, Index}}; -use crate::{PReg, RegClass}; /// A least-recently-used cache organized as a linked list based on a vector. pub struct Lru { @@ -27,19 +30,29 @@ pub struct LruNode { impl Lru { pub fn new(regclass: RegClass, regs: &[PReg]) -> Self { - let mut data = vec![LruNode { prev: u8::MAX, next: u8::MAX }; PReg::MAX + 1]; + let mut data = vec![ + LruNode { + prev: u8::MAX, + next: u8::MAX + }; + PReg::MAX + 1 + ]; let no_of_regs = regs.len(); for i in 0..no_of_regs { let (reg, prev_reg, next_reg) = ( regs[i], regs[i.checked_sub(1).unwrap_or(no_of_regs - 1)], - regs[if i >= no_of_regs - 1 { 0 } else { i + 1 }] + regs[if i >= no_of_regs - 1 { 0 } else { i + 1 }], ); data[reg.hw_enc()].prev = prev_reg.hw_enc() as u8; data[reg.hw_enc()].next = next_reg.hw_enc() as u8; } Self { - head: if regs.is_empty() { u8::MAX } else { regs[0].hw_enc() as u8 }, + head: if regs.is_empty() { + u8::MAX + } else { + regs[0].hw_enc() as u8 + }, data, regclass, } @@ -47,7 +60,12 @@ impl Lru { /// Marks the physical register `preg` as the most recently used pub fn poke(&mut self, preg: PReg) { - trace!("Before poking: {:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); + trace!( + "Before poking: {:?} LRU. head: {:?}, Actual data: {:?}", + self.regclass, + self.head, + self.data + ); trace!("About to poke {:?} in {:?} LRU", preg, self.regclass); let prev_newest = self.head; let hw_enc = preg.hw_enc() as u8; @@ -67,7 +85,12 @@ impl Lru { /// Gets the least recently used physical register. pub fn pop(&mut self) -> PReg { - trace!("Before popping: {:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); + trace!( + "Before popping: {:?} LRU. head: {:?}, Actual data: {:?}", + self.regclass, + self.head, + self.data + ); trace!("Popping {:?} LRU", self.regclass); if self.is_empty() { panic!("LRU is empty"); @@ -82,11 +105,16 @@ impl Lru { /// Splices out a node from the list. pub fn remove(&mut self, hw_enc: usize) { - trace!("Before removing: {:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); + trace!( + "Before removing: {:?} LRU. head: {:?}, Actual data: {:?}", + self.regclass, + self.head, + self.data + ); trace!("Removing p{hw_enc} from {:?} LRU", self.regclass); let (iprev, inext) = ( - self.data[hw_enc].prev as usize, - self.data[hw_enc].next as usize + self.data[hw_enc].prev as usize, + self.data[hw_enc].next as usize, ); self.data[iprev].next = self.data[hw_enc].next; self.data[inext].prev = self.data[hw_enc].prev; @@ -108,7 +136,12 @@ impl Lru { /// Sets the physical register with hw_enc `hw_enc` to the last in the list. pub fn append(&mut self, hw_enc: usize) { - trace!("Before appending: {:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); + trace!( + "Before appending: {:?} LRU. head: {:?}, Actual data: {:?}", + self.regclass, + self.head, + self.data + ); trace!("Appending p{hw_enc} to the {:?} LRU", self.regclass); if self.head != u8::MAX { let head = self.head as usize; @@ -135,15 +168,17 @@ impl Lru { /// Insert node `i` before node `j` in the list. fn insert_before(&mut self, i: u8, j: u8) { - trace!("Before inserting: {:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); + trace!( + "Before inserting: {:?} LRU. head: {:?}, Actual data: {:?}", + self.regclass, + self.head, + self.data + ); trace!("Inserting p{i} before {j} in {:?} LRU", self.regclass); let prev = self.data[j as usize].prev; self.data[prev as usize].next = i; self.data[j as usize].prev = i; - self.data[i as usize] = LruNode { - next: j, - prev, - }; + self.data[i as usize] = LruNode { next: j, prev }; trace!("Done inserting p{i} before {j} in {:?} LRU", self.regclass); if cfg!(debug_assertions) { self.validate_lru(); @@ -156,14 +191,22 @@ impl Lru { // Using this to debug. fn validate_lru(&self) { - trace!("{:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, self.head, self.data); + trace!( + "{:?} LRU. head: {:?}, Actual data: {:?}", + self.regclass, + self.head, + self.data + ); if self.head != u8::MAX { let mut node = self.data[self.head as usize].next; let mut seen = HashSet::new(); while node != self.head { if seen.contains(&node) { - panic!("Cycle detected in {:?} LRU.\n - head: {:?}, actual data: {:?}", self.regclass, self.head, self.data); + panic!( + "Cycle detected in {:?} LRU.\n + head: {:?}, actual data: {:?}", + self.regclass, self.head, self.data + ); } seen.insert(node); node = self.data[node as usize].next; @@ -174,13 +217,26 @@ impl Lru { continue; } if self.data[i].prev == u8::MAX || self.data[i].next == u8::MAX { - panic!("Invalid LRU. p{} next or previous is an invalid value, but not both", i); + panic!( + "Invalid LRU. p{} next or previous is an invalid value, but not both", + i + ); } if self.data[self.data[i].prev as usize].next != i as u8 { - panic!("Invalid LRU. p{i} prev is p{:?}, but p{:?} next is {:?}", self.data[i].prev, self.data[i].prev, self.data[self.data[i].prev as usize].next); + panic!( + "Invalid LRU. p{i} prev is p{:?}, but p{:?} next is {:?}", + self.data[i].prev, + self.data[i].prev, + self.data[self.data[i].prev as usize].next + ); } if self.data[self.data[i].next as usize].prev != i as u8 { - panic!("Invalid LRU. p{i} next is p{:?}, but p{:?} prev is p{:?}", self.data[i].next, self.data[i].next, self.data[self.data[i].next as usize].prev); + panic!( + "Invalid LRU. p{i} next is p{:?}, but p{:?} prev is p{:?}", + self.data[i].next, + self.data[i].next, + self.data[self.data[i].next as usize].prev + ); } } } @@ -198,8 +254,11 @@ impl fmt::Debug for Lru { let mut seen = HashSet::new(); while node != self.head { if seen.contains(&node) { - panic!("The {:?} LRU is messed up: - head: {:?}, {:?} -> p{node}, actual data: {:?}", self.regclass, self.head, data_str, self.data); + panic!( + "The {:?} LRU is messed up: + head: {:?}, {:?} -> p{node}, actual data: {:?}", + self.regclass, self.head, data_str, self.data + ); } seen.insert(node); data_str += &format!(" -> p{}", node); @@ -244,7 +303,7 @@ impl Lrus { Lru::new(RegClass::Int, int_regs), Lru::new(RegClass::Float, float_regs), Lru::new(RegClass::Vector, vec_regs), - ] + ], } } } diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 34dbbfb0..4cde3836 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1,19 +1,22 @@ +use crate::{cfg::CFGInfo, ion::Stats, Allocation, RegAllocError}; +use crate::{ssa::validate_ssa, Edit, Function, MachineEnv, Output, ProgPoint}; +use crate::{ + AllocationKind, Block, Inst, InstPosition, Operand, OperandConstraint, OperandKind, OperandPos, + PReg, PRegSet, RegClass, SpillSlot, VReg, +}; +use alloc::collections::VecDeque; +use alloc::vec::Vec; use core::convert::TryInto; use core::iter::FromIterator; use core::ops::{Index, IndexMut}; -use crate::{AllocationKind, Block, Inst, InstPosition, Operand, OperandConstraint, OperandKind, OperandPos, PReg, PRegSet, RegClass, SpillSlot, VReg}; -use crate::{Function, MachineEnv, ssa::validate_ssa, ProgPoint, Edit, Output}; -use crate::{cfg::CFGInfo, RegAllocError, Allocation, ion::Stats}; -use alloc::collections::VecDeque; -use alloc::vec::Vec; -mod vregset; mod bitset; -mod lru; mod iter; -use lru::*; -use iter::*; +mod lru; +mod vregset; use bitset::BitSet; +use iter::*; +use lru::*; use vregset::VRegSet; #[derive(Debug)] @@ -38,10 +41,13 @@ impl Allocs { allocs.push(Allocation::none()); } } - (Self { - allocs, - inst_alloc_offsets, - }, max_operand_len) + ( + Self { + allocs, + inst_alloc_offsets, + }, + max_operand_len, + ) } } @@ -72,7 +78,7 @@ fn remove_any_from_pregset(set: &mut PRegSet) -> Option { #[derive(Debug)] struct Stack<'a, F: Function> { num_spillslots: u32, - func: &'a F + func: &'a F, } impl<'a, F: Function> Stack<'a, F> { @@ -86,7 +92,7 @@ impl<'a, F: Function> Stack<'a, F> { /// Allocates a spill slot on the stack for `vreg` fn allocstack(&mut self, vreg: &VReg) -> SpillSlot { let size: u32 = self.func.spillslot_size(vreg.class()).try_into().unwrap(); - // Rest of this function was copied verbatim + // Rest of this function was copied verbatim // from `Env::allocate_spillslot` in src/ion/spill.rs. let mut offset = self.num_spillslots; // Align up to `size`. @@ -124,7 +130,9 @@ impl Edits { inst_post_edits: VecDeque::new(), edits: VecDeque::new(), fixed_stack_slots, - inst_needs_scratch_reg: PartedByRegClass { items: [false, false, false] } + inst_needs_scratch_reg: PartedByRegClass { + items: [false, false, false], + }, } } } @@ -161,42 +169,65 @@ impl Edits { let Edit::Move { from, to } = edit; if self.is_stack(from) && self.is_stack(to) { let scratch_reg = scratch_reg.unwrap(); - trace!("Edit is stack-to-stack, generating two moves with a scratch register {:?}", scratch_reg); + trace!( + "Edit is stack-to-stack, generating two moves with a scratch register {}", + scratch_reg + ); let scratch_alloc = Allocation::reg(scratch_reg); - trace!("Processed Edit: {:?}", (point, Edit::Move { - from: scratch_alloc, - to, - })); - self.edits.push_front((point, Edit::Move { - from: scratch_alloc, - to, - })); - trace!("Processed Edit: {:?}", (point, Edit::Move { - from, - to: scratch_alloc, - })); - self.edits.push_front((point, Edit::Move { - from, - to: scratch_alloc, - })); + trace!( + "Processed Edit: {:?}", + ( + point, + Edit::Move { + from: scratch_alloc, + to, + } + ) + ); + self.edits.push_front(( + point, + Edit::Move { + from: scratch_alloc, + to, + }, + )); + trace!( + "Processed Edit: {:?}", + ( + point, + Edit::Move { + from, + to: scratch_alloc, + } + ) + ); + self.edits.push_front(( + point, + Edit::Move { + from, + to: scratch_alloc, + }, + )); } else { trace!("Edit is not stack-to-stack. Adding it directly:"); - trace!("Processed Edit: {:?}", (point, Edit::Move { - from, - to, - })); - self.edits.push_front((point, Edit::Move { - from, - to, - })); + trace!("Processed Edit: {:?}", (point, Edit::Move { from, to })); + self.edits.push_front((point, Edit::Move { from, to })); } } - fn add_move_later(&mut self, inst: Inst, from: Allocation, to: Allocation, class: RegClass, pos: InstPosition, prepend: bool) { - trace!("Recording edit to add later: {:?}", (ProgPoint::new(inst, pos), Edit::Move { - from, - to - }, class)); + fn add_move_later( + &mut self, + inst: Inst, + from: Allocation, + to: Allocation, + class: RegClass, + pos: InstPosition, + prepend: bool, + ) { + trace!( + "Recording edit to add later: {:?}", + (ProgPoint::new(inst, pos), Edit::Move { from, to }, class) + ); if from == to { trace!("Deciding not to record the edit, since the source and dest are the same"); return; @@ -206,18 +237,12 @@ impl Edits { } let target_edits = match pos { InstPosition::After => &mut self.inst_post_edits, - InstPosition::Before => &mut self.inst_pre_edits + InstPosition::Before => &mut self.inst_pre_edits, }; if prepend { - target_edits.push_front((ProgPoint::new(inst, pos), Edit::Move { - from, - to, - }, class)); + target_edits.push_front((ProgPoint::new(inst, pos), Edit::Move { from, to }, class)); } else { - target_edits.push_back((ProgPoint::new(inst, pos), Edit::Move { - from, - to, - }, class)); + target_edits.push_back((ProgPoint::new(inst, pos), Edit::Move { from, to }, class)); } } } @@ -245,7 +270,7 @@ pub struct Env<'a, F: Function> { /// All the allocatables registers that were used for one thing or the other /// but need to be freed after the current instruction has completed processing, /// not immediately, like allocatable registers used as scratch registers. - /// + /// /// This is used to keep track of them so that they can be marked as free for reallocation /// after the instruction has completed processing. free_after_curr_inst: PartedByRegClass, @@ -255,7 +280,7 @@ pub struct Env<'a, F: Function> { /// After late defs have been allocated, rather than returning their registers to /// the free register list, it is added here to avoid the registers being used as /// scratch registers. - /// + /// /// For example, consider the following: /// def v0, use v1 /// If the processing of v1 requires a stack-to-stack move, then a scratch register is @@ -263,7 +288,7 @@ pub struct Env<'a, F: Function> { /// def v0, use v1 /// move from stack0 to p0 /// move from p0 to stack1 - /// + /// /// Since scratch registers may be drawn from the free register list and v0 will be allocated and /// deallocated before v1, then it's possible for the scratch register p0 to be v0's allocation, /// which is incorrect because p0 will end up holding whatever is in stack0, not v0. @@ -283,7 +308,6 @@ pub struct Env<'a, F: Function> { vregs_allocd_in_curr_inst: BitSet, /// `reused_input_to_reuse_op[i]` is the operand index of the reuse operand /// that uses the `i`th operand in the current instruction as its input. - /// This is used to reused_input_to_reuse_op: Vec, /// The vregs defined or used in the current instruction. vregs_in_curr_inst: BitSet, @@ -305,15 +329,26 @@ pub struct Env<'a, F: Function> { impl<'a, F: Function> Env<'a, F> { fn new(func: &'a F, env: &'a MachineEnv) -> Self { - trace!("multispillslots_named_by_last_slot: {:?}", func.multi_spillslot_named_by_last_slot()); let mut regs = [ env.preferred_regs_by_class[RegClass::Int as usize].clone(), env.preferred_regs_by_class[RegClass::Float as usize].clone(), env.preferred_regs_by_class[RegClass::Vector as usize].clone(), ]; - regs[0].extend(env.non_preferred_regs_by_class[RegClass::Int as usize].iter().cloned()); - regs[1].extend(env.non_preferred_regs_by_class[RegClass::Float as usize].iter().cloned()); - regs[2].extend(env.non_preferred_regs_by_class[RegClass::Vector as usize].iter().cloned()); + regs[0].extend( + env.non_preferred_regs_by_class[RegClass::Int as usize] + .iter() + .cloned(), + ); + regs[1].extend( + env.non_preferred_regs_by_class[RegClass::Float as usize] + .iter() + .cloned(), + ); + regs[2].extend( + env.non_preferred_regs_by_class[RegClass::Vector as usize] + .iter() + .cloned(), + ); use alloc::vec; trace!("{:?}", env); let (allocs, max_operand_len) = Allocs::new(func); @@ -329,33 +364,37 @@ impl<'a, F: Function> Env<'a, F> { PRegSet::from_iter(regs[0].iter().cloned()), PRegSet::from_iter(regs[1].iter().cloned()), PRegSet::from_iter(regs[2].iter().cloned()), - ] + ], }, - lrus: Lrus::new( - ®s[0], - ®s[1], - ®s[2] - ), + lrus: Lrus::new(®s[0], ®s[1], ®s[2]), vreg_in_preg: vec![VReg::invalid(); PReg::NUM_INDEX], stack: Stack::new(func), fixed_stack_slots, - temp_spillslots: PartedByRegClass { items: [ - Vec::with_capacity(func.num_vregs()), - Vec::with_capacity(func.num_vregs()), - Vec::with_capacity(func.num_vregs()), - ] }, - free_after_curr_inst: PartedByRegClass { items: [PRegSet::empty(), PRegSet::empty(), PRegSet::empty()] }, + temp_spillslots: PartedByRegClass { + items: [ + Vec::with_capacity(func.num_vregs()), + Vec::with_capacity(func.num_vregs()), + Vec::with_capacity(func.num_vregs()), + ], + }, + free_after_curr_inst: PartedByRegClass { + items: [PRegSet::empty(), PRegSet::empty(), PRegSet::empty()], + }, vregs_allocd_in_curr_inst: BitSet::with_capacity(func.num_vregs()), - freed_def_pregs: PartedByRegClass { items: [PRegSet::empty(), PRegSet::empty(), PRegSet::empty()] }, + freed_def_pregs: PartedByRegClass { + items: [PRegSet::empty(), PRegSet::empty(), PRegSet::empty()], + }, vregs_first_seen_in_curr_inst: BitSet::with_capacity(func.num_vregs()), reused_input_to_reuse_op: vec![usize::MAX; max_operand_len as usize], vregs_in_curr_inst: BitSet::with_capacity(func.num_vregs()), pregs_allocd_in_curr_inst: PRegSet::empty(), - dedicated_scratch_regs: PartedByRegClass { items: [ - env.scratch_by_class[0], - env.scratch_by_class[1], - env.scratch_by_class[2], - ] }, + dedicated_scratch_regs: PartedByRegClass { + items: [ + env.scratch_by_class[0], + env.scratch_by_class[1], + env.scratch_by_class[2], + ], + }, allocs, edits: Edits::new(fixed_stack_slots), stats: Stats::default(), @@ -379,7 +418,7 @@ impl<'a, F: Function> Env<'a, F> { } self.freepregs[class].union_from(self.freed_def_pregs[class]); self.freed_def_pregs[class] = PRegSet::empty(); - + for preg in self.free_after_curr_inst[class] { self.lrus[preg.class()].append(preg.hw_enc()); } @@ -395,9 +434,14 @@ impl<'a, F: Function> Env<'a, F> { /// already been processed. The only edits being processed will be for the /// ones to move a liveout vreg or block param from its spillslot to its /// expected allocation. - fn get_scratch_regs_for_reloading(&self, inst_needs_scratch_reg: PartedByRegClass) -> PartedByRegClass> { + fn get_scratch_regs_for_reloading( + &self, + inst_needs_scratch_reg: PartedByRegClass, + ) -> PartedByRegClass> { trace!("Getting scratch registers for reload_at_begin"); - let mut scratch_regs = PartedByRegClass{ items: [None, None, None] }; + let mut scratch_regs = PartedByRegClass { + items: [None, None, None], + }; for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { if inst_needs_scratch_reg[class] { trace!("{:?} class needs a scratch register", class); @@ -406,10 +450,12 @@ impl<'a, F: Function> Env<'a, F> { scratch_regs[class] = self.dedicated_scratch_regs[class]; } else { trace!("No dedicated scratch register for class {:?}. Using the last free register", class); - scratch_regs[class] = Some(self.freepregs[class] - .into_iter() - .next() - .expect("Allocation impossible?")); + scratch_regs[class] = Some( + self.freepregs[class] + .into_iter() + .next() + .expect("Allocation impossible?"), + ); } } } @@ -418,9 +464,15 @@ impl<'a, F: Function> Env<'a, F> { /// The scratch registers needed for processing edits generated while /// processing instructions. - fn get_scratch_regs(&mut self, inst: Inst, inst_needs_scratch_reg: PartedByRegClass) -> Result>, RegAllocError> { + fn get_scratch_regs( + &mut self, + inst: Inst, + inst_needs_scratch_reg: PartedByRegClass, + ) -> Result>, RegAllocError> { trace!("Getting scratch registers for instruction {:?}", inst); - let mut scratch_regs = PartedByRegClass { items: [None, None, None] }; + let mut scratch_regs = PartedByRegClass { + items: [None, None, None], + }; for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { if inst_needs_scratch_reg[class] { trace!("{:?} class needs a scratch register", class); @@ -436,6 +488,7 @@ impl<'a, F: Function> Env<'a, F> { trace!("No free {:?} registers. Evicting a register", class); self.evict_any_reg(inst, class)? }; + trace!("The scratch register: {reg}"); scratch_regs[class] = Some(reg); } } else { @@ -444,13 +497,27 @@ impl<'a, F: Function> Env<'a, F> { } Ok(scratch_regs) } - + fn move_after_inst(&mut self, inst: Inst, vreg: VReg, to: Allocation) { - self.edits.add_move_later(inst, self.vreg_allocs[vreg.vreg()], to, vreg.class(), InstPosition::After, false); + self.edits.add_move_later( + inst, + self.vreg_allocs[vreg.vreg()], + to, + vreg.class(), + InstPosition::After, + false, + ); } fn move_before_inst(&mut self, inst: Inst, vreg: VReg, to: Allocation) { - self.edits.add_move_later(inst, self.vreg_allocs[vreg.vreg()], to, vreg.class(), InstPosition::Before, false); + self.edits.add_move_later( + inst, + self.vreg_allocs[vreg.vreg()], + to, + vreg.class(), + InstPosition::Before, + false, + ); } fn allocd_within_constraint(&self, inst: Inst, op: Operand) -> bool { @@ -471,17 +538,17 @@ impl<'a, F: Function> Env<'a, F> { // assigning the clobber to that def, and if it lives past // the current instruction, then restoration will be impossible. alloc.is_some() && !alloc_is_clobber - }, + } OperandConstraint::Reg => { - let alloc_is_reg = alloc.is_reg() && alloc.as_reg().unwrap().class() == op.class() + let alloc_is_reg = alloc.is_reg() + && alloc.as_reg().unwrap().class() == op.class() && !self.is_stack(alloc); alloc_is_reg && !alloc_is_clobber - }, + } OperandConstraint::Stack => self.is_stack(alloc), // It is possible for an operand to have a fixed register constraint to // a clobber. - OperandConstraint::FixedReg(preg) => alloc.is_reg() && - alloc.as_reg().unwrap() == preg, + OperandConstraint::FixedReg(preg) => alloc.is_reg() && alloc.as_reg().unwrap() == preg, OperandConstraint::Reuse(_) => { unreachable!() } @@ -489,9 +556,9 @@ impl<'a, F: Function> Env<'a, F> { } fn evict_vreg_in_preg(&mut self, inst: Inst, preg: PReg) { - trace!("Removing the vreg in preg {:?} for eviction", preg); + trace!("Removing the vreg in preg {} for eviction", preg); let evicted_vreg = self.vreg_in_preg[preg.index()]; - trace!("The removed vreg: {:?}", evicted_vreg); + trace!("The removed vreg: {}", evicted_vreg); debug_assert_ne!(evicted_vreg, VReg::invalid()); if self.vreg_spillslots[evicted_vreg.vreg()].is_invalid() { self.vreg_spillslots[evicted_vreg.vreg()] = self.stack.allocstack(&evicted_vreg); @@ -503,9 +570,12 @@ impl<'a, F: Function> Env<'a, F> { } fn evict_any_reg(&mut self, inst: Inst, regclass: RegClass) -> Result { - trace!("Evicting a register in evict_any_reg for class {:?}", regclass); + trace!( + "Evicting a register in evict_any_reg for class {:?}", + regclass + ); let preg = self.lrus[regclass].pop(); - trace!("Selected register from lru: {:?}", preg); + trace!("Selected register from lru: {}", preg); // Check if the preg has already been allocated for this // instruction. If it has, then there are too many stuff to // allocate, making allocation impossible. @@ -522,7 +592,7 @@ impl<'a, F: Function> Env<'a, F> { } fn freealloc(&mut self, vreg: VReg, clobbers: PRegSet, is_fixed_def: bool) { - trace!("Freeing vreg {:?}", vreg); + trace!("Freeing vreg {}", vreg); let alloc = self.vreg_allocs[vreg.vreg()]; match alloc.kind() { AllocationKind::Reg => { @@ -560,12 +630,19 @@ impl<'a, F: Function> Env<'a, F> { self.pregs_allocd_in_curr_inst.remove(preg); } AllocationKind::Stack => (), - AllocationKind::None => unreachable!("Attempting to free an unallocated operand!") + AllocationKind::None => unreachable!("Attempting to free an unallocated operand!"), } self.vreg_allocs[vreg.vreg()] = Allocation::none(); self.live_vregs.remove(vreg.vreg()); - trace!("{:?} curr alloc is now {:?}", vreg, self.vreg_allocs[vreg.vreg()]); - trace!("Pregs currently allocated: {}", self.pregs_allocd_in_curr_inst); + trace!( + "{} curr alloc is now {}", + vreg, + self.vreg_allocs[vreg.vreg()] + ); + trace!( + "Pregs currently allocated: {}", + self.pregs_allocd_in_curr_inst + ); } /// Allocates a physical register for the operand `op`. @@ -573,19 +650,32 @@ impl<'a, F: Function> Env<'a, F> { trace!("freepregs int: {}", self.freepregs[RegClass::Int]); trace!("freepregs vector: {}", self.freepregs[RegClass::Vector]); trace!("freepregs float: {}", self.freepregs[RegClass::Float]); - trace!("freed_def_pregs int: {}", self.freed_def_pregs[RegClass::Int]); - trace!("freed_def_pregs vector: {}", self.freed_def_pregs[RegClass::Vector]); - trace!("freed_def_pregs float: {}", self.freed_def_pregs[RegClass::Float]); + trace!( + "freed_def_pregs int: {}", + self.freed_def_pregs[RegClass::Int] + ); + trace!( + "freed_def_pregs vector: {}", + self.freed_def_pregs[RegClass::Vector] + ); + trace!( + "freed_def_pregs float: {}", + self.freed_def_pregs[RegClass::Float] + ); trace!(""); // The only way a freed def preg can be reused for an operand is if // the operand uses or defines a vreg in the early phase and the vreg doesn't // live past the instruction. If the vreg lives past the instruction, then the // defined value will overwrite it. - if op.pos() == OperandPos::Early + if op.pos() == OperandPos::Early && op.kind() == OperandKind::Use - && self.vregs_first_seen_in_curr_inst.contains(op.vreg().vreg()) + && self + .vregs_first_seen_in_curr_inst + .contains(op.vreg().vreg()) { - if let Some(freed_def_preg) = remove_any_from_pregset(&mut self.freed_def_pregs[op.class()]) { + if let Some(freed_def_preg) = + remove_any_from_pregset(&mut self.freed_def_pregs[op.class()]) + { trace!("Reusing the freed def preg: {}", freed_def_preg); self.lrus[freed_def_preg.class()].append_and_poke(freed_def_preg); self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(freed_def_preg); @@ -600,7 +690,7 @@ impl<'a, F: Function> Env<'a, F> { trace!("Getting a register from freepregs"); remove_any_from_pregset(&mut self.freepregs[op.class()]).unwrap() }; - trace!("The allocated register for vreg {:?}: {:?}", preg, op.vreg()); + trace!("The allocated register for vreg {}: {}", op.vreg(), preg); self.lrus[op.class()].poke(preg); self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(preg); self.vreg_in_preg[preg.index()] = op.vreg(); @@ -608,15 +698,20 @@ impl<'a, F: Function> Env<'a, F> { Ok(()) } - fn alloc_fixed_reg_for_operand(&mut self, inst: Inst, op: Operand, preg: PReg) -> Result<(), RegAllocError> { - trace!("The fixed preg: {:?} for operand {:?}", preg, op); + fn alloc_fixed_reg_for_operand( + &mut self, + inst: Inst, + op: Operand, + preg: PReg, + ) -> Result<(), RegAllocError> { + trace!("The fixed preg: {} for operand {}", preg, op); // It is an error for a fixed register clobber to be used for a defined vreg // that outlives the instruction, because it will be impossible to restore it. // But checking for that will be expensive? - let is_allocatable = !self.is_stack(Allocation::reg(preg)) - && !self.func.inst_clobbers(inst).contains(preg); + let is_allocatable = + !self.is_stack(Allocation::reg(preg)) && !self.func.inst_clobbers(inst).contains(preg); if self.vreg_in_preg[preg.index()] != VReg::invalid() { // Something is already in that register. Evict it. // Check if the evicted register is a register in the @@ -633,10 +728,15 @@ impl<'a, F: Function> Env<'a, F> { // def v0 (fixed: p0), use v1 (fixed: p0) // In the above, p0 has already been used for v0, and since it's a // def operand, the register has been freed and kept in `freed_def_pregs`, - // so it can be added back to the free pregs list after the instruction + // so it can be added back to the free pregs list after the instruction // has finished processing. // To avoid the preg being added back to the free list, it must be removed // from `freed_def_pregs` here. + trace!( + "{} is now using preg {}. Removing it from the freed def pregs list", + op.vreg(), + preg + ); self.freed_def_pregs[preg.class()].remove(preg); self.lrus[preg.class()].append(preg.hw_enc()); } else if self.free_after_curr_inst[preg.class()].contains(preg) { @@ -654,7 +754,11 @@ impl<'a, F: Function> Env<'a, F> { // But then, it's reallocated for the second operand. // To prevent reallocating a register while a live one is still in it, // this register has to be removed from the list. - trace!("{:?} is now using preg {:?}. Removing it from the free after instruction list", op.vreg(), preg); + trace!( + "{} is now using preg {}. Removing it from the free after instruction list", + op.vreg(), + preg + ); self.free_after_curr_inst[preg.class()].remove(preg); if is_allocatable { self.lrus[preg.class()].append(preg.hw_enc()); @@ -671,13 +775,19 @@ impl<'a, F: Function> Env<'a, F> { self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(preg); self.vreg_in_preg[preg.index()] = op.vreg(); self.pregs_allocd_in_curr_inst.add(preg); - trace!("vreg {:?} is now in preg {:?}", op.vreg(), preg); + trace!("vreg {} is now in preg {}", op.vreg(), preg); Ok(()) } /// Allocates for the operand `op` with index `op_idx` into the /// vector of instruction `inst`'s operands. - fn alloc_operand(&mut self, inst: Inst, op: Operand, op_idx: usize, fixed_spillslot: Option) -> Result<(), RegAllocError> { + fn alloc_operand( + &mut self, + inst: Inst, + op: Operand, + op_idx: usize, + fixed_spillslot: Option, + ) -> Result<(), RegAllocError> { match op.constraint() { OperandConstraint::Any => { self.alloc_reg_for_operand(inst, op)?; @@ -712,10 +822,21 @@ impl<'a, F: Function> Env<'a, F> { /// Since only fixed register constraints are allowed, `fixed_spillslot` is used when a /// fixed stack allocation is needed, like when transferring a stack allocation from a /// reuse operand allocation to the reused input. - fn process_operand_allocation(&mut self, inst: Inst, op: Operand, op_idx: usize, fixed_spillslot: Option) -> Result<(), RegAllocError> { + fn process_operand_allocation( + &mut self, + inst: Inst, + op: Operand, + op_idx: usize, + fixed_spillslot: Option, + ) -> Result<(), RegAllocError> { if let Some(preg) = op.as_fixed_nonallocatable() { self.allocs[(inst.index(), op_idx)] = Allocation::reg(preg); - trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); + trace!( + "Allocation for instruction {:?} and operand {}: {}", + inst, + op, + self.allocs[(inst.index(), op_idx)] + ); return Ok(()); } self.vregs_in_curr_inst.insert(op.vreg().vreg()); @@ -739,7 +860,7 @@ impl<'a, F: Function> Env<'a, F> { // has to be done after. // // The move also has to be prepended. Consider the scenario: - // + // // 1. def v0 (any reg), use v1 (fixed: p0) // 2. use v0 (fixed: p0) // @@ -751,7 +872,7 @@ impl<'a, F: Function> Env<'a, F> { // 1. def v0 (any reg), use v1 (fixed: p0) // move from stack_v0 to p0 // 2. use v0 (fixed: p0) - // + // // When it's time to process v0, it has to be moved again: this time // because it needs to be in a register, not on the stack. // Edits are inserted to flow v0 from its spillslot to the newly allocated @@ -773,7 +894,7 @@ impl<'a, F: Function> Env<'a, F> { prev_alloc, op.class(), InstPosition::After, - true + true, ); } else { // This was handled by a simple move from the operand to its previous @@ -820,7 +941,7 @@ impl<'a, F: Function> Env<'a, F> { // move from stack_v0 to p1 // 2. use v0 (fixed: p1) // - // Assuming that after instruction 1 is processed, v0's + // Assuming that after instruction 1 is processed, v0's // location is p0, then stack0 will always overwrite it, // and v0 is not in stack0 (it's in p0, now). // To avoid this scenario, these moves are only inserted @@ -844,17 +965,19 @@ impl<'a, F: Function> Env<'a, F> { // move from stack0 to stack_v0 // 1. use v0 (fixed: stack0), use v0 (fixed: p0) // move from stack_v0 to p1 - // 2. use v0 (fixed: p1) - + // 2. use v0 (fixed: p1) + if !self.vregs_allocd_in_curr_inst.contains(op.vreg().vreg()) // Don't restore after the instruction if it doesn't live past // this instruction. && !self.vregs_first_seen_in_curr_inst.contains(op.vreg().vreg()) { if self.vreg_spillslots[op.vreg().vreg()].is_invalid() { - self.vreg_spillslots[op.vreg().vreg()] = self.stack.allocstack(&op.vreg()); + self.vreg_spillslots[op.vreg().vreg()] = + self.stack.allocstack(&op.vreg()); } - let op_spillslot = Allocation::stack(self.vreg_spillslots[op.vreg().vreg()]); + let op_spillslot = + Allocation::stack(self.vreg_spillslots[op.vreg().vreg()]); self.edits.add_move_later( inst, self.vreg_allocs[op.vreg().vreg()], @@ -888,7 +1011,11 @@ impl<'a, F: Function> Env<'a, F> { self.vreg_in_preg[preg.index()] = VReg::invalid(); // If it's a fixed stack slot, then it's not allocatable. if !self.is_stack(prev_alloc) { - trace!("{:?} is no longer using preg {:?}, so freeing it after instruction", op.vreg(), preg); + trace!( + "{} is no longer using preg {}, so freeing it after instruction", + op.vreg(), + preg + ); // A clobber will have already been removed from the LRU // and will be freed after the instruction has completed processing // if no vreg is still present in it. @@ -896,12 +1023,20 @@ impl<'a, F: Function> Env<'a, F> { self.free_after_curr_inst[preg.class()].add(preg); self.lrus[preg.class()].remove(preg.hw_enc()); } else { - trace!("{:?} is a clobber, so not bothering with the state update", preg); + trace!( + "{} is a clobber, so not bothering with the state update", + preg + ); } } } } - trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); + trace!( + "Allocation for instruction {:?} and operand {}: {}", + inst, + op, + self.allocs[(inst.index(), op_idx)] + ); } else { self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[op.vreg().vreg()]; if let Some(preg) = self.allocs[(inst.index(), op_idx)].as_reg() { @@ -923,12 +1058,16 @@ impl<'a, F: Function> Env<'a, F> { // // To avoid this scenario, a save and restore is added here. if !self.vregs_allocd_in_curr_inst.contains(op.vreg().vreg()) - && !self.vregs_first_seen_in_curr_inst.contains(op.vreg().vreg()) + && !self + .vregs_first_seen_in_curr_inst + .contains(op.vreg().vreg()) { if self.vreg_spillslots[op.vreg().vreg()].is_invalid() { - self.vreg_spillslots[op.vreg().vreg()] = self.stack.allocstack(&op.vreg()); + self.vreg_spillslots[op.vreg().vreg()] = + self.stack.allocstack(&op.vreg()); } - let op_spillslot = Allocation::stack(self.vreg_spillslots[op.vreg().vreg()]); + let op_spillslot = + Allocation::stack(self.vreg_spillslots[op.vreg().vreg()]); self.edits.add_move_later( inst, self.vreg_allocs[op.vreg().vreg()], @@ -950,7 +1089,12 @@ impl<'a, F: Function> Env<'a, F> { self.lrus[preg.class()].poke(preg); } } - trace!("Allocation for instruction {:?} and operand {:?}: {:?}", inst, op, self.allocs[(inst.index(), op_idx)]); + trace!( + "Allocation for instruction {:?} and operand {}: {}", + inst, + op, + self.allocs[(inst.index(), op_idx)] + ); } self.vregs_allocd_in_curr_inst.insert(op.vreg().vreg()); Ok(()) @@ -960,7 +1104,11 @@ impl<'a, F: Function> Env<'a, F> { for vreg in self.func.block_params(succ) { if self.vreg_spillslots[vreg.vreg()].is_invalid() { self.vreg_spillslots[vreg.vreg()] = self.stack.allocstack(vreg); - trace!("Block param {:?} is in {:?}", vreg, Allocation::stack(self.vreg_spillslots[vreg.vreg()])); + trace!( + "Block param {} is in {}", + vreg, + Allocation::stack(self.vreg_spillslots[vreg.vreg()]) + ); } } } @@ -984,15 +1132,15 @@ impl<'a, F: Function> Env<'a, F> { // // // It is also possible for a clobbered register to be allocated to an operand - // in an instruction. No edits need to be inserted here because + // in an instruction. No edits need to be inserted here because // `process_operand_allocation` has already done all the insertions. let vreg = self.vreg_in_preg[clobbered_preg.index()]; if vreg != VReg::invalid() { - let vreg_isnt_mentioned_in_curr_inst = !self.vregs_in_curr_inst.contains(vreg.vreg()); - if vreg_isnt_mentioned_in_curr_inst - { - trace!("Adding save and restore edits for {:?}", vreg); + let vreg_isnt_mentioned_in_curr_inst = + !self.vregs_in_curr_inst.contains(vreg.vreg()); + if vreg_isnt_mentioned_in_curr_inst { + trace!("Adding save and restore edits for {}", vreg); let preg_alloc = Allocation::reg(clobbered_preg); let slot = if self.vreg_spillslots[vreg.vreg()].is_valid() { self.vreg_spillslots[vreg.vreg()] @@ -1007,7 +1155,7 @@ impl<'a, F: Function> Env<'a, F> { slot_alloc, vreg.class(), InstPosition::Before, - true + true, ); self.edits.add_move_later( inst, @@ -1026,16 +1174,15 @@ impl<'a, F: Function> Env<'a, F> { /// If instruction `inst` is a branch in `block`, /// this function places branch arguments in the spillslots /// expected by the destination blocks. - /// + /// /// The process used to do this is as follows: - /// + /// /// 1. Move all branch arguments into corresponding temporary spillslots. /// 2. Move values from the temporary spillslots to corresponding block param spillslots. - /// + /// /// These temporaries are used because the moves have to be parallel in the case where /// a block parameter of the successor block is a branch argument. fn process_branch(&mut self, block: Block, inst: Inst) { - // Used to know which temporary spillslot should be used next. let mut next_temp_idx = PartedByRegClass { items: [0, 0, 0] }; @@ -1057,8 +1204,8 @@ impl<'a, F: Function> Env<'a, F> { for (succ_idx, _) in self.func.block_succs(block).iter().enumerate() { // Move from branch args spillslots to temporaries. // - // Consider a scenario: - // + // Consider a scenario: + // // block entry: // goto Y(...) // @@ -1079,7 +1226,7 @@ impl<'a, F: Function> Env<'a, F> { // in vp's spillslot), then during reload, it will still be allocated to vp's spillslot. // This will mean that at the beginning of the block, both va and vp will be expected to be // in vp's spillslot. An edit will be inserted to move from va's spillslot to vp's. - // And depending on the constraints of vp's use, an edit may or may not be inserted to move + // And depending on the constraints of vp's use, an edit may or may not be inserted to move // from vp's spillslot to somewhere else. // Either way, the correctness of the dataflow will depend on the order of edits. // If vp is required in be on the stack, then no edit will be inserted for it (it's already on @@ -1089,12 +1236,16 @@ impl<'a, F: Function> Env<'a, F> { // is clearly wrong. // // To avoid this scenario, branch args are placed into their own spillslots here - // so that if they aren't moved at all throughout the block, they will not be expected to + // so that if they aren't moved at all throughout the block, they will not be expected to // be in another vreg's spillslot at the block beginning. for vreg in self.func.branch_blockparams(block, inst, succ_idx).iter() { if self.vreg_spillslots[vreg.vreg()].is_invalid() { self.vreg_spillslots[vreg.vreg()] = self.stack.allocstack(vreg); - trace!("Block arg {:?} is going to be in {:?}", vreg, Allocation::stack(self.vreg_spillslots[vreg.vreg()])); + trace!( + "Block arg {} is going to be in {}", + vreg, + Allocation::stack(self.vreg_spillslots[vreg.vreg()]) + ); } if self.temp_spillslots[vreg.class()].len() == next_temp_idx[vreg.class()] { let newslot = self.stack.allocstack(vreg); @@ -1104,30 +1255,59 @@ impl<'a, F: Function> Env<'a, F> { let temp = Allocation::stack(temp_slot); next_temp_idx[vreg.class()] += 1; let vreg_spill = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); - trace!("{:?} which is going to be in {:?} inserting move to {:?}", vreg, vreg_spill, temp); + trace!( + "{} which is going to be in {} inserting move to {}", + vreg, + vreg_spill, + temp + ); // Assuming that vregs defined in the current branch instruction can't be // used as branch args for successors, else inserting the moves before, instead // of after will be wrong. But the edits are inserted before because the fuzzer // doesn't recognize moves inserted after branch instructions. - self.edits.add_move_later(inst, vreg_spill, temp, vreg.class(), InstPosition::Before, false); + self.edits.add_move_later( + inst, + vreg_spill, + temp, + vreg.class(), + InstPosition::Before, + false, + ); } } - + reset_temp_idx(&mut next_temp_idx); for (succ_idx, succ) in self.func.block_succs(block).iter().enumerate() { let succ_params = self.func.block_params(*succ); // Move from temporaries to block param spillslots. - for (pos, vreg) in self.func.branch_blockparams(block, inst, succ_idx).iter().enumerate() { + for (pos, vreg) in self + .func + .branch_blockparams(block, inst, succ_idx) + .iter() + .enumerate() + { let succ_param_vreg = succ_params[pos]; let param_alloc = Allocation::stack(self.vreg_spillslots[succ_param_vreg.vreg()]); let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; let temp = Allocation::stack(temp_slot); next_temp_idx[vreg.class()] += 1; - trace!(" --- Placing branch arg {:?} in {:?}", vreg, temp); - trace!("{:?} which is now in {:?} inserting move to {:?}", vreg, temp, param_alloc); - self.edits.add_move_later(inst, temp, param_alloc, vreg.class(), InstPosition::Before, false); + trace!(" --- Placing branch arg {} in {}", vreg, temp); + trace!( + "{} which is now in {} inserting move to {}", + vreg, + temp, + param_alloc + ); + self.edits.add_move_later( + inst, + temp, + param_alloc, + vreg.class(), + InstPosition::Before, + false, + ); // All branch arguments should be in their spillslots at the end of the function. // @@ -1138,7 +1318,8 @@ impl<'a, F: Function> Env<'a, F> { if self.vreg_allocs[vreg.vreg()].is_none() { self.vregs_first_seen_in_curr_inst.insert(vreg.vreg()); } - self.vreg_allocs[vreg.vreg()] = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); + self.vreg_allocs[vreg.vreg()] = + Allocation::stack(self.vreg_spillslots[vreg.vreg()]); self.live_vregs.insert(*vreg); self.vregs_allocd_in_curr_inst.insert(vreg.vreg()); self.vregs_in_curr_inst.insert(vreg.vreg()); @@ -1165,7 +1346,7 @@ impl<'a, F: Function> Env<'a, F> { // or a reused input constraint of an operand with a fixed register // constraint to use a clobber. if self.allocatable_regs.contains(preg) { - trace!("Removing {:?} from the freelist because it's a clobber", preg); + trace!("Removing {} from the freelist because it's a clobber", preg); self.freepregs[preg.class()].remove(preg); self.lrus[preg.class()].remove(preg.hw_enc()); } @@ -1180,9 +1361,14 @@ impl<'a, F: Function> Env<'a, F> { let reuse_op_idx = self.reused_input_to_reuse_op[op_idx]; if reuse_op_idx != usize::MAX { let reuse_op = operands[reuse_op_idx]; - let new_reuse_op = Operand::new(reuse_op.vreg(), op.constraint(), reuse_op.kind(), reuse_op.pos()); + let new_reuse_op = Operand::new( + reuse_op.vreg(), + op.constraint(), + reuse_op.kind(), + reuse_op.pos(), + ); self.process_operand_allocation(inst, new_reuse_op, reuse_op_idx, None)?; - } + } // It's possible for a fixed early use to have the same fixed constraint // as a fixed late def. Because of this, handle the fixed early use without // explicit reuse operand constraints later. @@ -1200,7 +1386,7 @@ impl<'a, F: Function> Env<'a, F> { self.freealloc(op.vreg(), clobbers, true); } for (op_idx, op) in operands.fixed_early_use() { - // The reuse operands inputs already have their allocations with + // The reuse operands inputs already have their allocations with // the reuse operands. Those allocations will be moved over to the // reused input records when the reuse operands are deallocated. if self.reused_input_to_reuse_op[op_idx] == usize::MAX { @@ -1217,7 +1403,8 @@ impl<'a, F: Function> Env<'a, F> { // allocated. continue; } - let new_reuse_op = Operand::new(op.vreg(), reused_op.constraint(), op.kind(), op.pos()); + let new_reuse_op = + Operand::new(op.vreg(), reused_op.constraint(), op.kind(), op.pos()); self.process_operand_allocation(inst, new_reuse_op, op_idx, None)?; } else { self.process_operand_allocation(inst, op, op_idx, None)?; @@ -1236,12 +1423,27 @@ impl<'a, F: Function> Env<'a, F> { let new_reused_op: Operand; let mut fixed_stack_alloc = None; if let Some(preg) = alloc.as_reg() { - new_reused_op = Operand::new(reused_op.vreg(), OperandConstraint::FixedReg(preg), reused_op.kind(), reused_op.pos()); + new_reused_op = Operand::new( + reused_op.vreg(), + OperandConstraint::FixedReg(preg), + reused_op.kind(), + reused_op.pos(), + ); } else { fixed_stack_alloc = alloc.as_stack(); - new_reused_op = Operand::new(reused_op.vreg(), OperandConstraint::Stack, reused_op.kind(), reused_op.pos()); + new_reused_op = Operand::new( + reused_op.vreg(), + OperandConstraint::Stack, + reused_op.kind(), + reused_op.pos(), + ); } - self.process_operand_allocation(inst, new_reused_op, reused_idx, fixed_stack_alloc)?; + self.process_operand_allocation( + inst, + new_reused_op, + reused_idx, + fixed_stack_alloc, + )?; } else { self.freealloc(op.vreg(), clobbers, false); } @@ -1262,7 +1464,7 @@ impl<'a, F: Function> Env<'a, F> { // In the case where the clobbered register is allocated to // something, don't add the register to the freelist, cause // it isn't free. - trace!("Adding clobbered {:?} to free after inst list", preg); + trace!("Adding clobbered {} to free after inst list", preg); // Consider a scenario: // // 1. use v0 (fixed: p1). Clobbers: [p0] @@ -1300,7 +1502,10 @@ impl<'a, F: Function> Env<'a, F> { // Something is still in the clobber. // After this instruction, it's no longer a clobber. // Add it back to the LRU. - trace!("Something is still in the clobber {:?}. Adding it back to the LRU directly.", preg); + trace!( + "Something is still in the clobber {}. Adding it back to the LRU directly.", + preg + ); self.lrus[preg.class()].append_and_poke(preg); } } @@ -1309,7 +1514,8 @@ impl<'a, F: Function> Env<'a, F> { trace!("freed_def_pregs: {}", self.freed_def_pregs); trace!("free after curr inst: {}", self.free_after_curr_inst); trace!(""); - let scratch_regs = self.get_scratch_regs(inst, self.edits.inst_needs_scratch_reg.clone())?; + let scratch_regs = + self.get_scratch_regs(inst, self.edits.inst_needs_scratch_reg.clone())?; self.edits.process_edits(scratch_regs); self.add_freed_regs_to_freelist(); self.vregs_first_seen_in_curr_inst.clear(); @@ -1330,16 +1536,27 @@ impl<'a, F: Function> Env<'a, F> { /// This function sets the current allocations of livein registers /// to their spillslots and inserts the edits to flow livein values to /// the allocations where they are expected to be before the first - /// instruction. + /// instruction. fn reload_at_begin(&mut self, block: Block) { - trace!("Reloading live registers at the beginning of block {:?}", block); - trace!("Live registers at the beginning of block {:?}: {:?}", block, self.live_vregs); - trace!("Block params at block {:?} beginning: {:?}", block, self.func.block_params(block)); + trace!( + "Reloading live registers at the beginning of block {:?}", + block + ); + trace!( + "Live registers at the beginning of block {:?}: {:?}", + block, + self.live_vregs + ); + trace!( + "Block params at block {:?} beginning: {:?}", + block, + self.func.block_params(block) + ); // We need to check for the registers that are still live. // These registers are either livein or block params // Liveins should be stack-allocated and block params should be freed. for vreg in self.func.block_params(block).iter().cloned() { - trace!("Processing {:?}", vreg); + trace!("Processing {}", vreg); if self.vreg_allocs[vreg.vreg()] == Allocation::none() { // If this block param was never used, its allocation will // be none at this point. @@ -1352,29 +1569,38 @@ impl<'a, F: Function> Env<'a, F> { // the first instruction. let prev_alloc = self.vreg_allocs[vreg.vreg()]; let slot = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); - trace!("{:?} is a block param. Freeing it", vreg); + trace!("{} is a block param. Freeing it", vreg); // A block's block param is not live before the block. // And `vreg_allocs[i]` of a virtual register i is none for // dead vregs. self.freealloc(vreg, PRegSet::empty(), false); if slot == prev_alloc { // No need to do any movements if the spillslot is where the vreg is expected to be. - trace!("No need to reload {:?} because it's already in its expected allocation", vreg); + trace!( + "No need to reload {} because it's already in its expected allocation", + vreg + ); continue; } - trace!("Move reason: reload {:?} at begin - move from its spillslot", vreg); + trace!( + "Move reason: reload {} at begin - move from its spillslot", + vreg + ); self.edits.add_move_later( self.func.block_insns(block).first(), slot, prev_alloc, vreg.class(), InstPosition::Before, - true + true, ); } for vreg in self.live_vregs.iter() { - trace!("Processing {:?}", vreg); - trace!("{:?} is not a block param. It's a liveout vreg from some predecessor", vreg); + trace!("Processing {}", vreg); + trace!( + "{} is not a block param. It's a liveout vreg from some predecessor", + vreg + ); if self.vreg_spillslots[vreg.vreg()].is_invalid() { self.vreg_spillslots[vreg.vreg()] = self.stack.allocstack(&vreg); } @@ -1382,15 +1608,18 @@ impl<'a, F: Function> Env<'a, F> { // the first instruction. let prev_alloc = self.vreg_allocs[vreg.vreg()]; let slot = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); - trace!("Setting {:?}'s current allocation to its spillslot", vreg); + trace!("Setting {}'s current allocation to its spillslot", vreg); self.vreg_allocs[vreg.vreg()] = slot; if let Some(preg) = prev_alloc.as_reg() { - trace!("{:?} was in {:?}. Removing it", preg, vreg); + trace!("{} was in {}. Removing it", preg, vreg); // Nothing is in that preg anymore. Return it to // the free preg list. self.vreg_in_preg[preg.index()] = VReg::invalid(); if !self.is_stack(prev_alloc) { - trace!("{:?} is not a fixed stack slot. Recording it in the freed def pregs list", prev_alloc); + trace!( + "{} is not a fixed stack slot. Recording it in the freed def pregs list", + prev_alloc + ); // Using this instead of directly adding it to // freepregs to prevent allocated registers from being // used as scratch registers. @@ -1400,20 +1629,27 @@ impl<'a, F: Function> Env<'a, F> { } if slot == prev_alloc { // No need to do any movements if the spillslot is where the vreg is expected to be. - trace!("No need to reload {:?} because it's already in its expected allocation", vreg); + trace!( + "No need to reload {} because it's already in its expected allocation", + vreg + ); continue; } - trace!("Move reason: reload {:?} at begin - move from its spillslot", vreg); + trace!( + "Move reason: reload {} at begin - move from its spillslot", + vreg + ); self.edits.add_move_later( self.func.block_insns(block).first(), slot, prev_alloc, vreg.class(), InstPosition::Before, - true + true, ); } - let scratch_regs = self.get_scratch_regs_for_reloading(self.edits.inst_needs_scratch_reg.clone()); + let scratch_regs = + self.get_scratch_regs_for_reloading(self.edits.inst_needs_scratch_reg.clone()); self.edits.process_edits(scratch_regs); self.add_freed_regs_to_freelist(); if trace_enabled!() { @@ -1422,8 +1658,8 @@ impl<'a, F: Function> Env<'a, F> { } fn log_post_reload_at_begin_state(&self, block: Block) { - use hashbrown::HashMap; use alloc::format; + use hashbrown::HashMap; trace!(""); trace!("State after instruction reload_at_begin of {:?}", block); let mut map = HashMap::new(); @@ -1449,8 +1685,8 @@ impl<'a, F: Function> Env<'a, F> { } fn log_post_inst_processing_state(&self, inst: Inst) { - use hashbrown::HashMap; use alloc::format; + use hashbrown::HashMap; trace!(""); trace!("State after instruction {:?}", inst); let mut map = HashMap::new(); @@ -1500,13 +1736,21 @@ fn log_function(func: &F) { trace!("Processing a new function"); for block in 0..func.num_blocks() { let block = Block::new(block); - trace!("Block {:?}. preds: {:?}. succs: {:?}, params: {:?}", - block, func.block_preds(block), func.block_succs(block), + trace!( + "Block {:?}. preds: {:?}. succs: {:?}, params: {:?}", + block, + func.block_preds(block), + func.block_succs(block), func.block_params(block) ); for inst in func.block_insns(block).iter() { let clobbers = func.inst_clobbers(inst); - trace!("inst{:?}: {:?}. Clobbers: {}", inst.index(), func.inst_operands(inst), clobbers); + trace!( + "inst{:?}: {:?}. Clobbers: {}", + inst.index(), + func.inst_operands(inst), + clobbers + ); if func.is_branch(inst) { trace!("Block args: "); for (succ_idx, _succ) in func.block_succs(block).iter().enumerate() { @@ -1526,7 +1770,7 @@ fn log_output<'a, F: Function>(env: &Env<'a, F>) { if env.vreg_spillslots[i].is_valid() { v.push(( format!("{}", VReg::new(i, RegClass::Int)), - format!("{}", Allocation::stack(env.vreg_spillslots[i])) + format!("{}", Allocation::stack(env.vreg_spillslots[i])), )); } } @@ -1541,7 +1785,6 @@ pub fn run( enable_annotations: bool, enable_ssa_checker: bool, ) -> Result { - if enable_ssa_checker { let cfginfo = CFGInfo::new(func)?; validate_ssa(func, &cfginfo)?; diff --git a/src/fastalloc/vregset.rs b/src/fastalloc/vregset.rs index 8f261eb0..2a8b983c 100644 --- a/src/fastalloc/vregset.rs +++ b/src/fastalloc/vregset.rs @@ -1,8 +1,8 @@ -use alloc::vec::Vec; +use crate::{RegClass, VReg}; use alloc::vec; -use core::fmt; +use alloc::vec::Vec; use core::convert::{TryFrom, TryInto}; -use crate::{RegClass, VReg}; +use core::fmt; struct RegClassNum; @@ -39,7 +39,7 @@ impl From for Frame { (match value { RegClass::Int => RegClassNum::INT, RegClass::Float => RegClassNum::FLOAT, - RegClass::Vector => RegClassNum::VECTOR + RegClass::Vector => RegClassNum::VECTOR, }) as Frame } } @@ -50,11 +50,10 @@ const VREGS_PER_FRAME: usize = BITS_PER_FRAME / 2; const EMPTY_FRAME: Frame = RegClassNum::INVALID as Frame; pub struct VRegSet { - bits: Vec + bits: Vec, } impl VRegSet { - pub fn with_capacity(n: usize) -> Self { let no_of_bits_needed = 2 * n; let quot = no_of_bits_needed / BITS_PER_FRAME; @@ -91,15 +90,14 @@ impl VRegSet { } pub fn is_empty(&mut self) -> bool { - self.bits.iter() - .all(|frame| *frame == EMPTY_FRAME) + self.bits.iter().all(|frame| *frame == EMPTY_FRAME) } pub fn iter(&self) -> BitSetIter { BitSetIter { next_frame_idx: 0, curr_frame: EMPTY_FRAME, - bits: &self.bits + bits: &self.bits, } } } @@ -107,7 +105,7 @@ impl VRegSet { pub struct BitSetIter<'a> { next_frame_idx: usize, curr_frame: Frame, - bits: &'a [Frame] + bits: &'a [Frame], } impl<'a> Iterator for BitSetIter<'a> { @@ -134,7 +132,6 @@ impl<'a> Iterator for BitSetIter<'a> { } } - impl fmt::Debug for VRegSet { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{{ ")?; @@ -160,11 +157,11 @@ mod tests { set.insert(VREG(199, Float)); set.insert(VREG(23, Int)); let els = [ - VREG(10, Int), - VREG(11, Vector), - VREG(23, Int), - VREG(199, Float), - VREG(2000, Int) + VREG(10, Int), + VREG(11, Vector), + VREG(23, Int), + VREG(199, Float), + VREG(2000, Int), ]; for (actual_el, expected_el) in set.iter().zip(els.iter()) { assert_eq!(actual_el, *expected_el); diff --git a/src/lib.rs b/src/lib.rs index 8e6765f0..6cefb657 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -41,12 +41,12 @@ type FxHashSet = hashbrown::HashSet>; pub(crate) mod cfg; pub(crate) mod domtree; +pub(crate) mod fastalloc; pub mod indexset; pub(crate) mod ion; pub(crate) mod moves; pub(crate) mod postorder; pub mod ssa; -pub(crate) mod fastalloc; #[macro_use] mod index; @@ -1570,7 +1570,9 @@ pub fn run( ) -> Result { match options.algorithm { Algorithm::Ion => ion::run(func, env, options.verbose_log, options.validate_ssa), - Algorithm::Fastalloc => fastalloc::run(func, env, options.verbose_log, options.validate_ssa) + Algorithm::Fastalloc => { + fastalloc::run(func, env, options.verbose_log, options.validate_ssa) + } } } From 647fe5c816986b350ee5dfac40883ddb508c6443 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 15 Aug 2024 11:45:40 +0100 Subject: [PATCH 55/95] removed unnecessary field --- src/fastalloc/mod.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 4cde3836..923c4a72 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -309,8 +309,6 @@ pub struct Env<'a, F: Function> { /// `reused_input_to_reuse_op[i]` is the operand index of the reuse operand /// that uses the `i`th operand in the current instruction as its input. reused_input_to_reuse_op: Vec, - /// The vregs defined or used in the current instruction. - vregs_in_curr_inst: BitSet, /// The physical registers allocated to the operands in the current instruction. /// Used during eviction to detect eviction of a register that is already in use in the /// instruction being processed, implying that there aren't enough registers for allocation. @@ -386,7 +384,6 @@ impl<'a, F: Function> Env<'a, F> { }, vregs_first_seen_in_curr_inst: BitSet::with_capacity(func.num_vregs()), reused_input_to_reuse_op: vec![usize::MAX; max_operand_len as usize], - vregs_in_curr_inst: BitSet::with_capacity(func.num_vregs()), pregs_allocd_in_curr_inst: PRegSet::empty(), dedicated_scratch_regs: PartedByRegClass { items: [ @@ -839,7 +836,6 @@ impl<'a, F: Function> Env<'a, F> { ); return Ok(()); } - self.vregs_in_curr_inst.insert(op.vreg().vreg()); self.live_vregs.insert(op.vreg()); if !self.allocd_within_constraint(inst, op) { let prev_alloc = self.vreg_allocs[op.vreg().vreg()]; @@ -1138,7 +1134,7 @@ impl<'a, F: Function> Env<'a, F> { let vreg = self.vreg_in_preg[clobbered_preg.index()]; if vreg != VReg::invalid() { let vreg_isnt_mentioned_in_curr_inst = - !self.vregs_in_curr_inst.contains(vreg.vreg()); + !self.vregs_allocd_in_curr_inst.contains(vreg.vreg()); if vreg_isnt_mentioned_in_curr_inst { trace!("Adding save and restore edits for {}", vreg); let preg_alloc = Allocation::reg(clobbered_preg); @@ -1322,7 +1318,6 @@ impl<'a, F: Function> Env<'a, F> { Allocation::stack(self.vreg_spillslots[vreg.vreg()]); self.live_vregs.insert(*vreg); self.vregs_allocd_in_curr_inst.insert(vreg.vreg()); - self.vregs_in_curr_inst.insert(vreg.vreg()); } } } @@ -1523,7 +1518,6 @@ impl<'a, F: Function> Env<'a, F> { for entry in self.reused_input_to_reuse_op.iter_mut() { *entry = usize::MAX; } - self.vregs_in_curr_inst.clear(); self.pregs_allocd_in_curr_inst = PRegSet::empty(); if trace_enabled!() { self.log_post_inst_processing_state(inst); From 08cce9c1c24a396d5dfd94a7e1d07341757fa09f Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Sat, 17 Aug 2024 12:01:34 +0100 Subject: [PATCH 56/95] live vregs now represented by an indexset --- src/fastalloc/mod.rs | 15 ++++++++------- src/lib.rs | 11 +++++++++++ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 923c4a72..42b9367a 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1,3 +1,4 @@ +use crate::indexset::IndexSet; use crate::{cfg::CFGInfo, ion::Stats, Allocation, RegAllocError}; use crate::{ssa::validate_ssa, Edit, Function, MachineEnv, Output, ProgPoint}; use crate::{ @@ -17,7 +18,6 @@ mod vregset; use bitset::BitSet; use iter::*; use lru::*; -use vregset::VRegSet; #[derive(Debug)] struct Allocs { @@ -257,7 +257,7 @@ pub struct Env<'a, F: Function> { /// `vreg_spillslots[i]` is the spillslot for virtual register `i`. vreg_spillslots: Vec, /// The virtual registers that are currently live. - live_vregs: VRegSet, + live_vregs: IndexSet, /// Allocatable free physical registers for classes Int, Float, and Vector, respectively. freepregs: PartedByRegClass, /// Least-recently-used caches for register classes Int, Float, and Vector, respectively. @@ -356,7 +356,7 @@ impl<'a, F: Function> Env<'a, F> { allocatable_regs: PRegSet::from(env), vreg_allocs: vec![Allocation::none(); func.num_vregs()], vreg_spillslots: vec![SpillSlot::invalid(); func.num_vregs()], - live_vregs: VRegSet::with_capacity(func.num_vregs()), + live_vregs: IndexSet::new(), freepregs: PartedByRegClass { items: [ PRegSet::from_iter(regs[0].iter().cloned()), @@ -630,7 +630,7 @@ impl<'a, F: Function> Env<'a, F> { AllocationKind::None => unreachable!("Attempting to free an unallocated operand!"), } self.vreg_allocs[vreg.vreg()] = Allocation::none(); - self.live_vregs.remove(vreg.vreg()); + self.live_vregs.set(vreg.bits(), false); trace!( "{} curr alloc is now {}", vreg, @@ -836,7 +836,7 @@ impl<'a, F: Function> Env<'a, F> { ); return Ok(()); } - self.live_vregs.insert(op.vreg()); + self.live_vregs.set(op.vreg().bits(), true); if !self.allocd_within_constraint(inst, op) { let prev_alloc = self.vreg_allocs[op.vreg().vreg()]; if prev_alloc.is_none() { @@ -1316,7 +1316,7 @@ impl<'a, F: Function> Env<'a, F> { } self.vreg_allocs[vreg.vreg()] = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); - self.live_vregs.insert(*vreg); + self.live_vregs.set(vreg.bits(), true); self.vregs_allocd_in_curr_inst.insert(vreg.vreg()); } } @@ -1589,7 +1589,8 @@ impl<'a, F: Function> Env<'a, F> { true, ); } - for vreg in self.live_vregs.iter() { + for vreg_bits in self.live_vregs.iter() { + let vreg = VReg::from(vreg_bits as u32); trace!("Processing {}", vreg); trace!( "{} is not a block param. It's a liveout vreg from some predecessor", diff --git a/src/lib.rs b/src/lib.rs index 6cefb657..de145ba5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -372,6 +372,17 @@ impl VReg { pub const fn invalid() -> Self { VReg::new(Self::MAX, RegClass::Int) } + + #[inline(always)] + pub const fn bits(self) -> usize { + self.bits as usize + } +} + +impl From for VReg { + fn from(value: u32) -> Self { + Self { bits: value } + } } impl core::fmt::Debug for VReg { From c93a4a15dad50643b8a8e447b729424d2af9d4e7 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Sat, 17 Aug 2024 15:51:16 +0100 Subject: [PATCH 57/95] now representing live vregs with a linked list --- src/fastalloc/mod.rs | 16 ++-- src/fastalloc/vregset.rs | 180 +++++++++++++++------------------------ 2 files changed, 78 insertions(+), 118 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 42b9367a..730f4cee 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -18,6 +18,7 @@ mod vregset; use bitset::BitSet; use iter::*; use lru::*; +use vregset::VRegSet; #[derive(Debug)] struct Allocs { @@ -257,7 +258,7 @@ pub struct Env<'a, F: Function> { /// `vreg_spillslots[i]` is the spillslot for virtual register `i`. vreg_spillslots: Vec, /// The virtual registers that are currently live. - live_vregs: IndexSet, + live_vregs: VRegSet, /// Allocatable free physical registers for classes Int, Float, and Vector, respectively. freepregs: PartedByRegClass, /// Least-recently-used caches for register classes Int, Float, and Vector, respectively. @@ -356,7 +357,7 @@ impl<'a, F: Function> Env<'a, F> { allocatable_regs: PRegSet::from(env), vreg_allocs: vec![Allocation::none(); func.num_vregs()], vreg_spillslots: vec![SpillSlot::invalid(); func.num_vregs()], - live_vregs: IndexSet::new(), + live_vregs: VRegSet::with_capacity(func.num_vregs()), freepregs: PartedByRegClass { items: [ PRegSet::from_iter(regs[0].iter().cloned()), @@ -630,7 +631,7 @@ impl<'a, F: Function> Env<'a, F> { AllocationKind::None => unreachable!("Attempting to free an unallocated operand!"), } self.vreg_allocs[vreg.vreg()] = Allocation::none(); - self.live_vregs.set(vreg.bits(), false); + self.live_vregs.remove(vreg.vreg()); trace!( "{} curr alloc is now {}", vreg, @@ -836,11 +837,11 @@ impl<'a, F: Function> Env<'a, F> { ); return Ok(()); } - self.live_vregs.set(op.vreg().bits(), true); if !self.allocd_within_constraint(inst, op) { let prev_alloc = self.vreg_allocs[op.vreg().vreg()]; if prev_alloc.is_none() { self.vregs_first_seen_in_curr_inst.insert(op.vreg().vreg()); + self.live_vregs.insert(op.vreg()); } self.alloc_operand(inst, op, op_idx, fixed_spillslot)?; // Need to insert a move to propagate flow from the current @@ -1313,11 +1314,11 @@ impl<'a, F: Function> Env<'a, F> { // are also used as operands. if self.vreg_allocs[vreg.vreg()].is_none() { self.vregs_first_seen_in_curr_inst.insert(vreg.vreg()); + self.live_vregs.insert(*vreg); + self.vregs_allocd_in_curr_inst.insert(vreg.vreg()); } self.vreg_allocs[vreg.vreg()] = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); - self.live_vregs.set(vreg.bits(), true); - self.vregs_allocd_in_curr_inst.insert(vreg.vreg()); } } } @@ -1589,8 +1590,7 @@ impl<'a, F: Function> Env<'a, F> { true, ); } - for vreg_bits in self.live_vregs.iter() { - let vreg = VReg::from(vreg_bits as u32); + for vreg in self.live_vregs.iter() { trace!("Processing {}", vreg); trace!( "{} is not a block param. It's a liveout vreg from some predecessor", diff --git a/src/fastalloc/vregset.rs b/src/fastalloc/vregset.rs index 2a8b983c..1889286b 100644 --- a/src/fastalloc/vregset.rs +++ b/src/fastalloc/vregset.rs @@ -1,133 +1,99 @@ -use crate::{RegClass, VReg}; -use alloc::vec; -use alloc::vec::Vec; -use core::convert::{TryFrom, TryInto}; use core::fmt; -struct RegClassNum; - -impl RegClassNum { - const INVALID: u8 = 0b00; - const MAX: u8 = 0b11; - // 0b11 - const INT: u8 = Self::MAX - RegClass::Int as u8; - // 0b10 - const FLOAT: u8 = Self::MAX - RegClass::Float as u8; - // 0b01 - const VECTOR: u8 = Self::MAX - RegClass::Vector as u8; -} - -impl TryFrom for RegClass { - type Error = (); - fn try_from(value: u64) -> Result { - if value == RegClassNum::INT as u64 { - Ok(RegClass::Int) - } else if value == RegClassNum::FLOAT as u64 { - Ok(RegClass::Float) - } else if value == RegClassNum::VECTOR as u64 { - Ok(RegClass::Vector) - } else if value == RegClassNum::INVALID as u64 { - Err(()) - } else { - unreachable!() - } - } -} +use alloc::vec; +use alloc::vec::Vec; +use crate::{RegClass, VReg}; -impl From for Frame { - fn from(value: RegClass) -> Self { - (match value { - RegClass::Int => RegClassNum::INT, - RegClass::Float => RegClassNum::FLOAT, - RegClass::Vector => RegClassNum::VECTOR, - }) as Frame - } +#[derive(Clone)] +struct VRegNode { + next: u32, + prev: u32, + class: RegClass, } -type Frame = u64; -const BITS_PER_FRAME: usize = core::mem::size_of::() * 8; -const VREGS_PER_FRAME: usize = BITS_PER_FRAME / 2; -const EMPTY_FRAME: Frame = RegClassNum::INVALID as Frame; - +// Using a non-circular doubly linked list here for fast insertion, +// removal and iteration. pub struct VRegSet { - bits: Vec, + items: Vec, + head: u32, } impl VRegSet { - pub fn with_capacity(n: usize) -> Self { - let no_of_bits_needed = 2 * n; - let quot = no_of_bits_needed / BITS_PER_FRAME; - let no_of_frames = quot + 1; + pub fn with_capacity(num_vregs: usize) -> Self { Self { - bits: vec![RegClassNum::INVALID as Frame; no_of_frames], + items: vec![VRegNode { prev: u32::MAX, next: u32::MAX, class: RegClass::Int }; num_vregs], + head: u32::MAX, } } - fn compute_index(&self, el: usize) -> (usize, usize) { - (el / BITS_PER_FRAME, el % BITS_PER_FRAME) - } - pub fn insert(&mut self, vreg: VReg) { - let (frame_no, idx) = self.compute_index(vreg.vreg() * 2); - let reg_class_num: Frame = vreg.class().into(); - self.bits[frame_no] |= reg_class_num << idx; + // Intentionally assuming that the set doesn't already + // contain `vreg`. + if self.head == u32::MAX { + self.items[vreg.vreg()] = VRegNode { + next: u32::MAX, + prev: u32::MAX, + class: vreg.class(), + }; + self.head = vreg.vreg() as u32; + } else { + let old_head_next = self.items[self.head as usize].next; + if old_head_next != u32::MAX { + self.items[old_head_next as usize].prev = vreg.vreg() as u32; + } + self.items[self.head as usize].next = vreg.vreg() as u32; + self.items[vreg.vreg()] = VRegNode { + next: old_head_next, + prev: self.head, + class: vreg.class(), + }; + } } pub fn remove(&mut self, vreg_num: usize) { - let (frame_no, idx) = self.compute_index(vreg_num * 2); - self.bits[frame_no] &= !(0b11 << idx); - } - - pub fn contains(&self, vreg_num: usize) -> bool { - let (frame_no, idx) = self.compute_index(vreg_num * 2); - self.bits[frame_no] & (0b11 << idx) != RegClassNum::INVALID as Frame - } - - pub fn clear(&mut self) { - for frame in self.bits.iter_mut() { - *frame = RegClassNum::INVALID as Frame; + let prev = self.items[vreg_num].prev; + let next = self.items[vreg_num].next; + if prev != u32::MAX { + self.items[prev as usize].next = next; + } + if next != u32::MAX { + self.items[next as usize].prev = prev; + } + if vreg_num as u32 == self.head { + self.head = next; } } - pub fn is_empty(&mut self) -> bool { - self.bits.iter().all(|frame| *frame == EMPTY_FRAME) + pub fn is_empty(&self) -> bool { + self.head == u32::MAX } - pub fn iter(&self) -> BitSetIter { - BitSetIter { - next_frame_idx: 0, - curr_frame: EMPTY_FRAME, - bits: &self.bits, + pub fn iter(&self) -> VRegSetIter { + VRegSetIter { + curr_item: self.head, + head: self.head, + items: &self.items, } } } -pub struct BitSetIter<'a> { - next_frame_idx: usize, - curr_frame: Frame, - bits: &'a [Frame], +pub struct VRegSetIter<'a> { + curr_item: u32, + head: u32, + items: &'a [VRegNode], } -impl<'a> Iterator for BitSetIter<'a> { +impl<'a> Iterator for VRegSetIter<'a> { type Item = VReg; - fn next(&mut self) -> Option { - loop { - while self.curr_frame == EMPTY_FRAME { - if self.next_frame_idx >= self.bits.len() { - return None; - } - self.curr_frame = self.bits[self.next_frame_idx]; - self.next_frame_idx += 1; - } - let mut skip = self.curr_frame.trailing_zeros(); - if skip % 2 != 0 { - skip -= 1; - } - let vreg_num = (self.next_frame_idx - 1) * VREGS_PER_FRAME + (skip / 2) as usize; - let class = (self.curr_frame >> skip) & 0b11; - self.curr_frame &= !(0b11 << skip); - return Some(VReg::new(vreg_num, class.try_into().unwrap())); + fn next(&mut self) -> Option { + if self.curr_item != u32::MAX { + let item = self.items[self.curr_item as usize].clone(); + let vreg = VReg::new(self.curr_item as usize, item.class); + self.curr_item = item.next; + Some(vreg) + } else { + None } } } @@ -135,8 +101,8 @@ impl<'a> Iterator for BitSetIter<'a> { impl fmt::Debug for VRegSet { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{{ ")?; - for el in self.iter() { - write!(f, "{el} ")?; + for vreg in self.iter() { + write!(f, "{vreg} ")?; } write!(f, "}}") } @@ -158,27 +124,21 @@ mod tests { set.insert(VREG(23, Int)); let els = [ VREG(10, Int), - VREG(11, Vector), VREG(23, Int), VREG(199, Float), + VREG(11, Vector), VREG(2000, Int), ]; for (actual_el, expected_el) in set.iter().zip(els.iter()) { assert_eq!(actual_el, *expected_el); } - assert!(set.contains(10)); - assert!(!set.contains(12)); - assert!(!set.contains(197)); - assert!(set.contains(23)); - assert!(set.contains(11)); set.remove(23); - assert!(!set.contains(23)); set.insert(VREG(73, Vector)); let els = [ VREG(10, Int), - VREG(11, Vector), VREG(73, Vector), VREG(199, Float), + VREG(11, Vector), VREG(2000, Int), ]; for (actual_el, expected_el) in set.iter().zip(els.iter()) { From 4a3973c4daa22b0312d49ac8d525db50d6b2dc06 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Sat, 17 Aug 2024 16:54:19 +0100 Subject: [PATCH 58/95] now preallocating edits queues --- src/fastalloc/mod.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 730f4cee..bde2f63f 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1,4 +1,3 @@ -use crate::indexset::IndexSet; use crate::{cfg::CFGInfo, ion::Stats, Allocation, RegAllocError}; use crate::{ssa::validate_ssa, Edit, Function, MachineEnv, Output, ProgPoint}; use crate::{ @@ -125,11 +124,17 @@ struct Edits { } impl Edits { - fn new(fixed_stack_slots: PRegSet) -> Self { + fn new(fixed_stack_slots: PRegSet, max_operand_len: u32, num_insts: usize) -> Self { + // Some operands generate edits and some don't. + // The operands that generate edits add no more than two. + // Some edits are added due to clobbers, not operands. + // Anyways, I think this may be a reasonable guess. + let inst_edits_len_guess = max_operand_len as usize * 2; + let total_edits_len_guess = inst_edits_len_guess * num_insts; Self { - inst_pre_edits: VecDeque::new(), - inst_post_edits: VecDeque::new(), - edits: VecDeque::new(), + inst_pre_edits: VecDeque::with_capacity(inst_edits_len_guess), + inst_post_edits: VecDeque::with_capacity(inst_edits_len_guess), + edits: VecDeque::with_capacity(total_edits_len_guess), fixed_stack_slots, inst_needs_scratch_reg: PartedByRegClass { items: [false, false, false], @@ -394,7 +399,7 @@ impl<'a, F: Function> Env<'a, F> { ], }, allocs, - edits: Edits::new(fixed_stack_slots), + edits: Edits::new(fixed_stack_slots, max_operand_len, func.num_insts()), stats: Stats::default(), } } From 0e9b929e067c4056333093a7dcc150b2c9039e65 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 22 Aug 2024 17:20:28 +0100 Subject: [PATCH 59/95] added intersect_from, invert and is_empty functions to PRegSet --- src/lib.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index de145ba5..17f6d2b0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -251,6 +251,24 @@ impl PRegSet { self.bits[i] |= other.bits[i]; } } + + pub fn intersect_from(&mut self, other: PRegSet) { + for i in 0..self.bits.len() { + self.bits[i] &= other.bits[i]; + } + } + + pub fn invert(&self) -> PRegSet { + let mut set = self.bits; + for i in 0..self.bits.len() { + set[i] = !self.bits[i]; + } + PRegSet{ bits: set } + } + + pub fn is_empty(&self, regclass: RegClass) -> bool { + self.bits[regclass as usize] == 0 + } } impl IntoIterator for PRegSet { From 5aa10ebd52ea001ef9f3de8f11c9f25c1256343b Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 22 Aug 2024 17:48:04 +0100 Subject: [PATCH 60/95] algorithm simplifications --- src/fastalloc/iter.rs | 8 + src/fastalloc/lru.rs | 76 ++-- src/fastalloc/mod.rs | 807 ++++++++++++++++++------------------------ 3 files changed, 393 insertions(+), 498 deletions(-) diff --git a/src/fastalloc/iter.rs b/src/fastalloc/iter.rs index b296c6af..cb91d8d6 100644 --- a/src/fastalloc/iter.rs +++ b/src/fastalloc/iter.rs @@ -18,6 +18,14 @@ impl<'a> Operands<'a> { .filter(move |(_, op)| predicate(*op)) } + pub fn def_ops(&self) -> impl Iterator + 'a { + self.matches(|op| op.kind() == OperandKind::Def) + } + + pub fn use_ops(&self) -> impl Iterator + 'a { + self.matches(|op| op.kind() == OperandKind::Use) + } + pub fn non_fixed_non_reuse_late(&self) -> impl Iterator + 'a { self.matches(|op| { !matches!( diff --git a/src/fastalloc/lru.rs b/src/fastalloc/lru.rs index cc8ae818..cfb33538 100644 --- a/src/fastalloc/lru.rs +++ b/src/fastalloc/lru.rs @@ -1,4 +1,4 @@ -use crate::{PReg, RegClass}; +use crate::{PReg, PRegSet, RegClass}; use alloc::vec; use alloc::vec::Vec; use core::{ @@ -7,6 +7,8 @@ use core::{ }; use hashbrown::HashSet; +const DUMMY_NODE_INDEX: usize = PReg::MAX + 1; + /// A least-recently-used cache organized as a linked list based on a vector. pub struct Lru { /// The list of node information. @@ -103,6 +105,46 @@ impl Lru { PReg::new(oldest as usize, self.regclass) } + /// Get the last PReg in the LRU from the set `from`. + pub fn last(&self, from: PRegSet) -> Option { + trace!("Getting the last preg from the LRU in set {from}"); + if self.is_empty() { + panic!("LRU is empty"); + } + let mut last = self.data[self.head as usize].prev; + let init_last = last; + loop { + let preg = PReg::new(last as usize, self.regclass); + if from.contains(preg) { + return Some(preg); + } + last = self.data[last as usize].prev; + if last == init_last { + return None; + } + } + } + + /// Get the last PReg from the LRU for which `f` returns true. + pub fn last_satisfying bool>(&self, f: F) -> Option { + trace!("Getting the last preg from the LRU satisfying..."); + if self.is_empty() { + panic!("LRU is empty"); + } + let mut last = self.data[self.head as usize].prev; + let init_last = last; + loop { + let preg = PReg::new(last as usize, self.regclass); + if f(preg) { + return Some(preg); + } + last = self.data[last as usize].prev; + if last == init_last { + return None; + } + } + } + /// Splices out a node from the list. pub fn remove(&mut self, hw_enc: usize) { trace!( @@ -134,38 +176,6 @@ impl Lru { } } - /// Sets the physical register with hw_enc `hw_enc` to the last in the list. - pub fn append(&mut self, hw_enc: usize) { - trace!( - "Before appending: {:?} LRU. head: {:?}, Actual data: {:?}", - self.regclass, - self.head, - self.data - ); - trace!("Appending p{hw_enc} to the {:?} LRU", self.regclass); - if self.head != u8::MAX { - let head = self.head as usize; - let last_node = self.data[head].prev; - self.data[last_node as usize].next = hw_enc as u8; - self.data[head].prev = hw_enc as u8; - self.data[hw_enc].prev = last_node; - self.data[hw_enc].next = self.head; - } else { - self.head = hw_enc as u8; - self.data[hw_enc].prev = hw_enc as u8; - self.data[hw_enc].next = hw_enc as u8; - } - trace!("Appended p{hw_enc} to the {:?} LRU", self.regclass); - if cfg!(debug_assertions) { - self.validate_lru(); - } - } - - pub fn append_and_poke(&mut self, preg: PReg) { - self.append(preg.hw_enc()); - self.poke(preg); - } - /// Insert node `i` before node `j` in the list. fn insert_before(&mut self, i: u8, j: u8) { trace!( diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index bde2f63f..385770f3 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -121,6 +121,9 @@ struct Edits { /// instruction's moves during the `process_edit` calls. inst_needs_scratch_reg: PartedByRegClass, fixed_stack_slots: PRegSet, + /// All pregs used as the source or destination in an edit + /// for the current instruction. + pregs_mentioned_in_edit: PRegSet, } impl Edits { @@ -139,6 +142,7 @@ impl Edits { inst_needs_scratch_reg: PartedByRegClass { items: [false, false, false], }, + pregs_mentioned_in_edit: PRegSet::empty(), } } } @@ -168,6 +172,7 @@ impl Edits { } self.inst_post_edits.clear(); self.inst_pre_edits.clear(); + self.pregs_mentioned_in_edit = PRegSet::empty(); } fn process_edit(&mut self, point: ProgPoint, edit: Edit, scratch_reg: Option) { @@ -234,6 +239,12 @@ impl Edits { "Recording edit to add later: {:?}", (ProgPoint::new(inst, pos), Edit::Move { from, to }, class) ); + if let Some(preg) = from.as_reg() { + self.pregs_mentioned_in_edit.add(preg); + } + if let Some(preg) = to.as_reg() { + self.pregs_mentioned_in_edit.add(preg); + } if from == to { trace!("Deciding not to record the edit, since the source and dest are the same"); return; @@ -253,6 +264,36 @@ impl Edits { } } +#[derive(Debug, Clone)] +struct PartedByOperandPos { + items: [T; 2], +} + +impl Index for PartedByOperandPos { + type Output = T; + fn index(&self, index: OperandPos) -> &Self::Output { + &self.items[index as usize] + } +} + +impl IndexMut for PartedByOperandPos { + fn index_mut(&mut self, index: OperandPos) -> &mut Self::Output { + &mut self.items[index as usize] + } +} + +use core::fmt; + +impl fmt::Display for PartedByOperandPos { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{{ early: {}, late: {} }}", + self.items[0], self.items[1] + ) + } +} + #[derive(Debug)] pub struct Env<'a, F: Function> { func: &'a F, @@ -273,34 +314,6 @@ pub struct Env<'a, F: Function> { vreg_in_preg: Vec, /// For parallel moves from branch args to block param spillslots. temp_spillslots: PartedByRegClass>, - /// All the allocatables registers that were used for one thing or the other - /// but need to be freed after the current instruction has completed processing, - /// not immediately, like allocatable registers used as scratch registers. - /// - /// This is used to keep track of them so that they can be marked as free for reallocation - /// after the instruction has completed processing. - free_after_curr_inst: PartedByRegClass, - /// Physical registers that were used for late def operands and now free to be - /// reused for early operands in the current instruction. - /// - /// After late defs have been allocated, rather than returning their registers to - /// the free register list, it is added here to avoid the registers being used as - /// scratch registers. - /// - /// For example, consider the following: - /// def v0, use v1 - /// If the processing of v1 requires a stack-to-stack move, then a scratch register is - /// used and the instruction becomes: - /// def v0, use v1 - /// move from stack0 to p0 - /// move from p0 to stack1 - /// - /// Since scratch registers may be drawn from the free register list and v0 will be allocated and - /// deallocated before v1, then it's possible for the scratch register p0 to be v0's allocation, - /// which is incorrect because p0 will end up holding whatever is in stack0, not v0. - /// `freed_def_regs` avoids this by allowing the late def registers to be reused without making it - /// possible for this scratch register scenario to happen. - freed_def_pregs: PartedByRegClass, /// Used to keep track of which used vregs are seen for the first time /// in the instruction, that is, if the vregs live past the current instruction. /// This is used to determine whether or not reused operands @@ -315,10 +328,12 @@ pub struct Env<'a, F: Function> { /// `reused_input_to_reuse_op[i]` is the operand index of the reuse operand /// that uses the `i`th operand in the current instruction as its input. reused_input_to_reuse_op: Vec, - /// The physical registers allocated to the operands in the current instruction. - /// Used during eviction to detect eviction of a register that is already in use in the - /// instruction being processed, implying that there aren't enough registers for allocation. - pregs_allocd_in_curr_inst: PRegSet, + /// The set of registers that can be used for allocation in the + /// early and late phases of an instruction. + /// Allocatable registers that contain no vregs, registers that can be + /// evicted can be in the set, and fixed stack slots are in this set. + available_pregs: PartedByOperandPos, + init_available_pregs: PRegSet, allocatable_regs: PRegSet, dedicated_scratch_regs: PartedByRegClass>, stack: Stack<'a, F>, @@ -353,13 +368,21 @@ impl<'a, F: Function> Env<'a, F> { .iter() .cloned(), ); + let allocatable_regs = PRegSet::from(env); + let init_available_pregs = { + let mut regs = allocatable_regs; + for preg in env.fixed_stack_slots.iter() { + regs.add(*preg); + } + regs + }; use alloc::vec; trace!("{:?}", env); let (allocs, max_operand_len) = Allocs::new(func); let fixed_stack_slots = PRegSet::from_iter(env.fixed_stack_slots.iter().cloned()); Self { func, - allocatable_regs: PRegSet::from(env), + allocatable_regs, vreg_allocs: vec![Allocation::none(); func.num_vregs()], vreg_spillslots: vec![SpillSlot::invalid(); func.num_vregs()], live_vregs: VRegSet::with_capacity(func.num_vregs()), @@ -381,16 +404,9 @@ impl<'a, F: Function> Env<'a, F> { Vec::with_capacity(func.num_vregs()), ], }, - free_after_curr_inst: PartedByRegClass { - items: [PRegSet::empty(), PRegSet::empty(), PRegSet::empty()], - }, vregs_allocd_in_curr_inst: BitSet::with_capacity(func.num_vregs()), - freed_def_pregs: PartedByRegClass { - items: [PRegSet::empty(), PRegSet::empty(), PRegSet::empty()], - }, vregs_first_seen_in_curr_inst: BitSet::with_capacity(func.num_vregs()), reused_input_to_reuse_op: vec![usize::MAX; max_operand_len as usize], - pregs_allocd_in_curr_inst: PRegSet::empty(), dedicated_scratch_regs: PartedByRegClass { items: [ env.scratch_by_class[0], @@ -398,6 +414,13 @@ impl<'a, F: Function> Env<'a, F> { env.scratch_by_class[2], ], }, + init_available_pregs, + available_pregs: PartedByOperandPos { + items: [ + init_available_pregs, + init_available_pregs, + ] + }, allocs, edits: Edits::new(fixed_stack_slots, max_operand_len, func.num_insts()), stats: Stats::default(), @@ -414,20 +437,11 @@ impl<'a, F: Function> Env<'a, F> { false } - fn add_freed_regs_to_freelist(&mut self) { - for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { - for preg in self.freed_def_pregs[class] { - self.lrus[class].append(preg.hw_enc()); - } - self.freepregs[class].union_from(self.freed_def_pregs[class]); - self.freed_def_pregs[class] = PRegSet::empty(); - - for preg in self.free_after_curr_inst[class] { - self.lrus[preg.class()].append(preg.hw_enc()); - } - self.freepregs[class].union_from(self.free_after_curr_inst[class]); - self.free_after_curr_inst[class] = PRegSet::empty(); - } + fn reset_available_pregs(&mut self) { + trace!("Resetting the available pregs"); + self.available_pregs = PartedByOperandPos { + items: [self.init_available_pregs, self.init_available_pregs] + }; } /// The scratch registers needed for processing the edits generated @@ -440,7 +454,8 @@ impl<'a, F: Function> Env<'a, F> { fn get_scratch_regs_for_reloading( &self, inst_needs_scratch_reg: PartedByRegClass, - ) -> PartedByRegClass> { + avail_pregs: PRegSet, + ) -> Result>, RegAllocError> { trace!("Getting scratch registers for reload_at_begin"); let mut scratch_regs = PartedByRegClass { items: [None, None, None], @@ -453,16 +468,18 @@ impl<'a, F: Function> Env<'a, F> { scratch_regs[class] = self.dedicated_scratch_regs[class]; } else { trace!("No dedicated scratch register for class {:?}. Using the last free register", class); - scratch_regs[class] = Some( - self.freepregs[class] - .into_iter() - .next() - .expect("Allocation impossible?"), - ); + scratch_regs[class] = self.lrus[class].last_satisfying(|preg| { + avail_pregs.contains(preg) + && self.vreg_in_preg[preg.index()] == VReg::invalid() + }); + if scratch_regs[class].is_none() { + trace!("Unable to find a scratch register for class {class:?}"); + return Err(RegAllocError::TooManyLiveRegs); + } } } } - scratch_regs + Ok(scratch_regs) } /// The scratch registers needed for processing edits generated while @@ -471,6 +488,7 @@ impl<'a, F: Function> Env<'a, F> { &mut self, inst: Inst, inst_needs_scratch_reg: PartedByRegClass, + avail_pregs: PRegSet, ) -> Result>, RegAllocError> { trace!("Getting scratch registers for instruction {:?}", inst); let mut scratch_regs = PartedByRegClass { @@ -484,15 +502,51 @@ impl<'a, F: Function> Env<'a, F> { scratch_regs[class] = Some(reg); } else { trace!("class {:?} has no dedicated scratch register", class); - let reg = if let Some(preg) = self.freepregs[class].into_iter().next() { - trace!("Using the last free {:?} register for scratch", class); - preg + if let Some(preg) = self.lrus[class].last_satisfying(|preg| { + avail_pregs.contains(preg) + // Consider a scenario: + // + // 1. use v0 (fixed: stack0), use v1 (fixed: p1) + // 2. use v1 (fixed: p2), use v0 (fixed: stack1) + // + // In the above, during the processing of inst 1, v0 is already + // in stack1 and v1 is already in p2. + // An edit will be inserted after the instruction to move + // from stack0 to stack1. Afterwards, when the v1 operand + // is being processed, `vreg_in_preg[p2]` will be set to `VReg::invalid`. + // The end result is: + // + // move from p1 to stack_v1 // Save v1 (inserted by `process_operand_allocation`) + // 1. use v0 (fixed: stack0), use v1 (fixed: p1) + // move from stack_v1 to p2 // Restore v1 (inserted by `process_operand_allocation`) + // move from stack0 to scratch // scratch could be p2 + // move from scratch to stack1 + // 2. use v1 (fixed: p2), use v0 (fixed: stack1) + // + // p2 could be used as a scratch register because it will be + // marked available in the `avail_regs` since it's not allocated + // to any operand in inst 1. + // If p2 is used as the scratch register, then v0 will + // be used in the place of v1 in inst 2, which is incorrect. + // To avoid this scenario, all pregs either used as the source + // or destination in the instruction are avoided. + && !self.edits.pregs_mentioned_in_edit.contains(preg) + }) { + if self.vreg_in_preg[preg.index()] != VReg::invalid() { + // A register used as scratch may be used for instructions + // added before or after the instruction. + // + // This will work because `preg` hasn't been allocated in the current + // instruction (`avail_pregs` only contains such registers). + self.evict_vreg_in_preg(inst, preg); + self.vreg_in_preg[preg.index()] = VReg::invalid(); + } + trace!("The scratch register: {preg}"); + scratch_regs[class] = Some(preg); } else { - trace!("No free {:?} registers. Evicting a register", class); - self.evict_any_reg(inst, class)? - }; - trace!("The scratch register: {reg}"); - scratch_regs[class] = Some(reg); + trace!("Unable to find a scratch register for class {class:?}"); + return Err(RegAllocError::TooManyLiveRegs); + } } } else { trace!("{:?} class does not need a scratch register", class); @@ -501,29 +555,7 @@ impl<'a, F: Function> Env<'a, F> { Ok(scratch_regs) } - fn move_after_inst(&mut self, inst: Inst, vreg: VReg, to: Allocation) { - self.edits.add_move_later( - inst, - self.vreg_allocs[vreg.vreg()], - to, - vreg.class(), - InstPosition::After, - false, - ); - } - - fn move_before_inst(&mut self, inst: Inst, vreg: VReg, to: Allocation) { - self.edits.add_move_later( - inst, - self.vreg_allocs[vreg.vreg()], - to, - vreg.class(), - InstPosition::Before, - false, - ); - } - - fn allocd_within_constraint(&self, inst: Inst, op: Operand) -> bool { + fn allocd_within_constraint(&self, inst: Inst, op: Operand, fixed_spillslot: Option) -> bool { let alloc = self.vreg_allocs[op.vreg().vreg()]; let alloc_is_clobber = if let Some(preg) = alloc.as_reg() { self.func.inst_clobbers(inst).contains(preg) @@ -540,15 +572,56 @@ impl<'a, F: Function> Env<'a, F> { // by a def operand with a reuse constraint will end up // assigning the clobber to that def, and if it lives past // the current instruction, then restoration will be impossible. - alloc.is_some() && !alloc_is_clobber + if alloc_is_clobber { + return false; + } + if let Some(preg) = alloc.as_reg() { + if !self.available_pregs[op.pos()].contains(preg) { + // If a register isn't in the available pregs list, then + // there are two cases: either it's reserved for a + // fixed register constraint or a vreg allocated in the instruction + // is already assigned to it. + // + // In the case where the vreg is used multiple times: + // use v0 (reg), use v0 (reg), use v0 (reg) + // During processing of the first operand, the allocated + // register is removed from the available regs list, but the + // vreg is added to the `vregs_allocd_in_curr_inst` set. + // This check is necessary to ensure + // that multiple pregs aren't assigned in these situations when + // they aren't needed. + self.vreg_in_preg[preg.index()] == op.vreg() + && self.vregs_allocd_in_curr_inst.contains(op.vreg().vreg()) + } else { + true + } + } else { + !alloc.is_none() + } } OperandConstraint::Reg => { - let alloc_is_reg = alloc.is_reg() - && alloc.as_reg().unwrap().class() == op.class() - && !self.is_stack(alloc); - alloc_is_reg && !alloc_is_clobber + if alloc_is_clobber { + return false; + } + if self.is_stack(alloc) { + return false; + } + if let Some(preg) = alloc.as_reg() { + if !self.available_pregs[op.pos()].contains(preg) { + self.vreg_in_preg[preg.index()] == op.vreg() + && self.vregs_allocd_in_curr_inst.contains(op.vreg().vreg()) + } else { + true + } + } else { + false + } } - OperandConstraint::Stack => self.is_stack(alloc), + OperandConstraint::Stack => if let Some(slot) = fixed_spillslot { + alloc == Allocation::stack(slot) + } else { + self.is_stack(alloc) + }, // It is possible for an operand to have a fixed register constraint to // a clobber. OperandConstraint::FixedReg(preg) => alloc.is_reg() && alloc.as_reg().unwrap() == preg, @@ -569,68 +642,23 @@ impl<'a, F: Function> Env<'a, F> { let slot = self.vreg_spillslots[evicted_vreg.vreg()]; self.vreg_allocs[evicted_vreg.vreg()] = Allocation::stack(slot); trace!("Move reason: eviction"); - self.move_after_inst(inst, evicted_vreg, Allocation::reg(preg)); - } - - fn evict_any_reg(&mut self, inst: Inst, regclass: RegClass) -> Result { - trace!( - "Evicting a register in evict_any_reg for class {:?}", - regclass + self.edits.add_move_later( + inst, + self.vreg_allocs[evicted_vreg.vreg()], + Allocation::reg(preg), + evicted_vreg.class(), + InstPosition::After, + false, ); - let preg = self.lrus[regclass].pop(); - trace!("Selected register from lru: {}", preg); - // Check if the preg has already been allocated for this - // instruction. If it has, then there are too many stuff to - // allocate, making allocation impossible. - // Remember that for this to be true, the fixed registers must have - // be allocated already. Why? Because if some register p0 has been allocated - // and some fixed constraint register is encountered that needs p0, then - // allocation will fail regardless of whether or not there are other free registers - if self.pregs_allocd_in_curr_inst.contains(preg) { - // No enough registers for allocation? - return Err(RegAllocError::TooManyLiveRegs); - } - self.evict_vreg_in_preg(inst, preg); - Ok(preg) } - fn freealloc(&mut self, vreg: VReg, clobbers: PRegSet, is_fixed_def: bool) { + fn freealloc(&mut self, vreg: VReg) { trace!("Freeing vreg {}", vreg); let alloc = self.vreg_allocs[vreg.vreg()]; match alloc.kind() { AllocationKind::Reg => { let preg = alloc.as_reg().unwrap(); self.vreg_in_preg[preg.index()] = VReg::invalid(); - // If it's a fixed stack slot, then it's not allocatable. - if !self.is_stack(alloc) { - if clobbers.contains(preg) || is_fixed_def { - // For a defined vreg to be restored to the location it's expected to - // be in after the instruction, it cannot be allocated to a clobber because that - // will make the restoration impossible. - // In the case where a reuse operand reuses an input allocated to a clobber, - // the defined vreg will be allocated to a clobber - // and if the vreg lives past the instruction, restoration will be impossible. - // To avoid this, simply make it impossible for a clobber to be allocated to - // a vreg with "any" or "any reg" constraints. - // By adding it to this list, instead of freed_def_pregs, the only way - // a clobber can be newly allocated to a vreg in the instruction is to - // use a fixed register constraint. - self.free_after_curr_inst[preg.class()].add(preg); - if is_fixed_def { - self.lrus[vreg.class()].remove(preg.hw_enc()); - } - // No need to remove the preg from the LRU if it's a clobber - // because clobbers have already been removed from the LRU. - } else { - // Added to the freed def pregs list, not the free pregs - // list to avoid a def's allocated register being used - // as a scratch register. - self.freed_def_pregs[vreg.class()].add(preg); - // Don't allow this register to be evicted. - self.lrus[vreg.class()].remove(preg.hw_enc()); - } - } - self.pregs_allocd_in_curr_inst.remove(preg); } AllocationKind::Stack => (), AllocationKind::None => unreachable!("Attempting to free an unallocated operand!"), @@ -642,62 +670,40 @@ impl<'a, F: Function> Env<'a, F> { vreg, self.vreg_allocs[vreg.vreg()] ); - trace!( - "Pregs currently allocated: {}", - self.pregs_allocd_in_curr_inst - ); } /// Allocates a physical register for the operand `op`. fn alloc_reg_for_operand(&mut self, inst: Inst, op: Operand) -> Result<(), RegAllocError> { - trace!("freepregs int: {}", self.freepregs[RegClass::Int]); - trace!("freepregs vector: {}", self.freepregs[RegClass::Vector]); - trace!("freepregs float: {}", self.freepregs[RegClass::Float]); - trace!( - "freed_def_pregs int: {}", - self.freed_def_pregs[RegClass::Int] - ); - trace!( - "freed_def_pregs vector: {}", - self.freed_def_pregs[RegClass::Vector] - ); - trace!( - "freed_def_pregs float: {}", - self.freed_def_pregs[RegClass::Float] - ); + trace!("available regs: {}", self.available_pregs); + trace!("Int LRU: {:?}", self.lrus[RegClass::Int]); + trace!("Float LRU: {:?}", self.lrus[RegClass::Float]); + trace!("Vector LRU: {:?}", self.lrus[RegClass::Vector]); trace!(""); - // The only way a freed def preg can be reused for an operand is if - // the operand uses or defines a vreg in the early phase and the vreg doesn't - // live past the instruction. If the vreg lives past the instruction, then the - // defined value will overwrite it. - if op.pos() == OperandPos::Early - && op.kind() == OperandKind::Use - && self - .vregs_first_seen_in_curr_inst - .contains(op.vreg().vreg()) - { - if let Some(freed_def_preg) = - remove_any_from_pregset(&mut self.freed_def_pregs[op.class()]) - { - trace!("Reusing the freed def preg: {}", freed_def_preg); - self.lrus[freed_def_preg.class()].append_and_poke(freed_def_preg); - self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(freed_def_preg); - self.vreg_in_preg[freed_def_preg.index()] = op.vreg(); - return Ok(()); - } + if self.available_pregs[op.pos()].is_empty(op.class()) { + trace!("No registers available in class {:?}", op.class()); + return Err(RegAllocError::TooManyLiveRegs); } - let preg = if self.freepregs[op.class()] == PRegSet::empty() { - trace!("Evicting a register"); - self.evict_any_reg(inst, op.class())? - } else { - trace!("Getting a register from freepregs"); - remove_any_from_pregset(&mut self.freepregs[op.class()]).unwrap() + let Some(preg) = self.lrus[op.class()].last(self.available_pregs[op.pos()]) else { + trace!("Failed to find an available {:?} register in the LRU for operand {op}", op.class()); + return Err(RegAllocError::TooManyLiveRegs); }; + if self.vreg_in_preg[preg.index()] != VReg::invalid() { + self.evict_vreg_in_preg(inst, preg); + } trace!("The allocated register for vreg {}: {}", op.vreg(), preg); self.lrus[op.class()].poke(preg); self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(preg); self.vreg_in_preg[preg.index()] = op.vreg(); - self.pregs_allocd_in_curr_inst.add(preg); + self.available_pregs[op.pos()].remove(preg); + match (op.pos(), op.kind()) { + (OperandPos::Late, OperandKind::Use) => { + self.available_pregs[OperandPos::Early].remove(preg); + } + (OperandPos::Early, OperandKind::Def) => { + self.available_pregs[OperandPos::Late].remove(preg); + } + _ => () + }; Ok(()) } @@ -706,80 +712,23 @@ impl<'a, F: Function> Env<'a, F> { inst: Inst, op: Operand, preg: PReg, - ) -> Result<(), RegAllocError> { + ) { trace!("The fixed preg: {} for operand {}", preg, op); // It is an error for a fixed register clobber to be used for a defined vreg // that outlives the instruction, because it will be impossible to restore it. // But checking for that will be expensive? - let is_allocatable = - !self.is_stack(Allocation::reg(preg)) && !self.func.inst_clobbers(inst).contains(preg); if self.vreg_in_preg[preg.index()] != VReg::invalid() { // Something is already in that register. Evict it. - // Check if the evicted register is a register in the - // current instruction. If it is, then there must be multiple - // fixed register constraints for the same `preg` in the same - // operand position (early or late), because the fixed registers - // are considered first. - if self.pregs_allocd_in_curr_inst.contains(preg) { - return Err(RegAllocError::TooManyLiveRegs); - } self.evict_vreg_in_preg(inst, preg); - } else if self.freed_def_pregs[preg.class()].contains(preg) { - // Consider the scenario: - // def v0 (fixed: p0), use v1 (fixed: p0) - // In the above, p0 has already been used for v0, and since it's a - // def operand, the register has been freed and kept in `freed_def_pregs`, - // so it can be added back to the free pregs list after the instruction - // has finished processing. - // To avoid the preg being added back to the free list, it must be removed - // from `freed_def_pregs` here. - trace!( - "{} is now using preg {}. Removing it from the freed def pregs list", - op.vreg(), - preg - ); - self.freed_def_pregs[preg.class()].remove(preg); - self.lrus[preg.class()].append(preg.hw_enc()); - } else if self.free_after_curr_inst[preg.class()].contains(preg) { - // If the new allocation was once a freed prev_alloc, remove it - // from the free after current inst list. - // For example: - // - // 1. use v0 (fixed: p0), use v0 (fixed: p1) - // 2. use v0 (fixed: p1) - // - // In the processing of the above, v0 is allocated to p1 at inst 2. - // During the processing of inst 1, v0's allocation is changed to p0 - // and p1 is put on the free after current inst list to make it - // available for later allocation. - // But then, it's reallocated for the second operand. - // To prevent reallocating a register while a live one is still in it, - // this register has to be removed from the list. - trace!( - "{} is now using preg {}. Removing it from the free after instruction list", - op.vreg(), - preg - ); - self.free_after_curr_inst[preg.class()].remove(preg); - if is_allocatable { - self.lrus[preg.class()].append(preg.hw_enc()); - } - } else { - // Find the register in the list of free registers (if it's there). - // If it's not there, then it must be be a fixed stack slot or - // a clobber, since clobbers are removed from the free preg list before allocation begins. - self.freepregs[op.class()].remove(preg); } - if is_allocatable { + if self.allocatable_regs.contains(preg) { self.lrus[op.class()].poke(preg); } self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(preg); self.vreg_in_preg[preg.index()] = op.vreg(); - self.pregs_allocd_in_curr_inst.add(preg); trace!("vreg {} is now in preg {}", op.vreg(), preg); - Ok(()) } /// Allocates for the operand `op` with index `op_idx` into the @@ -810,7 +759,7 @@ impl<'a, F: Function> Env<'a, F> { self.vreg_allocs[op.vreg().vreg()] = Allocation::stack(slot); } OperandConstraint::FixedReg(preg) => { - self.alloc_fixed_reg_for_operand(inst, op, preg)?; + self.alloc_fixed_reg_for_operand(inst, op, preg); } OperandConstraint::Reuse(_) => { // This is handled elsewhere. @@ -842,7 +791,8 @@ impl<'a, F: Function> Env<'a, F> { ); return Ok(()); } - if !self.allocd_within_constraint(inst, op) { + if !self.allocd_within_constraint(inst, op, fixed_spillslot) { + trace!("{op} isn't allocated within constraints."); let prev_alloc = self.vreg_allocs[op.vreg().vreg()]; if prev_alloc.is_none() { self.vregs_first_seen_in_curr_inst.insert(op.vreg().vreg()); @@ -1011,26 +961,6 @@ impl<'a, F: Function> Env<'a, F> { // Free the previous allocation so that it can be reused. let preg = prev_alloc.as_reg().unwrap(); self.vreg_in_preg[preg.index()] = VReg::invalid(); - // If it's a fixed stack slot, then it's not allocatable. - if !self.is_stack(prev_alloc) { - trace!( - "{} is no longer using preg {}, so freeing it after instruction", - op.vreg(), - preg - ); - // A clobber will have already been removed from the LRU - // and will be freed after the instruction has completed processing - // if no vreg is still present in it. - if !self.func.inst_clobbers(inst).contains(preg) { - self.free_after_curr_inst[preg.class()].add(preg); - self.lrus[preg.class()].remove(preg.hw_enc()); - } else { - trace!( - "{} is a clobber, so not bothering with the state update", - preg - ); - } - } } } trace!( @@ -1040,6 +970,7 @@ impl<'a, F: Function> Env<'a, F> { self.allocs[(inst.index(), op_idx)] ); } else { + trace!("{op} is already allocated within constraints"); self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[op.vreg().vreg()]; if let Some(preg) = self.allocs[(inst.index(), op_idx)].as_reg() { if self.func.inst_clobbers(inst).contains(preg) { @@ -1087,9 +1018,20 @@ impl<'a, F: Function> Env<'a, F> { true, ); } - } else if self.allocatable_regs.contains(preg) { + } + if self.allocatable_regs.contains(preg) { self.lrus[preg.class()].poke(preg); } + self.available_pregs[op.pos()].remove(preg); + match (op.pos(), op.kind()) { + (OperandPos::Late, OperandKind::Use) => { + self.available_pregs[OperandPos::Early].remove(preg); + } + (OperandPos::Early, OperandKind::Def) => { + self.available_pregs[OperandPos::Late].remove(preg); + } + _ => () + }; } trace!( "Allocation for instruction {:?} and operand {}: {}", @@ -1098,6 +1040,8 @@ impl<'a, F: Function> Env<'a, F> { self.allocs[(inst.index(), op_idx)] ); } + trace!("Late available regs: {}", self.available_pregs[OperandPos::Late]); + trace!("Early available regs: {}", self.available_pregs[OperandPos::Early]); self.vregs_allocd_in_curr_inst.insert(op.vreg().vreg()); Ok(()) } @@ -1115,6 +1059,24 @@ impl<'a, F: Function> Env<'a, F> { } } + fn remove_clobbers_from_available_pregs(&mut self, clobbers: PRegSet) { + trace!("Removing clobbers {clobbers} from available reg sets"); + // Don't let defs get allocated to clobbers. + // Consider a scenario: + // + // 1. (early|late) def v0 (reg). Clobbers: [p0] + // 2. use v0 (fixed: p0) + // + // If p0 isn't removed from the both available reg sets, then + // p0 could get allocated to v0 in inst 1, making it impossible + // to restore it after the instruction. + // To avoid this scenario, clobbers should be removed from both late + // and early reg sets. + let all_but_clobbers = clobbers.invert(); + self.available_pregs[OperandPos::Late].intersect_from(all_but_clobbers); + self.available_pregs[OperandPos::Early].intersect_from(all_but_clobbers); + } + fn save_and_restore_clobbered_registers(&mut self, inst: Inst) { trace!("Adding save and restore edits for vregs in clobbered registers"); for clobbered_preg in self.func.inst_clobbers(inst) { @@ -1338,193 +1300,109 @@ impl<'a, F: Function> Env<'a, F> { } let operands = Operands::new(self.func.inst_operands(inst)); let clobbers = self.func.inst_clobbers(inst); - for preg in clobbers { - // To avoid allocating clobbers, they are removed from the - // free register list. To also avoid a clobber being evicted, - // it's also removed from the LRU. - // The only way a clobber can be marked as the allocation of - // an operand is through a fixed register constraint to the clobber - // or a reused input constraint of an operand with a fixed register - // constraint to use a clobber. - if self.allocatable_regs.contains(preg) { - trace!("Removing {} from the freelist because it's a clobber", preg); - self.freepregs[preg.class()].remove(preg); - self.lrus[preg.class()].remove(preg.hw_enc()); - } - } + for (op_idx, op) in operands.reuse() { + trace!("Initializing reused_input_to_reuse_op"); let OperandConstraint::Reuse(reused_idx) = op.constraint() else { unreachable!() }; self.reused_input_to_reuse_op[reused_idx] = op_idx; } for (op_idx, op) in operands.fixed() { - let reuse_op_idx = self.reused_input_to_reuse_op[op_idx]; - if reuse_op_idx != usize::MAX { - let reuse_op = operands[reuse_op_idx]; - let new_reuse_op = Operand::new( - reuse_op.vreg(), - op.constraint(), - reuse_op.kind(), - reuse_op.pos(), - ); - self.process_operand_allocation(inst, new_reuse_op, reuse_op_idx, None)?; - } - // It's possible for a fixed early use to have the same fixed constraint - // as a fixed late def. Because of this, handle the fixed early use without - // explicit reuse operand constraints later. - else if op.pos() != OperandPos::Early || op.kind() != OperandKind::Use { - self.process_operand_allocation(inst, op, op_idx, None)?; - } - } - for (_, op) in operands.fixed_late_def() { - // It is possible for a fixed early use to - // use a register allocated to a fixed late def. - // This deallocates fixed late defs, making it possible - // for those early fixed uses to be allocated successfully, - // without making the fixed registers available for reuse by other - // operands in the instruction. - self.freealloc(op.vreg(), clobbers, true); - } - for (op_idx, op) in operands.fixed_early_use() { - // The reuse operands inputs already have their allocations with - // the reuse operands. Those allocations will be moved over to the - // reused input records when the reuse operands are deallocated. - if self.reused_input_to_reuse_op[op_idx] == usize::MAX { - self.process_operand_allocation(inst, op, op_idx, None)?; - } else { - trace!("Not allocating {} now because it's a reused input", op); + let OperandConstraint::FixedReg(preg) = op.constraint() else { + unreachable!(); + }; + let early_avail_pregs = self.available_pregs[OperandPos::Early]; + let late_avail_pregs = self.available_pregs[OperandPos::Late]; + match (op.pos(), op.kind()) { + (OperandPos::Early, OperandKind::Use) => { + if op.as_fixed_nonallocatable().is_none() && !early_avail_pregs.contains(preg) { + return Err(RegAllocError::TooManyLiveRegs); + } + self.available_pregs[OperandPos::Early].remove(preg); + if self.reused_input_to_reuse_op[op_idx] != usize::MAX { + if op.as_fixed_nonallocatable().is_none() && !late_avail_pregs.contains(preg) { + return Err(RegAllocError::TooManyLiveRegs); + } + self.available_pregs[OperandPos::Late].remove(preg); + } + } + (OperandPos::Late, OperandKind::Def) => { + if op.as_fixed_nonallocatable().is_none() && !late_avail_pregs.contains(preg) { + return Err(RegAllocError::TooManyLiveRegs); + } + self.available_pregs[OperandPos::Late].remove(preg); + } + _ => { + if op.as_fixed_nonallocatable().is_none() + && (!early_avail_pregs.contains(preg) || !late_avail_pregs.contains(preg)) + { + return Err(RegAllocError::TooManyLiveRegs); + } + self.available_pregs[OperandPos::Early].remove(preg); + self.available_pregs[OperandPos::Late].remove(preg); + } } } - for (op_idx, op) in operands.non_fixed_def() { + self.remove_clobbers_from_available_pregs(clobbers); + for (op_idx, op) in operands.def_ops() { + trace!("Allocating def operands {op}"); if let OperandConstraint::Reuse(reused_idx) = op.constraint() { let reused_op = operands[reused_idx]; - if matches!(reused_op.constraint(), OperandConstraint::FixedReg(_)) { - // The reuse operands that reuse early fixed uses have already been - // allocated. - continue; - } - let new_reuse_op = - Operand::new(op.vreg(), reused_op.constraint(), op.kind(), op.pos()); + let new_reuse_op = Operand::new( + op.vreg(), + reused_op.constraint(), + op.kind(), + op.pos(), + ); + trace!("allocating reuse op {op} as {new_reuse_op}"); self.process_operand_allocation(inst, new_reuse_op, op_idx, None)?; + if let Some(preg) = self.allocs[(inst.index(), op_idx)].as_reg() { + // The reused input is going to be processed as a fixed register for this + // preg. + self.available_pregs[OperandPos::Early].remove(preg); + } } else { self.process_operand_allocation(inst, op, op_idx, None)?; } + self.freealloc(op.vreg()); } - for (op_idx, op) in operands.non_fixed_late_use() { - self.process_operand_allocation(inst, op, op_idx, None)?; - } - for (op_idx, op) in operands.non_fixed_late_def() { - if let OperandConstraint::Reuse(reused_idx) = op.constraint() { - let alloc = self.allocs[(inst.index(), op_idx)]; - self.freealloc(op.vreg(), clobbers, false); - // Transfer the allocation for the reuse operand to - // the reused input. - let reused_op = operands[reused_idx]; - let new_reused_op: Operand; - let mut fixed_stack_alloc = None; - if let Some(preg) = alloc.as_reg() { - new_reused_op = Operand::new( - reused_op.vreg(), - OperandConstraint::FixedReg(preg), - reused_op.kind(), - reused_op.pos(), - ); + for (op_idx, op) in operands.use_ops() { + trace!("Allocating use operand {op}"); + if self.reused_input_to_reuse_op[op_idx] != usize::MAX { + let reuse_op_idx = self.reused_input_to_reuse_op[op_idx]; + let reuse_op_alloc = self.allocs[(inst.index(), reuse_op_idx)]; + let new_reused_input_constraint; + let mut fixed_slot = None; + if let Some(preg) = reuse_op_alloc.as_reg() { + new_reused_input_constraint = OperandConstraint::FixedReg(preg); } else { - fixed_stack_alloc = alloc.as_stack(); - new_reused_op = Operand::new( - reused_op.vreg(), - OperandConstraint::Stack, - reused_op.kind(), - reused_op.pos(), - ); + new_reused_input_constraint = OperandConstraint::Stack; + fixed_slot = Some(reuse_op_alloc.as_stack().unwrap()); } - self.process_operand_allocation( - inst, - new_reused_op, - reused_idx, - fixed_stack_alloc, - )?; + let new_reused_input = Operand::new( + op.vreg(), + new_reused_input_constraint, + op.kind(), + op.pos(), + ); + trace!("Allocating reused input {op} as {new_reused_input}, (fixed spillslot: {fixed_slot:?})"); + self.process_operand_allocation(inst, new_reused_input, op_idx, fixed_slot)?; } else { - self.freealloc(op.vreg(), clobbers, false); - } - } - for (op_idx, op) in operands.non_fixed_early_use() { - // Reused inputs already have their allocations. - if self.reused_input_to_reuse_op[op_idx] == usize::MAX { self.process_operand_allocation(inst, op, op_idx, None)?; } } - for (_, op) in operands.early_def() { - self.freealloc(op.vreg(), clobbers, false); - } self.save_and_restore_clobbered_registers(inst); - for preg in self.func.inst_clobbers(inst) { - if self.allocatable_regs.contains(preg) { - if self.vreg_in_preg[preg.index()] == VReg::invalid() { - // In the case where the clobbered register is allocated to - // something, don't add the register to the freelist, cause - // it isn't free. - trace!("Adding clobbered {} to free after inst list", preg); - // Consider a scenario: - // - // 1. use v0 (fixed: p1). Clobbers: [p0] - // 2. use v0 (fixed: p0) - // - // In the above, v0 is first allocated to p0 at inst 2. - // At inst 1, v0's allocation is changed to p1 and edits are inserted - // to save and restore v0: - // - // move from p1 to stack_v0 - // 1. use v0 (fixed: p1). Clobbers: [p0] - // move from stack_v0 to p0 - // 2. use v0 (fixed: p0) - // - // Suppose some other edits need to be inserted before/after inst 1 - // and scratch registers are needed. - // If the clobber p0 is added back to the free list directly, - // p0 may end up be being used as a scratch register and get overwritten - // before inst 2 is reached. This could happen if inst 1 is a safepoint and - // edits to save and restore reftypes are prepended before the inst - // and after resulting in the following scenario: - // - // --- p0 is overwritten --- - // move from p1 to stack_v0 - // 1. use v0 (fixed: p1). Clobbers: [p0] - // move from stack_v0 to p0 - // --- p0 is overwritten --- - // 2. use v0 (fixed: p0) - // - // To avoid this scenario, the registers are added to the - // `free_after_curr_inst` instead, to ensure that it isn't used as - // a scratch register. - self.free_after_curr_inst[preg.class()].add(preg); - } else { - // Something is still in the clobber. - // After this instruction, it's no longer a clobber. - // Add it back to the LRU. - trace!( - "Something is still in the clobber {}. Adding it back to the LRU directly.", - preg - ); - self.lrus[preg.class()].append_and_poke(preg); - } - } - } - trace!("After the allocation:"); - trace!("freed_def_pregs: {}", self.freed_def_pregs); - trace!("free after curr inst: {}", self.free_after_curr_inst); - trace!(""); + let mut avail_for_scratch = self.available_pregs[OperandPos::Early]; + avail_for_scratch.intersect_from(self.available_pregs[OperandPos::Late]); let scratch_regs = - self.get_scratch_regs(inst, self.edits.inst_needs_scratch_reg.clone())?; + self.get_scratch_regs(inst, self.edits.inst_needs_scratch_reg.clone(), avail_for_scratch)?; self.edits.process_edits(scratch_regs); - self.add_freed_regs_to_freelist(); self.vregs_first_seen_in_curr_inst.clear(); self.vregs_allocd_in_curr_inst.clear(); for entry in self.reused_input_to_reuse_op.iter_mut() { *entry = usize::MAX; } - self.pregs_allocd_in_curr_inst = PRegSet::empty(); if trace_enabled!() { self.log_post_inst_processing_state(inst); } @@ -1537,7 +1415,7 @@ impl<'a, F: Function> Env<'a, F> { /// to their spillslots and inserts the edits to flow livein values to /// the allocations where they are expected to be before the first /// instruction. - fn reload_at_begin(&mut self, block: Block) { + fn reload_at_begin(&mut self, block: Block) -> Result<(), RegAllocError> { trace!( "Reloading live registers at the beginning of block {:?}", block @@ -1552,6 +1430,8 @@ impl<'a, F: Function> Env<'a, F> { block, self.func.block_params(block) ); + trace!("Available pregs: {}", self.available_pregs[OperandPos::Early]); + let mut available_regs_for_scratch = self.available_pregs[OperandPos::Early]; // We need to check for the registers that are still live. // These registers are either livein or block params // Liveins should be stack-allocated and block params should be freed. @@ -1573,8 +1453,10 @@ impl<'a, F: Function> Env<'a, F> { // A block's block param is not live before the block. // And `vreg_allocs[i]` of a virtual register i is none for // dead vregs. - self.freealloc(vreg, PRegSet::empty(), false); - if slot == prev_alloc { + self.freealloc(vreg); + if let Some(preg) = prev_alloc.as_reg() { + available_regs_for_scratch.remove(preg); + } else if slot == prev_alloc { // No need to do any movements if the spillslot is where the vreg is expected to be. trace!( "No need to reload {} because it's already in its expected allocation", @@ -1612,20 +1494,9 @@ impl<'a, F: Function> Env<'a, F> { self.vreg_allocs[vreg.vreg()] = slot; if let Some(preg) = prev_alloc.as_reg() { trace!("{} was in {}. Removing it", preg, vreg); - // Nothing is in that preg anymore. Return it to - // the free preg list. + // Nothing is in that preg anymore. self.vreg_in_preg[preg.index()] = VReg::invalid(); - if !self.is_stack(prev_alloc) { - trace!( - "{} is not a fixed stack slot. Recording it in the freed def pregs list", - prev_alloc - ); - // Using this instead of directly adding it to - // freepregs to prevent allocated registers from being - // used as scratch registers. - self.freed_def_pregs[preg.class()].add(preg); - self.lrus[preg.class()].remove(preg.hw_enc()); - } + available_regs_for_scratch.remove(preg); } if slot == prev_alloc { // No need to do any movements if the spillslot is where the vreg is expected to be. @@ -1649,12 +1520,12 @@ impl<'a, F: Function> Env<'a, F> { ); } let scratch_regs = - self.get_scratch_regs_for_reloading(self.edits.inst_needs_scratch_reg.clone()); + self.get_scratch_regs_for_reloading(self.edits.inst_needs_scratch_reg.clone(), available_regs_for_scratch)?; self.edits.process_edits(scratch_regs); - self.add_freed_regs_to_freelist(); if trace_enabled!() { self.log_post_reload_at_begin_state(block); } + Ok(()) } fn log_post_reload_at_begin_state(&self, block: Block) { @@ -1679,9 +1550,6 @@ impl<'a, F: Function> Env<'a, F> { trace!("Int LRU: {:?}", self.lrus[RegClass::Int]); trace!("Float LRU: {:?}", self.lrus[RegClass::Float]); trace!("Vector LRU: {:?}", self.lrus[RegClass::Vector]); - trace!("Free int pregs: {}", self.freepregs[RegClass::Int]); - trace!("Free float pregs: {}", self.freepregs[RegClass::Float]); - trace!("Free vector pregs: {}", self.freepregs[RegClass::Vector]); } fn log_post_inst_processing_state(&self, inst: Inst) { @@ -1696,27 +1564,29 @@ impl<'a, F: Function> Env<'a, F> { } } trace!("vreg_allocs: {:?}", map); - let mut map = HashMap::new(); + let mut v = Vec::new(); for i in 0..self.vreg_in_preg.len() { if self.vreg_in_preg[i] != VReg::invalid() { - map.insert(PReg::from_index(i), self.vreg_in_preg[i]); + v.push(format!("{}: {}, ", PReg::from_index(i), self.vreg_in_preg[i])); } } - trace!("vreg_in_preg: {:?}", map); + trace!("vreg_in_preg: {:?}", v); trace!("Int LRU: {:?}", self.lrus[RegClass::Int]); trace!("Float LRU: {:?}", self.lrus[RegClass::Float]); trace!("Vector LRU: {:?}", self.lrus[RegClass::Vector]); - trace!("Free int pregs: {}", self.freepregs[RegClass::Int]); - trace!("Free float pregs: {}", self.freepregs[RegClass::Float]); - trace!("Free vector pregs: {}", self.freepregs[RegClass::Vector]); + trace!(""); } fn alloc_block(&mut self, block: Block) -> Result<(), RegAllocError> { trace!("{:?} start", block); for inst in self.func.block_insns(block).iter().rev() { + // Reset has to be before `alloc_inst` not after because + // available pregs is needed after processing the first + // instruction in the block during `reload_at_begin`. + self.reset_available_pregs(); self.alloc_inst(block, inst)?; } - self.reload_at_begin(block); + self.reload_at_begin(block)?; trace!("{:?} end\n", block); Ok(()) } @@ -1724,6 +1594,7 @@ impl<'a, F: Function> Env<'a, F> { fn run(&mut self) -> Result<(), RegAllocError> { debug_assert_eq!(self.func.entry_block().index(), 0); for block in (0..self.func.num_blocks()).rev() { + self.reset_available_pregs(); self.alloc_block(Block::new(block))?; } // Ought to check if there are livein registers @@ -1774,8 +1645,14 @@ fn log_output<'a, F: Function>(env: &Env<'a, F>) { )); } } + let mut temp_slots = Vec::new(); + for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { + for slot in env.temp_spillslots[class].iter() { + temp_slots.push(format!("{slot}")); + } + } trace!("VReg spillslots: {:?}", v); - trace!("Temp spillslots: {:?}", env.temp_spillslots); + trace!("Temp spillslots: {:?}", temp_slots); trace!("Final edits: {:?}", env.edits.edits); } From 52afa1b1dfb0be3eec2f39721fa14666ca310c1a Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Fri, 23 Aug 2024 20:47:36 +0100 Subject: [PATCH 61/95] fixed bug in process_branch state updates --- src/fastalloc/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 385770f3..7d3fab21 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1282,10 +1282,10 @@ impl<'a, F: Function> Env<'a, F> { if self.vreg_allocs[vreg.vreg()].is_none() { self.vregs_first_seen_in_curr_inst.insert(vreg.vreg()); self.live_vregs.insert(*vreg); - self.vregs_allocd_in_curr_inst.insert(vreg.vreg()); } self.vreg_allocs[vreg.vreg()] = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); + self.vregs_allocd_in_curr_inst.insert(vreg.vreg()); } } } From 5dcafe4522a928e6d6c695e92891628105623d38 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Sun, 25 Aug 2024 14:38:59 +0100 Subject: [PATCH 62/95] simplified handling edits --- src/fastalloc/mod.rs | 338 ++++++++++++++----------------------------- 1 file changed, 108 insertions(+), 230 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 7d3fab21..25889bbd 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -555,6 +555,46 @@ impl<'a, F: Function> Env<'a, F> { Ok(scratch_regs) } + fn reserve_reg_for_fixed_operand(&mut self, inst: Inst, op: Operand, op_idx: usize, preg: PReg) -> Result<(), RegAllocError> { + let early_avail_pregs = self.available_pregs[OperandPos::Early]; + let late_avail_pregs = self.available_pregs[OperandPos::Late]; + match (op.pos(), op.kind()) { + (OperandPos::Early, OperandKind::Use) => { + if op.as_fixed_nonallocatable().is_none() && !early_avail_pregs.contains(preg) { + return Err(RegAllocError::TooManyLiveRegs); + } + self.available_pregs[OperandPos::Early].remove(preg); + if self.reused_input_to_reuse_op[op_idx] != usize::MAX { + if op.as_fixed_nonallocatable().is_none() && !late_avail_pregs.contains(preg) { + return Err(RegAllocError::TooManyLiveRegs); + } + self.available_pregs[OperandPos::Late].remove(preg); + } + } + (OperandPos::Late, OperandKind::Def) => { + if op.as_fixed_nonallocatable().is_none() && !late_avail_pregs.contains(preg) { + return Err(RegAllocError::TooManyLiveRegs); + } + self.available_pregs[OperandPos::Late].remove(preg); + } + _ => { + if op.as_fixed_nonallocatable().is_none() + && (!early_avail_pregs.contains(preg) || !late_avail_pregs.contains(preg)) + { + return Err(RegAllocError::TooManyLiveRegs); + } + self.available_pregs[OperandPos::Early].remove(preg); + self.available_pregs[OperandPos::Late].remove(preg); + } + } + if self.vreg_in_preg[preg.index()] != VReg::invalid() { + // Something is already in that register. Evict it. + self.evict_vreg_in_preg(inst, preg); + self.vreg_in_preg[preg.index()] = VReg::invalid(); + } + Ok(()) + } + fn allocd_within_constraint(&self, inst: Inst, op: Operand, fixed_spillslot: Option) -> bool { let alloc = self.vreg_allocs[op.vreg().vreg()]; let alloc_is_clobber = if let Some(preg) = alloc.as_reg() { @@ -582,16 +622,13 @@ impl<'a, F: Function> Env<'a, F> { // fixed register constraint or a vreg allocated in the instruction // is already assigned to it. // - // In the case where the vreg is used multiple times: - // use v0 (reg), use v0 (reg), use v0 (reg) - // During processing of the first operand, the allocated - // register is removed from the available regs list, but the - // vreg is added to the `vregs_allocd_in_curr_inst` set. - // This check is necessary to ensure - // that multiple pregs aren't assigned in these situations when - // they aren't needed. + // For example: + // 1. use v0, use v0, use v0 + // + // Say p0 is assigned to v0 during the processing of the first operand. + // When the second v0 operand is being processed, v0 will still be in + // v0, so it is still allocated within constraints. self.vreg_in_preg[preg.index()] == op.vreg() - && self.vregs_allocd_in_curr_inst.contains(op.vreg().vreg()) } else { true } @@ -609,7 +646,6 @@ impl<'a, F: Function> Env<'a, F> { if let Some(preg) = alloc.as_reg() { if !self.available_pregs[op.pos()].contains(preg) { self.vreg_in_preg[preg.index()] == op.vreg() - && self.vregs_allocd_in_curr_inst.contains(op.vreg().vreg()) } else { true } @@ -673,7 +709,7 @@ impl<'a, F: Function> Env<'a, F> { } /// Allocates a physical register for the operand `op`. - fn alloc_reg_for_operand(&mut self, inst: Inst, op: Operand) -> Result<(), RegAllocError> { + fn alloc_reg_for_operand(&mut self, inst: Inst, op: Operand) -> Result { trace!("available regs: {}", self.available_pregs); trace!("Int LRU: {:?}", self.lrus[RegClass::Int]); trace!("Float LRU: {:?}", self.lrus[RegClass::Float]); @@ -692,8 +728,6 @@ impl<'a, F: Function> Env<'a, F> { } trace!("The allocated register for vreg {}: {}", op.vreg(), preg); self.lrus[op.class()].poke(preg); - self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(preg); - self.vreg_in_preg[preg.index()] = op.vreg(); self.available_pregs[op.pos()].remove(preg); match (op.pos(), op.kind()) { (OperandPos::Late, OperandKind::Use) => { @@ -704,31 +738,7 @@ impl<'a, F: Function> Env<'a, F> { } _ => () }; - Ok(()) - } - - fn alloc_fixed_reg_for_operand( - &mut self, - inst: Inst, - op: Operand, - preg: PReg, - ) { - trace!("The fixed preg: {} for operand {}", preg, op); - - // It is an error for a fixed register clobber to be used for a defined vreg - // that outlives the instruction, because it will be impossible to restore it. - // But checking for that will be expensive? - - if self.vreg_in_preg[preg.index()] != VReg::invalid() { - // Something is already in that register. Evict it. - self.evict_vreg_in_preg(inst, preg); - } - if self.allocatable_regs.contains(preg) { - self.lrus[op.class()].poke(preg); - } - self.vreg_allocs[op.vreg().vreg()] = Allocation::reg(preg); - self.vreg_in_preg[preg.index()] = op.vreg(); - trace!("vreg {} is now in preg {}", op.vreg(), preg); + Ok(Allocation::reg(preg)) } /// Allocates for the operand `op` with index `op_idx` into the @@ -739,13 +749,13 @@ impl<'a, F: Function> Env<'a, F> { op: Operand, op_idx: usize, fixed_spillslot: Option, - ) -> Result<(), RegAllocError> { - match op.constraint() { + ) -> Result { + let new_alloc = match op.constraint() { OperandConstraint::Any => { - self.alloc_reg_for_operand(inst, op)?; + self.alloc_reg_for_operand(inst, op)? } OperandConstraint::Reg => { - self.alloc_reg_for_operand(inst, op)?; + self.alloc_reg_for_operand(inst, op)? } OperandConstraint::Stack => { let slot = if let Some(spillslot) = fixed_spillslot { @@ -756,18 +766,28 @@ impl<'a, F: Function> Env<'a, F> { } self.vreg_spillslots[op.vreg().vreg()] }; - self.vreg_allocs[op.vreg().vreg()] = Allocation::stack(slot); + Allocation::stack(slot) } OperandConstraint::FixedReg(preg) => { - self.alloc_fixed_reg_for_operand(inst, op, preg); + trace!("The fixed preg: {} for operand {}", preg, op); + + // It is an error for a fixed register clobber to be used for a defined vreg + // that outlives the instruction, because it will be impossible to restore it. + // But checking for that will be expensive? + + if self.allocatable_regs.contains(preg) { + self.lrus[op.class()].poke(preg); + } + trace!("vreg {} is now in preg {}", op.vreg(), preg); + Allocation::reg(preg) } OperandConstraint::Reuse(_) => { // This is handled elsewhere. unreachable!(); } - } - self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[op.vreg().vreg()]; - Ok(()) + }; + self.allocs[(inst.index(), op_idx)] = new_alloc; + Ok(new_alloc) } /// Allocate operand the `op_idx`th operand `op` in instruction `inst` within its constraint. @@ -793,16 +813,16 @@ impl<'a, F: Function> Env<'a, F> { } if !self.allocd_within_constraint(inst, op, fixed_spillslot) { trace!("{op} isn't allocated within constraints."); - let prev_alloc = self.vreg_allocs[op.vreg().vreg()]; - if prev_alloc.is_none() { + let curr_alloc = self.vreg_allocs[op.vreg().vreg()]; + if curr_alloc.is_none() { self.vregs_first_seen_in_curr_inst.insert(op.vreg().vreg()); self.live_vregs.insert(op.vreg()); } - self.alloc_operand(inst, op, op_idx, fixed_spillslot)?; + let new_alloc = self.alloc_operand(inst, op, op_idx, fixed_spillslot)?; // Need to insert a move to propagate flow from the current // allocation to the subsequent places where the value was // used (in `prev_alloc`, that is). - if prev_alloc.is_some() { + if curr_alloc.is_some() { trace!("Move reason: Prev allocation doesn't meet constraints"); if op.kind() == OperandKind::Def { // In the case where `op` is a def, @@ -842,12 +862,14 @@ impl<'a, F: Function> Env<'a, F> { // To avoid this, the moves are prepended. self.edits.add_move_later( inst, - self.vreg_allocs[op.vreg().vreg()], - prev_alloc, + new_alloc, + curr_alloc, op.class(), InstPosition::After, true, ); + // No need to set vreg_in_preg because it will be set during + // `freealloc` if needed. } else { // This was handled by a simple move from the operand to its previous // allocation before the instruction, but this is incorrect. @@ -919,49 +941,24 @@ impl<'a, F: Function> Env<'a, F> { // move from stack_v0 to p1 // 2. use v0 (fixed: p1) - if !self.vregs_allocd_in_curr_inst.contains(op.vreg().vreg()) - // Don't restore after the instruction if it doesn't live past - // this instruction. - && !self.vregs_first_seen_in_curr_inst.contains(op.vreg().vreg()) - { - if self.vreg_spillslots[op.vreg().vreg()].is_invalid() { - self.vreg_spillslots[op.vreg().vreg()] = - self.stack.allocstack(&op.vreg()); - } - let op_spillslot = - Allocation::stack(self.vreg_spillslots[op.vreg().vreg()]); - self.edits.add_move_later( - inst, - self.vreg_allocs[op.vreg().vreg()], - op_spillslot, - op.class(), - InstPosition::Before, - false, - ); - self.edits.add_move_later( - inst, - op_spillslot, - prev_alloc, - op.class(), - InstPosition::After, - true, - ); - } else { - self.edits.add_move_later( - inst, - self.vreg_allocs[op.vreg().vreg()], - prev_alloc, - op.class(), - InstPosition::Before, - true, - ); - } + self.edits.add_move_later( + inst, + curr_alloc, + new_alloc, + op.class(), + InstPosition::Before, + false, + ); } - if prev_alloc.is_reg() { - // Free the previous allocation so that it can be reused. - let preg = prev_alloc.as_reg().unwrap(); + if let Some(preg) = new_alloc.as_reg() { + // Don't change the allocation. self.vreg_in_preg[preg.index()] = VReg::invalid(); } + } else { + self.vreg_allocs[op.vreg().vreg()] = new_alloc; + if let Some(preg) = new_alloc.as_reg() { + self.vreg_in_preg[preg.index()] = op.vreg(); + } } trace!( "Allocation for instruction {:?} and operand {}: {}", @@ -973,52 +970,6 @@ impl<'a, F: Function> Env<'a, F> { trace!("{op} is already allocated within constraints"); self.allocs[(inst.index(), op_idx)] = self.vreg_allocs[op.vreg().vreg()]; if let Some(preg) = self.allocs[(inst.index(), op_idx)].as_reg() { - if self.func.inst_clobbers(inst).contains(preg) { - // It is possible for the first use of a vreg in an instruction - // to be some clobber p0 and the expected location of that vreg - // after the instruction is also p0: - // - // 1. use v0 (fixed: p0), use v0 (fixed: p1). clobbers: [p0] - // 2. use v0 (fixed: p0) - // - // When the second use of v0 is encountered in inst 1, a save and restore is - // not inserted because it's not the first use of v0 in the instruction. Instead, - // a single edit to move from p1 to p0 is inserted before the instruction: - // - // move from p1 to p0 - // 1. use v0 (fixed: p0), use v0 (fixed: p1). clobbers: [p0] - // 2. use v0 (fixed: p0) - // - // To avoid this scenario, a save and restore is added here. - if !self.vregs_allocd_in_curr_inst.contains(op.vreg().vreg()) - && !self - .vregs_first_seen_in_curr_inst - .contains(op.vreg().vreg()) - { - if self.vreg_spillslots[op.vreg().vreg()].is_invalid() { - self.vreg_spillslots[op.vreg().vreg()] = - self.stack.allocstack(&op.vreg()); - } - let op_spillslot = - Allocation::stack(self.vreg_spillslots[op.vreg().vreg()]); - self.edits.add_move_later( - inst, - self.vreg_allocs[op.vreg().vreg()], - op_spillslot, - op.class(), - InstPosition::Before, - false, - ); - self.edits.add_move_later( - inst, - op_spillslot, - self.vreg_allocs[op.vreg().vreg()], - op.class(), - InstPosition::After, - true, - ); - } - } if self.allocatable_regs.contains(preg) { self.lrus[preg.class()].poke(preg); } @@ -1042,7 +993,7 @@ impl<'a, F: Function> Env<'a, F> { } trace!("Late available regs: {}", self.available_pregs[OperandPos::Late]); trace!("Early available regs: {}", self.available_pregs[OperandPos::Early]); - self.vregs_allocd_in_curr_inst.insert(op.vreg().vreg()); + //self.vregs_allocd_in_curr_inst.insert(op.vreg().vreg()); Ok(()) } @@ -1077,64 +1028,6 @@ impl<'a, F: Function> Env<'a, F> { self.available_pregs[OperandPos::Early].intersect_from(all_but_clobbers); } - fn save_and_restore_clobbered_registers(&mut self, inst: Inst) { - trace!("Adding save and restore edits for vregs in clobbered registers"); - for clobbered_preg in self.func.inst_clobbers(inst) { - // If the instruction clobbers a register holding a live vreg, - // insert edits to save the live reg and restore it - // after the instruction. - // For example: - // - // 1. def v2 - // 2. use v0, use v1 - clobbers p0 - // 3. use v2 (fixed: p0) - // - // In the above, v2 is assigned to p0 first. During the processing of inst 2, - // p0 is clobbered, so v2 is no longer in it and p0 no longer contains v2 at inst 2. - // p0 is allocated to the v2 def operand in inst 1. The flow ends up wrong because of - // the clobbering. - // - // - // It is also possible for a clobbered register to be allocated to an operand - // in an instruction. No edits need to be inserted here because - // `process_operand_allocation` has already done all the insertions. - - let vreg = self.vreg_in_preg[clobbered_preg.index()]; - if vreg != VReg::invalid() { - let vreg_isnt_mentioned_in_curr_inst = - !self.vregs_allocd_in_curr_inst.contains(vreg.vreg()); - if vreg_isnt_mentioned_in_curr_inst { - trace!("Adding save and restore edits for {}", vreg); - let preg_alloc = Allocation::reg(clobbered_preg); - let slot = if self.vreg_spillslots[vreg.vreg()].is_valid() { - self.vreg_spillslots[vreg.vreg()] - } else { - self.vreg_spillslots[vreg.vreg()] = self.stack.allocstack(&vreg); - self.vreg_spillslots[vreg.vreg()] - }; - let slot_alloc = Allocation::stack(slot); - self.edits.add_move_later( - inst, - preg_alloc, - slot_alloc, - vreg.class(), - InstPosition::Before, - true, - ); - self.edits.add_move_later( - inst, - slot_alloc, - preg_alloc, - vreg.class(), - InstPosition::After, - false, - ); - } - } - } - trace!("Done adding edits for clobbered registers"); - } - /// If instruction `inst` is a branch in `block`, /// this function places branch arguments in the spillslots /// expected by the destination blocks. @@ -1312,39 +1205,15 @@ impl<'a, F: Function> Env<'a, F> { let OperandConstraint::FixedReg(preg) = op.constraint() else { unreachable!(); }; - let early_avail_pregs = self.available_pregs[OperandPos::Early]; - let late_avail_pregs = self.available_pregs[OperandPos::Late]; - match (op.pos(), op.kind()) { - (OperandPos::Early, OperandKind::Use) => { - if op.as_fixed_nonallocatable().is_none() && !early_avail_pregs.contains(preg) { - return Err(RegAllocError::TooManyLiveRegs); - } - self.available_pregs[OperandPos::Early].remove(preg); - if self.reused_input_to_reuse_op[op_idx] != usize::MAX { - if op.as_fixed_nonallocatable().is_none() && !late_avail_pregs.contains(preg) { - return Err(RegAllocError::TooManyLiveRegs); - } - self.available_pregs[OperandPos::Late].remove(preg); - } - } - (OperandPos::Late, OperandKind::Def) => { - if op.as_fixed_nonallocatable().is_none() && !late_avail_pregs.contains(preg) { - return Err(RegAllocError::TooManyLiveRegs); - } - self.available_pregs[OperandPos::Late].remove(preg); - } - _ => { - if op.as_fixed_nonallocatable().is_none() - && (!early_avail_pregs.contains(preg) || !late_avail_pregs.contains(preg)) - { - return Err(RegAllocError::TooManyLiveRegs); - } - self.available_pregs[OperandPos::Early].remove(preg); - self.available_pregs[OperandPos::Late].remove(preg); - } - } + self.reserve_reg_for_fixed_operand(inst, op, op_idx, preg)?; } self.remove_clobbers_from_available_pregs(clobbers); + for preg in clobbers { + if self.vreg_in_preg[preg.index()] != VReg::invalid() { + self.evict_vreg_in_preg(inst, preg); + self.vreg_in_preg[preg.index()] = VReg::invalid(); + } + } for (op_idx, op) in operands.def_ops() { trace!("Allocating def operands {op}"); if let OperandConstraint::Reuse(reused_idx) = op.constraint() { @@ -1365,6 +1234,16 @@ impl<'a, F: Function> Env<'a, F> { } else { self.process_operand_allocation(inst, op, op_idx, None)?; } + if self.vreg_spillslots[op.vreg().vreg()].is_valid() { + self.edits.add_move_later( + inst, + self.vreg_allocs[op.vreg().vreg()], + Allocation::stack(self.vreg_spillslots[op.vreg().vreg()]), + op.class(), + InstPosition::After, + false + ); + } self.freealloc(op.vreg()); } for (op_idx, op) in operands.use_ops() { @@ -1392,7 +1271,6 @@ impl<'a, F: Function> Env<'a, F> { self.process_operand_allocation(inst, op, op_idx, None)?; } } - self.save_and_restore_clobbered_registers(inst); let mut avail_for_scratch = self.available_pregs[OperandPos::Early]; avail_for_scratch.intersect_from(self.available_pregs[OperandPos::Late]); let scratch_regs = From f697273e6f2049c6e2bb3a5b7ab7e2060dbdd666 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Sun, 25 Aug 2024 16:18:40 +0100 Subject: [PATCH 63/95] removed unnecessary fields --- src/fastalloc/mod.rs | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 25889bbd..8fe2ed7a 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -14,7 +14,6 @@ mod bitset; mod iter; mod lru; mod vregset; -use bitset::BitSet; use iter::*; use lru::*; use vregset::VRegSet; @@ -305,8 +304,6 @@ pub struct Env<'a, F: Function> { vreg_spillslots: Vec, /// The virtual registers that are currently live. live_vregs: VRegSet, - /// Allocatable free physical registers for classes Int, Float, and Vector, respectively. - freepregs: PartedByRegClass, /// Least-recently-used caches for register classes Int, Float, and Vector, respectively. lrus: Lrus, /// `vreg_in_preg[i]` is the virtual register currently in the physical register @@ -314,17 +311,6 @@ pub struct Env<'a, F: Function> { vreg_in_preg: Vec, /// For parallel moves from branch args to block param spillslots. temp_spillslots: PartedByRegClass>, - /// Used to keep track of which used vregs are seen for the first time - /// in the instruction, that is, if the vregs live past the current instruction. - /// This is used to determine whether or not reused operands - /// for reuse-input constraints should be restored after an instruction. - /// It's also used to determine if the an early operand can reuse a freed def operand's - /// allocation. And it's also used to determine the edits to be inserted when - /// allocating a use operand. - vregs_first_seen_in_curr_inst: BitSet, - /// Used to keep track of which vregs have been allocated in the current instruction. - /// This is used to determine which edits to insert when allocating a use operand. - vregs_allocd_in_curr_inst: BitSet, /// `reused_input_to_reuse_op[i]` is the operand index of the reuse operand /// that uses the `i`th operand in the current instruction as its input. reused_input_to_reuse_op: Vec, @@ -386,13 +372,6 @@ impl<'a, F: Function> Env<'a, F> { vreg_allocs: vec![Allocation::none(); func.num_vregs()], vreg_spillslots: vec![SpillSlot::invalid(); func.num_vregs()], live_vregs: VRegSet::with_capacity(func.num_vregs()), - freepregs: PartedByRegClass { - items: [ - PRegSet::from_iter(regs[0].iter().cloned()), - PRegSet::from_iter(regs[1].iter().cloned()), - PRegSet::from_iter(regs[2].iter().cloned()), - ], - }, lrus: Lrus::new(®s[0], ®s[1], ®s[2]), vreg_in_preg: vec![VReg::invalid(); PReg::NUM_INDEX], stack: Stack::new(func), @@ -404,8 +383,6 @@ impl<'a, F: Function> Env<'a, F> { Vec::with_capacity(func.num_vregs()), ], }, - vregs_allocd_in_curr_inst: BitSet::with_capacity(func.num_vregs()), - vregs_first_seen_in_curr_inst: BitSet::with_capacity(func.num_vregs()), reused_input_to_reuse_op: vec![usize::MAX; max_operand_len as usize], dedicated_scratch_regs: PartedByRegClass { items: [ @@ -815,7 +792,6 @@ impl<'a, F: Function> Env<'a, F> { trace!("{op} isn't allocated within constraints."); let curr_alloc = self.vreg_allocs[op.vreg().vreg()]; if curr_alloc.is_none() { - self.vregs_first_seen_in_curr_inst.insert(op.vreg().vreg()); self.live_vregs.insert(op.vreg()); } let new_alloc = self.alloc_operand(inst, op, op_idx, fixed_spillslot)?; @@ -993,7 +969,6 @@ impl<'a, F: Function> Env<'a, F> { } trace!("Late available regs: {}", self.available_pregs[OperandPos::Late]); trace!("Early available regs: {}", self.available_pregs[OperandPos::Early]); - //self.vregs_allocd_in_curr_inst.insert(op.vreg().vreg()); Ok(()) } @@ -1167,18 +1142,11 @@ impl<'a, F: Function> Env<'a, F> { ); // All branch arguments should be in their spillslots at the end of the function. - // - // The invariants posed by `vregs_first_seen_in_curr_inst` and - // `vregs_allocd_in_curr_inst` must be maintained in order to - // insert edits in the correct order when vregs used as branch args - // are also used as operands. if self.vreg_allocs[vreg.vreg()].is_none() { - self.vregs_first_seen_in_curr_inst.insert(vreg.vreg()); self.live_vregs.insert(*vreg); } self.vreg_allocs[vreg.vreg()] = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); - self.vregs_allocd_in_curr_inst.insert(vreg.vreg()); } } } @@ -1276,8 +1244,6 @@ impl<'a, F: Function> Env<'a, F> { let scratch_regs = self.get_scratch_regs(inst, self.edits.inst_needs_scratch_reg.clone(), avail_for_scratch)?; self.edits.process_edits(scratch_regs); - self.vregs_first_seen_in_curr_inst.clear(); - self.vregs_allocd_in_curr_inst.clear(); for entry in self.reused_input_to_reuse_op.iter_mut() { *entry = usize::MAX; } From 2e55a7682e84046716cbbf6b2f04b5d026494b4d Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Mon, 26 Aug 2024 12:25:24 +0100 Subject: [PATCH 64/95] fixed available regsets handling bug --- src/fastalloc/mod.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 8fe2ed7a..33f7c86f 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -713,6 +713,9 @@ impl<'a, F: Function> Env<'a, F> { (OperandPos::Early, OperandKind::Def) => { self.available_pregs[OperandPos::Late].remove(preg); } + (OperandPos::Late, OperandKind::Def) if matches!(op.constraint(), OperandConstraint::Reuse(_)) => { + self.available_pregs[OperandPos::Early].remove(preg); + } _ => () }; Ok(Allocation::reg(preg)) @@ -1182,6 +1185,38 @@ impl<'a, F: Function> Env<'a, F> { self.vreg_in_preg[preg.index()] = VReg::invalid(); } } + for (_, op) in operands.use_ops() { + if let Some(preg) = self.vreg_allocs[op.vreg().vreg()].as_reg() { + // The current allocation, vreg_allocs[op.vreg], doesn't change, + // so it should be removed from the available reg sets to avoid + // allocating it to some other operand in the instruction. + // + // For example: + // 1. def v0 (reuse: 1), use v1, use v2 + // 2. use v1 (fixed: p0) + // + // When inst 1 is about to be processed, vreg_allocs[v1] will be p0. + // Suppose p1 is allocated to v0: this will create a fixed constraint for + // v1 and p1 will also be allocated to it. + // When it's time to process the v2 operand, vreg_allocs[v1] will still be p0 + // because it doesn't change (except by an explicit fixed reg constraint which + // will not be a problem here) and it's possible for v2 to get p0 as an allocation, + // which is wrong. That will lead to the following scenario: + // + // move from p0 to p1 // Inserted due to reuse constraints + // // (vreg_allocs[v1] == p0) + // 1. def v0 (reuse: 1), use v1, use v2 // v0: p1, v1: p1, v2: p0 + // move from stack_v0 to p0 // Eviction here because v0 is still in p0 when + // // v2's processing picked p0 from available regs + // 2. use v1 (fixed: p0) + // + // To avoid this scenario, the register is removed from the available set. + self.available_pregs[op.pos()].remove(preg); + if let (OperandPos::Late, OperandKind::Use) = (op.pos(), op.kind()) { + self.available_pregs[OperandPos::Early].remove(preg); + } + } + } for (op_idx, op) in operands.def_ops() { trace!("Allocating def operands {op}"); if let OperandConstraint::Reuse(reused_idx) = op.constraint() { From c93476e415bcb0d9a22d8c6059d0cad7350b7ccf Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 29 Aug 2024 21:16:44 +0100 Subject: [PATCH 65/95] simplified edit handling: edits are added directly to the edits vector and reversed afterwards --- src/fastalloc/iter.rs | 7 + src/fastalloc/mod.rs | 545 ++++++++++++++++-------------------------- 2 files changed, 209 insertions(+), 343 deletions(-) diff --git a/src/fastalloc/iter.rs b/src/fastalloc/iter.rs index cb91d8d6..6232bd3a 100644 --- a/src/fastalloc/iter.rs +++ b/src/fastalloc/iter.rs @@ -26,6 +26,13 @@ impl<'a> Operands<'a> { self.matches(|op| op.kind() == OperandKind::Use) } + pub fn non_fixed_use(&self) -> impl Iterator + 'a { + self.matches(|op| { + !matches!(op.constraint(), OperandConstraint::FixedReg(_)) + && op.kind() == OperandKind::Use + }) + } + pub fn non_fixed_non_reuse_late(&self) -> impl Iterator + 'a { self.matches(|op| { !matches!( diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 33f7c86f..a41d96a9 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -4,7 +4,6 @@ use crate::{ AllocationKind, Block, Inst, InstPosition, Operand, OperandConstraint, OperandKind, OperandPos, PReg, PRegSet, RegClass, SpillSlot, VReg, }; -use alloc::collections::VecDeque; use alloc::vec::Vec; use core::convert::TryInto; use core::iter::FromIterator; @@ -110,23 +109,17 @@ impl<'a, F: Function> Stack<'a, F> { #[derive(Debug)] struct Edits { - /// The edits to be inserted before the currently processed instruction. - inst_pre_edits: VecDeque<(ProgPoint, Edit, RegClass)>, - /// The edits to be inserted after the currently processed instruction. - inst_post_edits: VecDeque<(ProgPoint, Edit, RegClass)>, /// The final output edits. - edits: VecDeque<(ProgPoint, Edit)>, - /// Used to determine if a scratch register is needed for an - /// instruction's moves during the `process_edit` calls. - inst_needs_scratch_reg: PartedByRegClass, + edits: Vec<(ProgPoint, Edit)>, fixed_stack_slots: PRegSet, - /// All pregs used as the source or destination in an edit - /// for the current instruction. - pregs_mentioned_in_edit: PRegSet, + /// The scratch registers being used in the instruction being + /// currently processed. + scratch_regs: PartedByRegClass>, + dedicated_scratch_regs: PartedByRegClass>, } impl Edits { - fn new(fixed_stack_slots: PRegSet, max_operand_len: u32, num_insts: usize) -> Self { + fn new(fixed_stack_slots: PRegSet, max_operand_len: u32, num_insts: usize, dedicated_scratch_regs: PartedByRegClass>) -> Self { // Some operands generate edits and some don't. // The operands that generate edits add no more than two. // Some edits are added due to clobbers, not operands. @@ -134,14 +127,10 @@ impl Edits { let inst_edits_len_guess = max_operand_len as usize * 2; let total_edits_len_guess = inst_edits_len_guess * num_insts; Self { - inst_pre_edits: VecDeque::with_capacity(inst_edits_len_guess), - inst_post_edits: VecDeque::with_capacity(inst_edits_len_guess), - edits: VecDeque::with_capacity(total_edits_len_guess), + edits: Vec::with_capacity(total_edits_len_guess), fixed_stack_slots, - inst_needs_scratch_reg: PartedByRegClass { - items: [false, false, false], - }, - pregs_mentioned_in_edit: PRegSet::empty(), + scratch_regs: dedicated_scratch_regs.clone(), + dedicated_scratch_regs, } } } @@ -157,108 +146,34 @@ impl Edits { false } - fn process_edits(&mut self, scratch_regs: PartedByRegClass>) { - for i in (0..self.inst_post_edits.len()).rev() { - let (point, edit, class) = self.inst_post_edits[i].clone(); - self.process_edit(point, edit, scratch_regs[class]); - } - for i in (0..self.inst_pre_edits.len()).rev() { - let (point, edit, class) = self.inst_pre_edits[i].clone(); - self.process_edit(point, edit, scratch_regs[class]); - } - for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { - self.inst_needs_scratch_reg[class] = false; - } - self.inst_post_edits.clear(); - self.inst_pre_edits.clear(); - self.pregs_mentioned_in_edit = PRegSet::empty(); - } - - fn process_edit(&mut self, point: ProgPoint, edit: Edit, scratch_reg: Option) { - trace!("Processing edit: {:?}", edit); - let Edit::Move { from, to } = edit; - if self.is_stack(from) && self.is_stack(to) { - let scratch_reg = scratch_reg.unwrap(); - trace!( - "Edit is stack-to-stack, generating two moves with a scratch register {}", - scratch_reg - ); - let scratch_alloc = Allocation::reg(scratch_reg); - trace!( - "Processed Edit: {:?}", - ( - point, - Edit::Move { - from: scratch_alloc, - to, - } - ) - ); - self.edits.push_front(( - point, - Edit::Move { - from: scratch_alloc, - to, - }, - )); - trace!( - "Processed Edit: {:?}", - ( - point, - Edit::Move { - from, - to: scratch_alloc, - } - ) - ); - self.edits.push_front(( - point, - Edit::Move { - from, - to: scratch_alloc, - }, - )); - } else { - trace!("Edit is not stack-to-stack. Adding it directly:"); - trace!("Processed Edit: {:?}", (point, Edit::Move { from, to })); - self.edits.push_front((point, Edit::Move { from, to })); - } - } - - fn add_move_later( + fn add_move( &mut self, inst: Inst, from: Allocation, to: Allocation, class: RegClass, pos: InstPosition, - prepend: bool, ) { trace!( - "Recording edit to add later: {:?}", + "Recording edit: {:?}", (ProgPoint::new(inst, pos), Edit::Move { from, to }, class) ); - if let Some(preg) = from.as_reg() { - self.pregs_mentioned_in_edit.add(preg); - } - if let Some(preg) = to.as_reg() { - self.pregs_mentioned_in_edit.add(preg); - } - if from == to { - trace!("Deciding not to record the edit, since the source and dest are the same"); - return; - } if self.is_stack(from) && self.is_stack(to) { - self.inst_needs_scratch_reg[class] = true; - } - let target_edits = match pos { - InstPosition::After => &mut self.inst_post_edits, - InstPosition::Before => &mut self.inst_pre_edits, - }; - if prepend { - target_edits.push_front((ProgPoint::new(inst, pos), Edit::Move { from, to }, class)); + trace!("Edit is stack-to-stack. Generating two edits with a scratch register"); + let scratch_reg = self.scratch_regs[class].unwrap(); + let scratch_alloc = Allocation::reg(scratch_reg); + trace!("Move 1: {scratch_alloc:?} to {to:?}"); + self.edits.push(( + ProgPoint::new(inst, pos), + Edit::Move { from: scratch_alloc, to }, + )); + trace!("Move 2: {from:?} to {scratch_alloc:?}"); + self.edits.push(( + ProgPoint::new(inst, pos), + Edit::Move { from, to: scratch_alloc }, + )); } else { - target_edits.push_back((ProgPoint::new(inst, pos), Edit::Move { from, to }, class)); + self.edits.push((ProgPoint::new(inst, pos), Edit::Move { from, to })); } } } @@ -321,7 +236,6 @@ pub struct Env<'a, F: Function> { available_pregs: PartedByOperandPos, init_available_pregs: PRegSet, allocatable_regs: PRegSet, - dedicated_scratch_regs: PartedByRegClass>, stack: Stack<'a, F>, fixed_stack_slots: PRegSet, @@ -362,6 +276,13 @@ impl<'a, F: Function> Env<'a, F> { } regs }; + let dedicated_scratch_regs = PartedByRegClass { + items: [ + env.scratch_by_class[0], + env.scratch_by_class[1], + env.scratch_by_class[2], + ], + }; use alloc::vec; trace!("{:?}", env); let (allocs, max_operand_len) = Allocs::new(func); @@ -384,13 +305,6 @@ impl<'a, F: Function> Env<'a, F> { ], }, reused_input_to_reuse_op: vec![usize::MAX; max_operand_len as usize], - dedicated_scratch_regs: PartedByRegClass { - items: [ - env.scratch_by_class[0], - env.scratch_by_class[1], - env.scratch_by_class[2], - ], - }, init_available_pregs, available_pregs: PartedByOperandPos { items: [ @@ -399,7 +313,7 @@ impl<'a, F: Function> Env<'a, F> { ] }, allocs, - edits: Edits::new(fixed_stack_slots, max_operand_len, func.num_insts()), + edits: Edits::new(fixed_stack_slots, max_operand_len, func.num_insts(), dedicated_scratch_regs), stats: Stats::default(), } } @@ -414,125 +328,24 @@ impl<'a, F: Function> Env<'a, F> { false } - fn reset_available_pregs(&mut self) { + fn reset_available_pregs_and_scratch_regs(&mut self) { trace!("Resetting the available pregs"); self.available_pregs = PartedByOperandPos { items: [self.init_available_pregs, self.init_available_pregs] }; + self.edits.scratch_regs = self.edits.dedicated_scratch_regs.clone(); } - /// The scratch registers needed for processing the edits generated - /// during a `reload_at_begin` call. - /// - /// This function is only called when all instructions in a block have - /// already been processed. The only edits being processed will be for the - /// ones to move a liveout vreg or block param from its spillslot to its - /// expected allocation. - fn get_scratch_regs_for_reloading( - &self, - inst_needs_scratch_reg: PartedByRegClass, - avail_pregs: PRegSet, - ) -> Result>, RegAllocError> { - trace!("Getting scratch registers for reload_at_begin"); - let mut scratch_regs = PartedByRegClass { - items: [None, None, None], - }; - for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { - if inst_needs_scratch_reg[class] { - trace!("{:?} class needs a scratch register", class); - if self.dedicated_scratch_regs[class].is_some() { - trace!("Using the dedicated scratch register for class {:?}", class); - scratch_regs[class] = self.dedicated_scratch_regs[class]; - } else { - trace!("No dedicated scratch register for class {:?}. Using the last free register", class); - scratch_regs[class] = self.lrus[class].last_satisfying(|preg| { - avail_pregs.contains(preg) - && self.vreg_in_preg[preg.index()] == VReg::invalid() - }); - if scratch_regs[class].is_none() { - trace!("Unable to find a scratch register for class {class:?}"); - return Err(RegAllocError::TooManyLiveRegs); - } - } - } - } - Ok(scratch_regs) - } - - /// The scratch registers needed for processing edits generated while - /// processing instructions. - fn get_scratch_regs( - &mut self, - inst: Inst, - inst_needs_scratch_reg: PartedByRegClass, - avail_pregs: PRegSet, - ) -> Result>, RegAllocError> { - trace!("Getting scratch registers for instruction {:?}", inst); - let mut scratch_regs = PartedByRegClass { - items: [None, None, None], - }; - for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { - if inst_needs_scratch_reg[class] { - trace!("{:?} class needs a scratch register", class); - if let Some(reg) = self.dedicated_scratch_regs[class] { - trace!("Using the dedicated scratch register for class {:?}", class); - scratch_regs[class] = Some(reg); - } else { - trace!("class {:?} has no dedicated scratch register", class); - if let Some(preg) = self.lrus[class].last_satisfying(|preg| { - avail_pregs.contains(preg) - // Consider a scenario: - // - // 1. use v0 (fixed: stack0), use v1 (fixed: p1) - // 2. use v1 (fixed: p2), use v0 (fixed: stack1) - // - // In the above, during the processing of inst 1, v0 is already - // in stack1 and v1 is already in p2. - // An edit will be inserted after the instruction to move - // from stack0 to stack1. Afterwards, when the v1 operand - // is being processed, `vreg_in_preg[p2]` will be set to `VReg::invalid`. - // The end result is: - // - // move from p1 to stack_v1 // Save v1 (inserted by `process_operand_allocation`) - // 1. use v0 (fixed: stack0), use v1 (fixed: p1) - // move from stack_v1 to p2 // Restore v1 (inserted by `process_operand_allocation`) - // move from stack0 to scratch // scratch could be p2 - // move from scratch to stack1 - // 2. use v1 (fixed: p2), use v0 (fixed: stack1) - // - // p2 could be used as a scratch register because it will be - // marked available in the `avail_regs` since it's not allocated - // to any operand in inst 1. - // If p2 is used as the scratch register, then v0 will - // be used in the place of v1 in inst 2, which is incorrect. - // To avoid this scenario, all pregs either used as the source - // or destination in the instruction are avoided. - && !self.edits.pregs_mentioned_in_edit.contains(preg) - }) { - if self.vreg_in_preg[preg.index()] != VReg::invalid() { - // A register used as scratch may be used for instructions - // added before or after the instruction. - // - // This will work because `preg` hasn't been allocated in the current - // instruction (`avail_pregs` only contains such registers). - self.evict_vreg_in_preg(inst, preg); - self.vreg_in_preg[preg.index()] = VReg::invalid(); - } - trace!("The scratch register: {preg}"); - scratch_regs[class] = Some(preg); - } else { - trace!("Unable to find a scratch register for class {class:?}"); - return Err(RegAllocError::TooManyLiveRegs); - } - } - } else { - trace!("{:?} class does not need a scratch register", class); - } - } - Ok(scratch_regs) + fn get_scratch_reg(&self, class: RegClass) -> Result { + let mut avail_regs = self.available_pregs[OperandPos::Early]; + avail_regs.intersect_from(self.available_pregs[OperandPos::Late]); + self.lrus[class] + .last(avail_regs) + .ok_or(RegAllocError::TooManyLiveRegs) } - fn reserve_reg_for_fixed_operand(&mut self, inst: Inst, op: Operand, op_idx: usize, preg: PReg) -> Result<(), RegAllocError> { + fn reserve_reg_for_fixed_operand(&mut self, op: Operand, op_idx: usize, preg: PReg) -> Result<(), RegAllocError> { + trace!("Reserving register {preg} for fixed operand {op}"); let early_avail_pregs = self.available_pregs[OperandPos::Early]; let late_avail_pregs = self.available_pregs[OperandPos::Late]; match (op.pos(), op.kind()) { @@ -564,11 +377,6 @@ impl<'a, F: Function> Env<'a, F> { self.available_pregs[OperandPos::Late].remove(preg); } } - if self.vreg_in_preg[preg.index()] != VReg::invalid() { - // Something is already in that register. Evict it. - self.evict_vreg_in_preg(inst, preg); - self.vreg_in_preg[preg.index()] = VReg::invalid(); - } Ok(()) } @@ -605,6 +413,7 @@ impl<'a, F: Function> Env<'a, F> { // Say p0 is assigned to v0 during the processing of the first operand. // When the second v0 operand is being processed, v0 will still be in // v0, so it is still allocated within constraints. + trace!("The vreg in {preg}: {}", self.vreg_in_preg[preg.index()]); self.vreg_in_preg[preg.index()] == op.vreg() } else { true @@ -622,6 +431,7 @@ impl<'a, F: Function> Env<'a, F> { } if let Some(preg) = alloc.as_reg() { if !self.available_pregs[op.pos()].contains(preg) { + trace!("The vreg in {preg}: {}", self.vreg_in_preg[preg.index()]); self.vreg_in_preg[preg.index()] == op.vreg() } else { true @@ -655,13 +465,12 @@ impl<'a, F: Function> Env<'a, F> { let slot = self.vreg_spillslots[evicted_vreg.vreg()]; self.vreg_allocs[evicted_vreg.vreg()] = Allocation::stack(slot); trace!("Move reason: eviction"); - self.edits.add_move_later( + self.edits.add_move( inst, self.vreg_allocs[evicted_vreg.vreg()], Allocation::reg(preg), evicted_vreg.class(), InstPosition::After, - false, ); } @@ -751,14 +560,6 @@ impl<'a, F: Function> Env<'a, F> { OperandConstraint::FixedReg(preg) => { trace!("The fixed preg: {} for operand {}", preg, op); - // It is an error for a fixed register clobber to be used for a defined vreg - // that outlives the instruction, because it will be impossible to restore it. - // But checking for that will be expensive? - - if self.allocatable_regs.contains(preg) { - self.lrus[op.class()].poke(preg); - } - trace!("vreg {} is now in preg {}", op.vreg(), preg); Allocation::reg(preg) } OperandConstraint::Reuse(_) => { @@ -803,7 +604,14 @@ impl<'a, F: Function> Env<'a, F> { // used (in `prev_alloc`, that is). if curr_alloc.is_some() { trace!("Move reason: Prev allocation doesn't meet constraints"); + if self.is_stack(new_alloc) && self.is_stack(curr_alloc) && self.edits.scratch_regs[op.class()].is_none() { + let reg = self.get_scratch_reg(op.class())?; + self.edits.scratch_regs[op.class()] = Some(reg); + self.available_pregs[OperandPos::Early].remove(reg); + self.available_pregs[OperandPos::Late].remove(reg); + } if op.kind() == OperandKind::Def { + trace!("Adding edit from {new_alloc:?} to {curr_alloc:?} after inst {inst:?} for {op}"); // In the case where `op` is a def, // the allocation of `op` will not be holding the value // of `op` before the instruction. Since it's a def, @@ -839,13 +647,12 @@ impl<'a, F: Function> Env<'a, F> { // the location v0 is expected to be in after inst 1. // This messes up the dataflow. // To avoid this, the moves are prepended. - self.edits.add_move_later( + self.edits.add_move( inst, new_alloc, curr_alloc, op.class(), InstPosition::After, - true, ); // No need to set vreg_in_preg because it will be set during // `freealloc` if needed. @@ -919,21 +726,15 @@ impl<'a, F: Function> Env<'a, F> { // 1. use v0 (fixed: stack0), use v0 (fixed: p0) // move from stack_v0 to p1 // 2. use v0 (fixed: p1) - - self.edits.add_move_later( - inst, - curr_alloc, - new_alloc, - op.class(), - InstPosition::Before, - false, - ); + + // Edits for use operands are added later. } if let Some(preg) = new_alloc.as_reg() { // Don't change the allocation. self.vreg_in_preg[preg.index()] = VReg::invalid(); } } else { + trace!("Setting vreg_allocs[{op}] to {new_alloc:?}"); self.vreg_allocs[op.vreg().vreg()] = new_alloc; if let Some(preg) = new_alloc.as_reg() { self.vreg_in_preg[preg.index()] = op.vreg(); @@ -975,19 +776,6 @@ impl<'a, F: Function> Env<'a, F> { Ok(()) } - fn alloc_slots_for_block_params(&mut self, succ: Block) { - for vreg in self.func.block_params(succ) { - if self.vreg_spillslots[vreg.vreg()].is_invalid() { - self.vreg_spillslots[vreg.vreg()] = self.stack.allocstack(vreg); - trace!( - "Block param {} is in {}", - vreg, - Allocation::stack(self.vreg_spillslots[vreg.vreg()]) - ); - } - } - } - fn remove_clobbers_from_available_pregs(&mut self, clobbers: PRegSet) { trace!("Removing clobbers {clobbers} from available reg sets"); // Don't let defs get allocated to clobbers. @@ -1017,7 +805,7 @@ impl<'a, F: Function> Env<'a, F> { /// /// These temporaries are used because the moves have to be parallel in the case where /// a block parameter of the successor block is a branch argument. - fn process_branch(&mut self, block: Block, inst: Inst) { + fn process_branch(&mut self, block: Block, inst: Inst) -> Result<(), RegAllocError> { // Used to know which temporary spillslot should be used next. let mut next_temp_idx = PartedByRegClass { items: [0, 0, 0] }; @@ -1032,10 +820,53 @@ impl<'a, F: Function> Env<'a, F> { // This is necessary to prevent overwriting the branch arg's value before // placing it in the corresponding branch param spillslot. - for succ in self.func.block_succs(block).iter() { - self.alloc_slots_for_block_params(*succ); + trace!("Adding temp to block params spillslots for branch args"); + for (succ_idx, succ) in self.func.block_succs(block).iter().enumerate() { + let succ_params = self.func.block_params(*succ); + + // Move from temporaries to block param spillslots. + for (pos, vreg) in self + .func + .branch_blockparams(block, inst, succ_idx) + .iter() + .enumerate() + { + if self.temp_spillslots[vreg.class()].len() == next_temp_idx[vreg.class()] { + let newslot = self.stack.allocstack(vreg); + self.temp_spillslots[vreg.class()].push(newslot); + } + let succ_param_vreg = succ_params[pos]; + if self.vreg_spillslots[succ_param_vreg.vreg()].is_invalid() { + self.vreg_spillslots[succ_param_vreg.vreg()] = self.stack.allocstack(&succ_param_vreg); + trace!( + "Block param {} is in {}", + vreg, + Allocation::stack(self.vreg_spillslots[vreg.vreg()]) + ); + } + let param_alloc = Allocation::stack(self.vreg_spillslots[succ_param_vreg.vreg()]); + let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; + let temp = Allocation::stack(temp_slot); + next_temp_idx[vreg.class()] += 1; + trace!(" Branch arg {vreg} from {temp} to {param_alloc}"); + if self.edits.scratch_regs[vreg.class()].is_none() { + let reg = self.get_scratch_reg(vreg.class())?; + // No need to remove the scratch register from the available reg sets + // because branches are processed last. + self.edits.scratch_regs[vreg.class()] = Some(reg); + } + self.edits.add_move( + inst, + temp, + param_alloc, + vreg.class(), + InstPosition::Before, + ); + } } + reset_temp_idx(&mut next_temp_idx); + for (succ_idx, _) in self.func.block_succs(block).iter().enumerate() { // Move from branch args spillslots to temporaries. // @@ -1082,10 +913,6 @@ impl<'a, F: Function> Env<'a, F> { Allocation::stack(self.vreg_spillslots[vreg.vreg()]) ); } - if self.temp_spillslots[vreg.class()].len() == next_temp_idx[vreg.class()] { - let newslot = self.stack.allocstack(vreg); - self.temp_spillslots[vreg.class()].push(newslot); - } let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; let temp = Allocation::stack(temp_slot); next_temp_idx[vreg.class()] += 1; @@ -1100,50 +927,14 @@ impl<'a, F: Function> Env<'a, F> { // used as branch args for successors, else inserting the moves before, instead // of after will be wrong. But the edits are inserted before because the fuzzer // doesn't recognize moves inserted after branch instructions. - self.edits.add_move_later( + + self.edits.add_move( inst, vreg_spill, temp, vreg.class(), InstPosition::Before, - false, - ); - } - } - - reset_temp_idx(&mut next_temp_idx); - - for (succ_idx, succ) in self.func.block_succs(block).iter().enumerate() { - let succ_params = self.func.block_params(*succ); - - // Move from temporaries to block param spillslots. - for (pos, vreg) in self - .func - .branch_blockparams(block, inst, succ_idx) - .iter() - .enumerate() - { - let succ_param_vreg = succ_params[pos]; - let param_alloc = Allocation::stack(self.vreg_spillslots[succ_param_vreg.vreg()]); - let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; - let temp = Allocation::stack(temp_slot); - next_temp_idx[vreg.class()] += 1; - trace!(" --- Placing branch arg {} in {}", vreg, temp); - trace!( - "{} which is now in {} inserting move to {}", - vreg, - temp, - param_alloc ); - self.edits.add_move_later( - inst, - temp, - param_alloc, - vreg.class(), - InstPosition::Before, - false, - ); - // All branch arguments should be in their spillslots at the end of the function. if self.vreg_allocs[vreg.vreg()].is_none() { self.live_vregs.insert(*vreg); @@ -1152,6 +943,8 @@ impl<'a, F: Function> Env<'a, F> { Allocation::stack(self.vreg_spillslots[vreg.vreg()]); } } + + Ok(()) } fn alloc_inst(&mut self, block: Block, inst: Inst) -> Result<(), RegAllocError> { @@ -1159,14 +952,11 @@ impl<'a, F: Function> Env<'a, F> { if self.func.requires_refs_on_stack(inst) && !self.func.reftype_vregs().is_empty() { panic!("Safepoint instructions aren't supported"); } - if self.func.is_branch(inst) { - self.process_branch(block, inst); - } let operands = Operands::new(self.func.inst_operands(inst)); let clobbers = self.func.inst_clobbers(inst); for (op_idx, op) in operands.reuse() { - trace!("Initializing reused_input_to_reuse_op"); + trace!("Initializing reused_input_to_reuse_op for {op}"); let OperandConstraint::Reuse(reused_idx) = op.constraint() else { unreachable!() }; @@ -1176,17 +966,49 @@ impl<'a, F: Function> Env<'a, F> { let OperandConstraint::FixedReg(preg) = op.constraint() else { unreachable!(); }; - self.reserve_reg_for_fixed_operand(inst, op, op_idx, preg)?; + self.reserve_reg_for_fixed_operand(op, op_idx, preg)?; + if self.allocatable_regs.contains(preg) { + self.lrus[preg.class()].poke(preg); + } + } + for (_, op) in operands.fixed() { + let OperandConstraint::FixedReg(preg) = op.constraint() else { + unreachable!(); + }; + // Eviction has to be done separately to avoid using a fixed register + // as a scratch register. + if self.vreg_in_preg[preg.index()] != VReg::invalid() && self.vreg_in_preg[preg.index()] != op.vreg() { + trace!("Evicting {} from fixed register {preg}", self.vreg_in_preg[preg.index()]); + if self.fixed_stack_slots.contains(preg) && self.edits.scratch_regs[preg.class()].is_none() { + let reg = self.get_scratch_reg(preg.class())?; + self.edits.scratch_regs[preg.class()] = Some(reg); + self.available_pregs[OperandPos::Early].remove(reg); + self.available_pregs[OperandPos::Late].remove(reg); + } + self.evict_vreg_in_preg(inst, preg); + self.vreg_in_preg[preg.index()] = VReg::invalid(); + } } self.remove_clobbers_from_available_pregs(clobbers); for preg in clobbers { if self.vreg_in_preg[preg.index()] != VReg::invalid() { + trace!("Evicting {} from clobber {preg}", self.vreg_in_preg[preg.index()]); + if self.fixed_stack_slots.contains(preg) && self.edits.scratch_regs[preg.class()].is_none() { + let reg = self.get_scratch_reg(preg.class())?; + self.edits.scratch_regs[preg.class()] = Some(reg); + self.available_pregs[OperandPos::Early].remove(reg); + self.available_pregs[OperandPos::Late].remove(reg); + } self.evict_vreg_in_preg(inst, preg); self.vreg_in_preg[preg.index()] = VReg::invalid(); } } - for (_, op) in operands.use_ops() { + for (_, op) in operands.non_fixed_use() { + if op.as_fixed_nonallocatable().is_some() { + continue; + } if let Some(preg) = self.vreg_allocs[op.vreg().vreg()].as_reg() { + trace!("Removing {op}'s current reg allocation {preg} from reg sets"); // The current allocation, vreg_allocs[op.vreg], doesn't change, // so it should be removed from the available reg sets to avoid // allocating it to some other operand in the instruction. @@ -1238,14 +1060,28 @@ impl<'a, F: Function> Env<'a, F> { self.process_operand_allocation(inst, op, op_idx, None)?; } if self.vreg_spillslots[op.vreg().vreg()].is_valid() { - self.edits.add_move_later( - inst, - self.vreg_allocs[op.vreg().vreg()], - Allocation::stack(self.vreg_spillslots[op.vreg().vreg()]), - op.class(), - InstPosition::After, - false - ); + let curr_alloc = self.vreg_allocs[op.vreg().vreg()]; + let vreg_slot = self.vreg_spillslots[op.vreg().vreg()]; + let (is_stack_to_stack, src_and_dest_are_same) = if let Some(curr_alloc) = curr_alloc.as_stack() { + (true, curr_alloc == vreg_slot) + } else { + (self.is_stack(curr_alloc), false) + }; + if !src_and_dest_are_same { + if is_stack_to_stack && self.edits.scratch_regs[op.class()].is_none() { + let reg = self.get_scratch_reg(op.class())?; + self.edits.scratch_regs[op.class()] = Some(reg); + self.available_pregs[OperandPos::Early].remove(reg); + self.available_pregs[OperandPos::Late].remove(reg); + }; + self.edits.add_move( + inst, + self.vreg_allocs[op.vreg().vreg()], + Allocation::stack(self.vreg_spillslots[op.vreg().vreg()]), + op.class(), + InstPosition::After, + ); + } } self.freealloc(op.vreg()); } @@ -1274,11 +1110,26 @@ impl<'a, F: Function> Env<'a, F> { self.process_operand_allocation(inst, op, op_idx, None)?; } } - let mut avail_for_scratch = self.available_pregs[OperandPos::Early]; - avail_for_scratch.intersect_from(self.available_pregs[OperandPos::Late]); - let scratch_regs = - self.get_scratch_regs(inst, self.edits.inst_needs_scratch_reg.clone(), avail_for_scratch)?; - self.edits.process_edits(scratch_regs); + for (op_idx, op) in operands.use_ops() { + if op.as_fixed_nonallocatable().is_some() { + continue; + } + if self.vreg_allocs[op.vreg().vreg()] != self.allocs[(inst.index(), op_idx)] { + let curr_alloc = self.vreg_allocs[op.vreg().vreg()]; + let new_alloc = self.allocs[(inst.index(), op_idx)]; + trace!("Adding edit from {curr_alloc:?} to {new_alloc:?} before inst {inst:?} for {op}"); + self.edits.add_move( + inst, + curr_alloc, + new_alloc, + op.class(), + InstPosition::Before, + ); + } + } + if self.func.is_branch(inst) { + self.process_branch(block, inst)?; + } for entry in self.reused_input_to_reuse_op.iter_mut() { *entry = usize::MAX; } @@ -1347,13 +1198,16 @@ impl<'a, F: Function> Env<'a, F> { "Move reason: reload {} at begin - move from its spillslot", vreg ); - self.edits.add_move_later( + if self.is_stack(prev_alloc) && self.edits.scratch_regs[vreg.class()].is_none() { + let reg = self.get_scratch_reg(vreg.class())?; + self.edits.scratch_regs[vreg.class()] = Some(reg); + } + self.edits.add_move( self.func.block_insns(block).first(), slot, prev_alloc, vreg.class(), InstPosition::Before, - true, ); } for vreg in self.live_vregs.iter() { @@ -1389,18 +1243,22 @@ impl<'a, F: Function> Env<'a, F> { "Move reason: reload {} at begin - move from its spillslot", vreg ); - self.edits.add_move_later( + if self.is_stack(prev_alloc) && self.edits.scratch_regs[vreg.class()].is_none() { + let mut avail_regs = self.available_pregs[OperandPos::Early]; + avail_regs.intersect_from(self.available_pregs[OperandPos::Late]); + let reg = self.lrus[vreg.class()] + .last(avail_regs) + .ok_or(RegAllocError::TooManyLiveRegs)?; + self.edits.scratch_regs[vreg.class()] = Some(reg); + } + self.edits.add_move( self.func.block_insns(block).first(), slot, prev_alloc, vreg.class(), InstPosition::Before, - true, ); } - let scratch_regs = - self.get_scratch_regs_for_reloading(self.edits.inst_needs_scratch_reg.clone(), available_regs_for_scratch)?; - self.edits.process_edits(scratch_regs); if trace_enabled!() { self.log_post_reload_at_begin_state(block); } @@ -1462,7 +1320,7 @@ impl<'a, F: Function> Env<'a, F> { // Reset has to be before `alloc_inst` not after because // available pregs is needed after processing the first // instruction in the block during `reload_at_begin`. - self.reset_available_pregs(); + self.reset_available_pregs_and_scratch_regs(); self.alloc_inst(block, inst)?; } self.reload_at_begin(block)?; @@ -1473,9 +1331,10 @@ impl<'a, F: Function> Env<'a, F> { fn run(&mut self) -> Result<(), RegAllocError> { debug_assert_eq!(self.func.entry_block().index(), 0); for block in (0..self.func.num_blocks()).rev() { - self.reset_available_pregs(); + self.reset_available_pregs_and_scratch_regs(); self.alloc_block(Block::new(block))?; } + self.edits.edits.reverse(); // Ought to check if there are livein registers // then throw an error, but will that be expensive? Ok(()) @@ -1558,7 +1417,7 @@ pub fn run( } Ok(Output { - edits: env.edits.edits.make_contiguous().to_vec(), + edits: env.edits.edits, allocs: env.allocs.allocs, inst_alloc_offsets: env.allocs.inst_alloc_offsets, num_spillslots: env.stack.num_spillslots as usize, From 6952cd7022107507cc9e78f104b1d1a35ca3c4e9 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 29 Aug 2024 21:58:44 +0100 Subject: [PATCH 66/95] fixed bug with state update in process_branch --- src/fastalloc/mod.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index a41d96a9..863cb03c 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -938,9 +938,17 @@ impl<'a, F: Function> Env<'a, F> { // All branch arguments should be in their spillslots at the end of the function. if self.vreg_allocs[vreg.vreg()].is_none() { self.live_vregs.insert(*vreg); + self.vreg_allocs[vreg.vreg()] = + Allocation::stack(self.vreg_spillslots[vreg.vreg()]); + } else if self.vreg_allocs[vreg.vreg()] != vreg_spill { + self.edits.add_move( + inst, + self.vreg_allocs[vreg.vreg()], + vreg_spill, + vreg.class(), + InstPosition::Before, + ); } - self.vreg_allocs[vreg.vreg()] = - Allocation::stack(self.vreg_spillslots[vreg.vreg()]); } } From 8cce4d54b59fea1a878adb47e26bb0f9019111f5 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Mon, 2 Sep 2024 02:06:01 +0100 Subject: [PATCH 67/95] now tracks debug info --- src/fastalloc/mod.rs | 66 ++++++++++++++++++++++++++++++++++---------- src/lib.rs | 5 ++++ 2 files changed, 57 insertions(+), 14 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 863cb03c..bf3d2d0a 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1,8 +1,7 @@ use crate::{cfg::CFGInfo, ion::Stats, Allocation, RegAllocError}; use crate::{ssa::validate_ssa, Edit, Function, MachineEnv, Output, ProgPoint}; use crate::{ - AllocationKind, Block, Inst, InstPosition, Operand, OperandConstraint, OperandKind, OperandPos, - PReg, PRegSet, RegClass, SpillSlot, VReg, + AllocationKind, Block, Inst, InstPosition, Operand, OperandConstraint, OperandKind, OperandPos, PReg, PRegSet, RegClass, SpillSlot, VReg }; use alloc::vec::Vec; use core::convert::TryInto; @@ -17,6 +16,9 @@ use iter::*; use lru::*; use vregset::VRegSet; +#[cfg(test)] +mod tests; + #[derive(Debug)] struct Allocs { allocs: Vec, @@ -237,6 +239,7 @@ pub struct Env<'a, F: Function> { init_available_pregs: PRegSet, allocatable_regs: PRegSet, stack: Stack<'a, F>, + vreg_to_live_inst_range: Vec<(ProgPoint, ProgPoint, Allocation)>, fixed_stack_slots: PRegSet, @@ -244,6 +247,7 @@ pub struct Env<'a, F: Function> { allocs: Allocs, edits: Edits, stats: Stats, + debug_locations: Vec<(u32, ProgPoint, ProgPoint, Allocation)>, } impl<'a, F: Function> Env<'a, F> { @@ -297,6 +301,7 @@ impl<'a, F: Function> Env<'a, F> { vreg_in_preg: vec![VReg::invalid(); PReg::NUM_INDEX], stack: Stack::new(func), fixed_stack_slots, + vreg_to_live_inst_range: vec![(ProgPoint::invalid(), ProgPoint::invalid(), Allocation::none()); func.num_vregs()], temp_spillslots: PartedByRegClass { items: [ Vec::with_capacity(func.num_vregs()), @@ -315,6 +320,7 @@ impl<'a, F: Function> Env<'a, F> { allocs, edits: Edits::new(fixed_stack_slots, max_operand_len, func.num_insts(), dedicated_scratch_regs), stats: Stats::default(), + debug_locations: Vec::with_capacity(func.debug_value_labels().len()), } } @@ -595,14 +601,33 @@ impl<'a, F: Function> Env<'a, F> { if !self.allocd_within_constraint(inst, op, fixed_spillslot) { trace!("{op} isn't allocated within constraints."); let curr_alloc = self.vreg_allocs[op.vreg().vreg()]; + let new_alloc = self.alloc_operand(inst, op, op_idx, fixed_spillslot)?; if curr_alloc.is_none() { self.live_vregs.insert(op.vreg()); + self.vreg_to_live_inst_range[op.vreg().vreg()].1 = match (op.pos(), op.kind()) { + (OperandPos::Late, OperandKind::Use) + | (_, OperandKind::Def) => { + // Live range ends just before the early phase of the + // next instruction. + ProgPoint::before(Inst::new(inst.index() + 1)) + } + (OperandPos::Early, OperandKind::Use) => { + // Live range ends just before the late phase of the current instruction. + ProgPoint::after(inst) + } + }; + self.vreg_to_live_inst_range[op.vreg().vreg()].2 = new_alloc; + + trace!("Setting vreg_allocs[{op}] to {new_alloc:?}"); + self.vreg_allocs[op.vreg().vreg()] = new_alloc; + if let Some(preg) = new_alloc.as_reg() { + self.vreg_in_preg[preg.index()] = op.vreg(); + } } - let new_alloc = self.alloc_operand(inst, op, op_idx, fixed_spillslot)?; // Need to insert a move to propagate flow from the current // allocation to the subsequent places where the value was // used (in `prev_alloc`, that is). - if curr_alloc.is_some() { + else if curr_alloc.is_some() { trace!("Move reason: Prev allocation doesn't meet constraints"); if self.is_stack(new_alloc) && self.is_stack(curr_alloc) && self.edits.scratch_regs[op.class()].is_none() { let reg = self.get_scratch_reg(op.class())?; @@ -733,12 +758,6 @@ impl<'a, F: Function> Env<'a, F> { // Don't change the allocation. self.vreg_in_preg[preg.index()] = VReg::invalid(); } - } else { - trace!("Setting vreg_allocs[{op}] to {new_alloc:?}"); - self.vreg_allocs[op.vreg().vreg()] = new_alloc; - if let Some(preg) = new_alloc.as_reg() { - self.vreg_in_preg[preg.index()] = op.vreg(); - } } trace!( "Allocation for instruction {:?} and operand {}: {}", @@ -938,8 +957,10 @@ impl<'a, F: Function> Env<'a, F> { // All branch arguments should be in their spillslots at the end of the function. if self.vreg_allocs[vreg.vreg()].is_none() { self.live_vregs.insert(*vreg); + let slot = self.vreg_spillslots[vreg.vreg()]; self.vreg_allocs[vreg.vreg()] = - Allocation::stack(self.vreg_spillslots[vreg.vreg()]); + Allocation::stack(slot); + self.vreg_to_live_inst_range[vreg.vreg()].1 = ProgPoint::before(inst); } else if self.vreg_allocs[vreg.vreg()] != vreg_spill { self.edits.add_move( inst, @@ -1067,7 +1088,9 @@ impl<'a, F: Function> Env<'a, F> { } else { self.process_operand_allocation(inst, op, op_idx, None)?; } - if self.vreg_spillslots[op.vreg().vreg()].is_valid() { + let slot = self.vreg_spillslots[op.vreg().vreg()]; + if slot.is_valid() { + self.vreg_to_live_inst_range[op.vreg().vreg()].2 = Allocation::stack(slot); let curr_alloc = self.vreg_allocs[op.vreg().vreg()]; let vreg_slot = self.vreg_spillslots[op.vreg().vreg()]; let (is_stack_to_stack, src_and_dest_are_same) = if let Some(curr_alloc) = curr_alloc.as_stack() { @@ -1091,6 +1114,7 @@ impl<'a, F: Function> Env<'a, F> { ); } } + self.vreg_to_live_inst_range[op.vreg().vreg()].0 = ProgPoint::after(inst); self.freealloc(op.vreg()); } for (op_idx, op) in operands.use_ops() { @@ -1170,6 +1194,7 @@ impl<'a, F: Function> Env<'a, F> { ); trace!("Available pregs: {}", self.available_pregs[OperandPos::Early]); let mut available_regs_for_scratch = self.available_pregs[OperandPos::Early]; + let first_inst = self.func.block_insns(block).first(); // We need to check for the registers that are still live. // These registers are either livein or block params // Liveins should be stack-allocated and block params should be freed. @@ -1187,6 +1212,8 @@ impl<'a, F: Function> Env<'a, F> { // the first instruction. let prev_alloc = self.vreg_allocs[vreg.vreg()]; let slot = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); + self.vreg_to_live_inst_range[vreg.vreg()].2 = slot; + self.vreg_to_live_inst_range[vreg.vreg()].0 = ProgPoint::before(first_inst); trace!("{} is a block param. Freeing it", vreg); // A block's block param is not live before the block. // And `vreg_allocs[i]` of a virtual register i is none for @@ -1336,6 +1363,17 @@ impl<'a, F: Function> Env<'a, F> { Ok(()) } + fn build_debug_info(&mut self) { + trace!("Building debug location info"); + for &(vreg, start, end, label) in self.func.debug_value_labels() { + let (point_start, point_end, alloc) = self.vreg_to_live_inst_range[vreg.vreg()]; + if point_start.inst() <= start && end <= point_end.inst().next() { + self.debug_locations.push((label, point_start, point_end, alloc)); + } + } + self.debug_locations.sort_by_key(|loc| loc.0); + } + fn run(&mut self) -> Result<(), RegAllocError> { debug_assert_eq!(self.func.entry_block().index(), 0); for block in (0..self.func.num_blocks()).rev() { @@ -1343,6 +1381,7 @@ impl<'a, F: Function> Env<'a, F> { self.alloc_block(Block::new(block))?; } self.edits.edits.reverse(); + self.build_debug_info(); // Ought to check if there are livein registers // then throw an error, but will that be expensive? Ok(()) @@ -1429,8 +1468,7 @@ pub fn run( allocs: env.allocs.allocs, inst_alloc_offsets: env.allocs.inst_alloc_offsets, num_spillslots: env.stack.num_spillslots as usize, - // TODO: Handle debug locations. - debug_locations: Vec::new(), + debug_locations: env.debug_locations, safepoint_slots: Vec::new(), stats: env.stats, }) diff --git a/src/lib.rs b/src/lib.rs index 17f6d2b0..b6918e28 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1375,6 +1375,11 @@ impl ProgPoint { pub fn from_index(index: u32) -> Self { Self { bits: index } } + + #[inline(always)] + pub fn invalid() -> Self { + Self::before(Inst::new(usize::MAX)) + } } /// An instruction to insert into the program to perform some data movement. From a50361c174b691abac8694204531bbaf40a834e8 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Mon, 2 Sep 2024 12:09:13 +0100 Subject: [PATCH 68/95] added verbose log --- src/fastalloc/mod.rs | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index bf3d2d0a..99d2232d 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -66,15 +66,6 @@ impl IndexMut<(usize, usize)> for Allocs { } } -fn remove_any_from_pregset(set: &mut PRegSet) -> Option { - if let Some(preg) = set.into_iter().next() { - set.remove(preg); - Some(preg) - } else { - None - } -} - #[derive(Debug)] struct Stack<'a, F: Function> { num_spillslots: u32, @@ -252,6 +243,7 @@ pub struct Env<'a, F: Function> { impl<'a, F: Function> Env<'a, F> { fn new(func: &'a F, env: &'a MachineEnv) -> Self { + use alloc::vec; let mut regs = [ env.preferred_regs_by_class[RegClass::Int as usize].clone(), env.preferred_regs_by_class[RegClass::Float as usize].clone(), @@ -287,7 +279,6 @@ impl<'a, F: Function> Env<'a, F> { env.scratch_by_class[2], ], }; - use alloc::vec; trace!("{:?}", env); let (allocs, max_operand_len) = Allocs::new(func); let fixed_stack_slots = PRegSet::from_iter(env.fixed_stack_slots.iter().cloned()); @@ -1364,7 +1355,7 @@ impl<'a, F: Function> Env<'a, F> { } fn build_debug_info(&mut self) { - trace!("Building debug location info"); + trace!("Building debug location info"); for &(vreg, start, end, label) in self.func.debug_value_labels() { let (point_start, point_end, alloc) = self.vreg_to_live_inst_range[vreg.vreg()]; if point_start.inst() <= start && end <= point_end.inst().next() { @@ -1444,7 +1435,7 @@ fn log_output<'a, F: Function>(env: &Env<'a, F>) { pub fn run( func: &F, mach_env: &MachineEnv, - enable_annotations: bool, + verbose_log: bool, enable_ssa_checker: bool, ) -> Result { if enable_ssa_checker { @@ -1452,14 +1443,14 @@ pub fn run( validate_ssa(func, &cfginfo)?; } - if trace_enabled!() { + if trace_enabled!() || verbose_log { log_function(func); } let mut env = Env::new(func, mach_env); env.run()?; - if trace_enabled!() { + if trace_enabled!() || verbose_log { log_output(&env); } From ac75d213652102117b4c0b9d24bcef984b547b40 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Mon, 2 Sep 2024 12:19:34 +0100 Subject: [PATCH 69/95] removed unnecessary comments and code, renamed tests to better names --- src/fastalloc/bitset.rs | 135 ------------------- src/fastalloc/mod.rs | 115 +---------------- src/fastalloc/tests.rs | 278 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 281 insertions(+), 247 deletions(-) delete mode 100644 src/fastalloc/bitset.rs create mode 100644 src/fastalloc/tests.rs diff --git a/src/fastalloc/bitset.rs b/src/fastalloc/bitset.rs deleted file mode 100644 index 99d5b34e..00000000 --- a/src/fastalloc/bitset.rs +++ /dev/null @@ -1,135 +0,0 @@ -use alloc::vec; -use alloc::vec::Vec; - -type Frame = u64; -const BITS_PER_FRAME: usize = core::mem::size_of::() * 8; - -pub struct BitSet { - bits: Vec, -} - -impl BitSet { - pub fn with_capacity(n: usize) -> Self { - let quot = n / BITS_PER_FRAME; - let no_of_frames = quot + 1; - Self { - bits: vec![0; no_of_frames], - } - } - - pub fn compute_index(&self, el: usize) -> (usize, usize) { - (el / BITS_PER_FRAME, el % BITS_PER_FRAME) - } - - pub fn insert(&mut self, el: usize) { - let (frame_no, idx) = self.compute_index(el); - self.bits[frame_no] |= 1 << idx; - } - - pub fn remove(&mut self, el: usize) { - let (frame_no, idx) = self.compute_index(el); - self.bits[frame_no] &= !(1 << idx); - } - - pub fn contains(&self, el: usize) -> bool { - let (frame_no, idx) = self.compute_index(el); - self.bits[frame_no] & (1 << idx) != 0 - } - - pub fn clear(&mut self) { - for frame in self.bits.iter_mut() { - *frame = 0; - } - } - - pub fn is_empty(&mut self) -> bool { - self.bits.iter().all(|frame| *frame == 0) - } - - pub fn iter(&self) -> BitSetIter { - BitSetIter { - next_frame_idx: 0, - curr_frame: 0, - bits: &self.bits, - } - } -} - -pub struct BitSetIter<'a> { - next_frame_idx: usize, - curr_frame: Frame, - bits: &'a [Frame], -} - -impl<'a> Iterator for BitSetIter<'a> { - type Item = usize; - - fn next(&mut self) -> Option { - loop { - while self.curr_frame == 0 { - if self.next_frame_idx >= self.bits.len() { - return None; - } - self.curr_frame = self.bits[self.next_frame_idx]; - self.next_frame_idx += 1; - } - let skip = self.curr_frame.trailing_zeros(); - self.curr_frame &= !(1 << skip); - return Some((self.next_frame_idx - 1) * BITS_PER_FRAME + skip as usize); - } - } -} - -use core::fmt; - -impl fmt::Debug for BitSet { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{{ ")?; - for el in self.iter() { - write!(f, "{el} ")?; - } - write!(f, "}}") - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn operations() { - let mut set = BitSet::with_capacity(200); - set.insert(10); - set.insert(11); - set.insert(199); - set.insert(23); - set.insert(45); - let els = [10, 11, 23, 45, 199]; - for (actual_el, expected_el) in set.iter().zip(els.iter()) { - assert_eq!(actual_el, *expected_el as usize); - } - assert!(set.contains(10)); - assert!(!set.contains(12)); - assert!(!set.contains(197)); - assert!(set.contains(45)); - assert!(set.contains(23)); - assert!(set.contains(11)); - set.remove(23); - assert!(!set.contains(23)); - set.insert(73); - let els = [10, 11, 45, 73, 199]; - for (actual_el, expected_el) in set.iter().zip(els.iter()) { - assert_eq!(actual_el, *expected_el as usize); - } - } - - #[test] - fn empty() { - let mut set = BitSet::with_capacity(2000); - assert!(set.is_empty()); - set.insert(100); - assert!(!set.is_empty()); - set.remove(100); - assert!(set.is_empty()); - } -} diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 99d2232d..4a18c1f9 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -8,7 +8,6 @@ use core::convert::TryInto; use core::iter::FromIterator; use core::ops::{Index, IndexMut}; -mod bitset; mod iter; mod lru; mod vregset; @@ -628,41 +627,6 @@ impl<'a, F: Function> Env<'a, F> { } if op.kind() == OperandKind::Def { trace!("Adding edit from {new_alloc:?} to {curr_alloc:?} after inst {inst:?} for {op}"); - // In the case where `op` is a def, - // the allocation of `op` will not be holding the value - // of `op` before the instruction. Since it's a def, - // it will only hold the value after. So, the move - // has to be done after. - // - // The move also has to be prepended. Consider the scenario: - // - // 1. def v0 (any reg), use v1 (fixed: p0) - // 2. use v0 (fixed: p0) - // - // During the processing of the first instruction, v0 is already in - // p0. Since v1 has a fixed register constraint, it's processed - // first and evicts v0 from p0. Edits are inserted to flow v0 from - // its spillslot to p0 after the instruction: - // - // 1. def v0 (any reg), use v1 (fixed: p0) - // move from stack_v0 to p0 - // 2. use v0 (fixed: p0) - // - // When it's time to process v0, it has to be moved again: this time - // because it needs to be in a register, not on the stack. - // Edits are inserted to flow v0 from its spillslot to the newly allocated - // register, say p1. - // - // 1. def v0 (any reg), use v1 (fixed: p0) - // move from stack_v0 to p0 - // move from p1 to stack_v0 - // 2. use v0 (fixed: p0) - // - // The problem here is that the edits are out of order. p1, the - // allocation used for v0 in inst 1., is never moved into p0, - // the location v0 is expected to be in after inst 1. - // This messes up the dataflow. - // To avoid this, the moves are prepended. self.edits.add_move( inst, new_alloc, @@ -672,79 +636,10 @@ impl<'a, F: Function> Env<'a, F> { ); // No need to set vreg_in_preg because it will be set during // `freealloc` if needed. - } else { - // This was handled by a simple move from the operand to its previous - // allocation before the instruction, but this is incorrect. - // Consider the scenario: - // 1. use v0 (fixed: p0), use v1 (fixed: p1) - // 2. use v0 (fixed: p1) - // By the time inst 1 is to be processed, v0 will be in p1. - // But v1 should be in p1, not v0. If v0 is moved to p1 before inst 1, - // then it will overwrite v1 and v0 will be used instead of v1. - // It's also possible that the register used by v0 could be reused - // with a def operand. - // To resolve this, v0 is moved into its spillslot before inst 1. - // Then it's moved from its spillslot into p1 after inst 1, which is the place - // where it's expected to be after the instruction. - // This is to avoid two problems: - // 1. Overwriting a vreg that uses p1 in the current instruction. - // 2. Avoiding a situation where a def reuses the register used by v0 - // and overwrites v0. - // - // It is possible for a virtual register to be used twice in the - // same instruction with different constraints. - // For example: - // 1. use v0 (fixed: stack0), use v0 (fixed: p0) - // 2. use v0 (fixed: p1) - // By the time inst 1 is to be processed, v0 will be in p1. - // But it should be in p0 and stack0. If stack0 is processed - // first, moves will be inserted to move from stack0 to v0's - // spillslot before inst 1 and to move from spillslot - // to p1 after the instruction: - // - // move from stack0 to stack_v0 - // 1. use v0 (fixed: stack0), use v0 (fixed: p0) - // move from stack_v0 to p1 - // 2. use v0 (fixed: p1) - // - // But when the second use is encountered, moves will be inserted again - // and mess up the dataflow: - // - // move from p0 to stack_v0 - // move from stack0 to stack_v0 - // 1. use v0 (fixed: stack0), use v0 (fixed: p0) - // move from stack_v0 to p1 - // move from stack_v0 to p1 - // 2. use v0 (fixed: p1) - // - // Assuming that after instruction 1 is processed, v0's - // location is p0, then stack0 will always overwrite it, - // and v0 is not in stack0 (it's in p0, now). - // To avoid this scenario, these moves are only inserted - // for the first encountered constraint in an instruction. - // After this, any other operands with the same virtual register - // but different constraint will simply generate a move from the - // new location to the prev_alloc. This new move is inserted before - // the original one because the new location is now where v0 is - // expected to be before the instruction. - // For example: - // - // move from stack0 to stack_v0 - // 1. use v0 (fixed: stack0), use v0 (fixed: p0) - // move from stack_v0 to p1 - // 2. use v0 (fixed: p1) - // - // When the second use is encountered, the current location for v0 becomes - // p0 and a move from p0 to stack0 is prepended to the edits: - // - // move from p0 to stack0 - // move from stack0 to stack_v0 - // 1. use v0 (fixed: stack0), use v0 (fixed: p0) - // move from stack_v0 to p1 - // 2. use v0 (fixed: p1) - - // Edits for use operands are added later. } + // Edits for use operands are added later to avoid inserting + // edits out of order. + if let Some(preg) = new_alloc.as_reg() { // Don't change the allocation. self.vreg_in_preg[preg.index()] = VReg::invalid(); @@ -933,10 +828,6 @@ impl<'a, F: Function> Env<'a, F> { vreg_spill, temp ); - // Assuming that vregs defined in the current branch instruction can't be - // used as branch args for successors, else inserting the moves before, instead - // of after will be wrong. But the edits are inserted before because the fuzzer - // doesn't recognize moves inserted after branch instructions. self.edits.add_move( inst, diff --git a/src/fastalloc/tests.rs b/src/fastalloc/tests.rs new file mode 100644 index 00000000..c5548b1f --- /dev/null +++ b/src/fastalloc/tests.rs @@ -0,0 +1,278 @@ +use crate::{run, Algorithm, Allocation, Block, Function, Inst, InstOrEdit, InstRange, MachineEnv, Operand, OperandPos, Output, PReg, PRegSet, ProgPoint, RegClass, RegallocOptions, SpillSlot, VReg}; +use alloc::vec; +use alloc::vec::Vec; +use crate::OperandKind::{self, *}; +use crate::OperandConstraint::{self, *}; + +#[test] +fn test_debug_locations1() { + let mach_env = mach_env(10); + let mut options = RegallocOptions::default(); + options.validate_ssa = true; + options.algorithm = Algorithm::Fastalloc; + let mut f = RealFunction::new(vec![ + BlockBuildInfo { + insts: vec![ + /* 0. */ vec![op(Def, 0, FixedReg(p(0)))], + /* 1. */ vec![op(Def, 1, FixedReg(p(0))), op(Use, 0, FixedReg(p(0))), op(Use, 0, Reg)], + /* 2. */ vec![op(Def, 2, FixedReg(p(8))), op(Use, 0, FixedReg(p(2))), op(Use, 1, FixedReg(p(0)))], + /* 3. */ vec![op(Def, 3, FixedReg(p(9))), op(Use, 0, FixedReg(p(9)))] + ], + } + ]); + f.debug_value_labels = vec![ + (v(0), i(0), i(4), 32), + (v(2), i(2), i(4), 70), + (v(2), i(2), i(4), 71), + (v(3), i(3), i(4), 34), + ]; + let result = run(&f, &mach_env, &options).unwrap(); + assert_eq!(result.debug_locations, vec![ + (32, ProgPoint::after(i(0)), ProgPoint::after(i(3)), alloc(p(9))), + (34, ProgPoint::after(i(3)), ProgPoint::before(i(4)), alloc(p(9))), + (70, ProgPoint::after(i(2)), ProgPoint::before(i(3)), alloc(p(8))), + (71, ProgPoint::after(i(2)), ProgPoint::before(i(3)), alloc(p(8))), + ]); +} + +#[test] +fn test_debug_locations2() { + let mach_env = mach_env(2); + let mut options = RegallocOptions::default(); + options.validate_ssa = true; + options.algorithm = Algorithm::Fastalloc; + let mut f = RealFunction::new(vec![ + BlockBuildInfo { + insts: vec![ + /* 0. */ vec![op(Def, 2, FixedReg(p(0)))], + /* 1. */ vec![op(Def, 0, FixedReg(p(0)))], + /* 2. */ vec![op(Def, 1, FixedReg(p(1)))], + /* 3. */ vec![op(Use, 0, FixedReg(p(0))), op(Use, 0, FixedReg(p(1)))], + /* 4. */ vec![op(Use, 1, FixedReg(p(1)))], + ], + } + ]); + f.debug_value_labels = vec![ + (v(0), i(1), i(4), 10), + (v(1), i(0), i(1), 11), + (v(1), i(2), i(3), 23), + ]; + let result = run(&f, &mach_env, &options).unwrap(); + assert_eq!(result.debug_locations.len(), 2); + assert_eq!( + result.debug_locations[0], + (10, ProgPoint::after(i(1)), ProgPoint::after(i(3)), alloc(p(0))) + ); + assert_eq!(result.debug_locations[1].0, 23); + assert_eq!(result.debug_locations[1].1, ProgPoint::after(i(2))); + assert_eq!(result.debug_locations[1].2, ProgPoint::after(i(4))); + assert!(matches!(result.debug_locations[1].3.as_stack(), Some(_))); +} + +impl RealFunction { + fn new(blocks: Vec) -> Self { + assert!(blocks.len() <= 2, "Just for testing purposes"); + let mut f = Self::default(); + let mut max_vreg_num_seen = 0; + for block in blocks.iter() { + f.blocks.push(RealBlock { + params: vec![], + preds: vec![], + succs: vec![], + }); + let start_inst_idx = f.insts.len(); + for inst in block.insts.iter() { + f.insts.push(RealInst { + inst: Inst::new(f.insts.len()), + kind: RealInstKind::Normal + }); + let start_op_idx = f.operands.len(); + for op in inst.iter() { + max_vreg_num_seen = max_vreg_num_seen.max(op.vreg().vreg()); + f.operands.push(*op); + } + f.operand_ranges.push((start_op_idx, f.operands.len())); + } + if !block.insts.is_empty() { + f.insts.last_mut().unwrap().kind = RealInstKind::Ret; + } + f.inst_ranges.push((start_inst_idx, f.insts.len())); + } + f.num_vregs = max_vreg_num_seen + 1; + f + } +} + +fn mach_env(no_of_regs: usize) -> MachineEnv { + MachineEnv { + preferred_regs_by_class: [ + (0..no_of_regs) + .map(|no| PReg::new(no, RegClass::Int)) + .collect(), + vec![], + vec![] + ], + non_preferred_regs_by_class: [vec![], vec![], vec![]], + scratch_by_class: [None, None, None], + fixed_stack_slots: vec![], + } +} + +fn op(kind: OperandKind, vreg_num: usize, constraint: OperandConstraint) -> Operand { + Operand::new(VReg::new(vreg_num, RegClass::Int), constraint, kind, match kind { + Use => OperandPos::Early, + Def => OperandPos::Late + }) +} + +fn alloc(preg: PReg) -> Allocation { + Allocation::reg(preg) +} + +fn v(vreg_num: usize) -> VReg { + VReg::new(vreg_num, RegClass::Int) +} + +fn i(inst: usize) -> Inst { + Inst::new(inst) +} + +fn p(hw_enc: usize) -> PReg { + PReg::new(hw_enc, RegClass::Int) +} + +struct BlockBuildInfo { + insts: Vec>, +} + +#[derive(Default)] +struct RealFunction { + blocks: Vec, + insts: Vec, + operands: Vec, + operand_ranges: Vec<(usize, usize)>, + inst_ranges: Vec<(usize, usize)>, + num_vregs: usize, + debug_value_labels: Vec<(VReg, Inst, Inst, u32)>, +} + +struct RealBlock { + params: Vec, + preds: Vec, + succs: Vec, +} + +struct RealInst { + inst: Inst, + kind: RealInstKind, +} + +impl RealInst { + fn is_branch(&self) -> bool { + match self.kind { + RealInstKind::Branch(_, _) => true, + _ => false, + } + } + + fn is_ret(&self) -> bool { + match self.kind { + RealInstKind::Ret => true, + _ => false, + } + } +} + +enum RealInstKind { + Normal, + Branch(Block, Vec), + Ret +} + +impl Function for RealFunction { + fn num_insts(&self) -> usize { + self.insts.len() + } + + fn num_blocks(&self) -> usize { + self.blocks.len() + } + + fn block_insns(&self, block: crate::Block) -> crate::InstRange { + let (start, end) = self.inst_ranges[block.index()]; + if start != end { + InstRange::new(self.insts[start].inst, Inst::new(self.insts[end - 1].inst.index() + 1)) + } else { + InstRange::new(Inst::new(0), Inst::new(0)) + } + } + + fn allow_multiple_vreg_defs(&self) -> bool { + false + } + + fn block_params(&self, block: crate::Block) -> &[VReg] { + &self.blocks[block.index()].params + } + + fn block_preds(&self, block: crate::Block) -> &[crate::Block] { + &self.blocks[block.index()].preds + } + + fn block_succs(&self, block: Block) -> &[Block] { + &self.blocks[block.index()].succs + } + + fn debug_value_labels(&self) -> &[(VReg, Inst, Inst, u32)] { + &self.debug_value_labels + } + + fn entry_block(&self) -> Block { + Block::new(0) + } + + fn inst_clobbers(&self, _insn: Inst) -> crate::PRegSet { + PRegSet::empty() + } + + fn inst_operands(&self, insn: Inst) -> &[Operand] { + let (start, end) = self.operand_ranges[insn.index()]; + &self.operands[start..end] + } + + fn is_branch(&self, insn: Inst) -> bool { + self.insts[insn.index()].is_branch() + } + + fn is_ret(&self, insn: Inst) -> bool { + self.insts[insn.index()].is_ret() + } + + fn multi_spillslot_named_by_last_slot(&self) -> bool { + false + } + + fn num_vregs(&self) -> usize { + self.num_vregs + } + + fn reftype_vregs(&self) -> &[VReg] { + &[] + } + + fn requires_refs_on_stack(&self, _: Inst) -> bool { + false + } + + fn spillslot_size(&self, regclass: crate::RegClass) -> usize { + match regclass { + RegClass::Int => 2, + RegClass::Float => 4, + RegClass::Vector => 8, + } + } + + fn branch_blockparams(&self, _block: Block, _insn: Inst, _succ_idx: usize) -> &[VReg] { + &[] + } +} From 1d59643174ddb3188a5302ede55bceec81e5f7a7 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Mon, 2 Sep 2024 12:26:46 +0100 Subject: [PATCH 70/95] formatting --- src/fastalloc/iter.rs | 138 +----------------------- src/fastalloc/mod.rs | 222 +++++++++++++++++++++++---------------- src/fastalloc/tests.rs | 121 ++++++++++++++------- src/fastalloc/vregset.rs | 11 +- src/lib.rs | 2 +- 5 files changed, 229 insertions(+), 265 deletions(-) diff --git a/src/fastalloc/iter.rs b/src/fastalloc/iter.rs index 6232bd3a..91fee49d 100644 --- a/src/fastalloc/iter.rs +++ b/src/fastalloc/iter.rs @@ -1,4 +1,4 @@ -use crate::{Operand, OperandConstraint, OperandKind, OperandPos}; +use crate::{Operand, OperandConstraint, OperandKind}; pub struct Operands<'a>(pub &'a [Operand]); @@ -26,156 +26,22 @@ impl<'a> Operands<'a> { self.matches(|op| op.kind() == OperandKind::Use) } - pub fn non_fixed_use(&self) -> impl Iterator + 'a { - self.matches(|op| { - !matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && op.kind() == OperandKind::Use - }) - } - - pub fn non_fixed_non_reuse_late(&self) -> impl Iterator + 'a { - self.matches(|op| { - !matches!( - op.constraint(), - OperandConstraint::FixedReg(_) | OperandConstraint::Reuse(_) - ) && op.pos() == OperandPos::Late - }) - } - - pub fn non_reuse_late_def(&self) -> impl Iterator + 'a { - self.matches(|op| { - !matches!(op.constraint(), OperandConstraint::Reuse(_)) - && op.pos() == OperandPos::Late - && op.kind() == OperandKind::Def - }) - } - - pub fn non_fixed_non_reuse_early(&self) -> impl Iterator + 'a { - self.matches(|op| { - !matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && !matches!(op.constraint(), OperandConstraint::Reuse(_)) - && op.pos() == OperandPos::Early - }) - } - pub fn reuse(&self) -> impl Iterator + 'a { self.matches(|op| matches!(op.constraint(), OperandConstraint::Reuse(_))) } - pub fn non_reuse_early_def(&self) -> impl Iterator + 'a { - self.matches(|op| { - !matches!(op.constraint(), OperandConstraint::Reuse(_)) - && op.pos() == OperandPos::Early - && op.kind() == OperandKind::Def - }) - } - pub fn fixed(&self) -> impl Iterator + 'a { - self.matches(|op| matches!(op.constraint(), OperandConstraint::FixedReg(_))) - } - - pub fn fixed_early(&self) -> impl Iterator + 'a { - self.matches(|op| { - matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && op.pos() == OperandPos::Early - }) - } - - pub fn fixed_late(&self) -> impl Iterator + 'a { self.matches(|op| { matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && op.pos() == OperandPos::Late - }) - } - - pub fn non_reuse_def(&self) -> impl Iterator + 'a { - self.matches(|op| { - !matches!(op.constraint(), OperandConstraint::Reuse(_)) && op.kind() == OperandKind::Def - }) - } - - pub fn non_fixed_def(&self) -> impl Iterator + 'a { - self.matches(|op| { - !matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && op.kind() == OperandKind::Def - }) - } - - pub fn non_fixed_non_reuse_late_use(&self) -> impl Iterator + 'a { - self.matches(|op| { - !matches!( - op.constraint(), - OperandConstraint::FixedReg(_) | OperandConstraint::Reuse(_) - ) && op.pos() == OperandPos::Late - && op.kind() == OperandKind::Use - }) - } - - pub fn non_fixed_non_reuse_late_def(&self) -> impl Iterator + 'a { - self.matches(|op| { - !matches!( - op.constraint(), - OperandConstraint::FixedReg(_) | OperandConstraint::Reuse(_) - ) && op.pos() == OperandPos::Late - && op.kind() == OperandKind::Def - }) - } - - pub fn non_fixed_late_use(&self) -> impl Iterator + 'a { - self.matches(|op| { - !matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && op.pos() == OperandPos::Late - && op.kind() == OperandKind::Use - }) - } - - pub fn non_fixed_late_def(&self) -> impl Iterator + 'a { - self.matches(|op| { - !matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && op.pos() == OperandPos::Late - && op.kind() == OperandKind::Def - }) - } - - pub fn non_fixed_early_use(&self) -> impl Iterator + 'a { - self.matches(|op| { - !matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && op.pos() == OperandPos::Early - && op.kind() == OperandKind::Use }) } - pub fn non_fixed_early_def(&self) -> impl Iterator + 'a { + pub fn non_fixed_use(&self) -> impl Iterator + 'a { self.matches(|op| { !matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && op.pos() == OperandPos::Early - && op.kind() == OperandKind::Def - }) - } - - pub fn late_def(&self) -> impl Iterator + 'a { - self.matches(|op| op.pos() == OperandPos::Late && op.kind() == OperandKind::Def) - } - - pub fn early_def(&self) -> impl Iterator + 'a { - self.matches(|op| op.pos() == OperandPos::Early && op.kind() == OperandKind::Def) - } - - pub fn fixed_early_use(&self) -> impl Iterator + 'a { - self.matches(|op| { - matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && op.pos() == OperandPos::Early && op.kind() == OperandKind::Use }) } - - pub fn fixed_late_def(&self) -> impl Iterator + 'a { - self.matches(|op| { - matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && op.pos() == OperandPos::Late - && op.kind() == OperandKind::Def - }) - } } impl<'a> core::ops::Index for Operands<'a> { diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 4a18c1f9..4fca594d 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1,7 +1,8 @@ use crate::{cfg::CFGInfo, ion::Stats, Allocation, RegAllocError}; use crate::{ssa::validate_ssa, Edit, Function, MachineEnv, Output, ProgPoint}; use crate::{ - AllocationKind, Block, Inst, InstPosition, Operand, OperandConstraint, OperandKind, OperandPos, PReg, PRegSet, RegClass, SpillSlot, VReg + AllocationKind, Block, Inst, InstPosition, Operand, OperandConstraint, OperandKind, OperandPos, + PReg, PRegSet, RegClass, SpillSlot, VReg, }; use alloc::vec::Vec; use core::convert::TryInto; @@ -111,7 +112,12 @@ struct Edits { } impl Edits { - fn new(fixed_stack_slots: PRegSet, max_operand_len: u32, num_insts: usize, dedicated_scratch_regs: PartedByRegClass>) -> Self { + fn new( + fixed_stack_slots: PRegSet, + max_operand_len: u32, + num_insts: usize, + dedicated_scratch_regs: PartedByRegClass>, + ) -> Self { // Some operands generate edits and some don't. // The operands that generate edits add no more than two. // Some edits are added due to clobbers, not operands. @@ -157,15 +163,22 @@ impl Edits { trace!("Move 1: {scratch_alloc:?} to {to:?}"); self.edits.push(( ProgPoint::new(inst, pos), - Edit::Move { from: scratch_alloc, to }, + Edit::Move { + from: scratch_alloc, + to, + }, )); trace!("Move 2: {from:?} to {scratch_alloc:?}"); self.edits.push(( ProgPoint::new(inst, pos), - Edit::Move { from, to: scratch_alloc }, + Edit::Move { + from, + to: scratch_alloc, + }, )); } else { - self.edits.push((ProgPoint::new(inst, pos), Edit::Move { from, to })); + self.edits + .push((ProgPoint::new(inst, pos), Edit::Move { from, to })); } } } @@ -192,11 +205,7 @@ use core::fmt; impl fmt::Display for PartedByOperandPos { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "{{ early: {}, late: {} }}", - self.items[0], self.items[1] - ) + write!(f, "{{ early: {}, late: {} }}", self.items[0], self.items[1]) } } @@ -291,7 +300,14 @@ impl<'a, F: Function> Env<'a, F> { vreg_in_preg: vec![VReg::invalid(); PReg::NUM_INDEX], stack: Stack::new(func), fixed_stack_slots, - vreg_to_live_inst_range: vec![(ProgPoint::invalid(), ProgPoint::invalid(), Allocation::none()); func.num_vregs()], + vreg_to_live_inst_range: vec![ + ( + ProgPoint::invalid(), + ProgPoint::invalid(), + Allocation::none() + ); + func.num_vregs() + ], temp_spillslots: PartedByRegClass { items: [ Vec::with_capacity(func.num_vregs()), @@ -302,13 +318,15 @@ impl<'a, F: Function> Env<'a, F> { reused_input_to_reuse_op: vec![usize::MAX; max_operand_len as usize], init_available_pregs, available_pregs: PartedByOperandPos { - items: [ - init_available_pregs, - init_available_pregs, - ] + items: [init_available_pregs, init_available_pregs], }, allocs, - edits: Edits::new(fixed_stack_slots, max_operand_len, func.num_insts(), dedicated_scratch_regs), + edits: Edits::new( + fixed_stack_slots, + max_operand_len, + func.num_insts(), + dedicated_scratch_regs, + ), stats: Stats::default(), debug_locations: Vec::with_capacity(func.debug_value_labels().len()), } @@ -327,7 +345,7 @@ impl<'a, F: Function> Env<'a, F> { fn reset_available_pregs_and_scratch_regs(&mut self) { trace!("Resetting the available pregs"); self.available_pregs = PartedByOperandPos { - items: [self.init_available_pregs, self.init_available_pregs] + items: [self.init_available_pregs, self.init_available_pregs], }; self.edits.scratch_regs = self.edits.dedicated_scratch_regs.clone(); } @@ -340,7 +358,12 @@ impl<'a, F: Function> Env<'a, F> { .ok_or(RegAllocError::TooManyLiveRegs) } - fn reserve_reg_for_fixed_operand(&mut self, op: Operand, op_idx: usize, preg: PReg) -> Result<(), RegAllocError> { + fn reserve_reg_for_fixed_operand( + &mut self, + op: Operand, + op_idx: usize, + preg: PReg, + ) -> Result<(), RegAllocError> { trace!("Reserving register {preg} for fixed operand {op}"); let early_avail_pregs = self.available_pregs[OperandPos::Early]; let late_avail_pregs = self.available_pregs[OperandPos::Late]; @@ -364,8 +387,8 @@ impl<'a, F: Function> Env<'a, F> { self.available_pregs[OperandPos::Late].remove(preg); } _ => { - if op.as_fixed_nonallocatable().is_none() - && (!early_avail_pregs.contains(preg) || !late_avail_pregs.contains(preg)) + if op.as_fixed_nonallocatable().is_none() + && (!early_avail_pregs.contains(preg) || !late_avail_pregs.contains(preg)) { return Err(RegAllocError::TooManyLiveRegs); } @@ -376,7 +399,12 @@ impl<'a, F: Function> Env<'a, F> { Ok(()) } - fn allocd_within_constraint(&self, inst: Inst, op: Operand, fixed_spillslot: Option) -> bool { + fn allocd_within_constraint( + &self, + inst: Inst, + op: Operand, + fixed_spillslot: Option, + ) -> bool { let alloc = self.vreg_allocs[op.vreg().vreg()]; let alloc_is_clobber = if let Some(preg) = alloc.as_reg() { self.func.inst_clobbers(inst).contains(preg) @@ -436,11 +464,13 @@ impl<'a, F: Function> Env<'a, F> { false } } - OperandConstraint::Stack => if let Some(slot) = fixed_spillslot { - alloc == Allocation::stack(slot) - } else { - self.is_stack(alloc) - }, + OperandConstraint::Stack => { + if let Some(slot) = fixed_spillslot { + alloc == Allocation::stack(slot) + } else { + self.is_stack(alloc) + } + } // It is possible for an operand to have a fixed register constraint to // a clobber. OperandConstraint::FixedReg(preg) => alloc.is_reg() && alloc.as_reg().unwrap() == preg, @@ -491,7 +521,11 @@ impl<'a, F: Function> Env<'a, F> { } /// Allocates a physical register for the operand `op`. - fn alloc_reg_for_operand(&mut self, inst: Inst, op: Operand) -> Result { + fn alloc_reg_for_operand( + &mut self, + inst: Inst, + op: Operand, + ) -> Result { trace!("available regs: {}", self.available_pregs); trace!("Int LRU: {:?}", self.lrus[RegClass::Int]); trace!("Float LRU: {:?}", self.lrus[RegClass::Float]); @@ -502,7 +536,10 @@ impl<'a, F: Function> Env<'a, F> { return Err(RegAllocError::TooManyLiveRegs); } let Some(preg) = self.lrus[op.class()].last(self.available_pregs[op.pos()]) else { - trace!("Failed to find an available {:?} register in the LRU for operand {op}", op.class()); + trace!( + "Failed to find an available {:?} register in the LRU for operand {op}", + op.class() + ); return Err(RegAllocError::TooManyLiveRegs); }; if self.vreg_in_preg[preg.index()] != VReg::invalid() { @@ -518,10 +555,12 @@ impl<'a, F: Function> Env<'a, F> { (OperandPos::Early, OperandKind::Def) => { self.available_pregs[OperandPos::Late].remove(preg); } - (OperandPos::Late, OperandKind::Def) if matches!(op.constraint(), OperandConstraint::Reuse(_)) => { + (OperandPos::Late, OperandKind::Def) + if matches!(op.constraint(), OperandConstraint::Reuse(_)) => + { self.available_pregs[OperandPos::Early].remove(preg); } - _ => () + _ => (), }; Ok(Allocation::reg(preg)) } @@ -536,12 +575,8 @@ impl<'a, F: Function> Env<'a, F> { fixed_spillslot: Option, ) -> Result { let new_alloc = match op.constraint() { - OperandConstraint::Any => { - self.alloc_reg_for_operand(inst, op)? - } - OperandConstraint::Reg => { - self.alloc_reg_for_operand(inst, op)? - } + OperandConstraint::Any => self.alloc_reg_for_operand(inst, op)?, + OperandConstraint::Reg => self.alloc_reg_for_operand(inst, op)?, OperandConstraint::Stack => { let slot = if let Some(spillslot) = fixed_spillslot { spillslot @@ -595,10 +630,9 @@ impl<'a, F: Function> Env<'a, F> { if curr_alloc.is_none() { self.live_vregs.insert(op.vreg()); self.vreg_to_live_inst_range[op.vreg().vreg()].1 = match (op.pos(), op.kind()) { - (OperandPos::Late, OperandKind::Use) - | (_, OperandKind::Def) => { - // Live range ends just before the early phase of the - // next instruction. + (OperandPos::Late, OperandKind::Use) | (_, OperandKind::Def) => { + // Live range ends just before the early phase of the + // next instruction. ProgPoint::before(Inst::new(inst.index() + 1)) } (OperandPos::Early, OperandKind::Use) => { @@ -619,7 +653,10 @@ impl<'a, F: Function> Env<'a, F> { // used (in `prev_alloc`, that is). else if curr_alloc.is_some() { trace!("Move reason: Prev allocation doesn't meet constraints"); - if self.is_stack(new_alloc) && self.is_stack(curr_alloc) && self.edits.scratch_regs[op.class()].is_none() { + if self.is_stack(new_alloc) + && self.is_stack(curr_alloc) + && self.edits.scratch_regs[op.class()].is_none() + { let reg = self.get_scratch_reg(op.class())?; self.edits.scratch_regs[op.class()] = Some(reg); self.available_pregs[OperandPos::Early].remove(reg); @@ -666,7 +703,7 @@ impl<'a, F: Function> Env<'a, F> { (OperandPos::Early, OperandKind::Def) => { self.available_pregs[OperandPos::Late].remove(preg); } - _ => () + _ => (), }; } trace!( @@ -676,8 +713,14 @@ impl<'a, F: Function> Env<'a, F> { self.allocs[(inst.index(), op_idx)] ); } - trace!("Late available regs: {}", self.available_pregs[OperandPos::Late]); - trace!("Early available regs: {}", self.available_pregs[OperandPos::Early]); + trace!( + "Late available regs: {}", + self.available_pregs[OperandPos::Late] + ); + trace!( + "Early available regs: {}", + self.available_pregs[OperandPos::Early] + ); Ok(()) } @@ -742,7 +785,8 @@ impl<'a, F: Function> Env<'a, F> { } let succ_param_vreg = succ_params[pos]; if self.vreg_spillslots[succ_param_vreg.vreg()].is_invalid() { - self.vreg_spillslots[succ_param_vreg.vreg()] = self.stack.allocstack(&succ_param_vreg); + self.vreg_spillslots[succ_param_vreg.vreg()] = + self.stack.allocstack(&succ_param_vreg); trace!( "Block param {} is in {}", vreg, @@ -760,13 +804,8 @@ impl<'a, F: Function> Env<'a, F> { // because branches are processed last. self.edits.scratch_regs[vreg.class()] = Some(reg); } - self.edits.add_move( - inst, - temp, - param_alloc, - vreg.class(), - InstPosition::Before, - ); + self.edits + .add_move(inst, temp, param_alloc, vreg.class(), InstPosition::Before); } } @@ -828,20 +867,14 @@ impl<'a, F: Function> Env<'a, F> { vreg_spill, temp ); - - self.edits.add_move( - inst, - vreg_spill, - temp, - vreg.class(), - InstPosition::Before, - ); + + self.edits + .add_move(inst, vreg_spill, temp, vreg.class(), InstPosition::Before); // All branch arguments should be in their spillslots at the end of the function. if self.vreg_allocs[vreg.vreg()].is_none() { self.live_vregs.insert(*vreg); let slot = self.vreg_spillslots[vreg.vreg()]; - self.vreg_allocs[vreg.vreg()] = - Allocation::stack(slot); + self.vreg_allocs[vreg.vreg()] = Allocation::stack(slot); self.vreg_to_live_inst_range[vreg.vreg()].1 = ProgPoint::before(inst); } else if self.vreg_allocs[vreg.vreg()] != vreg_spill { self.edits.add_move( @@ -888,9 +921,16 @@ impl<'a, F: Function> Env<'a, F> { }; // Eviction has to be done separately to avoid using a fixed register // as a scratch register. - if self.vreg_in_preg[preg.index()] != VReg::invalid() && self.vreg_in_preg[preg.index()] != op.vreg() { - trace!("Evicting {} from fixed register {preg}", self.vreg_in_preg[preg.index()]); - if self.fixed_stack_slots.contains(preg) && self.edits.scratch_regs[preg.class()].is_none() { + if self.vreg_in_preg[preg.index()] != VReg::invalid() + && self.vreg_in_preg[preg.index()] != op.vreg() + { + trace!( + "Evicting {} from fixed register {preg}", + self.vreg_in_preg[preg.index()] + ); + if self.fixed_stack_slots.contains(preg) + && self.edits.scratch_regs[preg.class()].is_none() + { let reg = self.get_scratch_reg(preg.class())?; self.edits.scratch_regs[preg.class()] = Some(reg); self.available_pregs[OperandPos::Early].remove(reg); @@ -903,8 +943,13 @@ impl<'a, F: Function> Env<'a, F> { self.remove_clobbers_from_available_pregs(clobbers); for preg in clobbers { if self.vreg_in_preg[preg.index()] != VReg::invalid() { - trace!("Evicting {} from clobber {preg}", self.vreg_in_preg[preg.index()]); - if self.fixed_stack_slots.contains(preg) && self.edits.scratch_regs[preg.class()].is_none() { + trace!( + "Evicting {} from clobber {preg}", + self.vreg_in_preg[preg.index()] + ); + if self.fixed_stack_slots.contains(preg) + && self.edits.scratch_regs[preg.class()].is_none() + { let reg = self.get_scratch_reg(preg.class())?; self.edits.scratch_regs[preg.class()] = Some(reg); self.available_pregs[OperandPos::Early].remove(reg); @@ -954,12 +999,8 @@ impl<'a, F: Function> Env<'a, F> { trace!("Allocating def operands {op}"); if let OperandConstraint::Reuse(reused_idx) = op.constraint() { let reused_op = operands[reused_idx]; - let new_reuse_op = Operand::new( - op.vreg(), - reused_op.constraint(), - op.kind(), - op.pos(), - ); + let new_reuse_op = + Operand::new(op.vreg(), reused_op.constraint(), op.kind(), op.pos()); trace!("allocating reuse op {op} as {new_reuse_op}"); self.process_operand_allocation(inst, new_reuse_op, op_idx, None)?; if let Some(preg) = self.allocs[(inst.index(), op_idx)].as_reg() { @@ -975,11 +1016,12 @@ impl<'a, F: Function> Env<'a, F> { self.vreg_to_live_inst_range[op.vreg().vreg()].2 = Allocation::stack(slot); let curr_alloc = self.vreg_allocs[op.vreg().vreg()]; let vreg_slot = self.vreg_spillslots[op.vreg().vreg()]; - let (is_stack_to_stack, src_and_dest_are_same) = if let Some(curr_alloc) = curr_alloc.as_stack() { - (true, curr_alloc == vreg_slot) - } else { - (self.is_stack(curr_alloc), false) - }; + let (is_stack_to_stack, src_and_dest_are_same) = + if let Some(curr_alloc) = curr_alloc.as_stack() { + (true, curr_alloc == vreg_slot) + } else { + (self.is_stack(curr_alloc), false) + }; if !src_and_dest_are_same { if is_stack_to_stack && self.edits.scratch_regs[op.class()].is_none() { let reg = self.get_scratch_reg(op.class())?; @@ -1012,12 +1054,8 @@ impl<'a, F: Function> Env<'a, F> { new_reused_input_constraint = OperandConstraint::Stack; fixed_slot = Some(reuse_op_alloc.as_stack().unwrap()); } - let new_reused_input = Operand::new( - op.vreg(), - new_reused_input_constraint, - op.kind(), - op.pos(), - ); + let new_reused_input = + Operand::new(op.vreg(), new_reused_input_constraint, op.kind(), op.pos()); trace!("Allocating reused input {op} as {new_reused_input}, (fixed spillslot: {fixed_slot:?})"); self.process_operand_allocation(inst, new_reused_input, op_idx, fixed_slot)?; } else { @@ -1074,7 +1112,10 @@ impl<'a, F: Function> Env<'a, F> { block, self.func.block_params(block) ); - trace!("Available pregs: {}", self.available_pregs[OperandPos::Early]); + trace!( + "Available pregs: {}", + self.available_pregs[OperandPos::Early] + ); let mut available_regs_for_scratch = self.available_pregs[OperandPos::Early]; let first_inst = self.func.block_insns(block).first(); // We need to check for the registers that are still live. @@ -1221,7 +1262,11 @@ impl<'a, F: Function> Env<'a, F> { let mut v = Vec::new(); for i in 0..self.vreg_in_preg.len() { if self.vreg_in_preg[i] != VReg::invalid() { - v.push(format!("{}: {}, ", PReg::from_index(i), self.vreg_in_preg[i])); + v.push(format!( + "{}: {}, ", + PReg::from_index(i), + self.vreg_in_preg[i] + )); } } trace!("vreg_in_preg: {:?}", v); @@ -1246,11 +1291,12 @@ impl<'a, F: Function> Env<'a, F> { } fn build_debug_info(&mut self) { - trace!("Building debug location info"); + trace!("Building debug location info"); for &(vreg, start, end, label) in self.func.debug_value_labels() { let (point_start, point_end, alloc) = self.vreg_to_live_inst_range[vreg.vreg()]; if point_start.inst() <= start && end <= point_end.inst().next() { - self.debug_locations.push((label, point_start, point_end, alloc)); + self.debug_locations + .push((label, point_start, point_end, alloc)); } } self.debug_locations.sort_by_key(|loc| loc.0); diff --git a/src/fastalloc/tests.rs b/src/fastalloc/tests.rs index c5548b1f..e502c594 100644 --- a/src/fastalloc/tests.rs +++ b/src/fastalloc/tests.rs @@ -1,8 +1,11 @@ -use crate::{run, Algorithm, Allocation, Block, Function, Inst, InstOrEdit, InstRange, MachineEnv, Operand, OperandPos, Output, PReg, PRegSet, ProgPoint, RegClass, RegallocOptions, SpillSlot, VReg}; +use crate::OperandConstraint::{self, *}; +use crate::OperandKind::{self, *}; +use crate::{ + run, Algorithm, Allocation, Block, Function, Inst, InstOrEdit, InstRange, MachineEnv, Operand, + OperandPos, Output, PReg, PRegSet, ProgPoint, RegClass, RegallocOptions, SpillSlot, VReg, +}; use alloc::vec; use alloc::vec::Vec; -use crate::OperandKind::{self, *}; -use crate::OperandConstraint::{self, *}; #[test] fn test_debug_locations1() { @@ -10,16 +13,24 @@ fn test_debug_locations1() { let mut options = RegallocOptions::default(); options.validate_ssa = true; options.algorithm = Algorithm::Fastalloc; - let mut f = RealFunction::new(vec![ - BlockBuildInfo { - insts: vec![ - /* 0. */ vec![op(Def, 0, FixedReg(p(0)))], - /* 1. */ vec![op(Def, 1, FixedReg(p(0))), op(Use, 0, FixedReg(p(0))), op(Use, 0, Reg)], - /* 2. */ vec![op(Def, 2, FixedReg(p(8))), op(Use, 0, FixedReg(p(2))), op(Use, 1, FixedReg(p(0)))], - /* 3. */ vec![op(Def, 3, FixedReg(p(9))), op(Use, 0, FixedReg(p(9)))] + let mut f = RealFunction::new(vec![BlockBuildInfo { + insts: vec![ + /* 0. */ vec![op(Def, 0, FixedReg(p(0)))], + /* 1. */ + vec![ + op(Def, 1, FixedReg(p(0))), + op(Use, 0, FixedReg(p(0))), + op(Use, 0, Reg), ], - } - ]); + /* 2. */ + vec![ + op(Def, 2, FixedReg(p(8))), + op(Use, 0, FixedReg(p(2))), + op(Use, 1, FixedReg(p(0))), + ], + /* 3. */ vec![op(Def, 3, FixedReg(p(9))), op(Use, 0, FixedReg(p(9)))], + ], + }]); f.debug_value_labels = vec![ (v(0), i(0), i(4), 32), (v(2), i(2), i(4), 70), @@ -27,12 +38,35 @@ fn test_debug_locations1() { (v(3), i(3), i(4), 34), ]; let result = run(&f, &mach_env, &options).unwrap(); - assert_eq!(result.debug_locations, vec![ - (32, ProgPoint::after(i(0)), ProgPoint::after(i(3)), alloc(p(9))), - (34, ProgPoint::after(i(3)), ProgPoint::before(i(4)), alloc(p(9))), - (70, ProgPoint::after(i(2)), ProgPoint::before(i(3)), alloc(p(8))), - (71, ProgPoint::after(i(2)), ProgPoint::before(i(3)), alloc(p(8))), - ]); + assert_eq!( + result.debug_locations, + vec![ + ( + 32, + ProgPoint::after(i(0)), + ProgPoint::after(i(3)), + alloc(p(9)) + ), + ( + 34, + ProgPoint::after(i(3)), + ProgPoint::before(i(4)), + alloc(p(9)) + ), + ( + 70, + ProgPoint::after(i(2)), + ProgPoint::before(i(3)), + alloc(p(8)) + ), + ( + 71, + ProgPoint::after(i(2)), + ProgPoint::before(i(3)), + alloc(p(8)) + ), + ] + ); } #[test] @@ -41,17 +75,15 @@ fn test_debug_locations2() { let mut options = RegallocOptions::default(); options.validate_ssa = true; options.algorithm = Algorithm::Fastalloc; - let mut f = RealFunction::new(vec![ - BlockBuildInfo { - insts: vec![ - /* 0. */ vec![op(Def, 2, FixedReg(p(0)))], - /* 1. */ vec![op(Def, 0, FixedReg(p(0)))], - /* 2. */ vec![op(Def, 1, FixedReg(p(1)))], - /* 3. */ vec![op(Use, 0, FixedReg(p(0))), op(Use, 0, FixedReg(p(1)))], - /* 4. */ vec![op(Use, 1, FixedReg(p(1)))], - ], - } - ]); + let mut f = RealFunction::new(vec![BlockBuildInfo { + insts: vec![ + /* 0. */ vec![op(Def, 2, FixedReg(p(0)))], + /* 1. */ vec![op(Def, 0, FixedReg(p(0)))], + /* 2. */ vec![op(Def, 1, FixedReg(p(1)))], + /* 3. */ vec![op(Use, 0, FixedReg(p(0))), op(Use, 0, FixedReg(p(1)))], + /* 4. */ vec![op(Use, 1, FixedReg(p(1)))], + ], + }]); f.debug_value_labels = vec![ (v(0), i(1), i(4), 10), (v(1), i(0), i(1), 11), @@ -61,7 +93,12 @@ fn test_debug_locations2() { assert_eq!(result.debug_locations.len(), 2); assert_eq!( result.debug_locations[0], - (10, ProgPoint::after(i(1)), ProgPoint::after(i(3)), alloc(p(0))) + ( + 10, + ProgPoint::after(i(1)), + ProgPoint::after(i(3)), + alloc(p(0)) + ) ); assert_eq!(result.debug_locations[1].0, 23); assert_eq!(result.debug_locations[1].1, ProgPoint::after(i(2))); @@ -84,7 +121,7 @@ impl RealFunction { for inst in block.insts.iter() { f.insts.push(RealInst { inst: Inst::new(f.insts.len()), - kind: RealInstKind::Normal + kind: RealInstKind::Normal, }); let start_op_idx = f.operands.len(); for op in inst.iter() { @@ -110,7 +147,7 @@ fn mach_env(no_of_regs: usize) -> MachineEnv { .map(|no| PReg::new(no, RegClass::Int)) .collect(), vec![], - vec![] + vec![], ], non_preferred_regs_by_class: [vec![], vec![], vec![]], scratch_by_class: [None, None, None], @@ -119,10 +156,15 @@ fn mach_env(no_of_regs: usize) -> MachineEnv { } fn op(kind: OperandKind, vreg_num: usize, constraint: OperandConstraint) -> Operand { - Operand::new(VReg::new(vreg_num, RegClass::Int), constraint, kind, match kind { - Use => OperandPos::Early, - Def => OperandPos::Late - }) + Operand::new( + VReg::new(vreg_num, RegClass::Int), + constraint, + kind, + match kind { + Use => OperandPos::Early, + Def => OperandPos::Late, + }, + ) } fn alloc(preg: PReg) -> Allocation { @@ -186,7 +228,7 @@ impl RealInst { enum RealInstKind { Normal, Branch(Block, Vec), - Ret + Ret, } impl Function for RealFunction { @@ -201,7 +243,10 @@ impl Function for RealFunction { fn block_insns(&self, block: crate::Block) -> crate::InstRange { let (start, end) = self.inst_ranges[block.index()]; if start != end { - InstRange::new(self.insts[start].inst, Inst::new(self.insts[end - 1].inst.index() + 1)) + InstRange::new( + self.insts[start].inst, + Inst::new(self.insts[end - 1].inst.index() + 1), + ) } else { InstRange::new(Inst::new(0), Inst::new(0)) } diff --git a/src/fastalloc/vregset.rs b/src/fastalloc/vregset.rs index 1889286b..f26ec96e 100644 --- a/src/fastalloc/vregset.rs +++ b/src/fastalloc/vregset.rs @@ -1,8 +1,8 @@ use core::fmt; +use crate::{RegClass, VReg}; use alloc::vec; use alloc::vec::Vec; -use crate::{RegClass, VReg}; #[derive(Clone)] struct VRegNode { @@ -21,7 +21,14 @@ pub struct VRegSet { impl VRegSet { pub fn with_capacity(num_vregs: usize) -> Self { Self { - items: vec![VRegNode { prev: u32::MAX, next: u32::MAX, class: RegClass::Int }; num_vregs], + items: vec![ + VRegNode { + prev: u32::MAX, + next: u32::MAX, + class: RegClass::Int + }; + num_vregs + ], head: u32::MAX, } } diff --git a/src/lib.rs b/src/lib.rs index b6918e28..16e6faaa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -263,7 +263,7 @@ impl PRegSet { for i in 0..self.bits.len() { set[i] = !self.bits[i]; } - PRegSet{ bits: set } + PRegSet { bits: set } } pub fn is_empty(&self, regclass: RegClass) -> bool { From b94b4eaffcb77eef98102099974a2e42736a5108 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Mon, 2 Sep 2024 12:43:32 +0100 Subject: [PATCH 71/95] removed stack constraint support and reftype vreg checks --- src/fastalloc/iter.rs | 4 +-- src/fastalloc/mod.rs | 62 ++++++++---------------------------------- src/fastalloc/tests.rs | 8 ------ 3 files changed, 13 insertions(+), 61 deletions(-) diff --git a/src/fastalloc/iter.rs b/src/fastalloc/iter.rs index 91fee49d..dec74e71 100644 --- a/src/fastalloc/iter.rs +++ b/src/fastalloc/iter.rs @@ -31,9 +31,7 @@ impl<'a> Operands<'a> { } pub fn fixed(&self) -> impl Iterator + 'a { - self.matches(|op| { - matches!(op.constraint(), OperandConstraint::FixedReg(_)) - }) + self.matches(|op| matches!(op.constraint(), OperandConstraint::FixedReg(_))) } pub fn non_fixed_use(&self) -> impl Iterator + 'a { diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 4fca594d..09da2679 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -399,12 +399,7 @@ impl<'a, F: Function> Env<'a, F> { Ok(()) } - fn allocd_within_constraint( - &self, - inst: Inst, - op: Operand, - fixed_spillslot: Option, - ) -> bool { + fn allocd_within_constraint(&self, inst: Inst, op: Operand) -> bool { let alloc = self.vreg_allocs[op.vreg().vreg()]; let alloc_is_clobber = if let Some(preg) = alloc.as_reg() { self.func.inst_clobbers(inst).contains(preg) @@ -464,13 +459,6 @@ impl<'a, F: Function> Env<'a, F> { false } } - OperandConstraint::Stack => { - if let Some(slot) = fixed_spillslot { - alloc == Allocation::stack(slot) - } else { - self.is_stack(alloc) - } - } // It is possible for an operand to have a fixed register constraint to // a clobber. OperandConstraint::FixedReg(preg) => alloc.is_reg() && alloc.as_reg().unwrap() == preg, @@ -572,22 +560,10 @@ impl<'a, F: Function> Env<'a, F> { inst: Inst, op: Operand, op_idx: usize, - fixed_spillslot: Option, ) -> Result { let new_alloc = match op.constraint() { OperandConstraint::Any => self.alloc_reg_for_operand(inst, op)?, OperandConstraint::Reg => self.alloc_reg_for_operand(inst, op)?, - OperandConstraint::Stack => { - let slot = if let Some(spillslot) = fixed_spillslot { - spillslot - } else { - if self.vreg_spillslots[op.vreg().vreg()].is_invalid() { - self.vreg_spillslots[op.vreg().vreg()] = self.stack.allocstack(&op.vreg()); - } - self.vreg_spillslots[op.vreg().vreg()] - }; - Allocation::stack(slot) - } OperandConstraint::FixedReg(preg) => { trace!("The fixed preg: {} for operand {}", preg, op); @@ -611,7 +587,6 @@ impl<'a, F: Function> Env<'a, F> { inst: Inst, op: Operand, op_idx: usize, - fixed_spillslot: Option, ) -> Result<(), RegAllocError> { if let Some(preg) = op.as_fixed_nonallocatable() { self.allocs[(inst.index(), op_idx)] = Allocation::reg(preg); @@ -623,10 +598,10 @@ impl<'a, F: Function> Env<'a, F> { ); return Ok(()); } - if !self.allocd_within_constraint(inst, op, fixed_spillslot) { + if !self.allocd_within_constraint(inst, op) { trace!("{op} isn't allocated within constraints."); let curr_alloc = self.vreg_allocs[op.vreg().vreg()]; - let new_alloc = self.alloc_operand(inst, op, op_idx, fixed_spillslot)?; + let new_alloc = self.alloc_operand(inst, op, op_idx)?; if curr_alloc.is_none() { self.live_vregs.insert(op.vreg()); self.vreg_to_live_inst_range[op.vreg().vreg()].1 = match (op.pos(), op.kind()) { @@ -893,9 +868,6 @@ impl<'a, F: Function> Env<'a, F> { fn alloc_inst(&mut self, block: Block, inst: Inst) -> Result<(), RegAllocError> { trace!("Allocating instruction {:?}", inst); - if self.func.requires_refs_on_stack(inst) && !self.func.reftype_vregs().is_empty() { - panic!("Safepoint instructions aren't supported"); - } let operands = Operands::new(self.func.inst_operands(inst)); let clobbers = self.func.inst_clobbers(inst); @@ -1002,14 +974,9 @@ impl<'a, F: Function> Env<'a, F> { let new_reuse_op = Operand::new(op.vreg(), reused_op.constraint(), op.kind(), op.pos()); trace!("allocating reuse op {op} as {new_reuse_op}"); - self.process_operand_allocation(inst, new_reuse_op, op_idx, None)?; - if let Some(preg) = self.allocs[(inst.index(), op_idx)].as_reg() { - // The reused input is going to be processed as a fixed register for this - // preg. - self.available_pregs[OperandPos::Early].remove(preg); - } + self.process_operand_allocation(inst, new_reuse_op, op_idx)?; } else { - self.process_operand_allocation(inst, op, op_idx, None)?; + self.process_operand_allocation(inst, op, op_idx)?; } let slot = self.vreg_spillslots[op.vreg().vreg()]; if slot.is_valid() { @@ -1046,20 +1013,16 @@ impl<'a, F: Function> Env<'a, F> { if self.reused_input_to_reuse_op[op_idx] != usize::MAX { let reuse_op_idx = self.reused_input_to_reuse_op[op_idx]; let reuse_op_alloc = self.allocs[(inst.index(), reuse_op_idx)]; - let new_reused_input_constraint; - let mut fixed_slot = None; - if let Some(preg) = reuse_op_alloc.as_reg() { - new_reused_input_constraint = OperandConstraint::FixedReg(preg); - } else { - new_reused_input_constraint = OperandConstraint::Stack; - fixed_slot = Some(reuse_op_alloc.as_stack().unwrap()); - } + let Some(preg) = reuse_op_alloc.as_reg() else { + unreachable!(); + }; + let new_reused_input_constraint = OperandConstraint::FixedReg(preg); let new_reused_input = Operand::new(op.vreg(), new_reused_input_constraint, op.kind(), op.pos()); - trace!("Allocating reused input {op} as {new_reused_input}, (fixed spillslot: {fixed_slot:?})"); - self.process_operand_allocation(inst, new_reused_input, op_idx, fixed_slot)?; + trace!("Allocating reused input {op} as {new_reused_input}"); + self.process_operand_allocation(inst, new_reused_input, op_idx)?; } else { - self.process_operand_allocation(inst, op, op_idx, None)?; + self.process_operand_allocation(inst, op, op_idx)?; } } for (op_idx, op) in operands.use_ops() { @@ -1397,7 +1360,6 @@ pub fn run( inst_alloc_offsets: env.allocs.inst_alloc_offsets, num_spillslots: env.stack.num_spillslots as usize, debug_locations: env.debug_locations, - safepoint_slots: Vec::new(), stats: env.stats, }) } diff --git a/src/fastalloc/tests.rs b/src/fastalloc/tests.rs index e502c594..d3d1d8a8 100644 --- a/src/fastalloc/tests.rs +++ b/src/fastalloc/tests.rs @@ -301,14 +301,6 @@ impl Function for RealFunction { self.num_vregs } - fn reftype_vregs(&self) -> &[VReg] { - &[] - } - - fn requires_refs_on_stack(&self, _: Inst) -> bool { - false - } - fn spillslot_size(&self, regclass: crate::RegClass) -> usize { match regclass { RegClass::Int => 2, From 670062757b3af71624bc2f7c42b9bb56de6e4f48 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 5 Sep 2024 13:13:28 +0100 Subject: [PATCH 72/95] removed unnecessary clobber check in allocd_within_constraint --- src/fastalloc/mod.rs | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 09da2679..e584ea76 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -399,26 +399,10 @@ impl<'a, F: Function> Env<'a, F> { Ok(()) } - fn allocd_within_constraint(&self, inst: Inst, op: Operand) -> bool { + fn allocd_within_constraint(&self, op: Operand) -> bool { let alloc = self.vreg_allocs[op.vreg().vreg()]; - let alloc_is_clobber = if let Some(preg) = alloc.as_reg() { - self.func.inst_clobbers(inst).contains(preg) - } else { - false - }; match op.constraint() { OperandConstraint::Any => { - // Completely avoid assigning clobbers, if possible. - // Assigning a clobber to a def operand that lives past the - // current instruction makes it impossible to restore - // the vreg. - // And assigning a clobber to a use operand that is reused - // by a def operand with a reuse constraint will end up - // assigning the clobber to that def, and if it lives past - // the current instruction, then restoration will be impossible. - if alloc_is_clobber { - return false; - } if let Some(preg) = alloc.as_reg() { if !self.available_pregs[op.pos()].contains(preg) { // If a register isn't in the available pregs list, then @@ -442,9 +426,6 @@ impl<'a, F: Function> Env<'a, F> { } } OperandConstraint::Reg => { - if alloc_is_clobber { - return false; - } if self.is_stack(alloc) { return false; } @@ -598,7 +579,7 @@ impl<'a, F: Function> Env<'a, F> { ); return Ok(()); } - if !self.allocd_within_constraint(inst, op) { + if !self.allocd_within_constraint(op) { trace!("{op} isn't allocated within constraints."); let curr_alloc = self.vreg_allocs[op.vreg().vreg()]; let new_alloc = self.alloc_operand(inst, op, op_idx)?; From baae3caf6d4912658c31060202c6ef77d34ee64c Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 5 Sep 2024 13:58:46 +0100 Subject: [PATCH 73/95] simplified vregset --- src/fastalloc/vregset.rs | 110 ++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 66 deletions(-) diff --git a/src/fastalloc/vregset.rs b/src/fastalloc/vregset.rs index f26ec96e..bd44ffa8 100644 --- a/src/fastalloc/vregset.rs +++ b/src/fastalloc/vregset.rs @@ -1,21 +1,22 @@ use core::fmt; -use crate::{RegClass, VReg}; +use crate::ion::data_structures::VRegIndex; +use crate::VReg; use alloc::vec; use alloc::vec::Vec; #[derive(Clone)] struct VRegNode { - next: u32, - prev: u32, - class: RegClass, + next: VRegIndex, + prev: VRegIndex, + vreg: VReg, } // Using a non-circular doubly linked list here for fast insertion, // removal and iteration. pub struct VRegSet { items: Vec, - head: u32, + head: VRegIndex, } impl VRegSet { @@ -23,61 +24,43 @@ impl VRegSet { Self { items: vec![ VRegNode { - prev: u32::MAX, - next: u32::MAX, - class: RegClass::Int + prev: VRegIndex::new(num_vregs), + next: VRegIndex::new(num_vregs), + vreg: VReg::invalid() }; - num_vregs + num_vregs + 1 ], - head: u32::MAX, + head: VRegIndex::new(num_vregs), } } pub fn insert(&mut self, vreg: VReg) { // Intentionally assuming that the set doesn't already // contain `vreg`. - if self.head == u32::MAX { - self.items[vreg.vreg()] = VRegNode { - next: u32::MAX, - prev: u32::MAX, - class: vreg.class(), - }; - self.head = vreg.vreg() as u32; - } else { - let old_head_next = self.items[self.head as usize].next; - if old_head_next != u32::MAX { - self.items[old_head_next as usize].prev = vreg.vreg() as u32; - } - self.items[self.head as usize].next = vreg.vreg() as u32; - self.items[vreg.vreg()] = VRegNode { - next: old_head_next, - prev: self.head, - class: vreg.class(), - }; - } + let old_head_next = self.items[self.head.index()].next; + self.items[vreg.vreg()] = VRegNode { + next: old_head_next, + prev: self.head, + vreg, + }; + self.items[self.head.index()].next = VRegIndex::new(vreg.vreg()); + self.items[old_head_next.index()].prev = VRegIndex::new(vreg.vreg()); } pub fn remove(&mut self, vreg_num: usize) { let prev = self.items[vreg_num].prev; let next = self.items[vreg_num].next; - if prev != u32::MAX { - self.items[prev as usize].next = next; - } - if next != u32::MAX { - self.items[next as usize].prev = prev; - } - if vreg_num as u32 == self.head { - self.head = next; - } + self.items[prev.index()].next = next; + self.items[next.index()].prev = prev; } pub fn is_empty(&self) -> bool { - self.head == u32::MAX + self.items[self.head.index()].next == self.head } pub fn iter(&self) -> VRegSetIter { VRegSetIter { - curr_item: self.head, + curr_item: self.items[self.head.index()].next, head: self.head, items: &self.items, } @@ -85,8 +68,8 @@ impl VRegSet { } pub struct VRegSetIter<'a> { - curr_item: u32, - head: u32, + curr_item: VRegIndex, + head: VRegIndex, items: &'a [VRegNode], } @@ -94,11 +77,10 @@ impl<'a> Iterator for VRegSetIter<'a> { type Item = VReg; fn next(&mut self) -> Option { - if self.curr_item != u32::MAX { - let item = self.items[self.curr_item as usize].clone(); - let vreg = VReg::new(self.curr_item as usize, item.class); + if self.curr_item != self.head { + let item = self.items[self.curr_item.index()].clone(); self.curr_item = item.next; - Some(vreg) + Some(item.vreg) } else { None } @@ -118,39 +100,35 @@ impl fmt::Debug for VRegSet { #[cfg(test)] mod tests { use super::*; + use crate::RegClass; use RegClass::*; const VREG: fn(usize, RegClass) -> VReg = VReg::new; #[test] fn operations() { let mut set = VRegSet::with_capacity(3090); + assert!(set.is_empty()); set.insert(VREG(10, Int)); set.insert(VREG(2000, Int)); set.insert(VREG(11, Vector)); set.insert(VREG(199, Float)); set.insert(VREG(23, Int)); - let els = [ - VREG(10, Int), - VREG(23, Int), - VREG(199, Float), - VREG(11, Vector), - VREG(2000, Int), - ]; - for (actual_el, expected_el) in set.iter().zip(els.iter()) { - assert_eq!(actual_el, *expected_el); - } + let mut iter = set.iter(); + assert_eq!(iter.next(), Some(VREG(23, Int))); + assert_eq!(iter.next(), Some(VREG(199, Float))); + assert_eq!(iter.next(), Some(VREG(11, Vector))); + assert_eq!(iter.next(), Some(VREG(2000, Int))); + assert_eq!(iter.next(), Some(VREG(10, Int))); + set.remove(23); + set.remove(11); set.insert(VREG(73, Vector)); - let els = [ - VREG(10, Int), - VREG(73, Vector), - VREG(199, Float), - VREG(11, Vector), - VREG(2000, Int), - ]; - for (actual_el, expected_el) in set.iter().zip(els.iter()) { - assert_eq!(actual_el, *expected_el); - } + let mut iter = set.iter(); + assert_eq!(iter.next(), Some(VREG(73, Vector))); + assert_eq!(iter.next(), Some(VREG(199, Float))); + assert_eq!(iter.next(), Some(VREG(2000, Int))); + assert_eq!(iter.next(), Some(VREG(10, Int))); + assert!(!set.is_empty()); } #[test] From 004c19e4610e285f34014f9f4f383251200d8d4e Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 5 Sep 2024 14:05:10 +0100 Subject: [PATCH 74/95] simplify lru --- src/fastalloc/lru.rs | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/src/fastalloc/lru.rs b/src/fastalloc/lru.rs index cfb33538..6b16df4b 100644 --- a/src/fastalloc/lru.rs +++ b/src/fastalloc/lru.rs @@ -108,21 +108,7 @@ impl Lru { /// Get the last PReg in the LRU from the set `from`. pub fn last(&self, from: PRegSet) -> Option { trace!("Getting the last preg from the LRU in set {from}"); - if self.is_empty() { - panic!("LRU is empty"); - } - let mut last = self.data[self.head as usize].prev; - let init_last = last; - loop { - let preg = PReg::new(last as usize, self.regclass); - if from.contains(preg) { - return Some(preg); - } - last = self.data[last as usize].prev; - if last == init_last { - return None; - } - } + self.last_satisfying(|preg| from.contains(preg)) } /// Get the last PReg from the LRU for which `f` returns true. @@ -146,7 +132,7 @@ impl Lru { } /// Splices out a node from the list. - pub fn remove(&mut self, hw_enc: usize) { + fn remove(&mut self, hw_enc: usize) { trace!( "Before removing: {:?} LRU. head: {:?}, Actual data: {:?}", self.regclass, From a64ee575795dc86560628b9152bbb1df39b91512 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 5 Sep 2024 14:13:10 +0100 Subject: [PATCH 75/95] improved allocs initialization --- src/fastalloc/mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index e584ea76..69071136 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -30,17 +30,17 @@ struct Allocs { impl Allocs { fn new(func: &F) -> (Self, u32) { let operand_no_guess = func.num_insts() * 3; - let mut allocs = Vec::with_capacity(operand_no_guess); + let mut allocs = Vec::new(); let mut inst_alloc_offsets = Vec::with_capacity(operand_no_guess); let mut max_operand_len = 0; + let mut no_of_operands = 0; for inst in 0..func.num_insts() { let operands_len = func.inst_operands(Inst::new(inst)).len() as u32; max_operand_len = max_operand_len.max(operands_len); - inst_alloc_offsets.push(allocs.len() as u32); - for _ in 0..operands_len { - allocs.push(Allocation::none()); - } + inst_alloc_offsets.push(no_of_operands as u32); + no_of_operands += operands_len; } + allocs.resize(no_of_operands as usize, Allocation::none()); ( Self { allocs, From bb1855c26f32532d72376647f98bb6491a84a6c4 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 5 Sep 2024 14:23:00 +0100 Subject: [PATCH 76/95] added bitand and bitor impls for PRegSet --- src/lib.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 6f22acc9..60ff46ac 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -271,6 +271,26 @@ impl PRegSet { } } +impl core::ops::BitAnd for PRegSet { + type Output = PRegSet; + + fn bitand(self, rhs: PRegSet) -> Self::Output { + let mut out = self; + out.intersect_from(rhs); + out + } +} + +impl core::ops::BitOr for PRegSet { + type Output = PRegSet; + + fn bitor(self, rhs: PRegSet) -> Self::Output { + let mut out = self; + out.union_from(rhs); + out + } +} + impl IntoIterator for PRegSet { type Item = PReg; type IntoIter = PRegSetIter; From 8bd9a463e64c8224607b3b6dbd88619bf0aad510 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 5 Sep 2024 14:23:52 +0100 Subject: [PATCH 77/95] corrected available set issue with reg allocation from operands --- src/fastalloc/mod.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 69071136..ed194d0f 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -500,11 +500,20 @@ impl<'a, F: Function> Env<'a, F> { trace!("Float LRU: {:?}", self.lrus[RegClass::Float]); trace!("Vector LRU: {:?}", self.lrus[RegClass::Vector]); trace!(""); - if self.available_pregs[op.pos()].is_empty(op.class()) { - trace!("No registers available in class {:?}", op.class()); + let draw_from = match (op.pos(), op.kind()) { + (OperandPos::Late, OperandKind::Use) + | (OperandPos::Early, OperandKind::Def) + | (OperandPos::Late, OperandKind::Def) + if matches!(op.constraint(), OperandConstraint::Reuse(_)) => { + self.available_pregs[OperandPos::Late] & self.available_pregs[OperandPos::Early] + } + _ => self.available_pregs[op.pos()] + }; + if draw_from.is_empty(op.class()) { + trace!("No registers available for {op}"); return Err(RegAllocError::TooManyLiveRegs); } - let Some(preg) = self.lrus[op.class()].last(self.available_pregs[op.pos()]) else { + let Some(preg) = self.lrus[op.class()].last(draw_from) else { trace!( "Failed to find an available {:?} register in the LRU for operand {op}", op.class() From f464dd5cf84ac963f24cd0e0e6bc132fabc57da8 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 5 Sep 2024 15:01:40 +0100 Subject: [PATCH 78/95] removed unnecessary loop --- src/fastalloc/mod.rs | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index ed194d0f..3e3d4540 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -921,42 +921,6 @@ impl<'a, F: Function> Env<'a, F> { self.vreg_in_preg[preg.index()] = VReg::invalid(); } } - for (_, op) in operands.non_fixed_use() { - if op.as_fixed_nonallocatable().is_some() { - continue; - } - if let Some(preg) = self.vreg_allocs[op.vreg().vreg()].as_reg() { - trace!("Removing {op}'s current reg allocation {preg} from reg sets"); - // The current allocation, vreg_allocs[op.vreg], doesn't change, - // so it should be removed from the available reg sets to avoid - // allocating it to some other operand in the instruction. - // - // For example: - // 1. def v0 (reuse: 1), use v1, use v2 - // 2. use v1 (fixed: p0) - // - // When inst 1 is about to be processed, vreg_allocs[v1] will be p0. - // Suppose p1 is allocated to v0: this will create a fixed constraint for - // v1 and p1 will also be allocated to it. - // When it's time to process the v2 operand, vreg_allocs[v1] will still be p0 - // because it doesn't change (except by an explicit fixed reg constraint which - // will not be a problem here) and it's possible for v2 to get p0 as an allocation, - // which is wrong. That will lead to the following scenario: - // - // move from p0 to p1 // Inserted due to reuse constraints - // // (vreg_allocs[v1] == p0) - // 1. def v0 (reuse: 1), use v1, use v2 // v0: p1, v1: p1, v2: p0 - // move from stack_v0 to p0 // Eviction here because v0 is still in p0 when - // // v2's processing picked p0 from available regs - // 2. use v1 (fixed: p0) - // - // To avoid this scenario, the register is removed from the available set. - self.available_pregs[op.pos()].remove(preg); - if let (OperandPos::Late, OperandKind::Use) = (op.pos(), op.kind()) { - self.available_pregs[OperandPos::Early].remove(preg); - } - } - } for (op_idx, op) in operands.def_ops() { trace!("Allocating def operands {op}"); if let OperandConstraint::Reuse(reused_idx) = op.constraint() { From 05b7dd1a284cebe538026a9d4c63a4ff71f292a3 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 5 Sep 2024 15:02:45 +0100 Subject: [PATCH 79/95] removed unnecessary check in process_operand_allocation --- src/fastalloc/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 3e3d4540..c7f782dc 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -616,7 +616,7 @@ impl<'a, F: Function> Env<'a, F> { // Need to insert a move to propagate flow from the current // allocation to the subsequent places where the value was // used (in `prev_alloc`, that is). - else if curr_alloc.is_some() { + else { trace!("Move reason: Prev allocation doesn't meet constraints"); if self.is_stack(new_alloc) && self.is_stack(curr_alloc) From e5592d844c8f1bfb457df00b19a1da3b113c3b8b Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 5 Sep 2024 15:11:40 +0100 Subject: [PATCH 80/95] fixed problem with scratch register evictions --- src/fastalloc/mod.rs | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index c7f782dc..113b3da4 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -350,12 +350,16 @@ impl<'a, F: Function> Env<'a, F> { self.edits.scratch_regs = self.edits.dedicated_scratch_regs.clone(); } - fn get_scratch_reg(&self, class: RegClass) -> Result { + fn get_scratch_reg(&mut self, inst: Inst, class: RegClass) -> Result { let mut avail_regs = self.available_pregs[OperandPos::Early]; avail_regs.intersect_from(self.available_pregs[OperandPos::Late]); - self.lrus[class] - .last(avail_regs) - .ok_or(RegAllocError::TooManyLiveRegs) + let Some(preg) = self.lrus[class].last(avail_regs) else { + return Err(RegAllocError::TooManyLiveRegs); + }; + if self.vreg_in_preg[preg.index()] != VReg::invalid() { + self.evict_vreg_in_preg(inst, preg); + } + Ok(preg) } fn reserve_reg_for_fixed_operand( @@ -622,7 +626,7 @@ impl<'a, F: Function> Env<'a, F> { && self.is_stack(curr_alloc) && self.edits.scratch_regs[op.class()].is_none() { - let reg = self.get_scratch_reg(op.class())?; + let reg = self.get_scratch_reg(inst, op.class())?; self.edits.scratch_regs[op.class()] = Some(reg); self.available_pregs[OperandPos::Early].remove(reg); self.available_pregs[OperandPos::Late].remove(reg); @@ -764,7 +768,7 @@ impl<'a, F: Function> Env<'a, F> { next_temp_idx[vreg.class()] += 1; trace!(" Branch arg {vreg} from {temp} to {param_alloc}"); if self.edits.scratch_regs[vreg.class()].is_none() { - let reg = self.get_scratch_reg(vreg.class())?; + let reg = self.get_scratch_reg(inst, vreg.class())?; // No need to remove the scratch register from the available reg sets // because branches are processed last. self.edits.scratch_regs[vreg.class()] = Some(reg); @@ -893,7 +897,7 @@ impl<'a, F: Function> Env<'a, F> { if self.fixed_stack_slots.contains(preg) && self.edits.scratch_regs[preg.class()].is_none() { - let reg = self.get_scratch_reg(preg.class())?; + let reg = self.get_scratch_reg(inst, preg.class())?; self.edits.scratch_regs[preg.class()] = Some(reg); self.available_pregs[OperandPos::Early].remove(reg); self.available_pregs[OperandPos::Late].remove(reg); @@ -912,7 +916,7 @@ impl<'a, F: Function> Env<'a, F> { if self.fixed_stack_slots.contains(preg) && self.edits.scratch_regs[preg.class()].is_none() { - let reg = self.get_scratch_reg(preg.class())?; + let reg = self.get_scratch_reg(inst, preg.class())?; self.edits.scratch_regs[preg.class()] = Some(reg); self.available_pregs[OperandPos::Early].remove(reg); self.available_pregs[OperandPos::Late].remove(reg); @@ -945,7 +949,7 @@ impl<'a, F: Function> Env<'a, F> { }; if !src_and_dest_are_same { if is_stack_to_stack && self.edits.scratch_regs[op.class()].is_none() { - let reg = self.get_scratch_reg(op.class())?; + let reg = self.get_scratch_reg(inst, op.class())?; self.edits.scratch_regs[op.class()] = Some(reg); self.available_pregs[OperandPos::Early].remove(reg); self.available_pregs[OperandPos::Late].remove(reg); @@ -1074,7 +1078,7 @@ impl<'a, F: Function> Env<'a, F> { vreg ); if self.is_stack(prev_alloc) && self.edits.scratch_regs[vreg.class()].is_none() { - let reg = self.get_scratch_reg(vreg.class())?; + let reg = self.get_scratch_reg(first_inst, vreg.class())?; self.edits.scratch_regs[vreg.class()] = Some(reg); } self.edits.add_move( From 21ec7f806d64e1d2de7f5593badc9f42f5d184f6 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 5 Sep 2024 16:54:35 +0100 Subject: [PATCH 81/95] removed unnecessary operand iter function --- src/fastalloc/iter.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/fastalloc/iter.rs b/src/fastalloc/iter.rs index dec74e71..d1437559 100644 --- a/src/fastalloc/iter.rs +++ b/src/fastalloc/iter.rs @@ -33,13 +33,6 @@ impl<'a> Operands<'a> { pub fn fixed(&self) -> impl Iterator + 'a { self.matches(|op| matches!(op.constraint(), OperandConstraint::FixedReg(_))) } - - pub fn non_fixed_use(&self) -> impl Iterator + 'a { - self.matches(|op| { - !matches!(op.constraint(), OperandConstraint::FixedReg(_)) - && op.kind() == OperandKind::Use - }) - } } impl<'a> core::ops::Index for Operands<'a> { From 0fd7b52683d5318e01c996d4f91e7a2ca7e1a0cf Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 5 Sep 2024 17:14:22 +0100 Subject: [PATCH 82/95] removed unnecessary operand num guess in allocs initialization; refactored scratch reg allocation code --- src/fastalloc/mod.rs | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 113b3da4..01c8619c 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -29,9 +29,8 @@ struct Allocs { impl Allocs { fn new(func: &F) -> (Self, u32) { - let operand_no_guess = func.num_insts() * 3; let mut allocs = Vec::new(); - let mut inst_alloc_offsets = Vec::with_capacity(operand_no_guess); + let mut inst_alloc_offsets = Vec::with_capacity(func.num_insts()); let mut max_operand_len = 0; let mut no_of_operands = 0; for inst in 0..func.num_insts() { @@ -350,6 +349,14 @@ impl<'a, F: Function> Env<'a, F> { self.edits.scratch_regs = self.edits.dedicated_scratch_regs.clone(); } + fn alloc_scratch_reg(&mut self, inst: Inst, class: RegClass) -> Result<(), RegAllocError> { + let reg = self.get_scratch_reg(inst, class)?; + self.edits.scratch_regs[class] = Some(reg); + self.available_pregs[OperandPos::Early].remove(reg); + self.available_pregs[OperandPos::Late].remove(reg); + Ok(()) + } + fn get_scratch_reg(&mut self, inst: Inst, class: RegClass) -> Result { let mut avail_regs = self.available_pregs[OperandPos::Early]; avail_regs.intersect_from(self.available_pregs[OperandPos::Late]); @@ -626,10 +633,7 @@ impl<'a, F: Function> Env<'a, F> { && self.is_stack(curr_alloc) && self.edits.scratch_regs[op.class()].is_none() { - let reg = self.get_scratch_reg(inst, op.class())?; - self.edits.scratch_regs[op.class()] = Some(reg); - self.available_pregs[OperandPos::Early].remove(reg); - self.available_pregs[OperandPos::Late].remove(reg); + self.alloc_scratch_reg(inst, op.class())?; } if op.kind() == OperandKind::Def { trace!("Adding edit from {new_alloc:?} to {curr_alloc:?} after inst {inst:?} for {op}"); @@ -897,10 +901,7 @@ impl<'a, F: Function> Env<'a, F> { if self.fixed_stack_slots.contains(preg) && self.edits.scratch_regs[preg.class()].is_none() { - let reg = self.get_scratch_reg(inst, preg.class())?; - self.edits.scratch_regs[preg.class()] = Some(reg); - self.available_pregs[OperandPos::Early].remove(reg); - self.available_pregs[OperandPos::Late].remove(reg); + self.alloc_scratch_reg(inst, preg.class())?; } self.evict_vreg_in_preg(inst, preg); self.vreg_in_preg[preg.index()] = VReg::invalid(); @@ -916,10 +917,7 @@ impl<'a, F: Function> Env<'a, F> { if self.fixed_stack_slots.contains(preg) && self.edits.scratch_regs[preg.class()].is_none() { - let reg = self.get_scratch_reg(inst, preg.class())?; - self.edits.scratch_regs[preg.class()] = Some(reg); - self.available_pregs[OperandPos::Early].remove(reg); - self.available_pregs[OperandPos::Late].remove(reg); + self.alloc_scratch_reg(inst, preg.class())?; } self.evict_vreg_in_preg(inst, preg); self.vreg_in_preg[preg.index()] = VReg::invalid(); @@ -949,10 +947,7 @@ impl<'a, F: Function> Env<'a, F> { }; if !src_and_dest_are_same { if is_stack_to_stack && self.edits.scratch_regs[op.class()].is_none() { - let reg = self.get_scratch_reg(inst, op.class())?; - self.edits.scratch_regs[op.class()] = Some(reg); - self.available_pregs[OperandPos::Early].remove(reg); - self.available_pregs[OperandPos::Late].remove(reg); + self.alloc_scratch_reg(inst, op.class())?; }; self.edits.add_move( inst, From b6094fe10c8915771cd3c096450f4077eae73b2e Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 5 Sep 2024 17:16:00 +0100 Subject: [PATCH 83/95] updated outdated vregset comment --- src/fastalloc/vregset.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fastalloc/vregset.rs b/src/fastalloc/vregset.rs index bd44ffa8..af6474cd 100644 --- a/src/fastalloc/vregset.rs +++ b/src/fastalloc/vregset.rs @@ -12,7 +12,7 @@ struct VRegNode { vreg: VReg, } -// Using a non-circular doubly linked list here for fast insertion, +// Using a doubly linked list here for fast insertion, // removal and iteration. pub struct VRegSet { items: Vec, From 8df9209eb91e4e62af8bd414bb11c0e2e4caa0c1 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 5 Sep 2024 22:03:36 +0100 Subject: [PATCH 84/95] process_branch now uses parallel move resolver --- src/fastalloc/lru.rs | 18 +++- src/fastalloc/mod.rs | 209 +++++++++++++++++-------------------------- 2 files changed, 97 insertions(+), 130 deletions(-) diff --git a/src/fastalloc/lru.rs b/src/fastalloc/lru.rs index 6b16df4b..a645d9b1 100644 --- a/src/fastalloc/lru.rs +++ b/src/fastalloc/lru.rs @@ -270,12 +270,12 @@ impl fmt::Debug for Lru { } } -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct PartedByRegClass { pub items: [T; 3], } -impl Index for PartedByRegClass { +impl Index for PartedByRegClass { type Output = T; fn index(&self, index: RegClass) -> &Self::Output { @@ -283,7 +283,7 @@ impl Index for PartedByRegClass { } } -impl IndexMut for PartedByRegClass { +impl IndexMut for PartedByRegClass { fn index_mut(&mut self, index: RegClass) -> &mut Self::Output { &mut self.items[index as usize] } @@ -306,7 +306,7 @@ impl Lrus { use core::fmt::{Debug, Display}; -impl Display for PartedByRegClass { +impl Display for PartedByRegClass { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, @@ -315,3 +315,13 @@ impl Display for PartedByRegClass { ) } } + +impl Debug for PartedByRegClass { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{{ int: {:?}, float: {:?}, vector: {:?} }}", + self.items[0], self.items[1], self.items[2] + ) + } +} diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 01c8619c..e80c28c5 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -1,3 +1,4 @@ +use crate::moves::{MoveAndScratchResolver, ParallelMoves}; use crate::{cfg::CFGInfo, ion::Stats, Allocation, RegAllocError}; use crate::{ssa::validate_ssa, Edit, Function, MachineEnv, Output, ProgPoint}; use crate::{ @@ -80,8 +81,8 @@ impl<'a, F: Function> Stack<'a, F> { } /// Allocates a spill slot on the stack for `vreg` - fn allocstack(&mut self, vreg: &VReg) -> SpillSlot { - let size: u32 = self.func.spillslot_size(vreg.class()).try_into().unwrap(); + fn allocstack(&mut self, class: RegClass) -> SpillSlot { + let size: u32 = self.func.spillslot_size(class).try_into().unwrap(); // Rest of this function was copied verbatim // from `Env::allocate_spillslot` in src/ion/spill.rs. let mut offset = self.num_spillslots; @@ -224,8 +225,6 @@ pub struct Env<'a, F: Function> { /// `vreg_in_preg[i]` is the virtual register currently in the physical register /// with index `i`. vreg_in_preg: Vec, - /// For parallel moves from branch args to block param spillslots. - temp_spillslots: PartedByRegClass>, /// `reused_input_to_reuse_op[i]` is the operand index of the reuse operand /// that uses the `i`th operand in the current instruction as its input. reused_input_to_reuse_op: Vec, @@ -237,6 +236,7 @@ pub struct Env<'a, F: Function> { init_available_pregs: PRegSet, allocatable_regs: PRegSet, stack: Stack<'a, F>, + preferred_victim: PartedByRegClass, vreg_to_live_inst_range: Vec<(ProgPoint, ProgPoint, Allocation)>, fixed_stack_slots: PRegSet, @@ -307,13 +307,11 @@ impl<'a, F: Function> Env<'a, F> { ); func.num_vregs() ], - temp_spillslots: PartedByRegClass { - items: [ - Vec::with_capacity(func.num_vregs()), - Vec::with_capacity(func.num_vregs()), - Vec::with_capacity(func.num_vregs()), - ], - }, + preferred_victim: PartedByRegClass { items: [ + regs[0].last().cloned().unwrap_or(PReg::invalid()), + regs[1].last().cloned().unwrap_or(PReg::invalid()), + regs[2].last().cloned().unwrap_or(PReg::invalid()), + ] }, reused_input_to_reuse_op: vec![usize::MAX; max_operand_len as usize], init_available_pregs, available_pregs: PartedByOperandPos { @@ -466,7 +464,7 @@ impl<'a, F: Function> Env<'a, F> { trace!("The removed vreg: {}", evicted_vreg); debug_assert_ne!(evicted_vreg, VReg::invalid()); if self.vreg_spillslots[evicted_vreg.vreg()].is_invalid() { - self.vreg_spillslots[evicted_vreg.vreg()] = self.stack.allocstack(&evicted_vreg); + self.vreg_spillslots[evicted_vreg.vreg()] = self.stack.allocstack(evicted_vreg.class()); } let slot = self.vreg_spillslots[evicted_vreg.vreg()]; self.vreg_allocs[evicted_vreg.vreg()] = Allocation::stack(slot); @@ -718,132 +716,35 @@ impl<'a, F: Function> Env<'a, F> { /// If instruction `inst` is a branch in `block`, /// this function places branch arguments in the spillslots /// expected by the destination blocks. - /// - /// The process used to do this is as follows: - /// - /// 1. Move all branch arguments into corresponding temporary spillslots. - /// 2. Move values from the temporary spillslots to corresponding block param spillslots. - /// - /// These temporaries are used because the moves have to be parallel in the case where - /// a block parameter of the successor block is a branch argument. fn process_branch(&mut self, block: Block, inst: Inst) -> Result<(), RegAllocError> { - // Used to know which temporary spillslot should be used next. - let mut next_temp_idx = PartedByRegClass { items: [0, 0, 0] }; - - fn reset_temp_idx(next_temp_idx: &mut PartedByRegClass) { - next_temp_idx[RegClass::Int] = 0; - next_temp_idx[RegClass::Float] = 0; - next_temp_idx[RegClass::Vector] = 0; - } + trace!("Processing branch instruction {inst:?} in block {block:?}"); - // In the case where the block param of a successor is also a branch arg, - // the reading of all the block params must be done before the writing. - // This is necessary to prevent overwriting the branch arg's value before - // placing it in the corresponding branch param spillslot. + let mut int_parallel_moves = ParallelMoves::new(); + let mut float_parallel_moves = ParallelMoves::new(); + let mut vec_parallel_moves = ParallelMoves::new(); - trace!("Adding temp to block params spillslots for branch args"); for (succ_idx, succ) in self.func.block_succs(block).iter().enumerate() { - let succ_params = self.func.block_params(*succ); - - // Move from temporaries to block param spillslots. for (pos, vreg) in self .func .branch_blockparams(block, inst, succ_idx) .iter() .enumerate() { - if self.temp_spillslots[vreg.class()].len() == next_temp_idx[vreg.class()] { - let newslot = self.stack.allocstack(vreg); - self.temp_spillslots[vreg.class()].push(newslot); - } + let succ_params = self.func.block_params(*succ); let succ_param_vreg = succ_params[pos]; if self.vreg_spillslots[succ_param_vreg.vreg()].is_invalid() { self.vreg_spillslots[succ_param_vreg.vreg()] = - self.stack.allocstack(&succ_param_vreg); + self.stack.allocstack(succ_param_vreg.class()); trace!( "Block param {} is in {}", vreg, Allocation::stack(self.vreg_spillslots[vreg.vreg()]) ); } - let param_alloc = Allocation::stack(self.vreg_spillslots[succ_param_vreg.vreg()]); - let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; - let temp = Allocation::stack(temp_slot); - next_temp_idx[vreg.class()] += 1; - trace!(" Branch arg {vreg} from {temp} to {param_alloc}"); - if self.edits.scratch_regs[vreg.class()].is_none() { - let reg = self.get_scratch_reg(inst, vreg.class())?; - // No need to remove the scratch register from the available reg sets - // because branches are processed last. - self.edits.scratch_regs[vreg.class()] = Some(reg); - } - self.edits - .add_move(inst, temp, param_alloc, vreg.class(), InstPosition::Before); - } - } - - reset_temp_idx(&mut next_temp_idx); - - for (succ_idx, _) in self.func.block_succs(block).iter().enumerate() { - // Move from branch args spillslots to temporaries. - // - // Consider a scenario: - // - // block entry: - // goto Y(...) - // - // block Y(vp) - // goto X - // - // block X - // use vp - // goto Y(va) - // - // block X branches to block Y and block Y branches to block X. - // Block Y has block param vp and block X uses virtual register va as the branch arg for vp. - // Block X has an instruction that uses vp. - // In the case where branch arg va is defined in a predecessor, there is a possibility - // that, at the beginning of the block, during the reload, that va will always overwrite vp. - // This could happen because at the end of the block, va is allocated to be in vp's - // spillslot. If va isn't used throughout the block (or if all its use constraints allow it to be - // in vp's spillslot), then during reload, it will still be allocated to vp's spillslot. - // This will mean that at the beginning of the block, both va and vp will be expected to be - // in vp's spillslot. An edit will be inserted to move from va's spillslot to vp's. - // And depending on the constraints of vp's use, an edit may or may not be inserted to move - // from vp's spillslot to somewhere else. - // Either way, the correctness of the dataflow will depend on the order of edits. - // If vp is required in be on the stack, then no edit will be inserted for it (it's already on - // the stack, in its spillslot). But an edit will be inserted to move from va's spillslot - // to vp's. - // If block Y has other predecessors that define vp to be other values, then this dataflow - // is clearly wrong. - // - // To avoid this scenario, branch args are placed into their own spillslots here - // so that if they aren't moved at all throughout the block, they will not be expected to - // be in another vreg's spillslot at the block beginning. - for vreg in self.func.branch_blockparams(block, inst, succ_idx).iter() { if self.vreg_spillslots[vreg.vreg()].is_invalid() { - self.vreg_spillslots[vreg.vreg()] = self.stack.allocstack(vreg); - trace!( - "Block arg {} is going to be in {}", - vreg, - Allocation::stack(self.vreg_spillslots[vreg.vreg()]) - ); + self.vreg_spillslots[vreg.vreg()] = self.stack.allocstack(vreg.class()); } - let temp_slot = self.temp_spillslots[vreg.class()][next_temp_idx[vreg.class()]]; - let temp = Allocation::stack(temp_slot); - next_temp_idx[vreg.class()] += 1; let vreg_spill = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); - trace!( - "{} which is going to be in {} inserting move to {}", - vreg, - vreg_spill, - temp - ); - - self.edits - .add_move(inst, vreg_spill, temp, vreg.class(), InstPosition::Before); - // All branch arguments should be in their spillslots at the end of the function. if self.vreg_allocs[vreg.vreg()].is_none() { self.live_vregs.insert(*vreg); let slot = self.vreg_spillslots[vreg.vreg()]; @@ -852,15 +753,78 @@ impl<'a, F: Function> Env<'a, F> { } else if self.vreg_allocs[vreg.vreg()] != vreg_spill { self.edits.add_move( inst, - self.vreg_allocs[vreg.vreg()], vreg_spill, + self.vreg_allocs[vreg.vreg()], vreg.class(), InstPosition::Before, ); + self.vreg_allocs[vreg.vreg()] = vreg_spill; } + let parallel_moves = match vreg.class() { + RegClass::Int => &mut int_parallel_moves, + RegClass::Float => &mut float_parallel_moves, + RegClass::Vector => &mut vec_parallel_moves, + }; + parallel_moves.add( + Allocation::stack(self.vreg_spillslots[vreg.vreg()]), + Allocation::stack(self.vreg_spillslots[succ_param_vreg.vreg()]), + Some(*vreg), + ); } } + let resolved_int = int_parallel_moves.resolve(); + let resolved_float = float_parallel_moves.resolve(); + let resolved_vec = vec_parallel_moves.resolve(); + let mut new_scratch_reg = PartedByRegClass { items: [None; 3] }; + let mut num_spillslots = self.stack.num_spillslots; + + for (resolved, class) in [ + (resolved_int, RegClass::Int), + (resolved_float, RegClass::Float), + (resolved_vec, RegClass::Vector) + ] { + let scratch_resolver = MoveAndScratchResolver { + find_free_reg: || { + if let Some(reg) = self.edits.scratch_regs[class] { + Some(Allocation::reg(reg)) + } else if let Some(reg) = new_scratch_reg[class] { + Some(Allocation::reg(reg)) + } else { + use OperandPos::*; + let avail_regs = self.available_pregs[Early] & self.available_pregs[Late]; + let Some(preg) = self.lrus[class].last(avail_regs) else { + return None; + }; + new_scratch_reg[RegClass::Int] = Some(preg); + Some(Allocation::reg(preg)) + } + }, + get_stackslot: || { + let size: u32 = self.func.spillslot_size(class).try_into().unwrap(); + let mut offset = num_spillslots; + debug_assert!(size.is_power_of_two()); + offset = (offset + size - 1) & !(size - 1); + let slot = if self.func.multi_spillslot_named_by_last_slot() { + offset + size - 1 + } else { + offset + }; + offset += size; + num_spillslots = offset; + Allocation::stack(SpillSlot::new(slot as usize)) + }, + is_stack_alloc: |alloc| { + self.is_stack(alloc) + }, + borrowed_scratch_reg: self.preferred_victim[class], + }; + let moves = scratch_resolver.compute(resolved); + for (from, to, _) in moves.into_iter().rev() { + self.edits.edits.push((ProgPoint::before(inst), Edit::Move { from, to })) + } + self.stack.num_spillslots = num_spillslots; + } Ok(()) } @@ -1045,7 +1009,7 @@ impl<'a, F: Function> Env<'a, F> { continue; } if self.vreg_spillslots[vreg.vreg()].is_invalid() { - self.vreg_spillslots[vreg.vreg()] = self.stack.allocstack(&vreg); + self.vreg_spillslots[vreg.vreg()] = self.stack.allocstack(vreg.class()); } // The allocation where the vreg is expected to be before // the first instruction. @@ -1091,7 +1055,7 @@ impl<'a, F: Function> Env<'a, F> { vreg ); if self.vreg_spillslots[vreg.vreg()].is_invalid() { - self.vreg_spillslots[vreg.vreg()] = self.stack.allocstack(&vreg); + self.vreg_spillslots[vreg.vreg()] = self.stack.allocstack(vreg.class()); } // The allocation where the vreg is expected to be before // the first instruction. @@ -1274,14 +1238,7 @@ fn log_output<'a, F: Function>(env: &Env<'a, F>) { )); } } - let mut temp_slots = Vec::new(); - for class in [RegClass::Int, RegClass::Float, RegClass::Vector] { - for slot in env.temp_spillslots[class].iter() { - temp_slots.push(format!("{slot}")); - } - } trace!("VReg spillslots: {:?}", v); - trace!("Temp spillslots: {:?}", temp_slots); trace!("Final edits: {:?}", env.edits.edits); } From 7af8283e465f417e7acebcb96ccb98e92dcce925 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Thu, 5 Sep 2024 22:16:36 +0100 Subject: [PATCH 85/95] formatting --- src/fastalloc/mod.rs | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index e80c28c5..1b7675ac 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -307,11 +307,13 @@ impl<'a, F: Function> Env<'a, F> { ); func.num_vregs() ], - preferred_victim: PartedByRegClass { items: [ - regs[0].last().cloned().unwrap_or(PReg::invalid()), - regs[1].last().cloned().unwrap_or(PReg::invalid()), - regs[2].last().cloned().unwrap_or(PReg::invalid()), - ] }, + preferred_victim: PartedByRegClass { + items: [ + regs[0].last().cloned().unwrap_or(PReg::invalid()), + regs[1].last().cloned().unwrap_or(PReg::invalid()), + regs[2].last().cloned().unwrap_or(PReg::invalid()), + ], + }, reused_input_to_reuse_op: vec![usize::MAX; max_operand_len as usize], init_available_pregs, available_pregs: PartedByOperandPos { @@ -511,12 +513,13 @@ impl<'a, F: Function> Env<'a, F> { trace!(""); let draw_from = match (op.pos(), op.kind()) { (OperandPos::Late, OperandKind::Use) - | (OperandPos::Early, OperandKind::Def) - | (OperandPos::Late, OperandKind::Def) - if matches!(op.constraint(), OperandConstraint::Reuse(_)) => { + | (OperandPos::Early, OperandKind::Def) + | (OperandPos::Late, OperandKind::Def) + if matches!(op.constraint(), OperandConstraint::Reuse(_)) => + { self.available_pregs[OperandPos::Late] & self.available_pregs[OperandPos::Early] } - _ => self.available_pregs[op.pos()] + _ => self.available_pregs[op.pos()], }; if draw_from.is_empty(op.class()) { trace!("No registers available for {op}"); @@ -782,7 +785,7 @@ impl<'a, F: Function> Env<'a, F> { for (resolved, class) in [ (resolved_int, RegClass::Int), (resolved_float, RegClass::Float), - (resolved_vec, RegClass::Vector) + (resolved_vec, RegClass::Vector), ] { let scratch_resolver = MoveAndScratchResolver { find_free_reg: || { @@ -814,14 +817,14 @@ impl<'a, F: Function> Env<'a, F> { num_spillslots = offset; Allocation::stack(SpillSlot::new(slot as usize)) }, - is_stack_alloc: |alloc| { - self.is_stack(alloc) - }, + is_stack_alloc: |alloc| self.is_stack(alloc), borrowed_scratch_reg: self.preferred_victim[class], }; let moves = scratch_resolver.compute(resolved); for (from, to, _) in moves.into_iter().rev() { - self.edits.edits.push((ProgPoint::before(inst), Edit::Move { from, to })) + self.edits + .edits + .push((ProgPoint::before(inst), Edit::Move { from, to })) } self.stack.num_spillslots = num_spillslots; } From f2f2daa644aac58dea25bba54530a4b6db1694d9 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Fri, 6 Sep 2024 12:13:30 +0100 Subject: [PATCH 86/95] added loop in alloc_inst to correct scratch reg alloc issue --- src/fastalloc/iter.rs | 4 ++++ src/fastalloc/mod.rs | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/src/fastalloc/iter.rs b/src/fastalloc/iter.rs index d1437559..b4cedf04 100644 --- a/src/fastalloc/iter.rs +++ b/src/fastalloc/iter.rs @@ -33,6 +33,10 @@ impl<'a> Operands<'a> { pub fn fixed(&self) -> impl Iterator + 'a { self.matches(|op| matches!(op.constraint(), OperandConstraint::FixedReg(_))) } + + pub fn non_fixed(&self) -> impl Iterator + 'a { + self.matches(|op| !matches!(op.constraint(), OperandConstraint::FixedReg(_))) + } } impl<'a> core::ops::Index for Operands<'a> { diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 1b7675ac..b3f5ada4 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -852,6 +852,47 @@ impl<'a, F: Function> Env<'a, F> { self.lrus[preg.class()].poke(preg); } } + for (_, op) in operands.non_fixed() { + if op.as_fixed_nonallocatable().is_some() { + continue; + } + if let Some(preg) = self.vreg_allocs[op.vreg().vreg()].as_reg() { + trace!("Removing {op}'s current reg allocation {preg} from reg sets"); + // The current allocation, vreg_allocs[op.vreg], doesn't change, + // so it should be removed from the available reg sets to avoid + // allocating it as scratch. + // + // For example: + // + // 1. def v0 (fixed: p23), use v1 (reg) + // 2. use v1 (fixed: p0) + // v0 is in stack_v0 and v1 is in stack_v1 + // + // Suppose p23 is a fixed stack slot. Then an edit will need to + // be inserted after inst 1 to move from stack_v0 to p23 and + // a scratch register is needed. It is possible for p0 to be used + // as scratch since it hasn't been removed from the available regsets. + // If it is used as scratch, then we'll have: + // + // 1. def v0 (fixed: p23), use v1 (reg) + // move from stack_v1 to p0 // v1 is evicted + // move from p23 to p0 // v1 is overwritten by v0 + // move from p0 to stack_v0 + // 2. use v1 (fixed: p0) // v0 is used instead of v1 + // + // To avoid this scenario, the register is removed from the available set. + self.available_pregs[op.pos()].remove(preg); + match (op.pos(), op.kind()) { + (OperandPos::Late, OperandKind::Use) => { + self.available_pregs[OperandPos::Early].remove(preg) + } + (OperandPos::Early, OperandKind::Def) => { + self.available_pregs[OperandPos::Late].remove(preg); + } + _ => () + }; + } + } for (_, op) in operands.fixed() { let OperandConstraint::FixedReg(preg) = op.constraint() else { unreachable!(); From 5c7c307dfbd6faa85254ddfecad1488881f1e6a9 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Fri, 6 Sep 2024 13:13:58 +0100 Subject: [PATCH 87/95] fixed issue with scratch register handling in reload_at_begin --- src/fastalloc/mod.rs | 89 +++++++++++++++++++++++++++++++++----------- 1 file changed, 67 insertions(+), 22 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index b3f5ada4..ee7ced95 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -350,16 +350,29 @@ impl<'a, F: Function> Env<'a, F> { } fn alloc_scratch_reg(&mut self, inst: Inst, class: RegClass) -> Result<(), RegAllocError> { - let reg = self.get_scratch_reg(inst, class)?; + use OperandPos::{Late, Early}; + let reg = self.get_scratch_reg( + inst, + class, + self.available_pregs[Late] & self.available_pregs[Early] + )?; self.edits.scratch_regs[class] = Some(reg); self.available_pregs[OperandPos::Early].remove(reg); self.available_pregs[OperandPos::Late].remove(reg); Ok(()) } - fn get_scratch_reg(&mut self, inst: Inst, class: RegClass) -> Result { - let mut avail_regs = self.available_pregs[OperandPos::Early]; - avail_regs.intersect_from(self.available_pregs[OperandPos::Late]); + fn get_scratch_reg_for_reload(&mut self, inst: Inst, class: RegClass, avail_regs: PRegSet) -> Result { + let Some(preg) = self.lrus[class].last(avail_regs) else { + return Err(RegAllocError::TooManyLiveRegs); + }; + if self.vreg_in_preg[preg.index()] != VReg::invalid() { + self.evict_vreg_in_preg_before_inst(inst, preg); + } + Ok(preg) + } + + fn get_scratch_reg(&mut self, inst: Inst, class: RegClass, avail_regs: PRegSet) -> Result { let Some(preg) = self.lrus[class].last(avail_regs) else { return Err(RegAllocError::TooManyLiveRegs); }; @@ -460,6 +473,26 @@ impl<'a, F: Function> Env<'a, F> { } } + fn evict_vreg_in_preg_before_inst(&mut self, inst: Inst, preg: PReg) { + trace!("Removing the vreg in preg {} for eviction", preg); + let evicted_vreg = self.vreg_in_preg[preg.index()]; + trace!("The removed vreg: {}", evicted_vreg); + debug_assert_ne!(evicted_vreg, VReg::invalid()); + if self.vreg_spillslots[evicted_vreg.vreg()].is_invalid() { + self.vreg_spillslots[evicted_vreg.vreg()] = self.stack.allocstack(evicted_vreg.class()); + } + let slot = self.vreg_spillslots[evicted_vreg.vreg()]; + self.vreg_allocs[evicted_vreg.vreg()] = Allocation::stack(slot); + trace!("Move reason: eviction"); + self.edits.add_move( + inst, + self.vreg_allocs[evicted_vreg.vreg()], + Allocation::reg(preg), + evicted_vreg.class(), + InstPosition::Before, + ); + } + fn evict_vreg_in_preg(&mut self, inst: Inst, preg: PReg) { trace!("Removing the vreg in preg {} for eviction", preg); let evicted_vreg = self.vreg_in_preg[preg.index()]; @@ -833,6 +866,7 @@ impl<'a, F: Function> Env<'a, F> { fn alloc_inst(&mut self, block: Block, inst: Inst) -> Result<(), RegAllocError> { trace!("Allocating instruction {:?}", inst); + self.reset_available_pregs_and_scratch_regs(); let operands = Operands::new(self.func.inst_operands(inst)); let clobbers = self.func.inst_clobbers(inst); @@ -1040,7 +1074,8 @@ impl<'a, F: Function> Env<'a, F> { "Available pregs: {}", self.available_pregs[OperandPos::Early] ); - let mut available_regs_for_scratch = self.available_pregs[OperandPos::Early]; + self.reset_available_pregs_and_scratch_regs(); + let avail_regs_for_scratch = self.available_pregs[OperandPos::Early]; let first_inst = self.func.block_insns(block).first(); // We need to check for the registers that are still live. // These registers are either livein or block params @@ -1066,9 +1101,7 @@ impl<'a, F: Function> Env<'a, F> { // And `vreg_allocs[i]` of a virtual register i is none for // dead vregs. self.freealloc(vreg); - if let Some(preg) = prev_alloc.as_reg() { - available_regs_for_scratch.remove(preg); - } else if slot == prev_alloc { + if slot == prev_alloc { // No need to do any movements if the spillslot is where the vreg is expected to be. trace!( "No need to reload {} because it's already in its expected allocation", @@ -1081,7 +1114,7 @@ impl<'a, F: Function> Env<'a, F> { vreg ); if self.is_stack(prev_alloc) && self.edits.scratch_regs[vreg.class()].is_none() { - let reg = self.get_scratch_reg(first_inst, vreg.class())?; + let reg = self.get_scratch_reg_for_reload(first_inst, vreg.class(), avail_regs_for_scratch)?; self.edits.scratch_regs[vreg.class()] = Some(reg); } self.edits.add_move( @@ -1111,7 +1144,6 @@ impl<'a, F: Function> Env<'a, F> { trace!("{} was in {}. Removing it", preg, vreg); // Nothing is in that preg anymore. self.vreg_in_preg[preg.index()] = VReg::invalid(); - available_regs_for_scratch.remove(preg); } if slot == prev_alloc { // No need to do any movements if the spillslot is where the vreg is expected to be. @@ -1126,15 +1158,33 @@ impl<'a, F: Function> Env<'a, F> { vreg ); if self.is_stack(prev_alloc) && self.edits.scratch_regs[vreg.class()].is_none() { - let mut avail_regs = self.available_pregs[OperandPos::Early]; - avail_regs.intersect_from(self.available_pregs[OperandPos::Late]); - let reg = self.lrus[vreg.class()] - .last(avail_regs) - .ok_or(RegAllocError::TooManyLiveRegs)?; - self.edits.scratch_regs[vreg.class()] = Some(reg); + let Some(preg) = self.lrus[vreg.class()].last(avail_regs_for_scratch) else { + return Err(RegAllocError::TooManyLiveRegs); + }; + if self.vreg_in_preg[preg.index()] != VReg::invalid() { + // Had to put `evict_reg_in_preg_before_inst` here because of borrow checker rules. + trace!("Removing the vreg in preg {} for eviction", preg); + let evicted_vreg = self.vreg_in_preg[preg.index()]; + trace!("The removed vreg: {}", evicted_vreg); + debug_assert_ne!(evicted_vreg, VReg::invalid()); + if self.vreg_spillslots[evicted_vreg.vreg()].is_invalid() { + self.vreg_spillslots[evicted_vreg.vreg()] = self.stack.allocstack(evicted_vreg.class()); + } + let slot = self.vreg_spillslots[evicted_vreg.vreg()]; + self.vreg_allocs[evicted_vreg.vreg()] = Allocation::stack(slot); + trace!("Move reason: eviction"); + self.edits.add_move( + first_inst, + self.vreg_allocs[evicted_vreg.vreg()], + Allocation::reg(preg), + evicted_vreg.class(), + InstPosition::Before, + ); + } + self.edits.scratch_regs[vreg.class()] = Some(preg); } self.edits.add_move( - self.func.block_insns(block).first(), + first_inst, slot, prev_alloc, vreg.class(), @@ -1203,10 +1253,6 @@ impl<'a, F: Function> Env<'a, F> { fn alloc_block(&mut self, block: Block) -> Result<(), RegAllocError> { trace!("{:?} start", block); for inst in self.func.block_insns(block).iter().rev() { - // Reset has to be before `alloc_inst` not after because - // available pregs is needed after processing the first - // instruction in the block during `reload_at_begin`. - self.reset_available_pregs_and_scratch_regs(); self.alloc_inst(block, inst)?; } self.reload_at_begin(block)?; @@ -1229,7 +1275,6 @@ impl<'a, F: Function> Env<'a, F> { fn run(&mut self) -> Result<(), RegAllocError> { debug_assert_eq!(self.func.entry_block().index(), 0); for block in (0..self.func.num_blocks()).rev() { - self.reset_available_pregs_and_scratch_regs(); self.alloc_block(Block::new(block))?; } self.edits.edits.reverse(); From b26d92819a015e7397009492d1a029ddce6c6dfb Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Fri, 6 Sep 2024 13:18:46 +0100 Subject: [PATCH 88/95] formatting --- src/fastalloc/mod.rs | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index ee7ced95..6e45d88d 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -350,11 +350,11 @@ impl<'a, F: Function> Env<'a, F> { } fn alloc_scratch_reg(&mut self, inst: Inst, class: RegClass) -> Result<(), RegAllocError> { - use OperandPos::{Late, Early}; + use OperandPos::{Early, Late}; let reg = self.get_scratch_reg( inst, class, - self.available_pregs[Late] & self.available_pregs[Early] + self.available_pregs[Late] & self.available_pregs[Early], )?; self.edits.scratch_regs[class] = Some(reg); self.available_pregs[OperandPos::Early].remove(reg); @@ -362,7 +362,12 @@ impl<'a, F: Function> Env<'a, F> { Ok(()) } - fn get_scratch_reg_for_reload(&mut self, inst: Inst, class: RegClass, avail_regs: PRegSet) -> Result { + fn get_scratch_reg_for_reload( + &mut self, + inst: Inst, + class: RegClass, + avail_regs: PRegSet, + ) -> Result { let Some(preg) = self.lrus[class].last(avail_regs) else { return Err(RegAllocError::TooManyLiveRegs); }; @@ -372,7 +377,12 @@ impl<'a, F: Function> Env<'a, F> { Ok(preg) } - fn get_scratch_reg(&mut self, inst: Inst, class: RegClass, avail_regs: PRegSet) -> Result { + fn get_scratch_reg( + &mut self, + inst: Inst, + class: RegClass, + avail_regs: PRegSet, + ) -> Result { let Some(preg) = self.lrus[class].last(avail_regs) else { return Err(RegAllocError::TooManyLiveRegs); }; @@ -923,7 +933,7 @@ impl<'a, F: Function> Env<'a, F> { (OperandPos::Early, OperandKind::Def) => { self.available_pregs[OperandPos::Late].remove(preg); } - _ => () + _ => (), }; } } @@ -1114,7 +1124,11 @@ impl<'a, F: Function> Env<'a, F> { vreg ); if self.is_stack(prev_alloc) && self.edits.scratch_regs[vreg.class()].is_none() { - let reg = self.get_scratch_reg_for_reload(first_inst, vreg.class(), avail_regs_for_scratch)?; + let reg = self.get_scratch_reg_for_reload( + first_inst, + vreg.class(), + avail_regs_for_scratch, + )?; self.edits.scratch_regs[vreg.class()] = Some(reg); } self.edits.add_move( @@ -1168,7 +1182,8 @@ impl<'a, F: Function> Env<'a, F> { trace!("The removed vreg: {}", evicted_vreg); debug_assert_ne!(evicted_vreg, VReg::invalid()); if self.vreg_spillslots[evicted_vreg.vreg()].is_invalid() { - self.vreg_spillslots[evicted_vreg.vreg()] = self.stack.allocstack(evicted_vreg.class()); + self.vreg_spillslots[evicted_vreg.vreg()] = + self.stack.allocstack(evicted_vreg.class()); } let slot = self.vreg_spillslots[evicted_vreg.vreg()]; self.vreg_allocs[evicted_vreg.vreg()] = Allocation::stack(slot); From e365469d15d8978e46fc3ee86f011bf8c18bd927 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Fri, 6 Sep 2024 20:43:30 +0100 Subject: [PATCH 89/95] fixed issue with scratch reg function for parallel moves in process_branch --- src/fastalloc/mod.rs | 58 ++++++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 6e45d88d..c10ad0fd 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -82,6 +82,7 @@ impl<'a, F: Function> Stack<'a, F> { /// Allocates a spill slot on the stack for `vreg` fn allocstack(&mut self, class: RegClass) -> SpillSlot { + trace!("Allocating a spillslot for class {class:?}"); let size: u32 = self.func.spillslot_size(class).try_into().unwrap(); // Rest of this function was copied verbatim // from `Env::allocate_spillslot` in src/ion/spill.rs. @@ -96,6 +97,7 @@ impl<'a, F: Function> Stack<'a, F> { }; offset += size; self.num_spillslots = offset; + trace!("Allocated slot: {slot}"); SpillSlot::new(slot as usize) } } @@ -763,6 +765,7 @@ impl<'a, F: Function> Env<'a, F> { /// this function places branch arguments in the spillslots /// expected by the destination blocks. fn process_branch(&mut self, block: Block, inst: Inst) -> Result<(), RegAllocError> { + use OperandPos::*; trace!("Processing branch instruction {inst:?} in block {block:?}"); let mut int_parallel_moves = ParallelMoves::new(); @@ -781,50 +784,56 @@ impl<'a, F: Function> Env<'a, F> { if self.vreg_spillslots[succ_param_vreg.vreg()].is_invalid() { self.vreg_spillslots[succ_param_vreg.vreg()] = self.stack.allocstack(succ_param_vreg.class()); - trace!( - "Block param {} is in {}", - vreg, - Allocation::stack(self.vreg_spillslots[vreg.vreg()]) - ); } if self.vreg_spillslots[vreg.vreg()].is_invalid() { self.vreg_spillslots[vreg.vreg()] = self.stack.allocstack(vreg.class()); } let vreg_spill = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); - if self.vreg_allocs[vreg.vreg()].is_none() { + let curr_alloc = self.vreg_allocs[vreg.vreg()]; + if curr_alloc.is_none() { self.live_vregs.insert(*vreg); - let slot = self.vreg_spillslots[vreg.vreg()]; - self.vreg_allocs[vreg.vreg()] = Allocation::stack(slot); self.vreg_to_live_inst_range[vreg.vreg()].1 = ProgPoint::before(inst); - } else if self.vreg_allocs[vreg.vreg()] != vreg_spill { + } else if curr_alloc != vreg_spill { + if self.is_stack(curr_alloc) && self.edits.scratch_regs[vreg.class()].is_none() + { + let reg = self.get_scratch_reg_for_reload( + inst, + vreg.class(), + self.available_pregs[Early] & self.available_pregs[Late], + )?; + self.edits.scratch_regs[vreg.class()] = Some(reg); + self.available_pregs[OperandPos::Early].remove(reg); + self.available_pregs[OperandPos::Late].remove(reg); + } self.edits.add_move( inst, vreg_spill, - self.vreg_allocs[vreg.vreg()], + curr_alloc, vreg.class(), InstPosition::Before, ); - self.vreg_allocs[vreg.vreg()] = vreg_spill; } + self.vreg_allocs[vreg.vreg()] = vreg_spill; let parallel_moves = match vreg.class() { RegClass::Int => &mut int_parallel_moves, RegClass::Float => &mut float_parallel_moves, RegClass::Vector => &mut vec_parallel_moves, }; - parallel_moves.add( - Allocation::stack(self.vreg_spillslots[vreg.vreg()]), - Allocation::stack(self.vreg_spillslots[succ_param_vreg.vreg()]), - Some(*vreg), - ); + let from = Allocation::stack(self.vreg_spillslots[vreg.vreg()]); + let to = Allocation::stack(self.vreg_spillslots[succ_param_vreg.vreg()]); + trace!("Recording parallel move from {from} to {to}"); + parallel_moves.add(from, to, Some(*vreg)); } } let resolved_int = int_parallel_moves.resolve(); let resolved_float = float_parallel_moves.resolve(); let resolved_vec = vec_parallel_moves.resolve(); - let mut new_scratch_reg = PartedByRegClass { items: [None; 3] }; + let mut scratch_regs = self.edits.scratch_regs.clone(); let mut num_spillslots = self.stack.num_spillslots; + let mut avail_regs = self.available_pregs[Early] & self.available_pregs[Late]; + trace!("Resolving parallel moves"); for (resolved, class) in [ (resolved_int, RegClass::Int), (resolved_float, RegClass::Float), @@ -832,17 +841,17 @@ impl<'a, F: Function> Env<'a, F> { ] { let scratch_resolver = MoveAndScratchResolver { find_free_reg: || { - if let Some(reg) = self.edits.scratch_regs[class] { - Some(Allocation::reg(reg)) - } else if let Some(reg) = new_scratch_reg[class] { + if let Some(reg) = scratch_regs[class] { + trace!("Retrieved reg {reg} for scratch resolver"); + scratch_regs[class] = None; Some(Allocation::reg(reg)) } else { - use OperandPos::*; - let avail_regs = self.available_pregs[Early] & self.available_pregs[Late]; let Some(preg) = self.lrus[class].last(avail_regs) else { + trace!("Couldn't find any reg for scratch resolver"); return None; }; - new_scratch_reg[RegClass::Int] = Some(preg); + avail_regs.remove(preg); + trace!("Retrieved reg {preg} for scratch resolver"); Some(Allocation::reg(preg)) } }, @@ -858,12 +867,14 @@ impl<'a, F: Function> Env<'a, F> { }; offset += size; num_spillslots = offset; + trace!("Retrieved slot {slot} for scratch resolver"); Allocation::stack(SpillSlot::new(slot as usize)) }, is_stack_alloc: |alloc| self.is_stack(alloc), borrowed_scratch_reg: self.preferred_victim[class], }; let moves = scratch_resolver.compute(resolved); + trace!("Resolved {class:?} parallel moves"); for (from, to, _) in moves.into_iter().rev() { self.edits .edits @@ -871,6 +882,7 @@ impl<'a, F: Function> Env<'a, F> { } self.stack.num_spillslots = num_spillslots; } + trace!("Completed processing branch"); Ok(()) } From 4d6be1452818607aa3d3bb0fa39dbde4d4aa62da Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Sat, 14 Sep 2024 10:02:48 +0100 Subject: [PATCH 90/95] removed unnecessary imports in test --- src/fastalloc/tests.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fastalloc/tests.rs b/src/fastalloc/tests.rs index d3d1d8a8..41be3771 100644 --- a/src/fastalloc/tests.rs +++ b/src/fastalloc/tests.rs @@ -1,8 +1,8 @@ use crate::OperandConstraint::{self, *}; use crate::OperandKind::{self, *}; use crate::{ - run, Algorithm, Allocation, Block, Function, Inst, InstOrEdit, InstRange, MachineEnv, Operand, - OperandPos, Output, PReg, PRegSet, ProgPoint, RegClass, RegallocOptions, SpillSlot, VReg, + run, Algorithm, Allocation, Block, Function, Inst, InstRange, MachineEnv, Operand, OperandPos, + PReg, PRegSet, ProgPoint, RegClass, RegallocOptions, VReg, }; use alloc::vec; use alloc::vec::Vec; From 784fc66c98b1e4240c03d81be4bbbb4ce9ee4b73 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Sat, 14 Sep 2024 19:29:42 +0100 Subject: [PATCH 91/95] vregset insert now asserts vreg absence --- src/fastalloc/vregset.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fastalloc/vregset.rs b/src/fastalloc/vregset.rs index af6474cd..77287dcc 100644 --- a/src/fastalloc/vregset.rs +++ b/src/fastalloc/vregset.rs @@ -35,8 +35,7 @@ impl VRegSet { } pub fn insert(&mut self, vreg: VReg) { - // Intentionally assuming that the set doesn't already - // contain `vreg`. + debug_assert_eq!(self.items[vreg.vreg()].vreg, VReg::invalid()); let old_head_next = self.items[self.head.index()].next; self.items[vreg.vreg()] = VRegNode { next: old_head_next, @@ -52,6 +51,7 @@ impl VRegSet { let next = self.items[vreg_num].next; self.items[prev.index()].next = next; self.items[next.index()].prev = prev; + self.items[vreg_num].vreg = VReg::invalid(); } pub fn is_empty(&self) -> bool { From e4b9fc0c10eff64ac34a79f3d5e3d067f06fa0c4 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Sat, 14 Sep 2024 19:49:14 +0100 Subject: [PATCH 92/95] removed duplicated functions --- src/fastalloc/mod.rs | 62 +++++++++++++------------------------------- 1 file changed, 18 insertions(+), 44 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index c10ad0fd..87484949 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -137,13 +137,8 @@ impl Edits { impl Edits { fn is_stack(&self, alloc: Allocation) -> bool { - if alloc.is_stack() { - return true; - } - if alloc.is_reg() { - return self.fixed_stack_slots.contains(alloc.as_reg().unwrap()); - } - false + alloc.is_stack() + || (alloc.is_reg() && self.fixed_stack_slots.contains(alloc.as_reg().unwrap())) } fn add_move( @@ -333,16 +328,6 @@ impl<'a, F: Function> Env<'a, F> { } } - fn is_stack(&self, alloc: Allocation) -> bool { - if alloc.is_stack() { - return true; - } - if alloc.is_reg() { - return self.fixed_stack_slots.contains(alloc.as_reg().unwrap()); - } - false - } - fn reset_available_pregs_and_scratch_regs(&mut self) { trace!("Resetting the available pregs"); self.available_pregs = PartedByOperandPos { @@ -462,7 +447,7 @@ impl<'a, F: Function> Env<'a, F> { } } OperandConstraint::Reg => { - if self.is_stack(alloc) { + if self.edits.is_stack(alloc) { return false; } if let Some(preg) = alloc.as_reg() { @@ -485,7 +470,7 @@ impl<'a, F: Function> Env<'a, F> { } } - fn evict_vreg_in_preg_before_inst(&mut self, inst: Inst, preg: PReg) { + fn base_evict_vreg_in_preg(&mut self, inst: Inst, preg: PReg, pos: InstPosition) { trace!("Removing the vreg in preg {} for eviction", preg); let evicted_vreg = self.vreg_in_preg[preg.index()]; trace!("The removed vreg: {}", evicted_vreg); @@ -501,28 +486,16 @@ impl<'a, F: Function> Env<'a, F> { self.vreg_allocs[evicted_vreg.vreg()], Allocation::reg(preg), evicted_vreg.class(), - InstPosition::Before, + pos, ); } + fn evict_vreg_in_preg_before_inst(&mut self, inst: Inst, preg: PReg) { + self.base_evict_vreg_in_preg(inst, preg, InstPosition::Before) + } + fn evict_vreg_in_preg(&mut self, inst: Inst, preg: PReg) { - trace!("Removing the vreg in preg {} for eviction", preg); - let evicted_vreg = self.vreg_in_preg[preg.index()]; - trace!("The removed vreg: {}", evicted_vreg); - debug_assert_ne!(evicted_vreg, VReg::invalid()); - if self.vreg_spillslots[evicted_vreg.vreg()].is_invalid() { - self.vreg_spillslots[evicted_vreg.vreg()] = self.stack.allocstack(evicted_vreg.class()); - } - let slot = self.vreg_spillslots[evicted_vreg.vreg()]; - self.vreg_allocs[evicted_vreg.vreg()] = Allocation::stack(slot); - trace!("Move reason: eviction"); - self.edits.add_move( - inst, - self.vreg_allocs[evicted_vreg.vreg()], - Allocation::reg(preg), - evicted_vreg.class(), - InstPosition::After, - ); + self.base_evict_vreg_in_preg(inst, preg, InstPosition::After) } fn freealloc(&mut self, vreg: VReg) { @@ -675,8 +648,8 @@ impl<'a, F: Function> Env<'a, F> { // used (in `prev_alloc`, that is). else { trace!("Move reason: Prev allocation doesn't meet constraints"); - if self.is_stack(new_alloc) - && self.is_stack(curr_alloc) + if self.edits.is_stack(new_alloc) + && self.edits.is_stack(curr_alloc) && self.edits.scratch_regs[op.class()].is_none() { self.alloc_scratch_reg(inst, op.class())?; @@ -794,7 +767,8 @@ impl<'a, F: Function> Env<'a, F> { self.live_vregs.insert(*vreg); self.vreg_to_live_inst_range[vreg.vreg()].1 = ProgPoint::before(inst); } else if curr_alloc != vreg_spill { - if self.is_stack(curr_alloc) && self.edits.scratch_regs[vreg.class()].is_none() + if self.edits.is_stack(curr_alloc) + && self.edits.scratch_regs[vreg.class()].is_none() { let reg = self.get_scratch_reg_for_reload( inst, @@ -870,7 +844,7 @@ impl<'a, F: Function> Env<'a, F> { trace!("Retrieved slot {slot} for scratch resolver"); Allocation::stack(SpillSlot::new(slot as usize)) }, - is_stack_alloc: |alloc| self.is_stack(alloc), + is_stack_alloc: |alloc| self.edits.is_stack(alloc), borrowed_scratch_reg: self.preferred_victim[class], }; let moves = scratch_resolver.compute(resolved); @@ -1007,7 +981,7 @@ impl<'a, F: Function> Env<'a, F> { if let Some(curr_alloc) = curr_alloc.as_stack() { (true, curr_alloc == vreg_slot) } else { - (self.is_stack(curr_alloc), false) + (self.edits.is_stack(curr_alloc), false) }; if !src_and_dest_are_same { if is_stack_to_stack && self.edits.scratch_regs[op.class()].is_none() { @@ -1135,7 +1109,7 @@ impl<'a, F: Function> Env<'a, F> { "Move reason: reload {} at begin - move from its spillslot", vreg ); - if self.is_stack(prev_alloc) && self.edits.scratch_regs[vreg.class()].is_none() { + if self.edits.is_stack(prev_alloc) && self.edits.scratch_regs[vreg.class()].is_none() { let reg = self.get_scratch_reg_for_reload( first_inst, vreg.class(), @@ -1183,7 +1157,7 @@ impl<'a, F: Function> Env<'a, F> { "Move reason: reload {} at begin - move from its spillslot", vreg ); - if self.is_stack(prev_alloc) && self.edits.scratch_regs[vreg.class()].is_none() { + if self.edits.is_stack(prev_alloc) && self.edits.scratch_regs[vreg.class()].is_none() { let Some(preg) = self.lrus[vreg.class()].last(avail_regs_for_scratch) else { return Err(RegAllocError::TooManyLiveRegs); }; From da22294be19772825dfe9e4b16247e32adff5848 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Sat, 14 Sep 2024 22:39:08 +0100 Subject: [PATCH 93/95] changed the edits length guess for initializing the edits vector --- src/fastalloc/mod.rs | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 87484949..0063c601 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -116,18 +116,14 @@ struct Edits { impl Edits { fn new( fixed_stack_slots: PRegSet, - max_operand_len: u32, num_insts: usize, dedicated_scratch_regs: PartedByRegClass>, ) -> Self { - // Some operands generate edits and some don't. - // The operands that generate edits add no more than two. - // Some edits are added due to clobbers, not operands. - // Anyways, I think this may be a reasonable guess. - let inst_edits_len_guess = max_operand_len as usize * 2; - let total_edits_len_guess = inst_edits_len_guess * num_insts; + // This guess is based on the sightglass benchmarks: + // The average number of edits per instruction is 1. + let edits_len_guess = num_insts; Self { - edits: Vec::with_capacity(total_edits_len_guess), + edits: Vec::with_capacity(edits_len_guess), fixed_stack_slots, scratch_regs: dedicated_scratch_regs.clone(), dedicated_scratch_regs, @@ -317,12 +313,7 @@ impl<'a, F: Function> Env<'a, F> { items: [init_available_pregs, init_available_pregs], }, allocs, - edits: Edits::new( - fixed_stack_slots, - max_operand_len, - func.num_insts(), - dedicated_scratch_regs, - ), + edits: Edits::new(fixed_stack_slots, func.num_insts(), dedicated_scratch_regs), stats: Stats::default(), debug_locations: Vec::with_capacity(func.debug_value_labels().len()), } From cd5a7c23e225b5a899455e882ac52d3aaad90bec Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Sat, 21 Sep 2024 16:35:04 +0100 Subject: [PATCH 94/95] removed unnecessary loop in alloc_inst --- src/fastalloc/mod.rs | 41 ----------------------------------------- 1 file changed, 41 deletions(-) diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 0063c601..3c71f54b 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -873,47 +873,6 @@ impl<'a, F: Function> Env<'a, F> { self.lrus[preg.class()].poke(preg); } } - for (_, op) in operands.non_fixed() { - if op.as_fixed_nonallocatable().is_some() { - continue; - } - if let Some(preg) = self.vreg_allocs[op.vreg().vreg()].as_reg() { - trace!("Removing {op}'s current reg allocation {preg} from reg sets"); - // The current allocation, vreg_allocs[op.vreg], doesn't change, - // so it should be removed from the available reg sets to avoid - // allocating it as scratch. - // - // For example: - // - // 1. def v0 (fixed: p23), use v1 (reg) - // 2. use v1 (fixed: p0) - // v0 is in stack_v0 and v1 is in stack_v1 - // - // Suppose p23 is a fixed stack slot. Then an edit will need to - // be inserted after inst 1 to move from stack_v0 to p23 and - // a scratch register is needed. It is possible for p0 to be used - // as scratch since it hasn't been removed from the available regsets. - // If it is used as scratch, then we'll have: - // - // 1. def v0 (fixed: p23), use v1 (reg) - // move from stack_v1 to p0 // v1 is evicted - // move from p23 to p0 // v1 is overwritten by v0 - // move from p0 to stack_v0 - // 2. use v1 (fixed: p0) // v0 is used instead of v1 - // - // To avoid this scenario, the register is removed from the available set. - self.available_pregs[op.pos()].remove(preg); - match (op.pos(), op.kind()) { - (OperandPos::Late, OperandKind::Use) => { - self.available_pregs[OperandPos::Early].remove(preg) - } - (OperandPos::Early, OperandKind::Def) => { - self.available_pregs[OperandPos::Late].remove(preg); - } - _ => (), - }; - } - } for (_, op) in operands.fixed() { let OperandConstraint::FixedReg(preg) = op.constraint() else { unreachable!(); From afb1802646cbdf08e028176fedcc709bac7bd468 Mon Sep 17 00:00:00 2001 From: d-sonuga Date: Sat, 21 Sep 2024 17:02:54 +0100 Subject: [PATCH 95/95] removed unnecessary iter function and lru constant --- src/fastalloc/iter.rs | 4 ---- src/fastalloc/lru.rs | 2 -- 2 files changed, 6 deletions(-) diff --git a/src/fastalloc/iter.rs b/src/fastalloc/iter.rs index b4cedf04..d1437559 100644 --- a/src/fastalloc/iter.rs +++ b/src/fastalloc/iter.rs @@ -33,10 +33,6 @@ impl<'a> Operands<'a> { pub fn fixed(&self) -> impl Iterator + 'a { self.matches(|op| matches!(op.constraint(), OperandConstraint::FixedReg(_))) } - - pub fn non_fixed(&self) -> impl Iterator + 'a { - self.matches(|op| !matches!(op.constraint(), OperandConstraint::FixedReg(_))) - } } impl<'a> core::ops::Index for Operands<'a> { diff --git a/src/fastalloc/lru.rs b/src/fastalloc/lru.rs index a645d9b1..9f0679ea 100644 --- a/src/fastalloc/lru.rs +++ b/src/fastalloc/lru.rs @@ -7,8 +7,6 @@ use core::{ }; use hashbrown::HashSet; -const DUMMY_NODE_INDEX: usize = PReg::MAX + 1; - /// A least-recently-used cache organized as a linked list based on a vector. pub struct Lru { /// The list of node information.