Skip to content

Commit

Permalink
Squash guest reg loads and stores into multiple transfers
Browse files Browse the repository at this point in the history
  • Loading branch information
Grarak committed Nov 9, 2024
1 parent d60f739 commit 9d36644
Show file tree
Hide file tree
Showing 6 changed files with 166 additions and 62 deletions.
119 changes: 113 additions & 6 deletions src/jit/assembler/basic_block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,11 +242,8 @@ impl BasicBlock {
}

pub fn remove_dead_code(&mut self, asm: &mut BlockAsm) {
let mut current_node = self.insts_link.root;
let mut i = 0;
while !current_node.is_null() {
let inst_i = BlockInstList::deref(current_node).value;
let inst = &mut asm.buf.insts[inst_i];
for (i, entry) in self.insts_link.iter().enumerate() {
let inst = &mut asm.buf.insts[entry.value];
if let BlockInstKind::RestoreReg { guest_reg, .. } = &inst.kind {
if *guest_reg != Reg::CPSR {
let (_, outputs) = inst.get_io();
Expand All @@ -255,9 +252,119 @@ impl BasicBlock {
}
}
}
}
}

fn flush_reg_io_consolidation(&mut self, asm: &mut BlockAsm, from_reg: Reg, to_reg: Reg, save: bool, start_i: usize, end_i: usize, end_node: *mut BlockInstListEntry) {
let mut current_node = end_node;
let mut live_range_accumulated = BlockRegSet::new();
for i in start_i..=end_i {
let entry = BlockInstList::deref(current_node);
let inst = &mut asm.buf.insts[entry.value];
inst.skip = true;
current_node = entry.previous;
live_range_accumulated += self.regs_live_ranges[i];
}

for i in start_i..=end_i {
self.regs_live_ranges[i] = live_range_accumulated;
}

let end_entry = BlockInstList::deref(end_node);

let mut thread_regs_addr_reg = asm.thread_regs_addr_reg;
if from_reg as u8 > 0 {
thread_regs_addr_reg = asm.tmp_operand_imm_reg;
let previous_entry = BlockInstList::deref(end_entry.previous);
let previous_inst = &mut asm.buf.insts[previous_entry.value];
*previous_inst = BlockInstKind::Alu3 {
op: BlockAluOp::Add,
operands: [thread_regs_addr_reg.into(), asm.thread_regs_addr_reg.into(), (from_reg as u32 * 4).into()],
set_cond: BlockAluSetCond::None,
thumb_pc_aligned: false,
}
.into();
self.used_regs[end_i - 1] += thread_regs_addr_reg;
self.used_regs[end_i] += thread_regs_addr_reg;
self.regs_live_ranges[end_i] += thread_regs_addr_reg;
}

let end_inst = &mut asm.buf.insts[end_entry.value];
let op = if save { BlockTransferOp::Write } else { BlockTransferOp::Read };

let mut guest_regs = RegReserve::new();
for reg in from_reg as u8..=to_reg as u8 {
guest_regs += Reg::from(reg);
}
*end_inst = BlockInstKind::GuestTransferMultiple {
op,
addr_reg: thread_regs_addr_reg,
addr_out_reg: thread_regs_addr_reg,
gp_regs: guest_regs,
fixed_regs: RegReserve::new(),
write_back: false,
pre: false,
add_to_base: true,
}
.into();
self.used_regs[end_i].add_guests(guest_regs);
}

pub fn consolidate_reg_io(&mut self, asm: &mut BlockAsm) {
let mut count = 0;
let mut target_reg = Reg::None;
let mut target_save = false;
let mut last_reg = Reg::None;
let mut was_save = None;
let mut start_i = 0;
let mut i = 0;

let mut current_node = self.insts_link.root;
while !current_node.is_null() {
let entry = BlockInstList::deref(current_node);
let inst = &asm.buf.insts[entry.value];
if !inst.skip {
let mut flush = true;

match &inst.kind {
BlockInstKind::SaveReg { guest_reg, .. } => {
if was_save == Some(true) && *guest_reg <= Reg::R12 && last_reg as u8 + 1 == *guest_reg as u8 {
count += 1;
flush = false;
target_reg = *guest_reg;
target_save = true;
}
last_reg = *guest_reg;
was_save = Some(true);
}
BlockInstKind::RestoreReg { guest_reg, .. } => {
if was_save == Some(false) && *guest_reg <= Reg::R12 && last_reg as u8 + 1 == *guest_reg as u8 {
count += 1;
flush = false;
target_reg = *guest_reg;
target_save = false;
}
last_reg = *guest_reg;
was_save = Some(false);
}
_ => {
last_reg = Reg::None;
was_save = None;
}
}

if flush && count > 0 {
self.flush_reg_io_consolidation(asm, Reg::from(target_reg as u8 - count), target_reg, target_save, start_i, i - 1, entry.previous);
count = 0;
}

if count == 0 {
start_i = i;
}
}

current_node = entry.next;
i += 1;
current_node = BlockInstList::deref(current_node).next;
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/jit/assembler/block_asm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ pub struct BlockAsm<'a> {

pub thread_regs_addr_reg: BlockReg,
pub tmp_guest_cpsr_reg: BlockReg,
tmp_operand_imm_reg: BlockReg,
pub tmp_operand_imm_reg: BlockReg,
tmp_shift_imm_reg: BlockReg,
tmp_func_call_reg: BlockReg,

Expand Down Expand Up @@ -919,6 +919,7 @@ impl<'a> BlockAsm<'a> {
}

basic_block.remove_dead_code(self);
basic_block.consolidate_reg_io(self);
}

(basic_blocks, reachable_blocks)
Expand Down
2 changes: 1 addition & 1 deletion src/jit/assembler/block_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -952,7 +952,7 @@ impl Debug for BlockInstKind {
let add_to_base = if *add_to_base { "+" } else { "-" };
write!(
f,
"{op:?}M {addr_reg:?} -> {addr_out_reg:?} gp regs: {gp_regs:?}, fixed regs: {fixed_regs:?}, write back: {write_back}, pre {pre}, {add_to_base}base"
"Guest{op:?}M {addr_reg:?} -> {addr_out_reg:?} gp regs: {gp_regs:?}, fixed regs: {fixed_regs:?}, write back: {write_back}, pre {pre}, {add_to_base}base"
)
}
BlockInstKind::SystemReg { op, operand } => write!(f, "{op:?} {operand:?}"),
Expand Down
94 changes: 45 additions & 49 deletions src/jit/assembler/block_reg_allocator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,50 +121,46 @@ impl BlockRegAllocator {
None
}

fn allocate_and_spill(&mut self, any_reg: u16, used_regs: &[BlockRegSet], allowed_regs: RegReserve) -> Option<Reg> {
let mut greatest_distance = 0;
let mut greatest_distance_reg = 0;
fn allocate_and_spill(&mut self, any_reg: u16, live_ranges: &[BlockRegSet], used_regs: &[BlockRegSet], allowed_regs: RegReserve) -> Option<Reg> {
for (i, mapped_reg) in self.stored_mapping_reverse.iter().enumerate() {
let reg = Reg::from(i as u8);

if mapped_reg.is_none() && allowed_regs.is_reserved(reg) && !live_ranges[1].contains(BlockReg::Fixed(reg)) && !used_regs[0].contains(BlockReg::Fixed(reg)) {
self.set_stored_mapping(any_reg, reg);
return Some(reg);
}
}

for (i, mapped_reg) in self.stored_mapping_reverse.iter().enumerate() {
let reg = Reg::from(i as u8);

if let &Some(mapped_reg) = mapped_reg {
let reg = Reg::from(i as u8);
if allowed_regs.is_reserved(reg) && !used_regs[0].contains(BlockReg::Any(mapped_reg)) {
for i in 1..used_regs.len() {
if used_regs[i].contains(BlockReg::Any(mapped_reg)) || used_regs[i].contains(BlockReg::Fixed(reg)) {
if i > greatest_distance {
greatest_distance = i;
greatest_distance_reg = mapped_reg;
}
break;
}
}
if allowed_regs.is_reserved(reg) && !used_regs[0].contains(BlockReg::Any(mapped_reg)) && !live_ranges[1].contains(BlockReg::Any(mapped_reg)) {
self.swap_stored_mapping(any_reg, mapped_reg);
return Some(reg);
}
}
}

if greatest_distance != 0 {
let reg = self.stored_mapping[greatest_distance_reg as usize];
self.spilled += BlockReg::Any(greatest_distance_reg);
self.gen_pre_handle_spilled_inst(greatest_distance_reg, reg, BlockTransferOp::Write);
self.swap_stored_mapping(any_reg, greatest_distance_reg);
return Some(reg);
for (i, mapped_reg) in self.stored_mapping_reverse.iter().enumerate() {
let reg = Reg::from(i as u8);

if let &Some(mapped_reg) = mapped_reg {
if allowed_regs.is_reserved(reg) && !used_regs[0].contains(BlockReg::Any(mapped_reg)) {
self.spilled += BlockReg::Any(mapped_reg);
self.gen_pre_handle_spilled_inst(mapped_reg, reg, BlockTransferOp::Write);
self.swap_stored_mapping(any_reg, mapped_reg);
return Some(reg);
}
}
}

None
}

fn allocate_local(&mut self, any_reg: u16, live_ranges: &[BlockRegSet], used_regs: &[BlockRegSet]) -> Reg {
let mut live_ranges_until_expiration = BlockRegSet::new();
let mut used_fixed_until_expiration = used_regs[0].get_fixed();
for i in 1..live_ranges.len() {
if !live_ranges[i].contains(BlockReg::Any(any_reg)) {
break;
}
live_ranges_until_expiration += live_ranges[i];
used_fixed_until_expiration += used_regs[i].get_fixed();
}

for reg in SCRATCH_REGS {
if !used_fixed_until_expiration.is_reserved(reg) && self.stored_mapping_reverse[reg as usize].is_none() && !live_ranges[1].contains(BlockReg::Fixed(reg)) {
if self.stored_mapping_reverse[reg as usize].is_none() && !live_ranges[1].contains(BlockReg::Fixed(reg)) {
self.set_stored_mapping(any_reg, reg);
return reg;
}
Expand All @@ -174,22 +170,7 @@ impl BlockRegAllocator {
return reg;
}

for (i, mapped_reg) in self.stored_mapping_reverse.iter().enumerate() {
if let &Some(mapped_reg) = mapped_reg {
let reg = Reg::from(i as u8);
if !used_regs[0].contains(BlockReg::Any(mapped_reg))
&& !live_ranges_until_expiration.contains(BlockReg::Any(mapped_reg))
&& !live_ranges_until_expiration.contains(BlockReg::Fixed(reg))
{
self.spilled += BlockReg::Any(mapped_reg);
self.gen_pre_handle_spilled_inst(mapped_reg, reg, BlockTransferOp::Write);
self.swap_stored_mapping(any_reg, mapped_reg);
return reg;
}
}
}

if let Some(reg) = self.allocate_and_spill(any_reg, used_regs, SCRATCH_REGS + ALLOCATION_REGS) {
if let Some(reg) = self.allocate_and_spill(any_reg, live_ranges, used_regs, SCRATCH_REGS + ALLOCATION_REGS) {
return reg;
}

Expand All @@ -201,7 +182,11 @@ impl BlockRegAllocator {
return reg;
}

if let Some(reg) = self.allocate_and_spill(any_reg, used_regs, ALLOCATION_REGS) {
if let Some(reg) = self.allocate_and_spill(any_reg, live_ranges, used_regs, ALLOCATION_REGS) {
return reg;
}

if let Some(reg) = self.allocate_and_spill(any_reg, live_ranges, used_regs, SCRATCH_REGS) {
return reg;
}

Expand Down Expand Up @@ -350,11 +335,17 @@ impl BlockRegAllocator {

if DEBUG && unsafe { BLOCK_LOG } {
println!("inputs: {inputs:?}, outputs: {outputs:?}");
println!("used regs {:?}", used_regs[0]);
}

self.relocate_guest_regs(inputs.get_guests().get_gp_regs(), live_ranges, &inputs, used_regs, true);
self.relocate_guest_regs(outputs.get_guests().get_gp_regs(), live_ranges, &inputs, used_regs, false);

if DEBUG && unsafe { BLOCK_LOG } {
println!("pre mapping {:?}", self.stored_mapping_reverse);
println!("pre spilled {:?}", self.spilled);
}

for any_input_reg in inputs.iter_any() {
let reg = self.get_input_reg(any_input_reg, live_ranges, used_regs);
inst.replace_input_regs(BlockReg::Any(any_input_reg), BlockReg::Fixed(reg));
Expand All @@ -370,6 +361,11 @@ impl BlockRegAllocator {
inst.replace_output_regs(BlockReg::Any(any_output_reg), BlockReg::Fixed(reg));
self.dirty_regs += reg;
}

if DEBUG && unsafe { BLOCK_LOG } {
println!("after mapping {:?}", self.stored_mapping_reverse);
println!("after spilled {:?}", self.spilled);
}
}

pub fn ensure_global_mappings(&mut self, output_regs: BlockRegSet) {
Expand Down Expand Up @@ -439,7 +435,7 @@ impl BlockRegAllocator {
self.spilled -= BlockReg::Any(output_reg);
self.gen_pre_handle_spilled_inst(output_reg, desired_reg_mapping, BlockTransferOp::Read);
} else {
panic!("required output reg must already have a value");
panic!("required output reg {output_reg:?} must already have a value");
}
self.set_stored_mapping(output_reg, desired_reg_mapping);
}
Expand Down
2 changes: 1 addition & 1 deletion src/jit/emitter/emit_transfer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ impl<const CPU: CpuType> JitAsm<'_, CPU> {
let fast_mem_mark_dirty_label = block_asm.new_label();
let continue_label = block_asm.new_label();

if is_valid && !inst_info.op.mem_is_write() && !inst_info.op.mem_transfer_user() && rlist.len() < RegReserve::gp().len() - 2 {
if is_valid && !inst_info.op.mem_is_write() && !inst_info.op.mem_transfer_user() && rlist.len() < (RegReserve::gp() + Reg::LR).len() - 2 {
let mut gp_regs = rlist.get_gp_regs();
let mut free_gp_regs = if gp_regs.is_empty() {
RegReserve::gp()
Expand Down
8 changes: 4 additions & 4 deletions src/jit/jit_asm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,12 +186,12 @@ fn emit_code_block_internal<const CPU: CpuType, const THUMB: bool>(asm: &mut Jit
}

let jit_entry = {
// unsafe { BLOCK_LOG = guest_pc == 0x2001b04 };
// unsafe { BLOCK_LOG = guest_pc == 0x2097454 };

let mut block_asm = asm.new_block_asm(false);

if DEBUG_LOG {
block_asm.call2(debug_enter_block::<CPU> as *const (), asm as *mut _ as u32, guest_pc | (THUMB as u32));
block_asm.call1(debug_enter_block::<CPU> as *const (), guest_pc | (THUMB as u32));
block_asm.restore_reg(Reg::CPSR);
}

Expand All @@ -200,7 +200,7 @@ fn emit_code_block_internal<const CPU: CpuType, const THUMB: bool>(asm: &mut Jit
asm.jit_buf.current_pc = guest_pc + (i << if THUMB { 1 } else { 2 }) as u32;
debug_println!("{CPU:?} emitting {:?} at pc: {:x}", asm.jit_buf.current_inst(), asm.jit_buf.current_pc);

// if asm.jit_buf.current_pc == 0x20a7f9c {
// if asm.jit_buf.current_pc == 0x20098ca {
// block_asm.bkpt(1);
// }

Expand Down Expand Up @@ -354,6 +354,6 @@ unsafe extern "C" fn debug_after_exec_op<const CPU: CpuType>(pc: u32, opcode: u3
debug_inst_info::<CPU>(get_regs!((*asm).emu, CPU), pc, &format!("\n\t{:?} {:?}", CPU, inst_info));
}

extern "C" fn debug_enter_block<const CPU: CpuType>(asm: *mut JitAsm<CPU>, pc: u32) {
extern "C" fn debug_enter_block<const CPU: CpuType>(pc: u32) {
println!("{CPU:?} execute {pc:x}");
}

0 comments on commit 9d36644

Please sign in to comment.