Skip to content

Commit

Permalink
Implement fastmem for multiple reads
Browse files Browse the repository at this point in the history
  • Loading branch information
Grarak committed Nov 3, 2024
1 parent cf08d39 commit fde8405
Show file tree
Hide file tree
Showing 11 changed files with 227 additions and 43 deletions.
5 changes: 5 additions & 0 deletions src/jit/assembler/arm/transfer_assembler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ pub struct LdmStm {
impl LdmStm {
#[inline]
pub fn generic(op0: Reg, regs: RegReserve, read: bool, write_back: bool, add_to_base: bool, pre: bool, cond: Cond) -> u32 {
debug_assert!(!write_back || !regs.is_reserved(op0));
u32::from(LdmStm::new(
regs.0 as u16,
u4::new(op0 as u8),
Expand All @@ -323,6 +324,7 @@ impl LdmStm {

#[inline]
pub fn push_post(regs: RegReserve, sp: Reg, cond: Cond) -> u32 {
debug_assert!(!regs.is_reserved(sp));
u32::from(LdmStm::new(regs.0 as u16, u4::new(sp as u8), false, true, false, false, false, u3::new(0b100), u4::new(cond as u8)))
}

Expand All @@ -333,11 +335,13 @@ impl LdmStm {

#[inline]
pub fn push_pre(regs: RegReserve, sp: Reg, cond: Cond) -> u32 {
debug_assert!(!regs.is_reserved(sp));
u32::from(LdmStm::new(regs.0 as u16, u4::new(sp as u8), false, true, false, false, true, u3::new(0b100), u4::new(cond as u8)))
}

#[inline]
pub fn pop_post(regs: RegReserve, sp: Reg, cond: Cond) -> u32 {
debug_assert!(!regs.is_reserved(sp));
u32::from(LdmStm::new(regs.0 as u16, u4::new(sp as u8), true, true, false, true, false, u3::new(0b100), u4::new(cond as u8)))
}

Expand All @@ -348,6 +352,7 @@ impl LdmStm {

#[inline]
pub fn pop_pre(regs: RegReserve, sp: Reg, cond: Cond) -> u32 {
debug_assert!(!regs.is_reserved(sp));
u32::from(LdmStm::new(regs.0 as u16, u4::new(sp as u8), true, true, false, true, true, u3::new(0b100), u4::new(cond as u8)))
}
}
Expand Down
22 changes: 22 additions & 0 deletions src/jit/assembler/block_asm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,28 @@ impl<'a> BlockAsm<'a> {
})
}

pub fn guest_transfer_read_multiple(
&mut self,
addr_reg: impl Into<BlockReg>,
addr_out_reg: impl Into<BlockReg>,
gp_regs: RegReserve,
fixed_regs: RegReserve,
write_back: bool,
pre: bool,
add_to_base: bool,
) {
self.insert_inst(BlockInstKind::GuestTransferMultiple {
op: BlockTransferOp::Read,
addr_reg: addr_reg.into(),
addr_out_reg: addr_out_reg.into(),
gp_regs,
fixed_regs,
write_back,
pre,
add_to_base,
})
}

pub fn mrs_cpsr(&mut self, operand: impl Into<BlockReg>) {
self.insert_inst(BlockInstKind::SystemReg {
op: BlockSystemRegOp::Mrs,
Expand Down
79 changes: 79 additions & 0 deletions src/jit/assembler/block_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@ impl BlockInst {
*self.io_cache.borrow_mut() = None;
self.kind.replace_output_regs(old, new);
}

pub fn needs_separated_io_regs(&self) -> bool {
matches!(self.kind, BlockInstKind::TransferMultiple { .. } | BlockInstKind::GuestTransferMultiple { .. })
}
}

impl From<BlockInstKind> for BlockInst {
Expand Down Expand Up @@ -168,6 +172,16 @@ pub enum BlockInstKind {
pre: bool,
add_to_base: bool,
},
GuestTransferMultiple {
op: BlockTransferOp,
addr_reg: BlockReg,
addr_out_reg: BlockReg,
gp_regs: RegReserve,
fixed_regs: RegReserve,
write_back: bool,
pre: bool,
add_to_base: bool,
},
SystemReg {
op: BlockSystemRegOp,
operand: BlockOperand,
Expand Down Expand Up @@ -302,6 +316,26 @@ impl BlockInstKind {
),
BlockTransferOp::Write => (BlockRegSet::new_fixed(*regs) + *operand, if *write_back { block_reg_set!(Some(*operand)) } else { block_reg_set!() }),
},
BlockInstKind::GuestTransferMultiple {
op,
addr_reg,
addr_out_reg,
gp_regs,
fixed_regs,
write_back,
..
} => match op {
BlockTransferOp::Read => {
let mut outputs = BlockRegSet::new_fixed(*fixed_regs);
outputs.add_guests(*gp_regs);
(block_reg_set!(Some(*addr_reg)), if *write_back { outputs + *addr_out_reg } else { outputs })
}
BlockTransferOp::Write => {
let mut inputs = BlockRegSet::new_fixed(*fixed_regs);
inputs.add_guests(*gp_regs);
(inputs + *addr_reg, if *write_back { block_reg_set!(Some(*addr_out_reg)) } else { block_reg_set!() })
}
},
BlockInstKind::SystemReg { op, operand } => match op {
BlockSystemRegOp::Mrs => (block_reg_set!(), block_reg_set!(Some(operand.as_reg()))),
BlockSystemRegOp::Msr => (block_reg_set!(operand.try_as_reg()), block_reg_set!()),
Expand Down Expand Up @@ -443,6 +477,7 @@ impl BlockInstKind {
operands[2].replace_regs(old, new);
}
BlockInstKind::TransferMultiple { operand, .. } => Self::replace_reg(operand, old, new),
BlockInstKind::GuestTransferMultiple { addr_reg, .. } => Self::replace_reg(addr_reg, old, new),
BlockInstKind::SystemReg { op, operand } => {
if *op == BlockSystemRegOp::Msr {
Self::replace_operand(operand, old, new);
Expand Down Expand Up @@ -501,6 +536,11 @@ impl BlockInstKind {
Self::replace_reg(operand, old, new);
}
}
BlockInstKind::GuestTransferMultiple { addr_out_reg, write_back, .. } => {
if *write_back {
Self::replace_reg(addr_out_reg, old, new);
}
}
BlockInstKind::SystemReg { op, operand } => {
if *op == BlockSystemRegOp::Mrs {
Self::replace_operand(operand, old, new);
Expand Down Expand Up @@ -676,6 +716,29 @@ impl BlockInstKind {
pre,
add_to_base,
} => opcodes.push(LdmStm::generic(operand.as_fixed(), *regs, *op == BlockTransferOp::Read, *write_back, *add_to_base, *pre, Cond::AL)),
BlockInstKind::GuestTransferMultiple {
op,
addr_reg,
addr_out_reg,
gp_regs,
fixed_regs,
write_back,
pre,
add_to_base,
} => {
if *write_back && *addr_reg != *addr_out_reg {
opcodes.push(AluShiftImm::mov_al(addr_out_reg.as_fixed(), addr_reg.as_fixed()))
}
opcodes.push(LdmStm::generic(
if *write_back { addr_out_reg.as_fixed() } else { addr_reg.as_fixed() },
*gp_regs + *fixed_regs,
*op == BlockTransferOp::Read,
*write_back,
*add_to_base,
*pre,
Cond::AL,
))
}
BlockInstKind::SystemReg { op, operand } => match op {
BlockSystemRegOp::Mrs => opcodes.push(Mrs::cpsr(operand.as_reg().as_fixed(), Cond::AL)),
BlockSystemRegOp::Msr => opcodes.push(Msr::cpsr_flags(operand.as_reg().as_fixed(), Cond::AL)),
Expand Down Expand Up @@ -866,6 +929,22 @@ impl Debug for BlockInstKind {
let add_to_base = if *add_to_base { "+" } else { "-" };
write!(f, "{op:?}M {operand:?} {regs:?}, write back: {write_back}, pre {pre}, {add_to_base}base")
}
BlockInstKind::GuestTransferMultiple {
op,
addr_reg,
addr_out_reg,
gp_regs,
fixed_regs,
write_back,
pre,
add_to_base,
} => {
let add_to_base = if *add_to_base { "+" } else { "-" };
write!(
f,
"{op:?}M {addr_reg:?} -> {addr_out_reg:?} gp regs: {gp_regs:?}, fixed regs: {fixed_regs:?}, write back: {write_back}, pre {pre}, {add_to_base}base"
)
}
BlockInstKind::SystemReg { op, operand } => write!(f, "{op:?} {operand:?}"),
BlockInstKind::Bfc { operand, lsb, width } => write!(f, "Bfc {operand:?}, {lsb}, {width}"),
BlockInstKind::Bfi { operands, lsb, width } => write!(f, "Bfi {:?}, {:?}, {lsb}, {width}", operands[0], operands[1]),
Expand Down
15 changes: 10 additions & 5 deletions src/jit/assembler/block_reg_allocator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ impl BlockRegAllocator {

fn allocate_common(&mut self, any_reg: u16, live_ranges: &[BlockRegSet], used_regs: &[BlockRegSet]) -> Option<Reg> {
for reg in ALLOCATION_REGS {
if self.stored_mapping_reverse[reg as usize].is_none() {
if self.stored_mapping_reverse[reg as usize].is_none() && !used_regs[0].contains(BlockReg::Fixed(reg)) && !live_ranges[1].contains(BlockReg::Fixed(reg)) {
self.set_stored_mapping(any_reg, reg);
return Some(reg);
}
Expand Down Expand Up @@ -257,7 +257,7 @@ impl BlockRegAllocator {
for guest_reg in guest_regs {
if self.stored_mapping[guest_reg as usize] != guest_reg
// Check if reg is used as a fixed input for something else
&& (!SCRATCH_REGS.is_reserved(guest_reg) || !live_ranges[1].contains(BlockReg::Fixed(guest_reg)))
&& !live_ranges[1].contains(BlockReg::Fixed(guest_reg))
{
relocatable_regs += guest_reg;
}
Expand All @@ -266,10 +266,15 @@ impl BlockRegAllocator {
let mut spilled_regs = Vec::new();
for guest_reg in relocatable_regs {
if let Some(currently_used_by) = self.stored_mapping_reverse[guest_reg as usize] {
self.spilled += BlockReg::Any(currently_used_by);
self.gen_pre_handle_spilled_inst(currently_used_by, guest_reg, BlockTransferOp::Write);
if DEBUG && unsafe { BLOCK_LOG } {
println!("relocate guest spill {currently_used_by} for {guest_reg:?}");
}
if inputs.contains(BlockReg::Any(currently_used_by)) || live_ranges[1].contains(BlockReg::Any(currently_used_by)) {
self.spilled += BlockReg::Any(currently_used_by);
self.gen_pre_handle_spilled_inst(currently_used_by, guest_reg, BlockTransferOp::Write);
spilled_regs.push((BlockReg::Any(currently_used_by), guest_reg, self.pre_allocate_insts.len() - 1));
}
self.remove_stored_mapping(currently_used_by);
spilled_regs.push((BlockReg::Any(currently_used_by), guest_reg, self.pre_allocate_insts.len() - 1));
}
}

Expand Down
83 changes: 76 additions & 7 deletions src/jit/emitter/emit_transfer.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::core::emu::{get_mmu, get_regs};
use crate::core::CpuType;
use crate::jit::assembler::block_asm::BlockAsm;
use crate::jit::assembler::BlockOperand;
use crate::jit::assembler::{BlockOperand, BlockReg};
use crate::jit::inst_info::Operand;
use crate::jit::inst_mem_handler::{inst_mem_handler, inst_mem_handler_multiple, inst_mem_handler_swp};
use crate::jit::jit_asm::JitAsm;
Expand Down Expand Up @@ -188,10 +188,14 @@ impl<const CPU: CpuType> JitAsm<'_, CPU> {
block_asm.mov(op0, (fast_read_value_reg, ShiftType::Ror, fast_read_addr_masked_reg));
}

block_asm.nop();
block_asm.branch_fallthrough(continue_label, Cond::AL);

block_asm.label(slow_read_label);

block_asm.restore_reg(op0);
if amount == MemoryAmount::Double {
block_asm.restore_reg(Reg::from(op0 as u8 + 1));
}

block_asm.save_context();

let op0_addr = get_regs!(self.emu, CPU).get_reg(op0) as *const _ as u32;
Expand Down Expand Up @@ -246,13 +250,76 @@ impl<const CPU: CpuType> JitAsm<'_, CPU> {

let mut pre = inst_info.op.mem_transfer_pre();
let decrement = inst_info.op.mem_transfer_decrement();
if decrement {
pre = !pre;
}
let write_back = inst_info.op.mem_transfer_write_back();

let op0 = *inst_info.operands()[0].as_reg_no_shift().unwrap();

let is_valid = !rlist.is_empty() && (!write_back || !rlist.is_reserved(op0));

let slow_read_label = block_asm.new_label();
let continue_label = block_asm.new_label();

if is_valid && !inst_info.op.mem_is_write() && !inst_info.op.mem_transfer_user() && rlist.len() < RegReserve::gp().len() - 2 {
let mut gp_regs = rlist.get_gp_regs();
let mut free_gp_regs = if gp_regs.is_empty() {
RegReserve::gp()
} else {
let highest_gp_reg = gp_regs.get_highest_reg();
RegReserve::from(!((1 << (highest_gp_reg as u8 + 1)) - 1)).get_gp_regs()
};
let mut non_gp_regs = rlist - gp_regs;

while free_gp_regs.len() < non_gp_regs.len() {
let highest_gp_reg = gp_regs.get_highest_reg();
gp_regs -= highest_gp_reg;
non_gp_regs += highest_gp_reg;
free_gp_regs = if gp_regs.is_empty() {
RegReserve::gp()
} else {
RegReserve::from(!((1 << (gp_regs.get_highest_reg() as u8 + 1)) - 1)).get_gp_regs()
};
}

let mut non_gp_regs_mappings = Vec::with_capacity(non_gp_regs.len());
let mut fixed_regs = RegReserve::new();
while !free_gp_regs.is_empty() && !non_gp_regs.is_empty() {
let fixed_reg = free_gp_regs.pop().unwrap();
fixed_regs += fixed_reg;
non_gp_regs_mappings.push((non_gp_regs.pop().unwrap(), fixed_reg));
}

if non_gp_regs.is_empty() {
block_asm.branch(slow_read_label, Cond::NV);

let base_reg = block_asm.new_reg();
let base_reg_out = block_asm.new_reg();
let mmu = get_mmu!(self.emu, CPU);
let base_ptr = mmu.get_base_tcm_ptr();
block_asm.bic(base_reg, op0, 0xF0000000);
block_asm.add(base_reg, base_reg, base_ptr as u32);
block_asm.guest_transfer_read_multiple(base_reg, base_reg_out, gp_regs, fixed_regs, write_back, pre, !decrement);

for (guest_reg, fixed_reg) in non_gp_regs_mappings {
block_asm.mov(guest_reg, BlockReg::Fixed(fixed_reg));
}

if write_back {
block_asm.sub(base_reg_out, base_reg_out, base_ptr as u32);
block_asm.mov(op0, (op0.into(), ShiftType::Lsr, BlockOperand::from(0xF0000000u32.trailing_zeros())));
block_asm.bfi(base_reg_out, op0, 0xF0000000u32.trailing_zeros() as u8, 0xF0000000u32.leading_ones() as u8);
block_asm.mov(op0, base_reg_out);
}

block_asm.branch_fallthrough(continue_label, Cond::AL);

block_asm.free_reg(base_reg_out);
block_asm.free_reg(base_reg);
}
}

if decrement {
pre = !pre;
}
let func_addr: *const () = match (inst_info.op.mem_is_write(), inst_info.op.mem_transfer_user(), pre, write_back, decrement) {
(false, false, false, false, false) => inst_mem_handler_multiple::<CPU, THUMB, false, false, false, false, false> as _,
(true, false, false, false, false) => inst_mem_handler_multiple::<CPU, THUMB, true, false, false, false, false> as _,
Expand Down Expand Up @@ -288,6 +355,7 @@ impl<const CPU: CpuType> JitAsm<'_, CPU> {
(true, true, true, true, true) => inst_mem_handler_multiple::<CPU, THUMB, true, true, true, true, true> as _,
};

block_asm.label(slow_read_label);
block_asm.save_context();
block_asm.call3(
func_addr,
Expand All @@ -306,8 +374,9 @@ impl<const CPU: CpuType> JitAsm<'_, CPU> {
for reg in restore_regs {
block_asm.restore_reg(reg);
}

block_asm.restore_reg(Reg::CPSR);

block_asm.label(continue_label);
}

pub fn emit_swp(&mut self, block_asm: &mut BlockAsm) {
Expand Down
2 changes: 1 addition & 1 deletion src/jit/inst_mem_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ mod handler {
let rlist = RegReserve::from(rlist as u32);
let op0 = Reg::from(op0);

if unlikely(rlist.len() == 0) {
if unlikely(rlist.is_empty()) {
if WRITE {
*get_regs_mut!(emu, CPU).get_reg_mut(op0) -= 0x40;
} else {
Expand Down
Loading

0 comments on commit fde8405

Please sign in to comment.