diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 5f918dfe59c2..f355ffc75b96 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -544,6 +544,7 @@ impl ABIMachineImpl for AArch64MachineImpl { defs: Vec>, loc: SourceLoc, opcode: ir::Opcode, + tmp: Writable, ) -> SmallVec<[(/* is_safepoint = */ bool, Inst); 2]> { let mut insts = SmallVec::new(); match &dest { @@ -563,7 +564,7 @@ impl ABIMachineImpl for AArch64MachineImpl { insts.push(( false, Inst::LoadExtName { - rd: writable_spilltmp_reg(), + rd: tmp, name: Box::new(name.clone()), offset: 0, srcloc: loc, @@ -573,7 +574,7 @@ impl ABIMachineImpl for AArch64MachineImpl { true, Inst::CallInd { info: Box::new(CallIndInfo { - rn: spilltmp_reg(), + rn: tmp.to_reg(), uses, defs, loc, diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 4bc22357fd02..04e2b17f4321 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -1,73 +1,584 @@ //! Implementation of the standard x64 ABI. -use crate::binemit::StackMap; -use crate::ir::{self, types, ArgumentExtension, StackSlot, Type}; +use crate::ir::types::*; +use crate::ir::{self, types, SourceLoc, TrapCode, Type}; +use crate::isa; use crate::isa::{x64::inst::*, CallConv}; +use crate::machinst::abi_impl::*; use crate::machinst::*; use crate::settings; use crate::{CodegenError, CodegenResult}; use alloc::boxed::Box; use alloc::vec::Vec; use args::*; -use log::trace; -use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable}; -use std::mem; +use regalloc::{RealReg, Reg, RegClass, Set, Writable}; +use smallvec::{smallvec, SmallVec}; +use std::convert::TryFrom; /// This is the limit for the size of argument and return-value areas on the /// stack. We place a reasonable limit here to avoid integer overflow issues /// with 32-bit arithmetic: for now, 128 MB. static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024; -#[derive(Clone, Debug)] -enum ABIArg { - Reg(RealReg, ir::Type, ir::ArgumentExtension), - Stack(i64, ir::Type, ir::ArgumentExtension), +/// Try to fill a Baldrdash register, returning it if it was found. +fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option { + if call_conv.extends_baldrdash() { + match ¶m.purpose { + &ir::ArgumentPurpose::VMContext => { + // This is SpiderMonkey's `WasmTlsReg`. + Some(ABIArg::Reg( + regs::r14().to_real_reg(), + types::I64, + param.extension, + )) + } + &ir::ArgumentPurpose::SignatureId => { + // This is SpiderMonkey's `WasmTableCallSigReg`. + Some(ABIArg::Reg( + regs::r10().to_real_reg(), + types::I64, + param.extension, + )) + } + _ => None, + } + } else { + None + } } -/// X64 ABI information shared between body (callee) and caller. -struct ABISig { - /// Argument locations (regs or stack slots). Stack offsets are relative to - /// SP on entry to function. - args: Vec, - /// Return-value locations. Stack offsets are relative to the return-area - /// pointer. - rets: Vec, - /// Space on stack used to store arguments. - stack_arg_space: i64, - /// Space on stack used to store return values. - stack_ret_space: i64, - /// Index in `args` of the stack-return-value-area argument. - stack_ret_arg: Option, - /// Calling convention used. - call_conv: CallConv, -} +/// Support fo rthe x64 ABI from the callee side (within a function body). +pub type X64ABIBody = ABIBodyImpl; + +/// Support for the x64 ABI from the caller side (at a callsite). +pub type X64ABICall = ABICallImpl; + +/// Implementation of ABI primitives for x64. +pub struct X64ABIMachineImpl; + +impl ABIMachineImpl for X64ABIMachineImpl { + type I = Inst; + + fn compute_arg_locs( + call_conv: isa::CallConv, + params: &[ir::AbiParam], + args_or_rets: ArgsOrRets, + add_ret_area_ptr: bool, + ) -> CodegenResult<(Vec, i64, Option)> { + let is_baldrdash = call_conv.extends_baldrdash(); + + let mut next_gpr = 0; + let mut next_vreg = 0; + let mut next_stack: u64 = 0; + let mut ret = vec![]; + + for i in 0..params.len() { + // Process returns backward, according to the SpiderMonkey ABI (which we + // adopt internally if `is_baldrdash` is set). + let param = match (args_or_rets, is_baldrdash) { + (ArgsOrRets::Args, _) => ¶ms[i], + (ArgsOrRets::Rets, false) => ¶ms[i], + (ArgsOrRets::Rets, true) => ¶ms[params.len() - 1 - i], + }; + + // Validate "purpose". + match ¶m.purpose { + &ir::ArgumentPurpose::VMContext + | &ir::ArgumentPurpose::Normal + | &ir::ArgumentPurpose::StackLimit + | &ir::ArgumentPurpose::SignatureId => {} + _ => panic!( + "Unsupported argument purpose {:?} in signature: {:?}", + param.purpose, params + ), + } + + let intreg = in_int_reg(param.value_type); + let vecreg = in_vec_reg(param.value_type); + debug_assert!(intreg || vecreg); + debug_assert!(!(intreg && vecreg)); + + let (next_reg, candidate) = if intreg { + let candidate = match args_or_rets { + ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr), + ArgsOrRets::Rets => get_intreg_for_retval_systemv(&call_conv, next_gpr, i), + }; + debug_assert!(candidate + .map(|r| r.get_class() == RegClass::I64) + .unwrap_or(true)); + (&mut next_gpr, candidate) + } else { + let candidate = match args_or_rets { + ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg), + ArgsOrRets::Rets => get_fltreg_for_retval_systemv(&call_conv, next_vreg, i), + }; + debug_assert!(candidate + .map(|r| r.get_class() == RegClass::V128) + .unwrap_or(true)); + (&mut next_vreg, candidate) + }; + + if let Some(param) = try_fill_baldrdash_reg(call_conv, param) { + assert!(intreg); + ret.push(param); + } else if let Some(reg) = candidate { + ret.push(ABIArg::Reg( + reg.to_real_reg(), + param.value_type, + param.extension, + )); + *next_reg += 1; + } else { + // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte + // stack alignment happens separately after all args.) + let size = (param.value_type.bits() / 8) as u64; + let size = std::cmp::max(size, 8); + // Align. + debug_assert!(size.is_power_of_two()); + next_stack = (next_stack + size - 1) & !(size - 1); + ret.push(ABIArg::Stack( + next_stack as i64, + param.value_type, + param.extension, + )); + next_stack += size; + } + } + + if args_or_rets == ArgsOrRets::Rets && is_baldrdash { + ret.reverse(); + } + + let extra_arg = if add_ret_area_ptr { + debug_assert!(args_or_rets == ArgsOrRets::Args); + if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) { + ret.push(ABIArg::Reg( + reg.to_real_reg(), + types::I64, + ir::ArgumentExtension::None, + )); + } else { + ret.push(ABIArg::Stack( + next_stack as i64, + types::I64, + ir::ArgumentExtension::None, + )); + next_stack += 8; + } + Some(ret.len() - 1) + } else { + None + }; + + next_stack = (next_stack + 15) & !15; + + // To avoid overflow issues, limit the arg/return size to something reasonable. + if next_stack > STACK_ARG_RET_SIZE_LIMIT { + return Err(CodegenError::ImplLimitExceeded); + } + + Ok((ret, next_stack as i64, extra_arg)) + } + + fn fp_to_arg_offset(call_conv: isa::CallConv, flags: &settings::Flags) -> i64 { + if call_conv.extends_baldrdash() { + let num_words = flags.baldrdash_prologue_words() as i64; + debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words"); + num_words * 8 + } else { + 16 // frame pointer + return address. + } + } + + fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Self::I { + let (is_int, ext_mode) = match ty { + types::B1 | types::B8 | types::I8 => (true, Some(ExtMode::BQ)), + types::B16 | types::I16 => (true, Some(ExtMode::WQ)), + types::B32 | types::I32 => (true, Some(ExtMode::LQ)), + types::B64 | types::I64 | types::R64 => (true, None), + types::F32 | types::F64 => (false, None), + _ => panic!("load_stack({})", ty), + }; + + let mem = SyntheticAmode::from(mem); + + if is_int { + match ext_mode { + Some(ext_mode) => Inst::movsx_rm_r( + ext_mode, + RegMem::mem(mem), + into_reg, + /* infallible load */ None, + ), + None => Inst::mov64_m_r(mem, into_reg, None /* infallible */), + } + } else { + let sse_op = match ty { + types::F32 => SseOpcode::Movss, + types::F64 => SseOpcode::Movsd, + _ => unreachable!(), + }; + Inst::xmm_mov( + sse_op, + RegMem::mem(mem), + into_reg, + None, /* infallible */ + ) + } + } + + fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I { + let (is_int, size) = match ty { + types::B1 | types::B8 | types::I8 => (true, 1), + types::B16 | types::I16 => (true, 2), + types::B32 | types::I32 => (true, 4), + types::B64 | types::I64 | types::R64 => (true, 8), + types::F32 => (false, 4), + types::F64 => (false, 8), + _ => unimplemented!("store_stack({})", ty), + }; + + let mem = SyntheticAmode::from(mem); + + if is_int { + Inst::mov_r_m(size, from_reg, mem, /* infallible store */ None) + } else { + let sse_op = match size { + 4 => SseOpcode::Movss, + 8 => SseOpcode::Movsd, + _ => unreachable!(), + }; + Inst::xmm_mov_r_m(sse_op, from_reg, mem, /* infallible store */ None) + } + } + + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Self::I { + Inst::gen_move(to_reg, from_reg, ty) + } + + /// Generate an integer-extend operation. + fn gen_extend( + to_reg: Writable, + from_reg: Reg, + is_signed: bool, + from_bits: u8, + to_bits: u8, + ) -> Self::I { + let ext_mode = match from_bits { + 1 | 8 => ExtMode::BQ, + 16 => ExtMode::WQ, + 32 => ExtMode::LQ, + _ => panic!("Bad extension: {} bits to {} bits", from_bits, to_bits), + }; + if is_signed { + Inst::movsx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg, None) + } else { + Inst::movzx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg, None) + } + } + + fn gen_ret() -> Self::I { + Inst::Ret + } + + fn gen_epilogue_placeholder() -> Self::I { + Inst::EpiloguePlaceholder + } + + fn gen_add_imm(into_reg: Writable, from_reg: Reg, imm: u64) -> SmallVec<[Self::I; 4]> { + let imm = u32::try_from(imm).expect("Immediate too large"); + let mut ret = SmallVec::new(); + if from_reg != into_reg.to_reg() { + ret.push(Inst::gen_move(into_reg, from_reg, I64)); + } + ret.push(Inst::alu_rmi_r( + true, + AluRmiROpcode::Add, + RegMemImm::imm(imm), + into_reg, + )); + ret + } + + fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Self::I; 2]> { + smallvec![ + Inst::cmp_rmi_r(/* bytes = */ 8, RegMemImm::reg(regs::rsp()), limit_reg), + Inst::TrapIf { + // NBE == "> unsigned"; args above are reversed; this tests limit_reg > rsp. + cc: CC::NBE, + srcloc: SourceLoc::default(), + trap_code: TrapCode::StackOverflow, + }, + ] + } + + fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable, _ty: Type) -> Self::I { + let mem: SyntheticAmode = mem.into(); + Inst::lea(mem, into_reg) + } + + fn get_fixed_tmp_reg() -> Reg { + // Use a caller-save register for this. Note that we need not exclude it + // from regalloc on x64 because `gen_add_imm()` above never clobbers a + // scratch register. Thus the sequence ends up being: gen stack limit + // into r10, always ending its previous live range just as we start a + // new one with single-instruction GV ops (loads or add-imms); then cmp + // r10 against rsp. + regs::r10() + } + + fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i64, ty: Type) -> Self::I { + assert_eq!(ty, I64); // only ever used for I64s. + let offset = i32::try_from(offset).expect("Offset out of range"); + let simm32 = offset as u32; + let mem = Amode::imm_reg(simm32, base); + Inst::mov64_m_r(mem, into_reg, None) + } + + fn gen_store_base_offset(base: Reg, offset: i64, from_reg: Reg, ty: Type) -> Self::I { + assert_eq!(ty, I64); // only ever used for I64s. + let offset = i32::try_from(offset).expect("Offset out of range"); + let simm32 = offset as u32; + let mem = Amode::imm_reg(simm32, base); + Inst::mov_r_m(/* bytes = */ 8, from_reg, mem, None) + } + + fn gen_sp_reg_adjust(amount: i64) -> SmallVec<[Self::I; 2]> { + let (alu_op, amount) = if amount >= 0 { + (AluRmiROpcode::Add, amount) + } else { + (AluRmiROpcode::Sub, -amount) + }; + + let amount = u32::try_from(amount).expect("Adjustment amount out of range"); + + smallvec![Inst::alu_rmi_r( + true, + alu_op, + RegMemImm::imm(amount), + Writable::from_reg(regs::rsp()), + )] + } + + fn gen_nominal_sp_adj(offset: i64) -> Self::I { + Inst::VirtualSPOffsetAdj { offset } + } + + fn gen_prologue_frame_setup() -> SmallVec<[Self::I; 2]> { + let r_rsp = regs::rsp(); + let r_rbp = regs::rbp(); + let w_rbp = Writable::from_reg(r_rbp); + let mut insts = SmallVec::new(); + // The "traditional" pre-preamble + // RSP before the call will be 0 % 16. So here, it is 8 % 16. + insts.push(Inst::push64(RegMemImm::reg(r_rbp))); + // RSP is now 0 % 16 + insts.push(Inst::mov_r_r(true, r_rsp, w_rbp)); + insts + } + + fn gen_epilogue_frame_restore() -> SmallVec<[Self::I; 2]> { + let mut insts = SmallVec::new(); + // Undo the "traditional" pre-preamble + insts.push(Inst::mov_r_r( + true, + regs::rbp(), + Writable::from_reg(regs::rsp()), + )); + insts.push(Inst::pop64(Writable::from_reg(regs::rbp()))); + insts + } + + fn gen_clobber_save( + call_conv: isa::CallConv, + clobbers: &Set>, + ) -> (u64, SmallVec<[Self::I; 16]>) { + let mut insts = SmallVec::new(); + let mut clobber_size = 0; + // Save callee saved registers that we trash. Keep track of how much space we've used, so + // as to know what we have to do to get the base of the spill area 0 % 16. + let clobbered = get_callee_saves(&call_conv, clobbers); + for reg in &clobbered { + let r_reg = reg.to_reg(); + match r_reg.get_class() { + RegClass::I64 => { + insts.push(Inst::push64(RegMemImm::reg(r_reg.to_reg()))); + clobber_size += 8; + } + // No XMM regs are callee-save, so we do not need to implement + // this. + _ => unimplemented!(), + } + } + // If the number of clobbered regs is odd, we need to push an extra 0 to + // maintain RSP alignment (to 16 bytes). + if clobbered.len() % 2 == 1 { + insts.push(Inst::push64(RegMemImm::imm(0))); + clobber_size += 8; + } + + (clobber_size, insts) + } + + fn gen_clobber_restore( + call_conv: isa::CallConv, + clobbers: &Set>, + ) -> SmallVec<[Self::I; 16]> { + let mut insts = SmallVec::new(); + // Restore regs. + let clobbered = get_callee_saves(&call_conv, clobbers); + if clobbered.len() % 2 == 1 { + insts.push(Inst::alu_rmi_r( + true, + AluRmiROpcode::Add, + RegMemImm::imm(8), + Writable::from_reg(regs::rsp()), + )); + } + for wreg in clobbered.into_iter().rev() { + let rreg = wreg.to_reg(); + match rreg.get_class() { + RegClass::I64 => { + // TODO: make these conversion sequences less cumbersome. + insts.push(Inst::pop64(Writable::from_reg(rreg.to_reg()))); + } + _ => unimplemented!(), + } + } + insts + } + + /// Generate a call instruction/sequence. + fn gen_call( + dest: &CallDest, + uses: Vec, + defs: Vec>, + loc: SourceLoc, + opcode: ir::Opcode, + tmp: Writable, + ) -> SmallVec<[(/* is_safepoint = */ bool, Self::I); 2]> { + let mut insts = SmallVec::new(); + match dest { + &CallDest::ExtName(ref name, RelocDistance::Near) => { + insts.push(( + /* is_safepoint = */ true, + Inst::call_known(name.clone(), uses, defs, loc, opcode), + )); + } + &CallDest::ExtName(ref name, RelocDistance::Far) => { + insts.push(( + /* is_safepoint = */ false, + Inst::LoadExtName { + dst: tmp, + name: Box::new(name.clone()), + offset: 0, + srcloc: loc, + }, + )); + insts.push(( + /* is_safepoint = */ true, + Inst::call_unknown(RegMem::reg(tmp.to_reg()), uses, defs, loc, opcode), + )); + } + &CallDest::Reg(reg) => { + insts.push(( + /* is_safepoint = */ true, + Inst::call_unknown(RegMem::reg(reg), uses, defs, loc, opcode), + )); + } + } + insts + } -pub(crate) struct X64ABIBody { - sig: ABISig, + fn get_spillslot_size(rc: RegClass, ty: Type) -> u32 { + // We allocate in terms of 8-byte slots. + match (rc, ty) { + (RegClass::I64, _) => 1, + (RegClass::V128, types::F32) | (RegClass::V128, types::F64) => 1, + (RegClass::V128, _) => 2, + _ => panic!("Unexpected register class!"), + } + } - /// Offsets to each stack slot. - stack_slots: Vec, + fn get_virtual_sp_offset_from_state(s: &::State) -> i64 { + s.virtual_sp_offset + } - /// Total stack size of all the stack slots. - stack_slots_size: usize, + fn get_nominal_sp_to_fp(s: &::State) -> i64 { + s.nominal_sp_to_fp + } - /// The register holding the return-area pointer, if needed. - ret_area_ptr: Option>, + fn get_caller_saves(call_conv: isa::CallConv) -> Vec> { + let mut caller_saved = Vec::new(); - /// Clobbered registers, as indicated by regalloc. - clobbered: Set>, + // Systemv calling convention: + // - GPR: all except RBX, RBP, R12 to R15 (which are callee-saved). + caller_saved.push(Writable::from_reg(regs::rsi())); + caller_saved.push(Writable::from_reg(regs::rdi())); + caller_saved.push(Writable::from_reg(regs::rax())); + caller_saved.push(Writable::from_reg(regs::rcx())); + caller_saved.push(Writable::from_reg(regs::rdx())); + caller_saved.push(Writable::from_reg(regs::r8())); + caller_saved.push(Writable::from_reg(regs::r9())); + caller_saved.push(Writable::from_reg(regs::r10())); + caller_saved.push(Writable::from_reg(regs::r11())); - /// Total number of spill slots, as indicated by regalloc. - num_spill_slots: Option, + if call_conv.extends_baldrdash() { + caller_saved.push(Writable::from_reg(regs::r12())); + caller_saved.push(Writable::from_reg(regs::r13())); + // Not r14; implicitly preserved in the entry. + caller_saved.push(Writable::from_reg(regs::r15())); + caller_saved.push(Writable::from_reg(regs::rbx())); + } - /// Calculated while creating the prologue, and used when creating the epilogue. Amount by - /// which RSP is adjusted downwards to allocate the spill area. - frame_size_bytes: Option, + // - XMM: all the registers! + caller_saved.push(Writable::from_reg(regs::xmm0())); + caller_saved.push(Writable::from_reg(regs::xmm1())); + caller_saved.push(Writable::from_reg(regs::xmm2())); + caller_saved.push(Writable::from_reg(regs::xmm3())); + caller_saved.push(Writable::from_reg(regs::xmm4())); + caller_saved.push(Writable::from_reg(regs::xmm5())); + caller_saved.push(Writable::from_reg(regs::xmm6())); + caller_saved.push(Writable::from_reg(regs::xmm7())); + caller_saved.push(Writable::from_reg(regs::xmm8())); + caller_saved.push(Writable::from_reg(regs::xmm9())); + caller_saved.push(Writable::from_reg(regs::xmm10())); + caller_saved.push(Writable::from_reg(regs::xmm11())); + caller_saved.push(Writable::from_reg(regs::xmm12())); + caller_saved.push(Writable::from_reg(regs::xmm13())); + caller_saved.push(Writable::from_reg(regs::xmm14())); + caller_saved.push(Writable::from_reg(regs::xmm15())); - call_conv: CallConv, + caller_saved + } +} - /// The settings controlling this function's compilation. - flags: settings::Flags, +impl From for SyntheticAmode { + fn from(amode: StackAMode) -> Self { + match amode { + StackAMode::FPOffset(off, _ty) => { + let off = i32::try_from(off).unwrap(); + let simm32 = off as u32; + SyntheticAmode::Real(Amode::ImmReg { + simm32, + base: regs::rbp(), + }) + } + StackAMode::NominalSPOffset(off, _ty) => { + let off = i32::try_from(off).unwrap(); + let simm32 = off as u32; + SyntheticAmode::nominal_sp_offset(simm32) + } + StackAMode::SPOffset(off, _ty) => { + let off = i32::try_from(off).unwrap(); + let simm32 = off as u32; + SyntheticAmode::Real(Amode::ImmReg { + simm32, + base: regs::rsp(), + }) + } + } + } } fn in_int_reg(ty: types::Type) -> bool { @@ -202,1095 +713,25 @@ fn is_callee_save_baldrdash(r: RealReg) -> bool { } } -fn get_callee_saves(call_conv: &CallConv, regs: Vec>) -> Vec> { - match call_conv { +fn get_callee_saves(call_conv: &CallConv, regs: &Set>) -> Vec> { + let mut regs: Vec> = match call_conv { CallConv::BaldrdashSystemV => regs - .into_iter() + .iter() + .cloned() .filter(|r| is_callee_save_baldrdash(r.to_reg())) .collect(), CallConv::BaldrdashWindows => { todo!("baldrdash windows"); } CallConv::Fast | CallConv::Cold | CallConv::SystemV => regs - .into_iter() + .iter() + .cloned() .filter(|r| is_callee_save_systemv(r.to_reg())) .collect(), CallConv::WindowsFastcall => todo!("windows fastcall"), CallConv::Probestack => todo!("probestack?"), - } -} - -impl X64ABIBody { - /// Create a new body ABI instance. - pub(crate) fn new(f: &ir::Function, flags: settings::Flags) -> CodegenResult { - let sig = ABISig::from_func_sig(&f.signature)?; - - let call_conv = f.signature.call_conv; - debug_assert!( - call_conv == CallConv::SystemV || call_conv.extends_baldrdash(), - "unsupported or unimplemented calling convention {}", - call_conv - ); - - // Compute stackslot locations and total stackslot size. - let mut stack_offset: usize = 0; - let mut stack_slots = vec![]; - for (stackslot, data) in f.stack_slots.iter() { - let off = stack_offset; - stack_offset += data.size as usize; - stack_offset = (stack_offset + 7) & !7; - debug_assert_eq!(stackslot.as_u32() as usize, stack_slots.len()); - stack_slots.push(off); - } - - Ok(Self { - sig, - stack_slots, - stack_slots_size: stack_offset, - ret_area_ptr: None, - clobbered: Set::empty(), - num_spill_slots: None, - frame_size_bytes: None, - call_conv: f.signature.call_conv.clone(), - flags, - }) - } - - /// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return - /// address, and maybe other standard elements depending on ABI (e.g. Wasm TLS reg). - fn fp_to_arg_offset(&self) -> i64 { - if self.call_conv.extends_baldrdash() { - let num_words = self.flags.baldrdash_prologue_words() as i64; - debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words"); - num_words * 8 - } else { - 16 // frame pointer + return address. - } - } -} - -impl ABIBody for X64ABIBody { - type I = Inst; - - fn temp_needed(&self) -> bool { - self.sig.stack_ret_arg.is_some() - } - - fn init(&mut self, maybe_tmp: Option>) { - if self.sig.stack_ret_arg.is_some() { - assert!(maybe_tmp.is_some()); - self.ret_area_ptr = maybe_tmp; - } - } - - fn flags(&self) -> &settings::Flags { - &self.flags - } - - fn num_args(&self) -> usize { - self.sig.args.len() - } - fn num_retvals(&self) -> usize { - self.sig.rets.len() - } - fn num_stackslots(&self) -> usize { - self.stack_slots.len() - } - - fn liveins(&self) -> Set { - let mut set: Set = Set::empty(); - for arg in &self.sig.args { - if let &ABIArg::Reg(r, ..) = arg { - set.insert(r); - } - } - set - } - - fn liveouts(&self) -> Set { - let mut set: Set = Set::empty(); - for ret in &self.sig.rets { - if let &ABIArg::Reg(r, ..) = ret { - set.insert(r); - } - } - set - } - - fn gen_copy_arg_to_reg(&self, idx: usize, to_reg: Writable) -> Inst { - match &self.sig.args[idx] { - ABIArg::Reg(from_reg, ty, _) => Inst::gen_move(to_reg, from_reg.to_reg(), *ty), - &ABIArg::Stack(off, ty, _) => { - assert!( - self.fp_to_arg_offset() + off <= u32::max_value() as i64, - "large offset nyi" - ); - load_stack( - Amode::imm_reg((self.fp_to_arg_offset() + off) as u32, regs::rbp()), - to_reg, - ty, - ) - } - } - } - - fn gen_retval_area_setup(&self) -> Option { - if let Some(i) = self.sig.stack_ret_arg { - let inst = self.gen_copy_arg_to_reg(i, self.ret_area_ptr.unwrap()); - trace!( - "gen_retval_area_setup: inst {:?}; ptr reg is {:?}", - inst, - self.ret_area_ptr.unwrap().to_reg() - ); - Some(inst) - } else { - trace!("gen_retval_area_setup: not needed"); - None - } - } - - fn gen_copy_reg_to_retval(&self, idx: usize, from_reg: Writable) -> Vec { - let mut ret = Vec::new(); - match &self.sig.rets[idx] { - &ABIArg::Reg(r, ty, ext) => { - let from_bits = ty.bits() as u8; - let ext_mode = match from_bits { - 1 | 8 => Some(ExtMode::BQ), - 16 => Some(ExtMode::WQ), - 32 => Some(ExtMode::LQ), - 64 | 128 => None, - _ => unreachable!(), - }; - - let dest_reg = Writable::from_reg(r.to_reg()); - match (ext, ext_mode) { - (ArgumentExtension::Uext, Some(ext_mode)) => { - ret.push(Inst::movzx_rm_r( - ext_mode, - RegMem::reg(from_reg.to_reg()), - dest_reg, - /* infallible load */ None, - )); - } - (ArgumentExtension::Sext, Some(ext_mode)) => { - ret.push(Inst::movsx_rm_r( - ext_mode, - RegMem::reg(from_reg.to_reg()), - dest_reg, - /* infallible load */ None, - )); - } - _ => ret.push(Inst::gen_move(dest_reg, from_reg.to_reg(), ty)), - }; - } - - &ABIArg::Stack(off, ty, ext) => { - let from_bits = ty.bits() as u8; - let ext_mode = match from_bits { - 1 | 8 => Some(ExtMode::BQ), - 16 => Some(ExtMode::WQ), - 32 => Some(ExtMode::LQ), - 64 => None, - _ => unreachable!(), - }; - - // Trash the from_reg; it should be its last use. - match (ext, ext_mode) { - (ArgumentExtension::Uext, Some(ext_mode)) => { - ret.push(Inst::movzx_rm_r( - ext_mode, - RegMem::reg(from_reg.to_reg()), - from_reg, - /* infallible load */ None, - )); - } - (ArgumentExtension::Sext, Some(ext_mode)) => { - ret.push(Inst::movsx_rm_r( - ext_mode, - RegMem::reg(from_reg.to_reg()), - from_reg, - /* infallible load */ None, - )); - } - _ => {} - }; - - assert!( - off < u32::max_value() as i64, - "large stack return offset nyi" - ); - - let mem = Amode::imm_reg(off as u32, self.ret_area_ptr.unwrap().to_reg()); - ret.push(store_stack(mem, from_reg.to_reg(), ty)) - } - } - - ret - } - - fn gen_ret(&self) -> Inst { - Inst::ret() - } - - fn gen_epilogue_placeholder(&self) -> Inst { - Inst::epilogue_placeholder() - } - - fn set_num_spillslots(&mut self, slots: usize) { - self.num_spill_slots = Some(slots); - } - - fn set_clobbered(&mut self, clobbered: Set>) { - self.clobbered = clobbered; - } - - fn stackslot_addr(&self, slot: StackSlot, offset: u32, dst: Writable) -> Inst { - let stack_off = self.stack_slots[slot.as_u32() as usize] as i64; - let sp_off: i64 = stack_off + (offset as i64); - Inst::lea(SyntheticAmode::nominal_sp_offset(sp_off as u32), dst) - } - - fn load_stackslot( - &self, - _slot: StackSlot, - _offset: u32, - _ty: Type, - _into_reg: Writable, - ) -> Inst { - unimplemented!("load_stackslot") - } - - fn store_stackslot(&self, _slot: StackSlot, _offset: u32, _ty: Type, _from_reg: Reg) -> Inst { - unimplemented!("store_stackslot") - } - - fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable) -> Inst { - // Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size. - let islot = slot.get() as i64; - let spill_off = islot * 8; - let sp_off = self.stack_slots_size as i64 + spill_off; - debug_assert!(sp_off <= u32::max_value() as i64, "large spill offsets NYI"); - trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off); - load_stack( - SyntheticAmode::nominal_sp_offset(sp_off as u32), - into_reg, - ty, - ) - } - - fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Inst { - // Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size. - let islot = slot.get() as i64; - let spill_off = islot * 8; - let sp_off = self.stack_slots_size as i64 + spill_off; - debug_assert!(sp_off <= u32::max_value() as i64, "large spill offsets NYI"); - trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off); - store_stack( - SyntheticAmode::nominal_sp_offset(sp_off as u32), - from_reg, - ty, - ) - } - - fn spillslots_to_stack_map(&self, slots: &[SpillSlot], state: &EmitState) -> StackMap { - assert!(state.virtual_sp_offset >= 0); - trace!( - "spillslots_to_stack_map: slots = {:?}, state = {:?}", - slots, - state - ); - let map_size = (state.virtual_sp_offset + state.nominal_sp_to_fp) as u32; - let map_words = (map_size + 7) / 8; - let mut bits = std::iter::repeat(false) - .take(map_words as usize) - .collect::>(); - - let first_spillslot_word = (self.stack_slots_size + state.virtual_sp_offset as usize) / 8; - for &slot in slots { - let slot = slot.get() as usize; - bits[first_spillslot_word + slot] = true; - } - - StackMap::from_slice(&bits[..]) - } - - fn gen_prologue(&mut self) -> Vec { - let r_rsp = regs::rsp(); - - let mut insts = vec![]; - - // Baldrdash generates its own prologue sequence, so we don't have to. - if !self.call_conv.extends_baldrdash() { - let r_rbp = regs::rbp(); - let w_rbp = Writable::from_reg(r_rbp); - - // The "traditional" pre-preamble - // RSP before the call will be 0 % 16. So here, it is 8 % 16. - insts.push(Inst::push64(RegMemImm::reg(r_rbp))); - // RSP is now 0 % 16 - insts.push(Inst::mov_r_r(true, r_rsp, w_rbp)); - } - - let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec()); - let callee_saved_used: usize = clobbered - .iter() - .map(|reg| match reg.to_reg().get_class() { - RegClass::I64 => 8, - _ => todo!(), - }) - .sum(); - - let mut total_stacksize = self.stack_slots_size + 8 * self.num_spill_slots.unwrap(); - if self.call_conv.extends_baldrdash() { - // Baldrdash expects the stack to take at least the number of words set in - // baldrdash_prologue_words; count them here. - debug_assert!( - !self.flags.enable_probestack(), - "baldrdash does not expect cranelift to emit stack probes" - ); - total_stacksize += self.flags.baldrdash_prologue_words() as usize * 8; - } - - // Now make sure the frame stack is aligned, so RSP == 0 % 16 in the function's body. - let padding = (16 - ((total_stacksize + callee_saved_used) % 16)) & 15; - let frame_size = total_stacksize + padding; - debug_assert!( - frame_size <= u32::max_value() as usize, - "gen_prologue(x86): total_stacksize >= 2G" - ); - debug_assert_eq!((frame_size + callee_saved_used) % 16, 0, "misaligned stack"); - - if !self.call_conv.extends_baldrdash() { - // Explicitly allocate the frame. - let w_rsp = Writable::from_reg(r_rsp); - if frame_size > 0 { - insts.push(Inst::alu_rmi_r( - true, - AluRmiROpcode::Sub, - RegMemImm::imm(frame_size as u32), - w_rsp, - )); - } - } - - // Save callee saved registers that we trash. Keep track of how much space we've used, so - // as to know what we have to do to get the base of the spill area 0 % 16. - let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec()); - for reg in clobbered { - let r_reg = reg.to_reg(); - match r_reg.get_class() { - RegClass::I64 => { - insts.push(Inst::push64(RegMemImm::reg(r_reg.to_reg()))); - } - _ => unimplemented!(), - } - } - - if callee_saved_used > 0 { - insts.push(Inst::VirtualSPOffsetAdj { - offset: callee_saved_used as i64, - }); - } - - // Stash this value. We'll need it for the epilogue. - debug_assert!(self.frame_size_bytes.is_none()); - self.frame_size_bytes = Some(frame_size); - - insts - } - - fn gen_epilogue(&self) -> Vec { - let mut insts = vec![]; - - // Undo what we did in the prologue. - - // Restore regs. - let clobbered = get_callee_saves(&self.call_conv, self.clobbered.to_vec()); - for wreg in clobbered.into_iter().rev() { - let rreg = wreg.to_reg(); - match rreg.get_class() { - RegClass::I64 => { - // TODO: make these conversion sequences less cumbersome. - insts.push(Inst::pop64(Writable::from_reg(rreg.to_reg()))); - } - _ => unimplemented!(), - } - } - - // No need to adjust the virtual sp offset here: - // - this would create issues when there's a return in the middle of a function, - // - and nothing in this sequence may try to access stack slots from the nominal SP. - - // Clear the spill area and the 16-alignment padding below it. - if !self.call_conv.extends_baldrdash() { - let frame_size = self.frame_size_bytes.unwrap(); - if frame_size > 0 { - let r_rsp = regs::rsp(); - let w_rsp = Writable::from_reg(r_rsp); - insts.push(Inst::alu_rmi_r( - true, - AluRmiROpcode::Add, - RegMemImm::imm(frame_size as u32), - w_rsp, - )); - } - } - - // Baldrdash generates its own preamble. - if !self.call_conv.extends_baldrdash() { - // Undo the "traditional" pre-preamble - // RSP before the call will be 0 % 16. So here, it is 8 % 16. - insts.push(Inst::pop64(Writable::from_reg(regs::rbp()))); - insts.push(Inst::ret()); - } - - insts - } - - fn frame_size(&self) -> u32 { - self.frame_size_bytes - .expect("frame size not computed before prologue generation") as u32 - } - - fn stack_args_size(&self) -> u32 { - unimplemented!("I need to be computed!") - } - - fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 { - // We allocate in terms of 8-byte slots. - match (rc, ty) { - (RegClass::I64, _) => 1, - (RegClass::V128, types::F32) | (RegClass::V128, types::F64) => 1, - (RegClass::V128, _) => 2, - _ => panic!("Unexpected register class!"), - } - } - - fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option) -> Inst { - let ty = ty_from_ty_hint_or_reg_class(from_reg.to_reg(), ty); - self.store_spillslot(to_slot, ty, from_reg.to_reg()) - } - - fn gen_reload( - &self, - to_reg: Writable, - from_slot: SpillSlot, - ty: Option, - ) -> Inst { - let ty = ty_from_ty_hint_or_reg_class(to_reg.to_reg().to_reg(), ty); - self.load_spillslot(from_slot, ty, to_reg.map(|r| r.to_reg())) - } -} - -/// Return a type either from an optional type hint, or if not, from the default -/// type associated with the given register's class. This is used to generate -/// loads/spills appropriately given the type of value loaded/stored (which may -/// be narrower than the spillslot). We usually have the type because the -/// regalloc usually provides the vreg being spilled/reloaded, and we know every -/// vreg's type. However, the regalloc *can* request a spill/reload without an -/// associated vreg when needed to satisfy a safepoint (which requires all -/// ref-typed values, even those in real registers in the original vcode, to be -/// in spillslots). -fn ty_from_ty_hint_or_reg_class(r: Reg, ty: Option) -> Type { - match (ty, r.get_class()) { - // If the type is provided - (Some(t), _) => t, - // If no type is provided, this should be a register spill for a - // safepoint, so we only expect I64 (integer) registers. - (None, RegClass::I64) => types::I64, - _ => panic!("Unexpected register class!"), - } -} - -fn get_caller_saves(call_conv: CallConv) -> Vec> { - let mut caller_saved = Vec::new(); - - // Systemv calling convention: - // - GPR: all except RBX, RBP, R12 to R15 (which are callee-saved). - caller_saved.push(Writable::from_reg(regs::rsi())); - caller_saved.push(Writable::from_reg(regs::rdi())); - caller_saved.push(Writable::from_reg(regs::rax())); - caller_saved.push(Writable::from_reg(regs::rcx())); - caller_saved.push(Writable::from_reg(regs::rdx())); - caller_saved.push(Writable::from_reg(regs::r8())); - caller_saved.push(Writable::from_reg(regs::r9())); - caller_saved.push(Writable::from_reg(regs::r10())); - caller_saved.push(Writable::from_reg(regs::r11())); - - if call_conv.extends_baldrdash() { - caller_saved.push(Writable::from_reg(regs::r12())); - caller_saved.push(Writable::from_reg(regs::r13())); - // Not r14; implicitly preserved in the entry. - caller_saved.push(Writable::from_reg(regs::r15())); - caller_saved.push(Writable::from_reg(regs::rbx())); - } - - // - XMM: all the registers! - caller_saved.push(Writable::from_reg(regs::xmm0())); - caller_saved.push(Writable::from_reg(regs::xmm1())); - caller_saved.push(Writable::from_reg(regs::xmm2())); - caller_saved.push(Writable::from_reg(regs::xmm3())); - caller_saved.push(Writable::from_reg(regs::xmm4())); - caller_saved.push(Writable::from_reg(regs::xmm5())); - caller_saved.push(Writable::from_reg(regs::xmm6())); - caller_saved.push(Writable::from_reg(regs::xmm7())); - caller_saved.push(Writable::from_reg(regs::xmm8())); - caller_saved.push(Writable::from_reg(regs::xmm9())); - caller_saved.push(Writable::from_reg(regs::xmm10())); - caller_saved.push(Writable::from_reg(regs::xmm11())); - caller_saved.push(Writable::from_reg(regs::xmm12())); - caller_saved.push(Writable::from_reg(regs::xmm13())); - caller_saved.push(Writable::from_reg(regs::xmm14())); - caller_saved.push(Writable::from_reg(regs::xmm15())); - - caller_saved -} - -fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec>) { - // Compute uses: all arg regs. - let mut uses = Vec::new(); - for arg in &sig.args { - match arg { - &ABIArg::Reg(reg, ..) => uses.push(reg.to_reg()), - _ => {} - } - } - - // Compute defs: all retval regs, and all caller-save (clobbered) regs. - let mut defs = get_caller_saves(sig.call_conv); - for ret in &sig.rets { - match ret { - &ABIArg::Reg(reg, ..) => defs.push(Writable::from_reg(reg.to_reg())), - _ => {} - } - } - - (uses, defs) -} - -/// Try to fill a Baldrdash register, returning it if it was found. -fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option { - if call_conv.extends_baldrdash() { - match ¶m.purpose { - &ir::ArgumentPurpose::VMContext => { - // This is SpiderMonkey's `WasmTlsReg`. - Some(ABIArg::Reg( - regs::r14().to_real_reg(), - types::I64, - param.extension, - )) - } - &ir::ArgumentPurpose::SignatureId => { - // This is SpiderMonkey's `WasmTableCallSigReg`. - Some(ABIArg::Reg( - regs::r10().to_real_reg(), - types::I64, - param.extension, - )) - } - _ => None, - } - } else { - None - } -} - -/// Are we computing information about arguments or return values? Much of the -/// handling is factored out into common routines; this enum allows us to -/// distinguish which case we're handling. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -enum ArgsOrRets { - Args, - Rets, -} - -/// Process a list of parameters or return values and allocate them to X-regs, -/// V-regs, and stack slots. -/// -/// Returns the list of argument locations, the stack-space used (rounded up -/// to a 16-byte-aligned boundary), and if `add_ret_area_ptr` was passed, the -/// index of the extra synthetic arg that was added. -fn compute_arg_locs( - call_conv: CallConv, - params: &[ir::AbiParam], - args_or_rets: ArgsOrRets, - add_ret_area_ptr: bool, -) -> CodegenResult<(Vec, i64, Option)> { - let is_baldrdash = call_conv.extends_baldrdash(); - - let mut next_gpr = 0; - let mut next_vreg = 0; - let mut next_stack: u64 = 0; - let mut ret = vec![]; - - for i in 0..params.len() { - // Process returns backward, according to the SpiderMonkey ABI (which we - // adopt internally if `is_baldrdash` is set). - let param = match (args_or_rets, is_baldrdash) { - (ArgsOrRets::Args, _) => ¶ms[i], - (ArgsOrRets::Rets, false) => ¶ms[i], - (ArgsOrRets::Rets, true) => ¶ms[params.len() - 1 - i], - }; - - // Validate "purpose". - match ¶m.purpose { - &ir::ArgumentPurpose::VMContext - | &ir::ArgumentPurpose::Normal - | &ir::ArgumentPurpose::StackLimit - | &ir::ArgumentPurpose::SignatureId => {} - _ => panic!( - "Unsupported argument purpose {:?} in signature: {:?}", - param.purpose, params - ), - } - - let intreg = in_int_reg(param.value_type); - let vecreg = in_vec_reg(param.value_type); - debug_assert!(intreg || vecreg); - debug_assert!(!(intreg && vecreg)); - - let (next_reg, candidate) = if intreg { - let candidate = match args_or_rets { - ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr), - ArgsOrRets::Rets => get_intreg_for_retval_systemv(&call_conv, next_gpr, i), - }; - debug_assert!(candidate - .map(|r| r.get_class() == RegClass::I64) - .unwrap_or(true)); - (&mut next_gpr, candidate) - } else { - let candidate = match args_or_rets { - ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg), - ArgsOrRets::Rets => get_fltreg_for_retval_systemv(&call_conv, next_vreg, i), - }; - debug_assert!(candidate - .map(|r| r.get_class() == RegClass::V128) - .unwrap_or(true)); - (&mut next_vreg, candidate) - }; - - if let Some(param) = try_fill_baldrdash_reg(call_conv, param) { - assert!(intreg); - ret.push(param); - } else if let Some(reg) = candidate { - ret.push(ABIArg::Reg( - reg.to_real_reg(), - param.value_type, - param.extension, - )); - *next_reg += 1; - } else { - // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte - // stack alignment happens separately after all args.) - let size = (param.value_type.bits() / 8) as u64; - let size = std::cmp::max(size, 8); - // Align. - debug_assert!(size.is_power_of_two()); - next_stack = (next_stack + size - 1) & !(size - 1); - ret.push(ABIArg::Stack( - next_stack as i64, - param.value_type, - param.extension, - )); - next_stack += size; - } - } - - if args_or_rets == ArgsOrRets::Rets && is_baldrdash { - ret.reverse(); - } - - let extra_arg = if add_ret_area_ptr { - debug_assert!(args_or_rets == ArgsOrRets::Args); - if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) { - ret.push(ABIArg::Reg( - reg.to_real_reg(), - types::I64, - ir::ArgumentExtension::None, - )); - } else { - ret.push(ABIArg::Stack( - next_stack as i64, - types::I64, - ir::ArgumentExtension::None, - )); - next_stack += 8; - } - Some(ret.len() - 1) - } else { - None }; - - next_stack = (next_stack + 15) & !15; - - // To avoid overflow issues, limit the arg/return size to something reasonable. - if next_stack > STACK_ARG_RET_SIZE_LIMIT { - return Err(CodegenError::ImplLimitExceeded); - } - - Ok((ret, next_stack as i64, extra_arg)) -} - -impl ABISig { - fn from_func_sig(sig: &ir::Signature) -> CodegenResult { - // Compute args and retvals from signature. Handle retvals first, - // because we may need to add a return-area arg to the args. - let (rets, stack_ret_space, _) = compute_arg_locs( - sig.call_conv, - &sig.returns, - ArgsOrRets::Rets, - /* extra ret-area ptr = */ false, - )?; - let need_stack_return_area = stack_ret_space > 0; - let (args, stack_arg_space, stack_ret_arg) = compute_arg_locs( - sig.call_conv, - &sig.params, - ArgsOrRets::Args, - need_stack_return_area, - )?; - - trace!( - "ABISig: sig {:?} => args = {:?} rets = {:?} arg stack = {} ret stack = {} stack_ret_arg = {:?}", - sig, - args, - rets, - stack_arg_space, - stack_ret_space, - stack_ret_arg - ); - - Ok(ABISig { - args, - rets, - stack_arg_space, - stack_ret_space, - stack_ret_arg, - call_conv: sig.call_conv, - }) - } -} - -enum CallDest { - ExtName(ir::ExternalName, RelocDistance), - Reg(Reg), -} - -fn adjust_stack>(ctx: &mut C, amount: u64, is_sub: bool) { - if amount == 0 { - return; - } - - let (alu_op, sp_adjustment) = if is_sub { - (AluRmiROpcode::Sub, amount as i64) - } else { - (AluRmiROpcode::Add, -(amount as i64)) - }; - - ctx.emit(Inst::VirtualSPOffsetAdj { - offset: sp_adjustment, - }); - - if amount <= u32::max_value() as u64 { - ctx.emit(Inst::alu_rmi_r( - true, - alu_op, - RegMemImm::imm(amount as u32), - Writable::from_reg(regs::rsp()), - )); - } else { - // TODO will require a scratch register. - unimplemented!("adjust stack with large offset"); - } -} - -fn load_stack(mem: impl Into, into_reg: Writable, ty: Type) -> Inst { - let (is_int, ext_mode) = match ty { - types::B1 | types::B8 | types::I8 => (true, Some(ExtMode::BQ)), - types::B16 | types::I16 => (true, Some(ExtMode::WQ)), - types::B32 | types::I32 => (true, Some(ExtMode::LQ)), - types::B64 | types::I64 | types::R64 => (true, None), - types::F32 | types::F64 => (false, None), - _ => panic!("load_stack({})", ty), - }; - - let mem = mem.into(); - - if is_int { - match ext_mode { - Some(ext_mode) => Inst::movsx_rm_r( - ext_mode, - RegMem::mem(mem), - into_reg, - /* infallible load */ None, - ), - None => Inst::mov64_m_r(mem, into_reg, None /* infallible */), - } - } else { - let sse_op = match ty { - types::F32 => SseOpcode::Movss, - types::F64 => SseOpcode::Movsd, - _ => unreachable!(), - }; - Inst::xmm_mov( - sse_op, - RegMem::mem(mem), - into_reg, - None, /* infallible */ - ) - } -} - -fn store_stack(mem: impl Into, from_reg: Reg, ty: Type) -> Inst { - let (is_int, size) = match ty { - types::B1 | types::B8 | types::I8 => (true, 1), - types::B16 | types::I16 => (true, 2), - types::B32 | types::I32 => (true, 4), - types::B64 | types::I64 | types::R64 => (true, 8), - types::F32 => (false, 4), - types::F64 => (false, 8), - _ => unimplemented!("store_stack({})", ty), - }; - let mem = mem.into(); - if is_int { - Inst::mov_r_m(size, from_reg, mem, /* infallible store */ None) - } else { - let sse_op = match size { - 4 => SseOpcode::Movss, - 8 => SseOpcode::Movsd, - _ => unreachable!(), - }; - Inst::xmm_mov_r_m(sse_op, from_reg, mem, /* infallible store */ None) - } -} - -/// X64 ABI object for a function call. -pub struct X64ABICall { - sig: ABISig, - uses: Vec, - defs: Vec>, - dest: CallDest, - loc: ir::SourceLoc, - opcode: ir::Opcode, -} - -impl X64ABICall { - /// Create a callsite ABI object for a call directly to the specified function. - pub fn from_func( - sig: &ir::Signature, - extname: &ir::ExternalName, - dist: RelocDistance, - loc: ir::SourceLoc, - ) -> CodegenResult { - let sig = ABISig::from_func_sig(sig)?; - let (uses, defs) = abisig_to_uses_and_defs(&sig); - Ok(Self { - sig, - uses, - defs, - dest: CallDest::ExtName(extname.clone(), dist), - loc, - opcode: ir::Opcode::Call, - }) - } - - /// Create a callsite ABI object for a call to a function pointer with the - /// given signature. - pub fn from_ptr( - sig: &ir::Signature, - ptr: Reg, - loc: ir::SourceLoc, - opcode: ir::Opcode, - ) -> CodegenResult { - let sig = ABISig::from_func_sig(sig)?; - let (uses, defs) = abisig_to_uses_and_defs(&sig); - Ok(Self { - sig, - uses, - defs, - dest: CallDest::Reg(ptr), - loc, - opcode, - }) - } -} - -impl ABICall for X64ABICall { - type I = Inst; - - fn num_args(&self) -> usize { - if self.sig.stack_ret_arg.is_some() { - self.sig.args.len() - 1 - } else { - self.sig.args.len() - } - } - - fn emit_stack_pre_adjust>(&self, ctx: &mut C) { - let off = self.sig.stack_arg_space + self.sig.stack_ret_space; - adjust_stack(ctx, off as u64, /* is_sub = */ true) - } - - fn emit_stack_post_adjust>(&self, ctx: &mut C) { - let off = self.sig.stack_arg_space + self.sig.stack_ret_space; - adjust_stack(ctx, off as u64, /* is_sub = */ false) - } - - fn emit_copy_reg_to_arg>( - &self, - ctx: &mut C, - idx: usize, - from_reg: Reg, - ) { - match &self.sig.args[idx] { - &ABIArg::Reg(reg, ty, ext) if ext != ir::ArgumentExtension::None && ty.bits() < 64 => { - assert_eq!(RegClass::I64, reg.get_class()); - let dest_reg = Writable::from_reg(reg.to_reg()); - let ext_mode = match ty.bits() { - 1 | 8 => ExtMode::BQ, - 16 => ExtMode::WQ, - 32 => ExtMode::LQ, - _ => unreachable!(), - }; - match ext { - ir::ArgumentExtension::Uext => { - ctx.emit(Inst::movzx_rm_r( - ext_mode, - RegMem::reg(from_reg), - dest_reg, - /* infallible load */ None, - )); - } - ir::ArgumentExtension::Sext => { - ctx.emit(Inst::movsx_rm_r( - ext_mode, - RegMem::reg(from_reg), - dest_reg, - /* infallible load */ None, - )); - } - _ => unreachable!(), - }; - } - &ABIArg::Reg(reg, ty, _) => ctx.emit(Inst::gen_move( - Writable::from_reg(reg.to_reg()), - from_reg, - ty, - )), - &ABIArg::Stack(off, ty, ext) => { - if ext != ir::ArgumentExtension::None && ty.bits() < 64 { - assert_eq!(RegClass::I64, from_reg.get_class()); - let dest_reg = Writable::from_reg(from_reg); - let ext_mode = match ty.bits() { - 1 | 8 => ExtMode::BQ, - 16 => ExtMode::WQ, - 32 => ExtMode::LQ, - _ => unreachable!(), - }; - // Extend in place in the source register. Our convention is to - // treat high bits as undefined for values in registers, so this - // is safe, even for an argument that is nominally read-only. - match ext { - ir::ArgumentExtension::Uext => { - ctx.emit(Inst::movzx_rm_r( - ext_mode, - RegMem::reg(from_reg), - dest_reg, - /* infallible load */ None, - )); - } - ir::ArgumentExtension::Sext => { - ctx.emit(Inst::movsx_rm_r( - ext_mode, - RegMem::reg(from_reg), - dest_reg, - /* infallible load */ None, - )); - } - _ => unreachable!(), - }; - } - - debug_assert!(off <= u32::max_value() as i64); - debug_assert!(off >= 0); - ctx.emit(store_stack( - Amode::imm_reg(off as u32, regs::rsp()), - from_reg, - ty, - )) - } - } - } - - fn emit_copy_retval_to_reg>( - &self, - ctx: &mut C, - idx: usize, - into_reg: Writable, - ) { - match &self.sig.rets[idx] { - &ABIArg::Reg(reg, ty, _) => ctx.emit(Inst::gen_move(into_reg, reg.to_reg(), ty)), - &ABIArg::Stack(off, ty, _) => { - let ret_area_base = self.sig.stack_arg_space; - let sp_offset = off + ret_area_base; - // TODO handle offsets bigger than u32::max - debug_assert!(sp_offset >= 0); - debug_assert!(sp_offset <= u32::max_value() as i64); - ctx.emit(load_stack( - Amode::imm_reg(sp_offset as u32, regs::rsp()), - into_reg, - ty, - )); - } - } - } - - fn emit_call>(&mut self, ctx: &mut C) { - let (uses, defs) = ( - mem::replace(&mut self.uses, Default::default()), - mem::replace(&mut self.defs, Default::default()), - ); - - if let Some(i) = self.sig.stack_ret_arg { - let dst = ctx.alloc_tmp(RegClass::I64, types::I64); - let ret_area_base = self.sig.stack_arg_space; - debug_assert!( - ret_area_base <= u32::max_value() as i64, - "large offset for ret area NYI" - ); - ctx.emit(Inst::lea( - Amode::imm_reg(ret_area_base as u32, regs::rsp()), - dst, - )); - self.emit_copy_reg_to_arg(ctx, i, dst.to_reg()); - } - - match &self.dest { - &CallDest::ExtName(ref name, RelocDistance::Near) => ctx.emit_safepoint( - Inst::call_known(name.clone(), uses, defs, self.loc, self.opcode), - ), - &CallDest::ExtName(ref name, RelocDistance::Far) => { - let tmp = ctx.alloc_tmp(RegClass::I64, types::I64); - ctx.emit(Inst::LoadExtName { - dst: tmp, - name: Box::new(name.clone()), - offset: 0, - srcloc: self.loc, - }); - ctx.emit_safepoint(Inst::call_unknown( - RegMem::reg(tmp.to_reg()), - uses, - defs, - self.loc, - self.opcode, - )); - } - &CallDest::Reg(reg) => ctx.emit_safepoint(Inst::call_unknown( - RegMem::reg(reg), - uses, - defs, - self.loc, - self.opcode, - )), - } - } + // Sort for determinism. + regs.sort_by_key(|r| r.to_reg().get_index()); + regs } diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index fdf9409ae56a..a91562b42560 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -224,8 +224,10 @@ pub trait ABIMachineImpl { /// SP-based offset). fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable, ty: Type) -> Self::I; - /// Get a fixed (not used by regalloc) temp. This is needed for certain - /// sequences generated after the register allocator has already run. + /// Get a fixed temp. This is needed for certain sequences generated after the register + /// allocator has already run. Note that `gen_add_imm()` must work correctly if this register + /// is an input and/or output; hence, it cannot clobber this scratch register. It also should + /// be a caller-saved register, i.e., one that be clobbered at the start of the prologue. fn get_fixed_tmp_reg() -> Reg; /// Generate a store to the given [base+offset] address. @@ -272,13 +274,15 @@ pub trait ABIMachineImpl { clobbers: &Set>, ) -> SmallVec<[Self::I; 16]>; - /// Generate a call instruction/sequence. + /// Generate a call instruction/sequence. This method is provided one + /// temporary register to use to synthesize the called address, if needed. fn gen_call( dest: &CallDest, uses: Vec, defs: Vec>, loc: SourceLoc, opcode: ir::Opcode, + tmp: Writable, ) -> SmallVec<[(/* is_safepoint = */ bool, Self::I); 2]>; /// Get the number of spillslots required for the given register-class and @@ -1147,8 +1151,9 @@ impl ABICall for ABICallImpl { )); self.emit_copy_reg_to_arg(ctx, i, rd.to_reg()); } + let tmp = ctx.alloc_tmp(RegClass::I64, I64); for (is_safepoint, inst) in - M::gen_call(&self.dest, uses, defs, self.loc, self.opcode).into_iter() + M::gen_call(&self.dest, uses, defs, self.loc, self.opcode, tmp).into_iter() { if is_safepoint { ctx.emit_safepoint(inst); diff --git a/cranelift/filetests/filetests/vcode/aarch64/call.clif b/cranelift/filetests/filetests/vcode/aarch64/call.clif index 28d75cb36a14..9787d76358be 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/call.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/call.clif @@ -11,8 +11,8 @@ block0(v0: i64): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp -; nextln: ldr x16, 8 ; b 12 ; data -; nextln: blr x16 +; nextln: ldr x1, 8 ; b 12 ; data +; nextln: blr x1 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -28,8 +28,8 @@ block0(v0: i32): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp ; nextln: mov w0, w0 -; nextln: ldr x16, 8 ; b 12 ; data -; nextln: blr x16 +; nextln: ldr x1, 8 ; b 12 ; data +; nextln: blr x1 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -57,8 +57,8 @@ block0(v0: i32): ; check: stp fp, lr, [sp, #-16]! ; nextln: mov fp, sp ; nextln: sxtw x0, w0 -; nextln: ldr x16, 8 ; b 12 ; data -; nextln: blr x16 +; nextln: ldr x1, 8 ; b 12 ; data +; nextln: blr x1 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret diff --git a/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif b/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif index 2458516cfc59..97234a7da0d1 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/reftypes.clif @@ -83,12 +83,12 @@ block3(v7: r64, v8: r64): ; nextln: mov x19, x0 ; nextln: mov x20, x1 ; nextln: mov x0, x19 -; nextln: ldr x16, 8 ; b 12 ; data +; nextln: ldr x1, 8 ; b 12 ; data ; nextln: stur x0, [sp, #24] ; nextln: stur x19, [sp, #32] ; nextln: stur x20, [sp, #40] ; nextln: (safepoint: slots [S0, S1, S2] -; nextln: blr x16 +; nextln: blr x1 ; nextln: ldur x19, [sp, #32] ; nextln: ldur x20, [sp, #40] ; nextln: add x1, sp, #16 diff --git a/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif b/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif index 8ad03936694b..63584805fc81 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif @@ -44,8 +44,8 @@ block0(v0: i64): ; nextln: mov fp, sp ; nextln: subs xzr, sp, x0 ; nextln: b.hs 8 ; udf -; nextln: ldr x16 -; nextln: blr x16 +; nextln: ldr x0 +; nextln: blr x0 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret @@ -67,8 +67,8 @@ block0(v0: i64): ; nextln: ldur x16, [x16, #4] ; nextln: subs xzr, sp, x16 ; nextln: b.hs 8 ; udf -; nextln: ldr x16 -; nextln: blr x16 +; nextln: ldr x0 +; nextln: blr x0 ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret