From 1c6f4976caaf3cf77243103b49115f01ca844f02 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Thu, 27 Apr 2023 15:15:04 -0700 Subject: [PATCH] Cranelift: Implement tail calls for x86_64 Co-Authored-By: Jamey Sharp --- cranelift/codegen/src/isa/aarch64/abi.rs | 13 + .../codegen/src/isa/aarch64/lower/isle.rs | 21 + cranelift/codegen/src/isa/riscv64/abi.rs | 13 + cranelift/codegen/src/isa/riscv64/lower.isle | 1 - .../codegen/src/isa/riscv64/lower/isle.rs | 21 + cranelift/codegen/src/isa/s390x/abi.rs | 13 + cranelift/codegen/src/isa/s390x/inst.isle | 3 - cranelift/codegen/src/isa/s390x/lower/isle.rs | 21 + cranelift/codegen/src/isa/x64/abi.rs | 61 ++- cranelift/codegen/src/isa/x64/encoding/rex.rs | 2 +- cranelift/codegen/src/isa/x64/inst.isle | 42 ++ cranelift/codegen/src/isa/x64/inst/emit.rs | 215 +++++++- cranelift/codegen/src/isa/x64/inst/mod.rs | 99 ++++ cranelift/codegen/src/isa/x64/lower.isle | 8 + cranelift/codegen/src/isa/x64/lower/isle.rs | 141 ++++- cranelift/codegen/src/machinst/abi.rs | 72 ++- cranelift/codegen/src/machinst/buffer.rs | 44 +- cranelift/codegen/src/machinst/isle.rs | 28 +- cranelift/codegen/src/machinst/mod.rs | 2 + cranelift/codegen/src/machinst/vcode.rs | 4 +- cranelift/codegen/src/prelude_lower.isle | 7 + .../isa/x64/return-call-indirect.clif | 230 ++++++++ .../filetests/isa/x64/return-call.clif | 510 ++++++++++++++++++ .../runtests/return-call-indirect.clif | 7 +- .../filetests/runtests/return-call-loop.clif | 142 +++++ .../filetests/runtests/return-call.clif | 105 +++- .../filetests/runtests/tail-call-conv.clif | 42 -- 27 files changed, 1768 insertions(+), 99 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/x64/return-call-indirect.clif create mode 100644 cranelift/filetests/filetests/isa/x64/return-call.clif create mode 100644 cranelift/filetests/filetests/runtests/return-call-loop.clif diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 5c786eced9d3..7a0d3404c052 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -1047,6 +1047,19 @@ impl ABIMachineSpec for AArch64MachineDeps { insts } + fn gen_return_call( + _callee: CallDest, + _new_stack_arg_size: u32, + _old_stack_arg_size: u32, + _ret_addr: Reg, + _fp: Reg, + _tmp: Writable, + _tmp2: Writable, + _uses: abi::CallArgList, + ) -> SmallVec<[Self::I; 2]> { + todo!(); + } + fn gen_memcpy Writable>( call_conv: isa::CallConv, dst: Reg, diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index 729f1ad23dfa..1ad1de7abdb4 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -86,6 +86,27 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> { AArch64CallSite ); + fn gen_return_call( + &mut self, + callee_sig: SigRef, + callee: ExternalName, + distance: RelocDistance, + args: ValueSlice, + ) -> InstOutput { + let _ = (callee_sig, callee, distance, args); + todo!() + } + + fn gen_return_call_indirect( + &mut self, + callee_sig: SigRef, + callee: Value, + args: ValueSlice, + ) -> InstOutput { + let _ = (callee_sig, callee, args); + todo!() + } + fn sign_return_address_disabled(&mut self) -> Option<()> { if self.backend.isa_flags.sign_return_address() { None diff --git a/cranelift/codegen/src/isa/riscv64/abi.rs b/cranelift/codegen/src/isa/riscv64/abi.rs index 5eb0145cda87..01639ab97f88 100644 --- a/cranelift/codegen/src/isa/riscv64/abi.rs +++ b/cranelift/codegen/src/isa/riscv64/abi.rs @@ -584,6 +584,19 @@ impl ABIMachineSpec for Riscv64MachineDeps { insts } + fn gen_return_call( + _callee: CallDest, + _new_stack_arg_size: u32, + _old_stack_arg_size: u32, + _ret_addr: Reg, + _fp: Reg, + _tmp: Writable, + _tmp2: Writable, + _uses: abi::CallArgList, + ) -> SmallVec<[Self::I; 2]> { + todo!(); + } + fn gen_memcpy Writable>( call_conv: isa::CallConv, dst: Reg, diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index d8193998064c..450632178933 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -1564,7 +1564,6 @@ (rule (lower (return args)) (lower_return args)) - ;;; Rules for `get_{frame,stack}_pointer` and `get_return_address` ;;;;;;;;;;;;; (rule (lower (get_frame_pointer)) diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs index c03a93640131..c030f532bdc0 100644 --- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs +++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs @@ -72,6 +72,27 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend> isle_lower_prelude_methods!(); isle_prelude_caller_methods!(Riscv64MachineDeps, Riscv64ABICallSite); + fn gen_return_call( + &mut self, + callee_sig: SigRef, + callee: ExternalName, + distance: RelocDistance, + args: ValueSlice, + ) -> InstOutput { + let _ = (callee_sig, callee, distance, args); + todo!() + } + + fn gen_return_call_indirect( + &mut self, + callee_sig: SigRef, + callee: Value, + args: ValueSlice, + ) -> InstOutput { + let _ = (callee_sig, callee, args); + todo!() + } + fn vreg_new(&mut self, r: Reg) -> VReg { VReg::new(r).unwrap() } diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index 1482e29c9c4a..6a6f03dfc80e 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -764,6 +764,19 @@ impl ABIMachineSpec for S390xMachineDeps { unreachable!(); } + fn gen_return_call( + _callee: CallDest, + _new_stack_arg_size: u32, + _old_stack_arg_size: u32, + _ret_addr: Reg, + _fp: Reg, + _tmp: Writable, + _tmp2: Writable, + _uses: abi::CallArgList, + ) -> SmallVec<[Self::I; 2]> { + todo!(); + } + fn gen_memcpy Writable>( _call_conv: isa::CallConv, _dst: Reg, diff --git a/cranelift/codegen/src/isa/s390x/inst.isle b/cranelift/codegen/src/isa/s390x/inst.isle index acea782aba21..30e65266ef23 100644 --- a/cranelift/codegen/src/isa/s390x/inst.isle +++ b/cranelift/codegen/src/isa/s390x/inst.isle @@ -3518,9 +3518,6 @@ ;; Helpers for generating `call` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; List of argument registers for a call instruction. -(type CallArgList extern (enum)) - ;; Partial (mutable) argument list in the process of being created. (type CallArgListBuilder extern (enum)) diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index f32d38b79a61..2eff2423487b 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -81,6 +81,27 @@ pub(crate) fn lower_branch( impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> { isle_lower_prelude_methods!(); + fn gen_return_call( + &mut self, + callee_sig: SigRef, + callee: ExternalName, + distance: RelocDistance, + args: ValueSlice, + ) -> InstOutput { + let _ = (callee_sig, callee, distance, args); + todo!() + } + + fn gen_return_call_indirect( + &mut self, + callee_sig: SigRef, + callee: Value, + args: ValueSlice, + ) -> InstOutput { + let _ = (callee_sig, callee, args); + todo!() + } + #[inline] fn args_builder_new(&mut self) -> CallArgListBuilder { Cell::new(CallArgList::new()) diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 1ae2de4b785f..af227d1200a3 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -700,6 +700,60 @@ impl ABIMachineSpec for X64ABIMachineSpec { insts } + fn gen_return_call( + callee: CallDest, + new_stack_arg_size: u32, + old_stack_arg_size: u32, + ret_addr: Reg, + fp: Reg, + tmp: Writable, + tmp2: Writable, + uses: abi::CallArgList, + ) -> SmallVec<[Self::I; 2]> { + let ret_addr = Gpr::new(ret_addr).unwrap(); + let fp = Gpr::new(fp).unwrap(); + let tmp = WritableGpr::from_writable_reg(tmp).unwrap(); + match callee { + CallDest::ExtName(callee, RelocDistance::Near) => smallvec![Inst::ReturnCallKnown { + callee, + new_stack_arg_size, + old_stack_arg_size, + ret_addr, + fp, + tmp, + uses, + }], + CallDest::ExtName(callee, RelocDistance::Far) => { + smallvec![ + Inst::LoadExtName { + dst: tmp2, + name: Box::new(callee.clone()), + offset: 0, + distance: RelocDistance::Far, + }, + Inst::ReturnCallUnknown { + callee: tmp2.into(), + new_stack_arg_size, + old_stack_arg_size, + ret_addr, + fp, + tmp, + uses, + } + ] + } + CallDest::Reg(callee) => smallvec![Inst::ReturnCallUnknown { + callee: callee.into(), + new_stack_arg_size, + old_stack_arg_size, + ret_addr, + fp, + tmp, + uses, + }], + } + } + fn gen_memcpy Writable>( call_conv: isa::CallConv, dst: Reg, @@ -878,10 +932,9 @@ fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Optio 7 => Some(regs::r9()), 8 => Some(regs::r10()), 9 => Some(regs::r11()), - 10 => Some(regs::r12()), - 11 => Some(regs::r13()), - 12 => Some(regs::r14()), - // NB: `r15` is reserved as a scratch register. + // NB: `r12`, `r13`, `r14` and `r15` are reserved for indirect + // callee addresses and temporaries required for our tail call + // sequence (fp, ret_addr, tmp). _ => None, }; } diff --git a/cranelift/codegen/src/isa/x64/encoding/rex.rs b/cranelift/codegen/src/isa/x64/encoding/rex.rs index 3586b8fc6046..70a61425cbcc 100644 --- a/cranelift/codegen/src/isa/x64/encoding/rex.rs +++ b/cranelift/codegen/src/isa/x64/encoding/rex.rs @@ -49,7 +49,7 @@ pub(crate) fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 { #[inline(always)] pub(crate) fn int_reg_enc(reg: impl Into) -> u8 { let reg = reg.into(); - debug_assert!(reg.is_real()); + debug_assert!(reg.is_real(), "reg = {reg:?}"); debug_assert_eq!(reg.class(), RegClass::Int); reg.to_real_reg().unwrap().hw_enc() } diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 2d2a87ed42c7..b23458e4bb4f 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -493,6 +493,48 @@ (CallUnknown (dest RegMem) (info BoxCallInfo)) + ;; Tail call to a direct destination. + (ReturnCallKnown + ;; The function we are calling. + (callee ExternalName) + ;; The size of the new stack frame's stack arguments. This is necessary + ;; for copying the frame over our current frame. It must already be + ;; allocated on the stack. + (new_stack_arg_size u32) + ;; The size of the current/old stack frame's stack arguments. + (old_stack_arg_size u32) + ;; The return address. Needs to be written into the correct stack slot + ;; after the new stack frame is copied into place. + (ret_addr Gpr) + ;; A copy of the frame pointer, because we will overwrite the current + ;; `rbp`. + (fp Gpr) + ;; A temporary register. + (tmp WritableGpr) + ;; The in-register arguments and their constraints. + (uses CallArgList)) + + ;; Tail call to an indirect destination. + (ReturnCallUnknown + ;; The function we are calling. + (callee RegMem) + ;; The size of the new stack frame's stack arguments. This is necessary + ;; for copying the frame over our current frame. It must already be + ;; allocated on the stack. + (new_stack_arg_size u32) + ;; The size of the current/old stack frame's stack arguments. + (old_stack_arg_size u32) + ;; The return address. Needs to be written into the correct stack slot + ;; after the new stack frame is copied into place. + (ret_addr Gpr) + ;; A copy of the frame pointer, because we will overwrite the current + ;; `rbp`. + (fp Gpr) + ;; A temporary register. + (tmp WritableGpr) + ;; The in-register arguments and their constraints. + (uses CallArgList)) + ;; A pseudo-instruction that captures register arguments in vregs. (Args (args VecArgPair)) diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 37d1e258ebfd..2c876a83d4d7 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1,7 +1,8 @@ use crate::binemit::{Addend, Reloc}; +use crate::ir; use crate::ir::immediates::{Ieee32, Ieee64}; use crate::ir::TrapCode; -use crate::ir::{KnownSymbol, LibCall}; +use crate::ir::{KnownSymbol, LibCall, MemFlags}; use crate::isa::x64::encoding::evex::{EvexInstruction, EvexVectorLength, RegisterOrAmode}; use crate::isa::x64::encoding::rex::{ emit_simm, emit_std_enc_enc, emit_std_enc_mem, emit_std_reg_mem, emit_std_reg_reg, int_reg_enc, @@ -1584,6 +1585,65 @@ pub(crate) fn emit( state.adjust_virtual_sp_offset(-callee_pop_size); } + Inst::ReturnCallKnown { + callee, + new_stack_arg_size, + old_stack_arg_size, + ret_addr, + fp, + tmp, + uses, + } => { + emit_return_call_common_sequence( + allocs, + sink, + info, + state, + *new_stack_arg_size, + *old_stack_arg_size, + *ret_addr, + *fp, + *tmp, + uses, + ); + + // Finally, jump to the callee! + sink.put1(0xE9); + // The addend adjusts for the difference between the end of the instruction and the + // beginning of the immediate field. + emit_reloc(sink, Reloc::X86CallPCRel4, &callee, -4); + sink.put4(0); + sink.add_call_site(ir::Opcode::ReturnCall); + } + + Inst::ReturnCallUnknown { + callee, + new_stack_arg_size, + old_stack_arg_size, + ret_addr, + fp, + tmp, + uses, + } => { + let callee = callee.with_allocs(allocs); + + emit_return_call_common_sequence( + allocs, + sink, + info, + state, + *new_stack_arg_size, + *old_stack_arg_size, + *ret_addr, + *fp, + *tmp, + uses, + ); + + Inst::JmpUnknown { target: callee }.emit(&[], sink, info, state); + sink.add_call_site(ir::Opcode::ReturnCallIndirect); + } + Inst::CallUnknown { dest, info: call_info, @@ -4089,3 +4149,156 @@ pub(crate) fn emit( state.clear_post_insn(); } + +/// Emit the common sequence used for both direct and indirect tail calls: +/// +/// * Copy the new frame's stack arguments over the top of our current frame. +/// +/// * Restore the old frame pointer. +/// +/// * Initialize the tail callee's stack pointer (simultaneously deallocating +/// the temporary stack space we allocated when creating the new frame's stack +/// arguments). +/// +/// * Move the return address into its stack slot. +fn emit_return_call_common_sequence( + allocs: &mut AllocationConsumer<'_>, + sink: &mut MachBuffer, + info: &EmitInfo, + state: &mut EmitState, + new_stack_arg_size: u32, + old_stack_arg_size: u32, + ret_addr: Gpr, + fp: Gpr, + tmp: WritableGpr, + uses: &CallArgList, +) { + assert!( + info.flags.preserve_frame_pointers(), + "frame pointers aren't fundamentally required for tail calls, \ + but the current implementation relies on them being present" + ); + + for u in uses { + let _ = allocs.next(u.vreg); + } + + let ret_addr = allocs.next(*ret_addr); + let ret_addr = Gpr::new(ret_addr).unwrap(); + + let fp = allocs.next(*fp); + + let tmp = allocs.next(tmp.to_reg().to_reg()); + let tmp = Gpr::new(tmp).unwrap(); + let tmp_w = WritableGpr::from_reg(tmp); + + // Copy the new frame (which is `frame_size` bytes above the SP) + // onto our current frame, using only volatile, non-argument + // registers. + // + // + // The current stack layout is the following: + // + // | ... | + // +---------------------+ + // | ... | + // | stack arguments | + // | ... | + // current | return address | + // frame | old FP | <-- FP + // | ... | + // | old stack slots | + // | ... | + // +---------------------+ + // | ... | + // new | new stack arguments | + // frame | ... | <-- SP + // +---------------------+ + // + // We need to pop FP, copy the new stack arguments over the old + // stack arguments, write the return address into the correct slot + // just after the new stack arguments, adjust SP to point to the new + // return address, and then jump to the callee (which will push the + // old FP again). + + // "Pop" the old FP into `rbp`. + Inst::Mov64MR { + src: SyntheticAmode::Real(Amode::ImmReg { + simm32: 0, + base: fp, + flags: MemFlags::trusted(), + }), + dst: Writable::from_reg(Gpr::new(regs::rbp()).unwrap()), + } + .emit(&[], sink, info, state); + + // The new lowest address (top of stack) -- relative to FP -- for + // our tail callee. We compute this now so that we can move our + // stack arguments into place. + let callee_sp_relative_to_fp = old_stack_arg_size.wrapping_sub(new_stack_arg_size); + + // Copy over each word, using `tmp` as a temporary register. + // + // Note that we have to do this from stack slots with the highest + // address to lowest address because in the case of when the tail + // callee has more stack arguments than we do, we might otherwise + // overwrite some of our stack arguments before they've been copied + // into place. + assert_eq!( + new_stack_arg_size % 8, + 0, + "stack argument space sizes should always be 8-byte aligned" + ); + for i in (0..new_stack_arg_size / 8).rev() { + Inst::Mov64MR { + src: SyntheticAmode::Real(Amode::ImmReg { + simm32: i * 8, + base: regs::rsp(), + flags: MemFlags::trusted(), + }), + dst: tmp_w, + } + .emit(&[], sink, info, state); + Inst::MovRM { + size: OperandSize::Size64, + src: tmp, + dst: SyntheticAmode::Real(Amode::ImmReg { + // Add 2 because we need to skip over the old FP and the + // return address. + simm32: callee_sp_relative_to_fp.wrapping_add((i + 2) * 8), + base: fp, + flags: MemFlags::trusted(), + }), + } + .emit(&[], sink, info, state); + } + + // Initialize SP for the tail callee, deallocating the temporary + // stack arguments space at the same time. + Inst::LoadEffectiveAddress { + size: OperandSize::Size64, + addr: SyntheticAmode::Real(Amode::ImmReg { + // NB: We add a word to `callee_sp_relative_to_fp` here because the + // callee will push FP, not us. + simm32: callee_sp_relative_to_fp.wrapping_add(8), + base: fp, + flags: MemFlags::trusted(), + }), + dst: Writable::from_reg(Gpr::new(regs::rsp()).unwrap()), + } + .emit(&[], sink, info, state); + + state.adjust_virtual_sp_offset(-i64::from(new_stack_arg_size)); + + // Write the return address into the correct stack slot. + Inst::MovRM { + size: OperandSize::Size64, + src: ret_addr, + dst: SyntheticAmode::Real(Amode::ImmReg { + simm32: 0, + base: regs::rsp(), + flags: MemFlags::trusted(), + }), + } + .emit(&[], sink, info, state); +} diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 88164567fcfd..7d2805adbd5b 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -80,6 +80,8 @@ impl Inst { | Inst::Bswap { .. } | Inst::CallKnown { .. } | Inst::CallUnknown { .. } + | Inst::ReturnCallKnown { .. } + | Inst::ReturnCallUnknown { .. } | Inst::CheckedSRemSeq { .. } | Inst::CheckedSRemSeq8 { .. } | Inst::Cmove { .. } @@ -1576,6 +1578,65 @@ impl PrettyPrint for Inst { format!("{op} *{dest}") } + Inst::ReturnCallKnown { + callee, + new_stack_arg_size, + old_stack_arg_size, + ret_addr, + fp, + tmp, + uses, + } => { + let ret_addr = regs::show_reg(ret_addr.to_reg()); + let fp = regs::show_reg(fp.to_reg()); + let tmp = regs::show_reg(tmp.to_reg().to_reg()); + let mut s = format!( + "return_call_known \ + {callee:?} \ + new_stack_arg_size:{new_stack_arg_size} \ + old_stack_arg_size:{old_stack_arg_size} \ + ret_addr:{ret_addr} \ + fp:{fp} \ + tmp:{tmp}" + ); + for ret in uses { + let preg = regs::show_reg(ret.preg); + let vreg = pretty_print_reg(ret.vreg, 8, allocs); + write!(&mut s, " {vreg}={preg}").unwrap(); + } + s + } + + Inst::ReturnCallUnknown { + callee, + new_stack_arg_size, + old_stack_arg_size, + ret_addr, + fp, + tmp, + uses, + } => { + let callee = callee.pretty_print(8, allocs); + let ret_addr = regs::show_reg(ret_addr.to_reg()); + let fp = regs::show_reg(fp.to_reg()); + let tmp = regs::show_reg(tmp.to_reg().to_reg()); + let mut s = format!( + "return_call_unknown \ + {callee} \ + new_stack_arg_size:{new_stack_arg_size} \ + old_stack_arg_size:{old_stack_arg_size} \ + ret_addr:{ret_addr} \ + fp:{fp} \ + tmp:{tmp}" + ); + for ret in uses { + let preg = regs::show_reg(ret.preg); + let vreg = pretty_print_reg(ret.vreg, 8, allocs); + write!(&mut s, " {vreg}={preg}").unwrap(); + } + s + } + Inst::Args { args } => { let mut s = "args".to_string(); for arg in args { @@ -2212,6 +2273,41 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol collector.reg_clobbers(info.clobbers); } + Inst::ReturnCallKnown { + callee, + ret_addr, + fp, + tmp, + uses, + .. + } => { + // Same as in the `Inst::CallKnown` branch. + debug_assert_ne!(*callee, ExternalName::LibCall(LibCall::Probestack)); + for u in uses { + collector.reg_fixed_use(u.vreg, u.preg); + } + collector.reg_use(**ret_addr); + collector.reg_use(**fp); + collector.reg_early_def(tmp.to_writable_reg()); + } + + Inst::ReturnCallUnknown { + callee, + ret_addr, + fp, + tmp, + uses, + .. + } => { + callee.get_operands(collector); + for u in uses { + collector.reg_fixed_use(u.vreg, u.preg); + } + collector.reg_use(**ret_addr); + collector.reg_use(**fp); + collector.reg_early_def(tmp.to_writable_reg()); + } + Inst::JmpTableSeq { ref idx, ref tmp1, @@ -2393,6 +2489,9 @@ impl MachInst for Inst { match self { // Interesting cases. &Self::Ret { .. } => MachTerminator::Ret, + &Self::ReturnCallKnown { .. } | &Self::ReturnCallUnknown { .. } => { + MachTerminator::RetCall + } &Self::JmpKnown { .. } => MachTerminator::Uncond, &Self::JmpCond { .. } => MachTerminator::Cond, &Self::JmpTableSeq { .. } => MachTerminator::Indirect, diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 788927e7f8a7..250397f9a48a 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -3188,6 +3188,14 @@ (rule (lower (call_indirect sig_ref val inputs)) (gen_call_indirect sig_ref val inputs)) +;;;; Rules for `return_call` and `return_call_indirect` ;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (return_call (func_ref_data sig_ref extname dist) args)) + (gen_return_call sig_ref extname dist args)) + +(rule (lower (return_call_indirect sig_ref callee args)) + (gen_return_call_indirect sig_ref callee args)) + ;;;; Rules for `get_{frame,stack}_pointer` and `get_return_address` ;;;;;;;;;;;; (rule (lower (get_frame_pointer)) diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 3935b1869e88..de185bca13d1 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -5,9 +5,9 @@ pub(crate) mod generated_code; use crate::{ ir::types, ir::AtomicRmwOp, - machinst::{InputSourceInst, Reg, Writable}, + isa, isle_common_prelude_methods, isle_lower_prelude_methods, + machinst::{CallArgList, InputSourceInst, Reg, Writable}, }; -use crate::{isle_common_prelude_methods, isle_lower_prelude_methods}; use generated_code::{Context, MInst, RegisterClass}; // Types that the generated ISLE code uses via `use super::*`. @@ -79,6 +79,143 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { isle_lower_prelude_methods!(); isle_prelude_caller_methods!(X64ABIMachineSpec, X64CallSite); + fn gen_return_call_indirect( + &mut self, + callee_sig: SigRef, + callee: Value, + args: ValueSlice, + ) -> InstOutput { + let caller_conv = isa::CallConv::Tail; + debug_assert_eq!( + self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()), + caller_conv, + "Can only do `return_call`s from within a `tail` calling convention function" + ); + + let callee = self.put_in_reg(callee); + + let mut call_site = X64CallSite::from_ptr( + self.lower_ctx.sigs(), + callee_sig, + callee, + Opcode::ReturnCallIndirect, + caller_conv, + self.backend.flags().clone(), + ) + .unwrap(); + + // First, load the return address, because copying our new stack frame + // over our current stack frame might overwrite it, and we'll need to + // place it in the correct location after we do that copy. + let fp = self.temp_writable_gpr(); + let rbp = self.preg_rbp(); + self.lower_ctx + .emit(MInst::MovFromPReg { src: rbp, dst: fp }); + let ret_addr = self.temp_writable_gpr(); + self.lower_ctx.emit(MInst::Mov64MR { + src: SyntheticAmode::Real(Amode::ImmReg { + simm32: 8, + base: fp.to_reg().to_reg(), + flags: MemFlags::trusted(), + }), + dst: ret_addr, + }); + + // Next, allocate additional stack space for the new stack frame. We + // will build it in the newly allocated space, but then copy it over our + // current frame at the last moment. + let new_stack_arg_size = call_site.emit_allocate_tail_call_frame(self.lower_ctx); + let old_stack_arg_size = self.lower_ctx.abi().stack_args_size(self.lower_ctx.sigs()); + + // Put all arguments in registers and stack slots (within that newly + // allocated stack space). + self.gen_call_common_args(&mut call_site, args); + + // Finally, emit the macro instruction to copy the new stack frame over + // our current one and do the actual tail call! + let tmp = self.temp_writable_gpr(); + let tmp2 = self.temp_writable_gpr(); + call_site.emit_return_call( + self.lower_ctx, + new_stack_arg_size, + old_stack_arg_size, + ret_addr.to_reg().to_reg(), + fp.to_reg().to_reg(), + tmp.to_writable_reg(), + tmp2.to_writable_reg(), + ); + + InstOutput::new() + } + + fn gen_return_call( + &mut self, + callee_sig: SigRef, + callee: ExternalName, + distance: RelocDistance, + args: ValueSlice, + ) -> InstOutput { + let caller_conv = isa::CallConv::Tail; + debug_assert_eq!( + self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()), + caller_conv, + "Can only do `return_call`s from within a `tail` calling convention function" + ); + + let mut call_site = X64CallSite::from_func( + self.lower_ctx.sigs(), + callee_sig, + &callee, + distance, + caller_conv, + self.backend.flags().clone(), + ) + .unwrap(); + + // First, load the return address, because copying our new stack frame + // over our current stack frame might overwrite it, and we'll need to + // place it in the correct location after we do that copy. + let fp = self.temp_writable_gpr(); + let rbp = self.preg_rbp(); + self.lower_ctx + .emit(MInst::MovFromPReg { src: rbp, dst: fp }); + let ret_addr = self.temp_writable_gpr(); + self.lower_ctx.emit(MInst::Mov64MR { + src: SyntheticAmode::Real(Amode::ImmReg { + simm32: 8, + base: fp.to_reg().to_reg(), + flags: MemFlags::trusted(), + }), + dst: ret_addr, + }); + + // Next, allocate additional stack space for the new stack frame. We + // will build it in the newly allocated space, but then copy it over our + // current frame at the last moment. + let new_stack_arg_size = call_site.emit_allocate_tail_call_frame(self.lower_ctx); + let old_stack_arg_size = self.lower_ctx.abi().stack_args_size(self.lower_ctx.sigs()); + + // Put all arguments in registers and stack slots (within that newly + // allocated stack space). + self.gen_call_common_args(&mut call_site, args); + + // Finally, emit the macro instruction to copy the new stack frame over + // our current one and do the actual tail call! + let tmp = self.temp_writable_gpr(); + let tmp2 = self.temp_writable_gpr(); + call_site.emit_return_call( + self.lower_ctx, + new_stack_arg_size, + old_stack_arg_size, + ret_addr.to_reg().to_reg(), + fp.to_reg().to_reg(), + tmp.to_writable_reg(), + tmp2.to_writable_reg(), + ); + + InstOutput::new() + } + #[inline] fn operand_size_of_type_32_64(&mut self, ty: Type) -> OperandSize { if ty.bits() == 64 { diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 54dc09c2c90d..118b3e415193 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -589,6 +589,17 @@ pub trait ABIMachineSpec { callee_pop_size: u32, ) -> SmallVec<[Self::I; 2]>; + fn gen_return_call( + callee: CallDest, + new_stack_arg_size: u32, + old_stack_arg_size: u32, + ret_addr: Reg, + fp: Reg, + tmp: Writable, + tmp2: Writable, + uses: abi::CallArgList, + ) -> SmallVec<[Self::I; 2]>; + /// Generate a memcpy invocation. Used to set up struct /// args. Takes `src`, `dst` as read-only inputs and passes a temporary /// allocator. @@ -2167,6 +2178,27 @@ impl CallSite { sigs.num_args(self.sig) } + /// Allocate space for building a `return_call`'s temporary frame before we + /// copy it over the current frame. + pub fn emit_allocate_tail_call_frame(&self, ctx: &mut Lower) -> u32 { + // The necessary stack space is: + // + // sizeof(callee_sig.stack_args) + // + // Note that any stack return space conceptually belongs to our caller + // and the function we are tail calling to has the same return type and + // will reuse that stack return space. + // + // The return address is pushed later on, after the stack arguments are + // filled in. + let frame_size = ctx.sigs()[self.sig].sized_stack_arg_space; + + let adjustment = -i32::try_from(frame_size).unwrap(); + adjust_stack_and_nominal_sp::(ctx, adjustment); + + frame_size + } + /// Emit code to pre-adjust the stack, prior to argument copies and call. pub fn emit_stack_pre_adjust(&self, ctx: &mut Lower) { let sig = &ctx.sigs()[self.sig]; @@ -2192,11 +2224,12 @@ impl CallSite { adjust_stack_and_nominal_sp::(ctx, stack_space); } - /// Emit a copy of a large argument into its associated stack buffer, if any. - /// We must be careful to perform all these copies (as necessary) before setting - /// up the argument registers, since we may have to invoke memcpy(), which could - /// clobber any registers already set up. The back-end should call this routine - /// for all arguments before calling emit_copy_regs_to_arg for all arguments. + /// Emit a copy of a large argument into its associated stack buffer, if + /// any. We must be careful to perform all these copies (as necessary) + /// before setting up the argument registers, since we may have to invoke + /// memcpy(), which could clobber any registers already set up. The + /// back-end should call this routine for all arguments before calling + /// `gen_arg` for all arguments. pub fn emit_copy_regs_to_buffer( &self, ctx: &mut Lower, @@ -2510,6 +2543,35 @@ impl CallSite { ctx.emit(inst); } } + + /// Emit a tail call sequence. + /// + /// The returned instruction should have a proper use-set (arg registers are + /// uses) according to the argument registers this function signature in + /// this ABI. + pub fn emit_return_call( + self, + ctx: &mut Lower, + new_stack_arg_size: u32, + old_stack_arg_size: u32, + ret_addr: Reg, + fp: Reg, + tmp: Writable, + tmp2: Writable, + ) { + for inst in M::gen_return_call( + self.dest, + new_stack_arg_size, + old_stack_arg_size, + ret_addr, + fp, + tmp, + tmp2, + self.uses, + ) { + ctx.emit(inst); + } + } } #[cfg(test)] diff --git a/cranelift/codegen/src/machinst/buffer.rs b/cranelift/codegen/src/machinst/buffer.rs index 147cb82e4e19..e60123b65488 100644 --- a/cranelift/codegen/src/machinst/buffer.rs +++ b/cranelift/codegen/src/machinst/buffer.rs @@ -406,7 +406,7 @@ impl MachBuffer { /// Add a byte. pub fn put1(&mut self, value: u8) { - trace!("MachBuffer: put byte @ {}: {:x}", self.cur_offset(), value); + // trace!("MachBuffer: put byte @ {}: {:x}", self.cur_offset(), value); self.data.push(value); // Post-invariant: conceptual-labels_at_tail contains a complete and @@ -421,11 +421,11 @@ impl MachBuffer { /// Add 2 bytes. pub fn put2(&mut self, value: u16) { - trace!( - "MachBuffer: put 16-bit word @ {}: {:x}", - self.cur_offset(), - value - ); + // trace!( + // "MachBuffer: put 16-bit word @ {}: {:x}", + // self.cur_offset(), + // value + // ); let bytes = value.to_le_bytes(); self.data.extend_from_slice(&bytes[..]); @@ -434,11 +434,11 @@ impl MachBuffer { /// Add 4 bytes. pub fn put4(&mut self, value: u32) { - trace!( - "MachBuffer: put 32-bit word @ {}: {:x}", - self.cur_offset(), - value - ); + // trace!( + // "MachBuffer: put 32-bit word @ {}: {:x}", + // self.cur_offset(), + // value + // ); let bytes = value.to_le_bytes(); self.data.extend_from_slice(&bytes[..]); @@ -447,11 +447,11 @@ impl MachBuffer { /// Add 8 bytes. pub fn put8(&mut self, value: u64) { - trace!( - "MachBuffer: put 64-bit word @ {}: {:x}", - self.cur_offset(), - value - ); + // trace!( + // "MachBuffer: put 64-bit word @ {}: {:x}", + // self.cur_offset(), + // value + // ); let bytes = value.to_le_bytes(); self.data.extend_from_slice(&bytes[..]); @@ -460,11 +460,11 @@ impl MachBuffer { /// Add a slice of bytes. pub fn put_data(&mut self, data: &[u8]) { - trace!( - "MachBuffer: put data @ {}: len {}", - self.cur_offset(), - data.len() - ); + // trace!( + // "MachBuffer: put data @ {}: len {}", + // self.cur_offset(), + // data.len() + // ); self.data.extend_from_slice(data); // Post-invariant: as for `put1()`. @@ -472,7 +472,7 @@ impl MachBuffer { /// Reserve appended space and return a mutable slice referring to it. pub fn get_appended_space(&mut self, len: usize) -> &mut [u8] { - trace!("MachBuffer: put data @ {}: len {}", self.cur_offset(), len); + // trace!("MachBuffer: put data @ {}: len {}", self.cur_offset(), len); let off = self.data.len(); let new_len = self.data.len() + len; self.data.resize(new_len, 0); diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 088c04a8db0e..d97a54c9fc32 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -737,16 +737,8 @@ macro_rules! isle_prelude_caller_methods { #[doc(hidden)] macro_rules! isle_prelude_method_helpers { ($abicaller:ty) => { - fn gen_call_common( - &mut self, - abi: Sig, - num_rets: usize, - mut caller: $abicaller, - (inputs, off): ValueSlice, - ) -> InstOutput { - caller.emit_stack_pre_adjust(self.lower_ctx); - - let num_args = self.lower_ctx.sigs().num_args(abi); + fn gen_call_common_args(&mut self, call_site: &mut $abicaller, (inputs, off): ValueSlice) { + let num_args = call_site.num_args(self.lower_ctx.sigs()); assert_eq!( inputs.len(&self.lower_ctx.dfg().value_lists) - off, @@ -760,13 +752,25 @@ macro_rules! isle_prelude_method_helpers { arg_regs.push(self.put_in_regs(input)); } for (i, arg_regs) in arg_regs.iter().enumerate() { - caller.emit_copy_regs_to_buffer(self.lower_ctx, i, *arg_regs); + call_site.emit_copy_regs_to_buffer(self.lower_ctx, i, *arg_regs); } for (i, arg_regs) in arg_regs.iter().enumerate() { - for inst in caller.gen_arg(self.lower_ctx, i, *arg_regs) { + for inst in call_site.gen_arg(self.lower_ctx, i, *arg_regs) { self.lower_ctx.emit(inst); } } + } + + fn gen_call_common( + &mut self, + abi: Sig, + num_rets: usize, + mut caller: $abicaller, + args: ValueSlice, + ) -> InstOutput { + caller.emit_stack_pre_adjust(self.lower_ctx); + + self.gen_call_common_args(&mut caller, args); // Handle retvals prior to emitting call, so the // constraints are on the call instruction; but buffer the diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index ba0a5484c63f..e08384c30ab2 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -263,6 +263,8 @@ pub enum MachTerminator { None, /// A return instruction. Ret, + /// A tail call. + RetCall, /// An unconditional branch to another block. Uncond, /// A conditional branch to one of two other blocks. diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 5fd6255e92da..a727ad53766a 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -1255,8 +1255,8 @@ impl RegallocFunction for VCode { match self.insts[insn.index()].is_term() { // We treat blocks terminated by an unconditional trap like a return for regalloc. MachTerminator::None => self.insts[insn.index()].is_trap(), - MachTerminator::Ret => true, - _ => false, + MachTerminator::Ret | MachTerminator::RetCall => true, + MachTerminator::Uncond | MachTerminator::Cond | MachTerminator::Indirect => false, } } diff --git a/cranelift/codegen/src/prelude_lower.isle b/cranelift/codegen/src/prelude_lower.isle index 4d44745618af..82737edf6e19 100644 --- a/cranelift/codegen/src/prelude_lower.isle +++ b/cranelift/codegen/src/prelude_lower.isle @@ -180,6 +180,7 @@ (type RelocDistance (primitive RelocDistance)) (type VecArgPair extern (enum)) (type VecRetPair extern (enum)) +(type CallArgList extern (enum)) ;;;; Helper Clif Extractors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -999,6 +1000,12 @@ (decl gen_return (ValueSlice) Unit) (extern constructor gen_return gen_return) +(decl gen_return_call (SigRef ExternalName RelocDistance ValueSlice) InstOutput) +(extern constructor gen_return_call gen_return_call) + +(decl gen_return_call_indirect (SigRef Value ValueSlice) InstOutput) +(extern constructor gen_return_call_indirect gen_return_call_indirect) + ;; Helper for extracting an immediate that's not 0 and not -1 from an imm64. (decl pure partial safe_divisor_from_imm64 (Type Imm64) u64) (extern constructor safe_divisor_from_imm64 safe_divisor_from_imm64) diff --git a/cranelift/filetests/filetests/isa/x64/return-call-indirect.clif b/cranelift/filetests/filetests/isa/x64/return-call-indirect.clif new file mode 100644 index 000000000000..aceac0c2b081 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/return-call-indirect.clif @@ -0,0 +1,230 @@ +test compile precise-output +set preserve_frame_pointers=true +target x86_64 + +;;;; Test passing `i64`s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %callee_i64(i64) -> i64 tail { +block0(v0: i64): + v1 = iadd_imm.i64 v0, 10 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lea 10(%rax), %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; addq $0xa, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %call_i64(i64) -> i64 tail { + fn0 = %callee_i64(i64) -> i64 tail + ; sig0 = (i64) -> i64 tail + +block0(v0: i64): + v1 = func_addr.i64 fn0 + return_call_indirect sig0, v1(v0) +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_ext_name %callee_i64+0, %r8 +; movq %rbp, %rcx +; movq 8(%rcx), %r9 +; return_call_unknown %r8 new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:%v195 fp:%v194 tmp:%v196 %rax=%rax +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movabsq $0, %r8 ; reloc_external Abs8 %callee_i64 0 +; movq %rbp, %rcx +; movq 8(%rcx), %r9 +; movq (%rcx), %rbp +; leaq 8(%rcx), %rsp +; movq %r9, (%rsp) +; jmpq *%r8 + +;;;; Test colocated tail calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %colocated_i64(i64) -> i64 tail { + fn0 = colocated %callee_i64(i64) -> i64 tail + ; sig0 = (i64) -> i64 tail + +block0(v0: i64): + v1 = func_addr.i64 fn0 + return_call_indirect sig0, v1(v0) +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_ext_name %callee_i64+0, %r8 +; movq %rbp, %rcx +; movq 8(%rcx), %r9 +; return_call_unknown %r8 new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:%v195 fp:%v194 tmp:%v196 %rax=%rax +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; leaq (%rip), %r8 ; reloc_external CallPCRel4 %callee_i64 -4 +; movq %rbp, %rcx +; movq 8(%rcx), %r9 +; movq (%rcx), %rbp +; leaq 8(%rcx), %rsp +; movq %r9, (%rsp) +; jmpq *%r8 + +;;;; Test passing `f64`s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %callee_f64(f64) -> f64 tail { +block0(v0: f64): + v1 = f64const 0x10.0 + v2 = fadd.f64 v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; addsd %xmm0, const(0), %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; addsd 0x14(%rip), %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %dh, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) + +function %call_f64(f64) -> f64 tail { + fn0 = %callee_f64(f64) -> f64 tail + ; sig0 = (f64) -> f64 tail + +block0(v0: f64): + v1 = func_addr.i64 fn0 + return_call_indirect sig0, v1(v0) +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_ext_name %callee_f64+0, %r8 +; movq %rbp, %rcx +; movq 8(%rcx), %r9 +; return_call_unknown %r8 new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:%v195 fp:%v194 tmp:%v196 %xmm0=%xmm0 +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movabsq $0, %r8 ; reloc_external Abs8 %callee_f64 0 +; movq %rbp, %rcx +; movq 8(%rcx), %r9 +; movq (%rcx), %rbp +; leaq 8(%rcx), %rsp +; movq %r9, (%rsp) +; jmpq *%r8 + +;;;; Test passing `i8`s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %callee_i8(i8) -> i8 tail { +block0(v0: i8): + v1 = iconst.i8 0 + v2 = icmp eq v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; testb %al, %al +; setz %al +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; testb %al, %al +; sete %al +; movq %rbp, %rsp +; popq %rbp +; retq + +function %call_i8(i8) -> i8 tail { + fn0 = %callee_i8(i8) -> i8 tail + ; sig0 = (i8) -> i8 tail + +block0(v0: i8): + v1 = func_addr.i64 fn0 + return_call_indirect sig0, v1(v0) +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_ext_name %callee_i8+0, %r8 +; movq %rbp, %rcx +; movq 8(%rcx), %r9 +; return_call_unknown %r8 new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:%v195 fp:%v194 tmp:%v196 %rax=%rax +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movabsq $0, %r8 ; reloc_external Abs8 %callee_i8 0 +; movq %rbp, %rcx +; movq 8(%rcx), %r9 +; movq (%rcx), %rbp +; leaq 8(%rcx), %rsp +; movq %r9, (%rsp) +; jmpq *%r8 + diff --git a/cranelift/filetests/filetests/isa/x64/return-call.clif b/cranelift/filetests/filetests/isa/x64/return-call.clif new file mode 100644 index 000000000000..6bc5bbcd385a --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/return-call.clif @@ -0,0 +1,510 @@ +test compile precise-output +set preserve_frame_pointers=true +target x86_64 + +;;;; Test passing `i64`s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %callee_i64(i64) -> i64 tail { +block0(v0: i64): + v1 = iadd_imm.i64 v0, 10 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lea 10(%rax), %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; addq $0xa, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %call_i64(i64) -> i64 tail { + fn0 = %callee_i64(i64) -> i64 tail + +block0(v0: i64): + return_call fn0(v0) +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rbp, %rcx +; movq 8(%rcx), %rdx +; load_ext_name %callee_i64+0, %r10 +; return_call_unknown %r10 new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:%v194 fp:%v193 tmp:%v195 %rax=%rax +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rbp, %rcx +; movq 8(%rcx), %rdx +; movabsq $0, %r10 ; reloc_external Abs8 %callee_i64 0 +; movq (%rcx), %rbp +; leaq 8(%rcx), %rsp +; movq %rdx, (%rsp) +; jmpq *%r10 + +;;;; Test colocated tail calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %colocated_i64(i64) -> i64 tail { + fn0 = colocated %callee_i64(i64) -> i64 tail + +block0(v0: i64): + return_call fn0(v0) +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rbp, %rcx +; movq 8(%rcx), %rdx +; return_call_known TestCase(%callee_i64) new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:%v194 fp:%v193 tmp:%v195 %rax=%rax +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rbp, %rcx +; movq 8(%rcx), %rdx +; movq (%rcx), %rbp +; leaq 8(%rcx), %rsp +; movq %rdx, (%rsp) +; jmp 0x1b ; reloc_external CallPCRel4 %callee_i64 -4 + +;;;; Test passing `f64`s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %callee_f64(f64) -> f64 tail { +block0(v0: f64): + v1 = f64const 0x10.0 + v2 = fadd.f64 v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; addsd %xmm0, const(0), %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; addsd 0x14(%rip), %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %dh, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) + +function %call_f64(f64) -> f64 tail { + fn0 = %callee_f64(f64) -> f64 tail + +block0(v0: f64): + return_call fn0(v0) +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rbp, %rax +; movq 8(%rax), %rdx +; load_ext_name %callee_f64+0, %r10 +; return_call_unknown %r10 new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:%v194 fp:%v193 tmp:%v195 %xmm0=%xmm0 +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rbp, %rax +; movq 8(%rax), %rdx +; movabsq $0, %r10 ; reloc_external Abs8 %callee_f64 0 +; movq (%rax), %rbp +; leaq 8(%rax), %rsp +; movq %rdx, (%rsp) +; jmpq *%r10 + +;;;; Test passing `i8`s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %callee_i8(i8) -> i8 tail { +block0(v0: i8): + v1 = iconst.i8 0 + v2 = icmp eq v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; testb %al, %al +; setz %al +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; testb %al, %al +; sete %al +; movq %rbp, %rsp +; popq %rbp +; retq + +function %call_i8(i8) -> i8 tail { + fn0 = %callee_i8(i8) -> i8 tail + +block0(v0: i8): + return_call fn0(v0) +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rbp, %rcx +; movq 8(%rcx), %rdx +; load_ext_name %callee_i8+0, %r10 +; return_call_unknown %r10 new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:%v194 fp:%v193 tmp:%v195 %rax=%rax +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rbp, %rcx +; movq 8(%rcx), %rdx +; movabsq $0, %r10 ; reloc_external Abs8 %callee_i8 0 +; movq (%rcx), %rbp +; leaq 8(%rcx), %rsp +; movq %rdx, (%rsp) +; jmpq *%r10 + +;;;; Test passing many arguments on stack ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %tail_callee_stack_args(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) -> i64 tail { +block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v8: i64, v9: i64, v10: i64, v11: i64, v12: i64, v13: i64, v14: i64, v15: i64, v16: i64, v17: i64, v18: i64, v19: i64, v20: i64, v21: i64, v22: i64, v23: i64, v24: i64, v25: i64): + return v25 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq 16(%rbp), %rax +; movq 24(%rbp), %rdx +; movq 32(%rbp), %r9 +; movq 40(%rbp), %r11 +; movq 48(%rbp), %rdi +; movq 56(%rbp), %rcx +; movq 64(%rbp), %r8 +; movq 72(%rbp), %r10 +; movq 80(%rbp), %rsi +; movq 88(%rbp), %rax +; movq 96(%rbp), %rdx +; movq 104(%rbp), %r9 +; movq 112(%rbp), %r11 +; movq 120(%rbp), %rdi +; movq 128(%rbp), %rcx +; movq 136(%rbp), %rax +; movq %rbp, %rsp +; popq %rbp +; ret 128 +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq 0x10(%rbp), %rax +; movq 0x18(%rbp), %rdx +; movq 0x20(%rbp), %r9 +; movq 0x28(%rbp), %r11 +; movq 0x30(%rbp), %rdi +; movq 0x38(%rbp), %rcx +; movq 0x40(%rbp), %r8 +; movq 0x48(%rbp), %r10 +; movq 0x50(%rbp), %rsi +; movq 0x58(%rbp), %rax +; movq 0x60(%rbp), %rdx +; movq 0x68(%rbp), %r9 +; movq 0x70(%rbp), %r11 +; movq 0x78(%rbp), %rdi +; movq 0x80(%rbp), %rcx +; movq 0x88(%rbp), %rax +; movq %rbp, %rsp +; popq %rbp +; retq $0x80 + +function %tail_caller_stack_args() -> i64 tail { + fn0 = %tail_callee_stack_args(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) -> i64 tail + +block0: + v0 = iconst.i64 10 + v1 = iconst.i64 15 + v2 = iconst.i64 20 + v3 = iconst.i64 25 + v4 = iconst.i64 30 + v5 = iconst.i64 35 + v6 = iconst.i64 40 + v7 = iconst.i64 45 + v8 = iconst.i64 50 + v9 = iconst.i64 55 + v10 = iconst.i64 60 + v11 = iconst.i64 65 + v12 = iconst.i64 70 + v13 = iconst.i64 75 + v14 = iconst.i64 80 + v15 = iconst.i64 85 + v16 = iconst.i64 90 + v17 = iconst.i64 95 + v18 = iconst.i64 100 + v19 = iconst.i64 105 + v20 = iconst.i64 110 + v21 = iconst.i64 115 + v22 = iconst.i64 120 + v23 = iconst.i64 125 + v24 = iconst.i64 130 + v25 = iconst.i64 135 + return_call fn0(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $112, %rsp +; block0: +; movl $10, %eax +; movq %rax, rsp(104 + virtual offset) +; movl $15, %ecx +; movq %rcx, rsp(96 + virtual offset) +; movl $20, %edx +; movq %rdx, rsp(88 + virtual offset) +; movl $25, %ebx +; movq %rbx, rsp(80 + virtual offset) +; movl $30, %esi +; movq %rsi, rsp(72 + virtual offset) +; movl $35, %edi +; movq %rdi, rsp(64 + virtual offset) +; movl $40, %r8d +; movq %r8, rsp(56 + virtual offset) +; movl $45, %r9d +; movq %r9, rsp(48 + virtual offset) +; movl $50, %r10d +; movq %r10, rsp(40 + virtual offset) +; movl $55, %r11d +; movq %r11, rsp(32 + virtual offset) +; movl $60, %r15d +; movl $65, %r13d +; movl $70, %r14d +; movl $75, %eax +; movl $80, %ecx +; movl $85, %edx +; movl $90, %ebx +; movl $95, %esi +; movl $100, %r8d +; movq %r8, rsp(24 + virtual offset) +; movl $105, %r8d +; movl $110, %r9d +; movl $115, %r10d +; movl $120, %r11d +; movl $125, %r12d +; movq %r12, rsp(16 + virtual offset) +; movl $130, %r12d +; movq %r12, rsp(8 + virtual offset) +; movl $135, %r12d +; movq %r12, rsp(0 + virtual offset) +; movq %rbp, %r12 +; movq 8(%r12), %rdi +; subq %rsp, $128, %rsp +; virtual_sp_offset_adjust 128 +; movq %r15, 0(%rsp) +; movq %r13, 8(%rsp) +; movq %r14, 16(%rsp) +; movq %rax, 24(%rsp) +; movq %rcx, 32(%rsp) +; movq %rdx, 40(%rsp) +; movq %rbx, 48(%rsp) +; movq %rsi, 56(%rsp) +; movq rsp(24 + virtual offset), %rax +; movq %rax, 64(%rsp) +; movq %r8, 72(%rsp) +; movq %r9, 80(%rsp) +; movq %r10, 88(%rsp) +; movq %r11, 96(%rsp) +; movq rsp(16 + virtual offset), %rax +; movq %rax, 104(%rsp) +; movq rsp(8 + virtual offset), %rax +; movq %rax, 112(%rsp) +; movq rsp(0 + virtual offset), %rax +; movq %rax, 120(%rsp) +; load_ext_name %tail_callee_stack_args+0, %r15 +; movq %rdi, %r14 +; movq rsp(32 + virtual offset), %r11 +; movq rsp(40 + virtual offset), %r10 +; movq rsp(48 + virtual offset), %r9 +; movq rsp(56 + virtual offset), %r8 +; movq rsp(64 + virtual offset), %rdi +; movq rsp(72 + virtual offset), %rsi +; movq rsp(80 + virtual offset), %rbx +; movq rsp(88 + virtual offset), %rdx +; movq rsp(96 + virtual offset), %rcx +; movq rsp(104 + virtual offset), %rax +; return_call_unknown %r15 new_stack_arg_size:128 old_stack_arg_size:0 ret_addr:%v219 fp:%v218 tmp:%v220 %rax=%rax %rcx=%rcx %rdx=%rdx %rbx=%rbx %rsi=%rsi %rdi=%rdi %r8=%r8 %r9=%r9 %r10=%r10 %r11=%r11 +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; subq $0x70, %rsp +; block1: ; offset 0x8 +; movl $0xa, %eax +; movq %rax, 0x68(%rsp) +; movl $0xf, %ecx +; movq %rcx, 0x60(%rsp) +; movl $0x14, %edx +; movq %rdx, 0x58(%rsp) +; movl $0x19, %ebx +; movq %rbx, 0x50(%rsp) +; movl $0x1e, %esi +; movq %rsi, 0x48(%rsp) +; movl $0x23, %edi +; movq %rdi, 0x40(%rsp) +; movl $0x28, %r8d +; movq %r8, 0x38(%rsp) +; movl $0x2d, %r9d +; movq %r9, 0x30(%rsp) +; movl $0x32, %r10d +; movq %r10, 0x28(%rsp) +; movl $0x37, %r11d +; movq %r11, 0x20(%rsp) +; movl $0x3c, %r15d +; movl $0x41, %r13d +; movl $0x46, %r14d +; movl $0x4b, %eax +; movl $0x50, %ecx +; movl $0x55, %edx +; movl $0x5a, %ebx +; movl $0x5f, %esi +; movl $0x64, %r8d +; movq %r8, 0x18(%rsp) +; movl $0x69, %r8d +; movl $0x6e, %r9d +; movl $0x73, %r10d +; movl $0x78, %r11d +; movl $0x7d, %r12d +; movq %r12, 0x10(%rsp) +; movl $0x82, %r12d +; movq %r12, 8(%rsp) +; movl $0x87, %r12d +; movq %r12, (%rsp) +; movq %rbp, %r12 +; movq 8(%r12), %rdi +; subq $0x80, %rsp +; movq %r15, (%rsp) +; movq %r13, 8(%rsp) +; movq %r14, 0x10(%rsp) +; movq %rax, 0x18(%rsp) +; movq %rcx, 0x20(%rsp) +; movq %rdx, 0x28(%rsp) +; movq %rbx, 0x30(%rsp) +; movq %rsi, 0x38(%rsp) +; movq 0x98(%rsp), %rax +; movq %rax, 0x40(%rsp) +; movq %r8, 0x48(%rsp) +; movq %r9, 0x50(%rsp) +; movq %r10, 0x58(%rsp) +; movq %r11, 0x60(%rsp) +; movq 0x90(%rsp), %rax +; movq %rax, 0x68(%rsp) +; movq 0x88(%rsp), %rax +; movq %rax, 0x70(%rsp) +; movq 0x80(%rsp), %rax +; movq %rax, 0x78(%rsp) +; movabsq $0, %r15 ; reloc_external Abs8 %tail_callee_stack_args 0 +; movq %rdi, %r14 +; movq 0xa0(%rsp), %r11 +; movq 0xa8(%rsp), %r10 +; movq 0xb0(%rsp), %r9 +; movq 0xb8(%rsp), %r8 +; movq 0xc0(%rsp), %rdi +; movq 0xc8(%rsp), %rsi +; movq 0xd0(%rsp), %rbx +; movq 0xd8(%rsp), %rdx +; movq 0xe0(%rsp), %rcx +; movq 0xe8(%rsp), %rax +; movq (%r12), %rbp +; movq 0x78(%rsp), %r13 +; movq %r13, 8(%r12) +; movq 0x70(%rsp), %r13 +; movq %r13, (%r12) +; movq 0x68(%rsp), %r13 +; movq %r13, -8(%r12) +; movq 0x60(%rsp), %r13 +; movq %r13, -0x10(%r12) +; movq 0x58(%rsp), %r13 +; movq %r13, -0x18(%r12) +; movq 0x50(%rsp), %r13 +; movq %r13, -0x20(%r12) +; movq 0x48(%rsp), %r13 +; movq %r13, -0x28(%r12) +; movq 0x40(%rsp), %r13 +; movq %r13, -0x30(%r12) +; movq 0x38(%rsp), %r13 +; movq %r13, -0x38(%r12) +; movq 0x30(%rsp), %r13 +; movq %r13, -0x40(%r12) +; movq 0x28(%rsp), %r13 +; movq %r13, -0x48(%r12) +; movq 0x20(%rsp), %r13 +; movq %r13, -0x50(%r12) +; movq 0x18(%rsp), %r13 +; movq %r13, -0x58(%r12) +; movq 0x10(%rsp), %r13 +; movq %r13, -0x60(%r12) +; movq 8(%rsp), %r13 +; movq %r13, -0x68(%r12) +; movq (%rsp), %r13 +; movq %r13, -0x70(%r12) +; leaq -0x78(%r12), %rsp +; movq %r14, (%rsp) +; jmpq *%r15 + diff --git a/cranelift/filetests/filetests/runtests/return-call-indirect.clif b/cranelift/filetests/filetests/runtests/return-call-indirect.clif index 4277aaa28803..698ead772d93 100644 --- a/cranelift/filetests/filetests/runtests/return-call-indirect.clif +++ b/cranelift/filetests/filetests/runtests/return-call-indirect.clif @@ -1,6 +1,9 @@ test interpret -;; test run -;; target x86_64 +test run + +set preserve_frame_pointers=true + +target x86_64 ;; target aarch64 ;; target aarch64 sign_return_address ;; target aarch64 has_pauth sign_return_address diff --git a/cranelift/filetests/filetests/runtests/return-call-loop.clif b/cranelift/filetests/filetests/runtests/return-call-loop.clif new file mode 100644 index 000000000000..7778b61fa382 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/return-call-loop.clif @@ -0,0 +1,142 @@ +test run +set preserve_frame_pointers=true +target x86_64 +;; target aarch64 +;; target aarch64 sign_return_address +;; target aarch64 has_pauth sign_return_address +;; target s390x + +;;;; Tail-Recursive Loop ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %loop(i64, i64) -> i8 tail { + fn0 = colocated %loop(i64, i64) -> i8 tail + +block0(v0: i64, v1: i64): + brif v0, block2, block1 + +;; Loop counter reached zero, we're done iterating. Return a comparison of the +;; current SP and the first loop iteration's SP. +block1: + v2 = get_stack_pointer.i64 + v3 = icmp eq v1, v2 + return v3 + +;; Another iteration of the loop. Decrement the loop counter and, if this was +;; our first iteration, grab the initial SP. +block2: + v4 = iadd_imm v0, -1 + brif v1, block4(v1), block3 + +;; Grab the initial SP. +block3: + v5 = get_stack_pointer.i64 + jump block4(v5) + +;; Continue the loop. +block4(v6: i64): + return_call fn0(v4, v6) +} + +; run: %loop(1_000_000, 0) == 1 + +;;;; Tail-Recursive Loop With Stack Arguments ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %loop_stack_args(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) -> i8 tail { + fn0 = %loop_stack_args(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) -> i8 tail + +block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v8: i64, v9: i64, v10: i64, v11: i64, v12: i64, v13: i64, v14: i64, v15: i64, v16: i64, v17: i64, v18: i64, v19: i64, v20: i64, v21: i64, v22: i64, v23: i64, v24: i64, v25: i64): + brif v0, block2, block1 + +;; Loop counter reached zero, we're done iterating. Return a comparison of the +;; current SP and the first loop iteration's SP. +block1: + v26 = get_stack_pointer.i64 + v27 = icmp eq v25, v26 + return v27 + +;; Another iteration of the loop. Decrement the loop counter and, if this was +;; our first iteration, grab the initial SP. +block2: + v28 = iadd_imm v0, -1 + brif v25, block4(v25), block3 + +;; Grab the initial SP. +block3: + v29 = get_stack_pointer.i64 + jump block4(v29) + +;; Continue the loop. +block4(v30: i64): + return_call fn0(v28, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v30) +} + +; run: %loop_stack_args(1_000_000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) == 1 + +;;;; Mutually-Recursive Loop With Different #s of Stack Arguments ;;;;;;;;;;;;;; + +function %mutual_loop_1(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) -> i8 tail { + fn0 = %mutual_loop_2(i64, i64) -> i8 tail + +block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v8: i64, v9: i64, v10: i64, v11: i64, v12: i64, v13: i64, v14: i64, v15: i64, v16: i64, v17: i64, v18: i64, v19: i64, v20: i64, v21: i64, v22: i64, v23: i64, v24: i64, v25: i64): + brif v0, block2, block1 + +;; Loop counter reached zero, we're done iterating. Return a comparison of the +;; current SP and the first loop iteration's SP. +block1: + v26 = get_stack_pointer.i64 + v27 = icmp eq v1, v26 + return v27 + +;; Another iteration of the loop. If this was our first iteration, grab the +;; initial SP. +block2: + brif v1, block4(v1), block3 + +;; Grab the initial SP. +block3: + v29 = get_stack_pointer.i64 + jump block4(v29) + +;; Continue the loop. +block4(v30: i64): + return_call fn0(v0, v30) +} + +function %mutual_loop_2(i64, i64) -> i8 tail { + fn0 = %mutual_loop_1(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) -> i8 tail + +block0(v0: i64, v1: i64): + v2 = iadd_imm v0, -1 + v3 = iconst.i64 42 + return_call fn0(v2, v1, v3, v3, v3, v3, v3, v3, v3, v3, v3, v3, v3, v3, v3, v3, v3, v3, v3, v3, v3, v3, v3, v3, v3, v3) +} + +; run: %mutual_loop_2(1_000_000, 0) == 1 + +;;;; Indirect Return-Call Loop ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %indirect_main(i64) -> i8 tail { + sig0 = (i64) -> i8 tail + fn0 = %indirect_break(i64) -> i8 tail + fn1 = %indirect_continue(i64) -> i8 tail +block0(v0: i64): + v1 = func_addr.i64 fn0 + v2 = func_addr.i64 fn1 + v3 = select v0, v2, v1 + return_call_indirect sig0, v3(v0) +} + +function %indirect_break(i64) -> i8 tail { +block0(v0: i64): + v1 = iconst.i8 42 + return v1 +} + +function %indirect_continue(i64) -> i8 tail { + fn0 = %indirect_main(i64) -> i8 tail +block0(v0: i64): + v1 = iadd_imm v0, -1 + return_call fn0(v1) +} + +; run: %indirect_main(1_000_000) == 42 diff --git a/cranelift/filetests/filetests/runtests/return-call.clif b/cranelift/filetests/filetests/runtests/return-call.clif index 4f317a41eed7..3ecea5a929b8 100644 --- a/cranelift/filetests/filetests/runtests/return-call.clif +++ b/cranelift/filetests/filetests/runtests/return-call.clif @@ -1,6 +1,9 @@ test interpret -;; test run -;; target x86_64 +test run + +set preserve_frame_pointers=true + +target x86_64 ;; target aarch64 ;; target aarch64 sign_return_address ;; target aarch64 has_pauth sign_return_address @@ -66,3 +69,101 @@ block0(v0: i8): } ; run: %call_i8(1) == 0 ; run: %call_i8(0) == 1 + +;;;; Test passing many arguments on stack ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %tail_callee_stack_args(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) -> i64 tail { +block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v8: i64, v9: i64, v10: i64, v11: i64, v12: i64, v13: i64, v14: i64, v15: i64, v16: i64, v17: i64, v18: i64, v19: i64, v20: i64, v21: i64, v22: i64, v23: i64, v24: i64, v25: i64): + return v25 +} + +function %tail_caller_stack_args() -> i64 tail { + fn0 = %tail_callee_stack_args(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) -> i64 tail + +block0: + v0 = iconst.i64 10 + v1 = iconst.i64 15 + v2 = iconst.i64 20 + v3 = iconst.i64 25 + v4 = iconst.i64 30 + v5 = iconst.i64 35 + v6 = iconst.i64 40 + v7 = iconst.i64 45 + v8 = iconst.i64 50 + v9 = iconst.i64 55 + v10 = iconst.i64 60 + v11 = iconst.i64 65 + v12 = iconst.i64 70 + v13 = iconst.i64 75 + v14 = iconst.i64 80 + v15 = iconst.i64 85 + v16 = iconst.i64 90 + v17 = iconst.i64 95 + v18 = iconst.i64 100 + v19 = iconst.i64 105 + v20 = iconst.i64 110 + v21 = iconst.i64 115 + v22 = iconst.i64 120 + v23 = iconst.i64 125 + v24 = iconst.i64 130 + v25 = iconst.i64 135 + return_call fn0(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) +} + +; run: %tail_caller_stack_args() == 135 + +;;;; Test diff blocks with diff return calls with diff # of stack args ;;;;;;;;; + +function %different_callee1(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) -> i64 tail { +block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v8: i64, v9: i64, v10: i64, v11: i64, v12: i64, v13: i64, v14: i64, v15: i64, v16: i64, v17: i64, v18: i64, v19: i64, v20: i64, v21: i64, v22: i64, v23: i64, v24: i64, v25: i64): + return v25 +} + +function %different_callee2(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) -> i64 tail { +block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v8: i64, v9: i64, v10: i64, v11: i64, v12: i64, v13: i64, v14: i64, v15: i64, v16: i64, v17: i64, v18: i64, v19: i64, v20: i64, v21: i64, v22: i64, v23: i64, v24: i64, v25: i64, v26: i64): + return v26 +} + +function %caller_of_different_callees(i64) -> i64 tail { + fn0 = %different_callee1(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) -> i64 tail + fn1 = %different_callee2(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) -> i64 tail + +block0(v99: i64): + v0 = iconst.i64 10 + v1 = iconst.i64 15 + v2 = iconst.i64 20 + v3 = iconst.i64 25 + v4 = iconst.i64 30 + v5 = iconst.i64 35 + v6 = iconst.i64 40 + v7 = iconst.i64 45 + v8 = iconst.i64 50 + v9 = iconst.i64 55 + v10 = iconst.i64 60 + v11 = iconst.i64 65 + v12 = iconst.i64 70 + v13 = iconst.i64 75 + v14 = iconst.i64 80 + v15 = iconst.i64 85 + v16 = iconst.i64 90 + v17 = iconst.i64 95 + v18 = iconst.i64 100 + v19 = iconst.i64 105 + v20 = iconst.i64 110 + v21 = iconst.i64 115 + v22 = iconst.i64 120 + v23 = iconst.i64 125 + v24 = iconst.i64 130 + v25 = iconst.i64 135 + brif v99, block1, block2 + +block1: + return_call fn0(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) + +block2: + v26 = iconst.i64 140 + return_call fn1(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26) +} + +; run: %caller_of_different_callees(1) == 135 +; run: %caller_of_different_callees(0) == 140 diff --git a/cranelift/filetests/filetests/runtests/tail-call-conv.clif b/cranelift/filetests/filetests/runtests/tail-call-conv.clif index efa9431d6e6b..84965b3a3e77 100644 --- a/cranelift/filetests/filetests/runtests/tail-call-conv.clif +++ b/cranelift/filetests/filetests/runtests/tail-call-conv.clif @@ -124,45 +124,3 @@ block0: } ; run: %tail_caller_stack_rets() == 135 - -;; Test the `tail` calling convention with non-tail calls and both stack -;; arguments and stack returns. - -function %tail_callee_stack_args_and_rets(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) -> i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 tail { -block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v8: i64, v9: i64, v10: i64, v11: i64, v12: i64, v13: i64, v14: i64, v15: i64, v16: i64, v17: i64, v18: i64, v19: i64, v20: i64, v21: i64, v22: i64, v23: i64, v24: i64, v25: i64): - return v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25 -} - -function %tail_caller_stack_args_and_rets() -> i64 tail { - fn0 = %tail_callee_stack_args_and_rets(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) -> i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 tail - -block0: - v0 = iconst.i64 10 - v1 = iconst.i64 15 - v2 = iconst.i64 20 - v3 = iconst.i64 25 - v4 = iconst.i64 30 - v5 = iconst.i64 35 - v6 = iconst.i64 40 - v7 = iconst.i64 45 - v8 = iconst.i64 50 - v9 = iconst.i64 55 - v10 = iconst.i64 60 - v11 = iconst.i64 65 - v12 = iconst.i64 70 - v13 = iconst.i64 75 - v14 = iconst.i64 80 - v15 = iconst.i64 85 - v16 = iconst.i64 90 - v17 = iconst.i64 95 - v18 = iconst.i64 100 - v19 = iconst.i64 105 - v20 = iconst.i64 110 - v21 = iconst.i64 115 - v22 = iconst.i64 120 - v23 = iconst.i64 125 - v24 = iconst.i64 130 - v25 = iconst.i64 135 - v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51 = call fn0(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) - return v51 -}