Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cranelift: Do not have any callee-save registers for the tail calling convention #6608

Merged
merged 1 commit into from
Jun 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions cranelift/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ similar = { workspace = true }
toml = { workspace = true }
serde = { workspace = true }
fxhash = "0.2.1"
# Note that this just enables `trace-log` for `clif-util` and doesn't turn it on
# for all of Cranelift, which would be bad.
regalloc2 = { workspace = true, features = ["trace-log"] }
fitzgen marked this conversation as resolved.
Show resolved Hide resolved

[features]
default = ["disas", "wasm", "cranelift-codegen/all-arch", "cranelift-codegen/trace-log", "souper-harvest"]
Expand Down
224 changes: 216 additions & 8 deletions cranelift/codegen/src/isa/aarch64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@ impl ABIMachineSpec for AArch64MachineDeps {
where
I: IntoIterator<Item = &'a ir::AbiParam>,
{
if call_conv == isa::CallConv::Tail {
return compute_arg_locs_tail(params, add_ret_area_ptr, args);
}

let is_apple_cc = call_conv.extends_apple_aarch64();

// See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#64parameter-passing), sections 6.4.
Expand Down Expand Up @@ -896,15 +900,16 @@ impl ABIMachineSpec for AArch64MachineDeps {
}

fn gen_clobber_restore(
_call_conv: isa::CallConv,
call_conv: isa::CallConv,
sig: &Signature,
flags: &settings::Flags,
clobbers: &[Writable<RealReg>],
fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
) -> SmallVec<[Inst; 16]> {
let mut insts = SmallVec::new();
let (clobbered_int, clobbered_vec) = get_regs_restored_in_epilogue(flags, sig, clobbers);
let (clobbered_int, clobbered_vec) =
get_regs_restored_in_epilogue(call_conv, flags, sig, clobbers);

// Free the fixed frame if necessary.
if fixed_frame_storage_size > 0 {
Expand Down Expand Up @@ -1107,8 +1112,12 @@ impl ABIMachineSpec for AArch64MachineDeps {
s.nominal_sp_to_fp
}

fn get_regs_clobbered_by_call(_call_conv_of_callee: isa::CallConv) -> PRegSet {
DEFAULT_AAPCS_CLOBBERS
fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> PRegSet {
if call_conv_of_callee == isa::CallConv::Tail {
TAIL_CLOBBERS
} else {
DEFAULT_AAPCS_CLOBBERS
}
}

fn get_ext_mode(
Expand All @@ -1119,15 +1128,17 @@ impl ABIMachineSpec for AArch64MachineDeps {
}

fn get_clobbered_callee_saves(
_call_conv: isa::CallConv,
call_conv: isa::CallConv,
flags: &settings::Flags,
sig: &Signature,
regs: &[Writable<RealReg>],
) -> Vec<Writable<RealReg>> {
let mut regs: Vec<Writable<RealReg>> = regs
.iter()
.cloned()
.filter(|r| is_reg_saved_in_prologue(flags.enable_pinned_reg(), sig, r.to_reg()))
.filter(|r| {
is_reg_saved_in_prologue(call_conv, flags.enable_pinned_reg(), sig, r.to_reg())
})
.collect();

// Sort registers for deterministic code output. We can do an unstable
Expand All @@ -1151,6 +1162,127 @@ impl ABIMachineSpec for AArch64MachineDeps {
}
}

fn compute_arg_locs_tail<'a, I>(
params: I,
add_ret_area_ptr: bool,
mut args: ArgsAccumulator<'_>,
) -> CodegenResult<(u32, Option<usize>)>
where
I: IntoIterator<Item = &'a ir::AbiParam>,
{
let mut xregs = TAIL_CLOBBERS
.into_iter()
.filter(|r| r.class() == RegClass::Int)
// We reserve `x0` for the return area pointer. For simplicity, we
// reserve it even when there is no return area pointer needed. This
// also means that identity functions don't have to shuffle arguments to
// different return registers because we shifted all argument register
// numbers down by one to make space for the return area pointer.
//
// Also, we cannot use all allocatable GPRs as arguments because we need
// at least one allocatable register for holding the callee address in
// indirect calls. So skip `x1` also, reserving it for that role.
.skip(2);

let mut vregs = TAIL_CLOBBERS
.into_iter()
.filter(|r| r.class() == RegClass::Float);

let mut next_stack: u32 = 0;

// Get the next stack slot for the given type.
let stack = |next_stack: &mut u32, ty: ir::Type| {
*next_stack = align_to(*next_stack, ty.bytes());
let offset = i64::from(*next_stack);
*next_stack += ty.bytes();
ABIArgSlot::Stack {
offset,
ty,
extension: ir::ArgumentExtension::None,
}
};

// Get the next `x` register available, or a stack slot if all are in use.
let mut xreg = |next_stack: &mut u32, ty| {
xregs
.next()
.map(|reg| ABIArgSlot::Reg {
reg: reg.into(),
ty,
extension: ir::ArgumentExtension::None,
})
.unwrap_or_else(|| stack(next_stack, ty))
};

// Get the next `v` register available, or a stack slot if all are in use.
let mut vreg = |next_stack: &mut u32, ty| {
vregs
.next()
.map(|reg| ABIArgSlot::Reg {
reg: reg.into(),
ty,
extension: ir::ArgumentExtension::None,
})
.unwrap_or_else(|| stack(next_stack, ty))
};

for param in params {
assert!(
legal_type_for_machine(param.value_type),
"Invalid type for AArch64: {:?}",
param.value_type
);

match param.purpose {
ir::ArgumentPurpose::Normal | ir::ArgumentPurpose::VMContext => {}
ir::ArgumentPurpose::StructArgument(_)
| ir::ArgumentPurpose::StructReturn
| ir::ArgumentPurpose::StackLimit => unimplemented!(
"support for {:?} parameters is not implemented for the `tail` \
calling convention yet",
param.purpose,
),
}

let (reg_classes, reg_types) = Inst::rc_for_type(param.value_type)?;
args.push(ABIArg::Slots {
slots: reg_classes
.iter()
.zip(reg_types)
.map(|(cls, ty)| match cls {
RegClass::Int => xreg(&mut next_stack, *ty),
RegClass::Float => vreg(&mut next_stack, *ty),
RegClass::Vector => unreachable!(),
})
.collect(),
purpose: param.purpose,
});
}

let ret_ptr = if add_ret_area_ptr {
let idx = args.args().len();
args.push(ABIArg::reg(
xreg_preg(0).into(),
types::I64,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
));
Some(idx)
} else {
None
};

next_stack = align_to(next_stack, 16);

// To avoid overflow issues, limit the arg/return size to something
// reasonable -- here, 128 MB.
if next_stack > STACK_ARG_RET_SIZE_LIMIT {
return Err(CodegenError::ImplLimitExceeded);
}

Ok((next_stack, ret_ptr))
}

/// Is this type supposed to be seen on this machine? E.g. references of the
/// wrong width are invalid.
fn legal_type_for_machine(ty: Type) -> bool {
Expand All @@ -1162,7 +1294,16 @@ fn legal_type_for_machine(ty: Type) -> bool {

/// Is the given register saved in the prologue if clobbered, i.e., is it a
/// callee-save?
fn is_reg_saved_in_prologue(enable_pinned_reg: bool, sig: &Signature, r: RealReg) -> bool {
fn is_reg_saved_in_prologue(
call_conv: isa::CallConv,
enable_pinned_reg: bool,
sig: &Signature,
r: RealReg,
) -> bool {
if call_conv == isa::CallConv::Tail {
return false;
}

// FIXME: We need to inspect whether a function is returning Z or P regs too.
let save_z_regs = sig
.params
Expand Down Expand Up @@ -1204,14 +1345,15 @@ fn is_reg_saved_in_prologue(enable_pinned_reg: bool, sig: &Signature, r: RealReg
/// prologue and restored in the epilogue, given the set of all registers
/// written by the function's body.
fn get_regs_restored_in_epilogue(
call_conv: isa::CallConv,
flags: &settings::Flags,
sig: &Signature,
regs: &[Writable<RealReg>],
) -> (Vec<Writable<RealReg>>, Vec<Writable<RealReg>>) {
let mut int_saves = vec![];
let mut vec_saves = vec![];
for &reg in regs {
if is_reg_saved_in_prologue(flags.enable_pinned_reg(), sig, reg.to_reg()) {
if is_reg_saved_in_prologue(call_conv, flags.enable_pinned_reg(), sig, reg.to_reg()) {
match reg.to_reg().class() {
RegClass::Int => int_saves.push(reg),
RegClass::Float => vec_saves.push(reg),
Expand Down Expand Up @@ -1297,3 +1439,69 @@ const fn default_aapcs_clobbers() -> PRegSet {
}

const DEFAULT_AAPCS_CLOBBERS: PRegSet = default_aapcs_clobbers();

// NB: The `tail` calling convention clobbers all allocatable registers.
const TAIL_CLOBBERS: PRegSet = PRegSet::empty()
.with(xreg_preg(0))
.with(xreg_preg(1))
.with(xreg_preg(2))
.with(xreg_preg(3))
.with(xreg_preg(4))
.with(xreg_preg(5))
.with(xreg_preg(6))
.with(xreg_preg(7))
.with(xreg_preg(8))
.with(xreg_preg(9))
.with(xreg_preg(10))
.with(xreg_preg(11))
.with(xreg_preg(12))
.with(xreg_preg(13))
.with(xreg_preg(14))
.with(xreg_preg(15))
// Cranelift reserves x16 and x17 as unallocatable scratch registers.
//
// x18 can be used by the platform and therefore is not allocatable.
.with(xreg_preg(19))
.with(xreg_preg(20))
.with(xreg_preg(21))
.with(xreg_preg(22))
.with(xreg_preg(23))
.with(xreg_preg(24))
.with(xreg_preg(25))
.with(xreg_preg(26))
.with(xreg_preg(27))
.with(xreg_preg(28))
// NB: x29 is the FP, x30 is the link register, and x31 is the SP. None of
// these are allocatable.
.with(vreg_preg(0))
.with(vreg_preg(1))
.with(vreg_preg(2))
.with(vreg_preg(3))
.with(vreg_preg(4))
.with(vreg_preg(5))
.with(vreg_preg(6))
.with(vreg_preg(7))
.with(vreg_preg(8))
.with(vreg_preg(9))
.with(vreg_preg(10))
.with(vreg_preg(11))
.with(vreg_preg(12))
.with(vreg_preg(13))
.with(vreg_preg(14))
.with(vreg_preg(15))
.with(vreg_preg(16))
.with(vreg_preg(17))
.with(vreg_preg(18))
.with(vreg_preg(19))
.with(vreg_preg(20))
.with(vreg_preg(21))
.with(vreg_preg(22))
.with(vreg_preg(23))
.with(vreg_preg(24))
.with(vreg_preg(25))
.with(vreg_preg(26))
.with(vreg_preg(27))
.with(vreg_preg(28))
.with(vreg_preg(29))
.with(vreg_preg(30))
.with(vreg_preg(31));
Loading