Skip to content

Commit

Permalink
cranelift: Implement scalar FMA on x86
Browse files Browse the repository at this point in the history
x86 does not have dedicated instructions for scalar FMA, lower
to a libcall which seems to be what llvm does.
  • Loading branch information
afonso360 committed Jul 16, 2022
1 parent 439f566 commit 48c77b7
Show file tree
Hide file tree
Showing 13 changed files with 181 additions and 54 deletions.
65 changes: 64 additions & 1 deletion cranelift/codegen/src/ir/libcall.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
//! Naming well-known routines in the runtime library.

use crate::ir::{types, ExternalName, FuncRef, Function, Opcode, Type};
use crate::ir::{
types, AbiParam, ArgumentPurpose, ExternalName, FuncRef, Function, Opcode, Signature, Type,
};
use crate::isa::CallConv;
use core::fmt;
use core::str::FromStr;
#[cfg(feature = "enable-serde")]
Expand Down Expand Up @@ -50,6 +53,10 @@ pub enum LibCall {
NearestF32,
/// nearest.f64
NearestF64,
/// fma.f32
FmaF32,
/// fma.f64
FmaF64,
/// libc.memcpy
Memcpy,
/// libc.memset
Expand Down Expand Up @@ -91,6 +98,8 @@ impl FromStr for LibCall {
"TruncF64" => Ok(Self::TruncF64),
"NearestF32" => Ok(Self::NearestF32),
"NearestF64" => Ok(Self::NearestF64),
"FmaF32" => Ok(Self::FmaF32),
"FmaF64" => Ok(Self::FmaF64),
"Memcpy" => Ok(Self::Memcpy),
"Memset" => Ok(Self::Memset),
"Memmove" => Ok(Self::Memmove),
Expand Down Expand Up @@ -124,13 +133,15 @@ impl LibCall {
Opcode::Floor => Self::FloorF32,
Opcode::Trunc => Self::TruncF32,
Opcode::Nearest => Self::NearestF32,
Opcode::Fma => Self::FmaF32,
_ => return None,
},
types::F64 => match opcode {
Opcode::Ceil => Self::CeilF64,
Opcode::Floor => Self::FloorF64,
Opcode::Trunc => Self::TruncF64,
Opcode::Nearest => Self::NearestF64,
Opcode::Fma => Self::FmaF64,
_ => return None,
},
_ => return None,
Expand All @@ -157,13 +168,65 @@ impl LibCall {
TruncF64,
NearestF32,
NearestF64,
FmaF32,
FmaF64,
Memcpy,
Memset,
Memmove,
Memcmp,
ElfTlsGetAddr,
]
}

/// Get a [Signature] for the function targeted by this [LibCall].
pub fn signature(&self, call_conv: CallConv) -> Signature {
use types::*;
let mut sig = Signature::new(call_conv);

match self {
LibCall::UdivI64
| LibCall::SdivI64
| LibCall::UremI64
| LibCall::SremI64
| LibCall::IshlI64
| LibCall::UshrI64
| LibCall::SshrI64 => {
sig.params.push(AbiParam::new(I64));
sig.params.push(AbiParam::new(I64));
sig.returns.push(AbiParam::new(I64));
}
LibCall::CeilF32 | LibCall::FloorF32 | LibCall::TruncF32 | LibCall::NearestF32 => {
sig.params.push(AbiParam::new(F32));
sig.returns.push(AbiParam::new(F32));
}
LibCall::TruncF64 | LibCall::FloorF64 | LibCall::CeilF64 | LibCall::NearestF64 => {
sig.params.push(AbiParam::new(F64));
sig.returns.push(AbiParam::new(F64));
}
LibCall::FmaF32 | LibCall::FmaF64 => {
let ty = if *self == LibCall::FmaF32 { F32 } else { F64 };

sig.params.push(AbiParam::new(ty));
sig.params.push(AbiParam::new(ty));
sig.params.push(AbiParam::new(ty));
sig.returns.push(AbiParam::new(ty));
}
LibCall::Probestack
| LibCall::Memcpy
| LibCall::Memset
| LibCall::Memmove
| LibCall::Memcmp
| LibCall::ElfTlsGetAddr => unimplemented!(),
}

if call_conv.extends_baldrdash() {
// Adds the special VMContext parameter to the signature.
sig.params
.push(AbiParam::special(I64, ArgumentPurpose::VMContext));
}

sig
}
}

/// Get a function reference for the probestack function in `func`.
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/aarch64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1551,7 +1551,7 @@ impl LowerBackend for AArch64Backend {
type MInst = Inst;

fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
lower_inst::lower_insn_to_regs(ctx, ir_inst, &self.flags, &self.isa_flags)
lower_inst::lower_insn_to_regs(ctx, ir_inst, &self.triple, &self.flags, &self.isa_flags)
}

fn lower_branch_group<C: LowerCtx<I = Inst>>(
Expand Down
14 changes: 11 additions & 3 deletions cranelift/codegen/src/isa/aarch64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ use crate::{
use std::boxed::Box;
use std::convert::TryFrom;
use std::vec::Vec;
use target_lexicon::Triple;

type BoxCallInfo = Box<CallInfo>;
type BoxCallIndInfo = Box<CallIndInfo>;
Expand All @@ -38,6 +39,7 @@ type BoxExternalName = Box<ExternalName>;
/// The main entry point for lowering with ISLE.
pub(crate) fn lower<C>(
lower_ctx: &mut C,
triple: &Triple,
flags: &Flags,
isa_flags: &IsaFlags,
outputs: &[InsnOutput],
Expand All @@ -46,9 +48,15 @@ pub(crate) fn lower<C>(
where
C: LowerCtx<I = MInst>,
{
lower_common(lower_ctx, flags, isa_flags, outputs, inst, |cx, insn| {
generated_code::constructor_lower(cx, insn)
})
lower_common(
lower_ctx,
triple,
flags,
isa_flags,
outputs,
inst,
|cx, insn| generated_code::constructor_lower(cx, insn),
)
}

pub struct ExtendedValue {
Expand Down
4 changes: 3 additions & 1 deletion cranelift/codegen/src/isa/aarch64/lower_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@ use crate::{CodegenError, CodegenResult};
use alloc::boxed::Box;
use alloc::vec::Vec;
use core::convert::TryFrom;
use target_lexicon::Triple;

/// Actually codegen an instruction's results into registers.
pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx: &mut C,
insn: IRInst,
triple: &Triple,
flags: &Flags,
isa_flags: &aarch64_settings::Flags,
) -> CodegenResult<()> {
Expand All @@ -33,7 +35,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
None
};

if let Ok(()) = super::lower::isle::lower(ctx, flags, isa_flags, &outputs, insn) {
if let Ok(()) = super::lower::isle::lower(ctx, triple, flags, isa_flags, &outputs, insn) {
return Ok(());
}

Expand Down
12 changes: 9 additions & 3 deletions cranelift/codegen/src/isa/s390x/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,14 @@ impl LowerBackend for S390xBackend {
None
};

if let Ok(()) =
super::lower::isle::lower(ctx, &self.flags, &self.isa_flags, &outputs, ir_inst)
{
if let Ok(()) = super::lower::isle::lower(
ctx,
&self.triple,
&self.flags,
&self.isa_flags,
&outputs,
ir_inst,
) {
return Ok(());
}

Expand Down Expand Up @@ -294,6 +299,7 @@ impl LowerBackend for S390xBackend {
// the second branch (if any) by emitting a two-way conditional branch.
if let Ok(()) = super::lower::isle::lower_branch(
ctx,
&self.triple,
&self.flags,
&self.isa_flags,
branches[0],
Expand Down
27 changes: 21 additions & 6 deletions cranelift/codegen/src/isa/s390x/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use std::boxed::Box;
use std::cell::Cell;
use std::convert::TryFrom;
use std::vec::Vec;
use target_lexicon::Triple;

type BoxCallInfo = Box<CallInfo>;
type BoxCallIndInfo = Box<CallIndInfo>;
Expand All @@ -36,6 +37,7 @@ type VecMInstBuilder = Cell<Vec<MInst>>;
/// The main entry point for lowering with ISLE.
pub(crate) fn lower<C>(
lower_ctx: &mut C,
triple: &Triple,
flags: &Flags,
isa_flags: &IsaFlags,
outputs: &[InsnOutput],
Expand All @@ -44,14 +46,21 @@ pub(crate) fn lower<C>(
where
C: LowerCtx<I = MInst>,
{
lower_common(lower_ctx, flags, isa_flags, outputs, inst, |cx, insn| {
generated_code::constructor_lower(cx, insn)
})
lower_common(
lower_ctx,
triple,
flags,
isa_flags,
outputs,
inst,
|cx, insn| generated_code::constructor_lower(cx, insn),
)
}

/// The main entry point for branch lowering with ISLE.
pub(crate) fn lower_branch<C>(
lower_ctx: &mut C,
triple: &Triple,
flags: &Flags,
isa_flags: &IsaFlags,
branch: Inst,
Expand All @@ -60,9 +69,15 @@ pub(crate) fn lower_branch<C>(
where
C: LowerCtx<I = MInst>,
{
lower_common(lower_ctx, flags, isa_flags, &[], branch, |cx, insn| {
generated_code::constructor_lower_branch(cx, insn, &targets.to_vec())
})
lower_common(
lower_ctx,
triple,
flags,
isa_flags,
&[],
branch,
|cx, insn| generated_code::constructor_lower_branch(cx, insn, &targets.to_vec()),
)
}

impl<C> generated_code::Context for IsleContext<'_, C, Flags, IsaFlags, 6>
Expand Down
11 changes: 11 additions & 0 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -3023,3 +3023,14 @@
(decl synthetic_amode_to_xmm_mem (SyntheticAmode) XmmMem)
(rule (synthetic_amode_to_xmm_mem amode)
(synthetic_amode_to_reg_mem amode))


;;;; Helpers for Emitting LibCalls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(type LibCall extern
(enum
FmaF32
FmaF64))

(decl libcall_3_ret_1 (LibCall Reg Reg Reg) Reg)
(extern constructor libcall_3_ret_1 libcall_3_ret_1)
8 changes: 8 additions & 0 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2542,6 +2542,14 @@
(rule (lower (has_type $F64X2 (fmax_pseudo x y)))
(x64_maxpd y x))

;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $F32 (fma x y z)))
(libcall_3_ret_1 (LibCall.FmaF32) x y z))
(rule (lower (has_type $F64 (fma x y z)))
(libcall_3_ret_1 (LibCall.FmaF64) x y z))


;; Rules for `load*` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; In order to load a value from memory to a GPR register, we may need to extend
Expand Down
Loading

0 comments on commit 48c77b7

Please sign in to comment.