From dcd3168c97c6024b24f80abbb2407c81d9b0a25a Mon Sep 17 00:00:00 2001 From: beetrees Date: Fri, 23 May 2025 15:51:51 +0100 Subject: [PATCH 1/8] Use correct sign extension on `__powi*f2` arguments --- src/abi/mod.rs | 28 ++++++++++++++++++++++++++++ src/intrinsics/mod.rs | 3 ++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/abi/mod.rs b/src/abi/mod.rs index 70d29596e..5f7bf3821 100644 --- a/src/abi/mod.rs +++ b/src/abi/mod.rs @@ -828,3 +828,31 @@ pub(crate) fn codegen_drop<'tcx>( } } } + +pub(crate) fn lib_call_arg_param(tcx: TyCtxt<'_>, ty: Type, is_signed: bool) -> AbiParam { + let param = AbiParam::new(ty); + if ty.is_int() && u64::from(ty.bits()) < tcx.data_layout.pointer_size.bits() { + match (&*tcx.sess.target.arch, &*tcx.sess.target.vendor) { + ("x86_64", _) | ("aarch64", "apple") => match (ty, is_signed) { + (types::I8 | types::I16, true) => param.sext(), + (types::I8 | types::I16, false) => param.uext(), + _ => param, + }, + ("aarch64", _) => param, + ("riscv64", _) => match (ty, is_signed) { + (types::I32, _) | (_, true) => param.sext(), + _ => param.uext(), + }, + ("s390x", _) => { + if is_signed { + param.sext() + } else { + param.uext() + } + } + _ => unimplemented!("{:?}", tcx.sess.target.arch), + } + } else { + param + } +} diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index 25e224ebf..7c18922d9 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -416,7 +416,8 @@ fn codegen_float_intrinsic_call<'tcx>( // These intrinsics aren't supported natively by Cranelift. // Lower them to a libcall. sym::powif32 | sym::powif64 => { - let input_tys: Vec<_> = vec![AbiParam::new(clif_ty), AbiParam::new(types::I32)]; + let input_tys: Vec<_> = + vec![AbiParam::new(clif_ty), lib_call_arg_param(fx.tcx, types::I32, true)]; let ret_val = fx.lib_call(name, input_tys, vec![AbiParam::new(clif_ty)], &args)[0]; CValue::by_val(ret_val, fx.layout_of(ty)) } From f0fb19ccc87e18402079fbe3106d8090cc8d9f64 Mon Sep 17 00:00:00 2001 From: beetrees Date: Sat, 24 May 2025 13:45:12 +0100 Subject: [PATCH 2/8] Add missing float libcalls to `compiler_builtins.rs` --- src/compiler_builtins.rs | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/compiler_builtins.rs b/src/compiler_builtins.rs index 5b6d0ef6d..8886317c1 100644 --- a/src/compiler_builtins.rs +++ b/src/compiler_builtins.rs @@ -46,15 +46,49 @@ builtin_functions! { fn __rust_u128_mulo(a: u128, b: u128, oflow: &mut i32) -> u128; fn __rust_i128_mulo(a: i128, b: i128, oflow: &mut i32) -> i128; - // floats + // integer -> float fn __floattisf(i: i128) -> f32; fn __floattidf(i: i128) -> f64; fn __floatuntisf(i: u128) -> f32; fn __floatuntidf(i: u128) -> f64; + // float -> integer fn __fixsfti(f: f32) -> i128; fn __fixdfti(f: f64) -> i128; fn __fixunssfti(f: f32) -> u128; fn __fixunsdfti(f: f64) -> u128; + // float binops + fn fmodf(a: f32, b: f32) -> f32; + fn fmod(a: f64, b: f64) -> f64; + // Cranelift float libcalls + fn fmaf(a: f32, b: f32, c: f32) -> f32; + fn fma(a: f64, b: f64, c: f64) -> f64; + fn floorf(f: f32) -> f32; + fn floor(f: f64) -> f64; + fn ceilf(f: f32) -> f32; + fn ceil(f: f64) -> f64; + fn truncf(f: f32) -> f32; + fn trunc(f: f64) -> f64; + fn nearbyintf(f: f32) -> f32; + fn nearbyint(f: f64) -> f64; + // float intrinsics + fn __powisf2(a: f32, b: i32) -> f32; + fn __powidf2(a: f64, b: i32) -> f64; + fn powf(a: f32, b: f32) -> f32; + fn pow(a: f64, b: f64) -> f64; + fn expf(f: f32) -> f32; + fn exp(f: f64) -> f64; + fn exp2f(f: f32) -> f32; + fn exp2(f: f64) -> f64; + fn logf(f: f32) -> f32; + fn log(f: f64) -> f64; + fn log2f(f: f32) -> f32; + fn log2(f: f64) -> f64; + fn log10f(f: f32) -> f32; + fn log10(f: f64) -> f64; + fn sinf(f: f32) -> f32; + fn sin(f: f64) -> f64; + fn cosf(f: f32) -> f32; + fn cos(f: f64) -> f64; // allocator // NOTE: These need to be mentioned here despite not being part of compiler_builtins because From 87c425b9e1ce015c4e3dad7729e8d54bc7b6b27b Mon Sep 17 00:00:00 2001 From: beetrees Date: Fri, 23 May 2025 15:51:51 +0100 Subject: [PATCH 3/8] Add basic support for `f16`/`f128` values --- src/abi/pass_mode.rs | 2 ++ src/common.rs | 8 ++++---- src/value_and_place.rs | 14 +++++++++++++- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/abi/pass_mode.rs b/src/abi/pass_mode.rs index 06d89bc9e..6d8614aca 100644 --- a/src/abi/pass_mode.rs +++ b/src/abi/pass_mode.rs @@ -22,8 +22,10 @@ fn reg_to_abi_param(reg: Reg) -> AbiParam { (RegKind::Integer, 3..=4) => types::I32, (RegKind::Integer, 5..=8) => types::I64, (RegKind::Integer, 9..=16) => types::I128, + (RegKind::Float, 2) => types::F16, (RegKind::Float, 4) => types::F32, (RegKind::Float, 8) => types::F64, + (RegKind::Float, 16) => types::F128, (RegKind::Vector, size) => types::I8.by(u32::try_from(size).unwrap()).unwrap(), _ => unreachable!("{:?}", reg), }; diff --git a/src/common.rs b/src/common.rs index abe2972ba..2f11b2d2d 100644 --- a/src/common.rs +++ b/src/common.rs @@ -33,10 +33,10 @@ pub(crate) fn scalar_to_clif_type(tcx: TyCtxt<'_>, scalar: Scalar) -> Type { Integer::I128 => types::I128, }, Primitive::Float(float) => match float { - Float::F16 => unimplemented!("f16_f128"), + Float::F16 => types::F16, Float::F32 => types::F32, Float::F64 => types::F64, - Float::F128 => unimplemented!("f16_f128"), + Float::F128 => types::F128, }, // FIXME(erikdesjardins): handle non-default addrspace ptr sizes Primitive::Pointer(_) => pointer_ty(tcx), @@ -64,10 +64,10 @@ fn clif_type_from_ty<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Option types::I32, ty::Float(size) => match size { - FloatTy::F16 => unimplemented!("f16_f128"), + FloatTy::F16 => types::F16, FloatTy::F32 => types::F32, FloatTy::F64 => types::F64, - FloatTy::F128 => unimplemented!("f16_f128"), + FloatTy::F128 => types::F128, }, ty::FnPtr(..) => pointer_ty(tcx), ty::RawPtr(pointee_ty, _) | ty::Ref(_, pointee_ty, _) => { diff --git a/src/value_and_place.rs b/src/value_and_place.rs index 0938c9905..cbfb215a8 100644 --- a/src/value_and_place.rs +++ b/src/value_and_place.rs @@ -325,7 +325,7 @@ impl<'tcx> CValue<'tcx> { const_val: ty::ScalarInt, ) -> CValue<'tcx> { assert_eq!(const_val.size(), layout.size, "{:#?}: {:?}", const_val, layout); - use cranelift_codegen::ir::immediates::{Ieee32, Ieee64}; + use cranelift_codegen::ir::immediates::{Ieee16, Ieee32, Ieee64, Ieee128}; let clif_ty = fx.clif_type(layout.ty).unwrap(); @@ -346,12 +346,24 @@ impl<'tcx> CValue<'tcx> { let raw_val = const_val.size().truncate(const_val.to_bits(layout.size)); fx.bcx.ins().iconst(clif_ty, raw_val as i64) } + ty::Float(FloatTy::F16) => { + fx.bcx.ins().f16const(Ieee16::with_bits(u16::try_from(const_val).unwrap())) + } ty::Float(FloatTy::F32) => { fx.bcx.ins().f32const(Ieee32::with_bits(u32::try_from(const_val).unwrap())) } ty::Float(FloatTy::F64) => { fx.bcx.ins().f64const(Ieee64::with_bits(u64::try_from(const_val).unwrap())) } + ty::Float(FloatTy::F128) => { + let value = fx + .bcx + .func + .dfg + .constants + .insert(Ieee128::with_bits(u128::try_from(const_val).unwrap()).into()); + fx.bcx.ins().f128const(value) + } _ => panic!( "CValue::const_val for non bool/char/float/integer/pointer type {:?} is not allowed", layout.ty From 27a9590d3db284a12143dafd03bf7343b59b6808 Mon Sep 17 00:00:00 2001 From: beetrees Date: Fri, 23 May 2025 15:51:51 +0100 Subject: [PATCH 4/8] Add `f16`/`f128` `+`/`-`/`*`/`/`/`%` support --- build_system/build_backend.rs | 2 +- src/base.rs | 11 +++++- src/codegen_f16_f128.rs | 64 +++++++++++++++++++++++++++++++++++ src/compiler_builtins.rs | 8 +++++ src/lib.rs | 4 +++ src/num.rs | 53 ++++++++++++++++++++++++++--- 6 files changed, 135 insertions(+), 7 deletions(-) create mode 100644 src/codegen_f16_f128.rs diff --git a/build_system/build_backend.rs b/build_system/build_backend.rs index 72bc42252..bf7cf1c0a 100644 --- a/build_system/build_backend.rs +++ b/build_system/build_backend.rs @@ -18,7 +18,7 @@ pub(crate) fn build_backend( let mut cmd = CG_CLIF.build(&bootstrap_host_compiler, dirs); let mut rustflags = rustflags_from_env("RUSTFLAGS"); - rustflags.push("-Zallow-features=rustc_private".to_owned()); + rustflags.push("-Zallow-features=rustc_private,f16,f128".to_owned()); rustflags_to_cmd_env(&mut cmd, "RUSTFLAGS", &rustflags); if env::var("CG_CLIF_EXPENSIVE_CHECKS").is_ok() { diff --git a/src/base.rs b/src/base.rs index c1f4c065c..461730410 100644 --- a/src/base.rs +++ b/src/base.rs @@ -15,9 +15,9 @@ use rustc_middle::ty::print::with_no_trimmed_paths; use crate::constant::ConstantCx; use crate::debuginfo::{FunctionDebugContext, TypeDebugContext}; -use crate::enable_verifier; use crate::prelude::*; use crate::pretty_clif::CommentWriter; +use crate::{codegen_f16_f128, enable_verifier}; pub(crate) struct CodegenedFunction { symbol_name: String, @@ -635,6 +635,15 @@ fn codegen_stmt<'tcx>(fx: &mut FunctionCx<'_, '_, 'tcx>, cur_block: Block, stmt: let val = operand.load_scalar(fx); match layout.ty.kind() { ty::Int(_) => CValue::by_val(fx.bcx.ins().ineg(val), layout), + // FIXME(bytecodealliance/wasmtime#8312): Remove + // once backend lowerings have been added to + // Cranelift. + ty::Float(FloatTy::F16) => { + CValue::by_val(codegen_f16_f128::neg_f16(fx, val), layout) + } + ty::Float(FloatTy::F128) => { + CValue::by_val(codegen_f16_f128::neg_f128(fx, val), layout) + } ty::Float(_) => CValue::by_val(fx.bcx.ins().fneg(val), layout), _ => unreachable!("un op Neg for {:?}", layout.ty), } diff --git a/src/codegen_f16_f128.rs b/src/codegen_f16_f128.rs new file mode 100644 index 000000000..c6f0779b8 --- /dev/null +++ b/src/codegen_f16_f128.rs @@ -0,0 +1,64 @@ +use crate::prelude::*; + +pub(crate) fn f16_to_f32(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value { + let (value, arg_ty) = + if fx.tcx.sess.target.vendor == "apple" && fx.tcx.sess.target.arch == "x86_64" { + ( + fx.bcx.ins().bitcast(types::I16, MemFlags::new(), value), + lib_call_arg_param(fx.tcx, types::I16, false), + ) + } else { + (value, AbiParam::new(types::F16)) + }; + fx.lib_call("__extendhfsf2", vec![arg_ty], vec![AbiParam::new(types::F32)], &[value])[0] +} + +pub(crate) fn f32_to_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value { + let ret_ty = if fx.tcx.sess.target.vendor == "apple" && fx.tcx.sess.target.arch == "x86_64" { + types::I16 + } else { + types::F16 + }; + let ret = fx.lib_call( + "__truncsfhf2", + vec![AbiParam::new(types::F32)], + vec![AbiParam::new(ret_ty)], + &[value], + )[0]; + if ret_ty == types::I16 { fx.bcx.ins().bitcast(types::F16, MemFlags::new(), ret) } else { ret } +} + +pub(crate) fn codegen_f128_binop( + fx: &mut FunctionCx<'_, '_, '_>, + bin_op: BinOp, + lhs: Value, + rhs: Value, +) -> Value { + let name = match bin_op { + BinOp::Add => "__addtf3", + BinOp::Sub => "__subtf3", + BinOp::Mul => "__multf3", + BinOp::Div => "__divtf3", + _ => unreachable!("handled in `codegen_float_binop`"), + }; + fx.lib_call( + name, + vec![AbiParam::new(types::F128), AbiParam::new(types::F128)], + vec![AbiParam::new(types::F128)], + &[lhs, rhs], + )[0] +} + +pub(crate) fn neg_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value { + let bits = fx.bcx.ins().bitcast(types::I16, MemFlags::new(), value); + let bits = fx.bcx.ins().bxor_imm(bits, 0x8000); + fx.bcx.ins().bitcast(types::F16, MemFlags::new(), bits) +} + +pub(crate) fn neg_f128(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value { + let bits = fx.bcx.ins().bitcast(types::I128, MemFlags::new(), value); + let (low, high) = fx.bcx.ins().isplit(bits); + let high = fx.bcx.ins().bxor_imm(high, 0x8000_0000_0000_0000_u64 as i64); + let bits = fx.bcx.ins().iconcat(low, high); + fx.bcx.ins().bitcast(types::F128, MemFlags::new(), bits) +} diff --git a/src/compiler_builtins.rs b/src/compiler_builtins.rs index 8886317c1..0c75df845 100644 --- a/src/compiler_builtins.rs +++ b/src/compiler_builtins.rs @@ -56,9 +56,17 @@ builtin_functions! { fn __fixdfti(f: f64) -> i128; fn __fixunssfti(f: f32) -> u128; fn __fixunsdfti(f: f64) -> u128; + // float -> float + fn __extendhfsf2(f: f16) -> f32; + fn __truncsfhf2(f: f32) -> f16; // float binops + fn __addtf3(a: f128, b: f128) -> f128; + fn __subtf3(a: f128, b: f128) -> f128; + fn __multf3(a: f128, b: f128) -> f128; + fn __divtf3(a: f128, b: f128) -> f128; fn fmodf(a: f32, b: f32) -> f32; fn fmod(a: f64, b: f64) -> f64; + fn fmodf128(a: f128, b: f128) -> f128; // Cranelift float libcalls fn fmaf(a: f32, b: f32, c: f32) -> f32; fn fma(a: f64, b: f64, c: f64) -> f64; diff --git a/src/lib.rs b/src/lib.rs index d41a80819..03dfd4956 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,6 +6,9 @@ #![cfg_attr(doc, feature(rustdoc_internals))] // Note: please avoid adding other feature gates where possible #![feature(rustc_private)] +// Only used to define intrinsics in `compiler_builtins.rs`. +#![feature(f16)] +#![feature(f128)] // Note: please avoid adding other feature gates where possible #![warn(rust_2018_idioms)] #![warn(unreachable_pub)] @@ -57,6 +60,7 @@ mod allocator; mod analyze; mod base; mod cast; +mod codegen_f16_f128; mod codegen_i128; mod common; mod compiler_builtins; diff --git a/src/num.rs b/src/num.rs index 90627f806..3ed276267 100644 --- a/src/num.rs +++ b/src/num.rs @@ -1,5 +1,6 @@ //! Various operations on integer and floating-point numbers +use crate::codegen_f16_f128; use crate::prelude::*; fn bin_op_to_intcc(bin_op: BinOp, signed: bool) -> IntCC { @@ -350,25 +351,60 @@ pub(crate) fn codegen_float_binop<'tcx>( let lhs = in_lhs.load_scalar(fx); let rhs = in_rhs.load_scalar(fx); + // FIXME(bytecodealliance/wasmtime#8312): Remove once backend lowerings have + // been added to Cranelift. + let (lhs, rhs) = if *in_lhs.layout().ty.kind() == ty::Float(FloatTy::F16) { + (codegen_f16_f128::f16_to_f32(fx, lhs), codegen_f16_f128::f16_to_f32(fx, rhs)) + } else { + (lhs, rhs) + }; let b = fx.bcx.ins(); let res = match bin_op { + // FIXME(bytecodealliance/wasmtime#8312): Remove once backend lowerings + // have been added to Cranelift. + BinOp::Add | BinOp::Sub | BinOp::Mul | BinOp::Div + if *in_lhs.layout().ty.kind() == ty::Float(FloatTy::F128) => + { + codegen_f16_f128::codegen_f128_binop(fx, bin_op, lhs, rhs) + } BinOp::Add => b.fadd(lhs, rhs), BinOp::Sub => b.fsub(lhs, rhs), BinOp::Mul => b.fmul(lhs, rhs), BinOp::Div => b.fdiv(lhs, rhs), BinOp::Rem => { - let (name, ty) = match in_lhs.layout().ty.kind() { - ty::Float(FloatTy::F32) => ("fmodf", types::F32), - ty::Float(FloatTy::F64) => ("fmod", types::F64), + let (name, ty, lhs, rhs) = match in_lhs.layout().ty.kind() { + ty::Float(FloatTy::F16) => ( + "fmodf", + types::F32, + // FIXME(bytecodealliance/wasmtime#8312): Already converted + // by the FIXME above. + // fx.bcx.ins().fpromote(types::F32, lhs), + // fx.bcx.ins().fpromote(types::F32, rhs), + lhs, + rhs, + ), + ty::Float(FloatTy::F32) => ("fmodf", types::F32, lhs, rhs), + ty::Float(FloatTy::F64) => ("fmod", types::F64, lhs, rhs), + ty::Float(FloatTy::F128) => ("fmodf128", types::F128, lhs, rhs), _ => bug!(), }; - fx.lib_call( + let ret_val = fx.lib_call( name, vec![AbiParam::new(ty), AbiParam::new(ty)], vec![AbiParam::new(ty)], &[lhs, rhs], - )[0] + )[0]; + + let ret_val = if *in_lhs.layout().ty.kind() == ty::Float(FloatTy::F16) { + // FIXME(bytecodealliance/wasmtime#8312): Use native Cranelift + // operation once Cranelift backend lowerings have been + // implemented. + codegen_f16_f128::f32_to_f16(fx, ret_val) + } else { + ret_val + }; + return CValue::by_val(ret_val, in_lhs.layout()); } BinOp::Eq | BinOp::Lt | BinOp::Le | BinOp::Ne | BinOp::Ge | BinOp::Gt => { let fltcc = match bin_op { @@ -386,6 +422,13 @@ pub(crate) fn codegen_float_binop<'tcx>( _ => unreachable!("{:?}({:?}, {:?})", bin_op, in_lhs, in_rhs), }; + // FIXME(bytecodealliance/wasmtime#8312): Remove once backend lowerings have + // been added to Cranelift. + let res = if *in_lhs.layout().ty.kind() == ty::Float(FloatTy::F16) { + codegen_f16_f128::f32_to_f16(fx, res) + } else { + res + }; CValue::by_val(res, in_lhs.layout()) } From 38d48dbe083b680b54e39c2ead2a92d76001786c Mon Sep 17 00:00:00 2001 From: beetrees Date: Fri, 23 May 2025 15:51:51 +0100 Subject: [PATCH 5/8] Add `f16`/`f128` comparison support --- src/codegen_f16_f128.rs | 54 +++++++++++++++++++++++++ src/compiler_builtins.rs | 9 +++++ src/intrinsics/mod.rs | 87 ++++++++++++++++++++++++++++++++++++++++ src/num.rs | 17 +++++--- 4 files changed, 162 insertions(+), 5 deletions(-) diff --git a/src/codegen_f16_f128.rs b/src/codegen_f16_f128.rs index c6f0779b8..c570fbbd9 100644 --- a/src/codegen_f16_f128.rs +++ b/src/codegen_f16_f128.rs @@ -28,6 +28,42 @@ pub(crate) fn f32_to_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value if ret_ty == types::I16 { fx.bcx.ins().bitcast(types::F16, MemFlags::new(), ret) } else { ret } } +pub(crate) fn fcmp(fx: &mut FunctionCx<'_, '_, '_>, cc: FloatCC, lhs: Value, rhs: Value) -> Value { + let ty = fx.bcx.func.dfg.value_type(lhs); + match ty { + types::F32 | types::F64 => fx.bcx.ins().fcmp(cc, lhs, rhs), + types::F16 => { + let lhs = f16_to_f32(fx, lhs); + let rhs = f16_to_f32(fx, rhs); + fx.bcx.ins().fcmp(cc, lhs, rhs) + } + types::F128 => { + let (name, int_cc) = match cc { + FloatCC::Equal => ("__eqtf2", IntCC::Equal), + FloatCC::NotEqual => ("__netf2", IntCC::NotEqual), + FloatCC::LessThan => ("__lttf2", IntCC::SignedLessThan), + FloatCC::LessThanOrEqual => ("__letf2", IntCC::SignedLessThanOrEqual), + FloatCC::GreaterThan => ("__gttf2", IntCC::SignedGreaterThan), + FloatCC::GreaterThanOrEqual => ("__getf2", IntCC::SignedGreaterThanOrEqual), + _ => unreachable!("not currently used in rustc_codegen_cranelift: {cc:?}"), + }; + let res = fx.lib_call( + name, + vec![AbiParam::new(types::F128), AbiParam::new(types::F128)], + // FIXME(rust-lang/compiler-builtins#919): This should be `I64` on non-AArch64 + // architectures, but switching it before compiler-builtins is fixed causes test + // failures. + vec![AbiParam::new(types::I32)], + &[lhs, rhs], + )[0]; + let zero = fx.bcx.ins().iconst(types::I32, 0); + let res = fx.bcx.ins().icmp(int_cc, res, zero); + res + } + _ => unreachable!("{ty:?}"), + } +} + pub(crate) fn codegen_f128_binop( fx: &mut FunctionCx<'_, '_, '_>, bin_op: BinOp, @@ -62,3 +98,21 @@ pub(crate) fn neg_f128(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value { let bits = fx.bcx.ins().iconcat(low, high); fx.bcx.ins().bitcast(types::F128, MemFlags::new(), bits) } + +pub(crate) fn fmin_f128(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value { + fx.lib_call( + "fminimumf128", + vec![AbiParam::new(types::F128), AbiParam::new(types::F128)], + vec![AbiParam::new(types::F128)], + &[a, b], + )[0] +} + +pub(crate) fn fmax_f128(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value { + fx.lib_call( + "fmaximumf128", + vec![AbiParam::new(types::F128), AbiParam::new(types::F128)], + vec![AbiParam::new(types::F128)], + &[a, b], + )[0] +} diff --git a/src/compiler_builtins.rs b/src/compiler_builtins.rs index 0c75df845..017a1370a 100644 --- a/src/compiler_builtins.rs +++ b/src/compiler_builtins.rs @@ -67,6 +67,15 @@ builtin_functions! { fn fmodf(a: f32, b: f32) -> f32; fn fmod(a: f64, b: f64) -> f64; fn fmodf128(a: f128, b: f128) -> f128; + // float comparison + fn __eqtf2(a: f128, b: f128) -> i32; + fn __netf2(a: f128, b: f128) -> i32; + fn __lttf2(a: f128, b: f128) -> i32; + fn __letf2(a: f128, b: f128) -> i32; + fn __gttf2(a: f128, b: f128) -> i32; + fn __getf2(a: f128, b: f128) -> i32; + fn fminimumf128(a: f128, b: f128) -> f128; + fn fmaximumf128(a: f128, b: f128) -> f128; // Cranelift float libcalls fn fmaf(a: f32, b: f32, c: f32) -> f32; fn fma(a: f64, b: f64, c: f64) -> f64; diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index 7c18922d9..6481d2112 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -27,6 +27,7 @@ use rustc_span::{Symbol, sym}; pub(crate) use self::llvm::codegen_llvm_intrinsic_call; use crate::cast::clif_intcast; +use crate::codegen_f16_f128; use crate::prelude::*; fn bug_on_incorrect_arg_count(intrinsic: impl std::fmt::Display) -> ! { @@ -1118,6 +1119,20 @@ fn codegen_regular_intrinsic_call<'tcx>( ret.write_cvalue(fx, old); } + sym::minimumf16 => { + intrinsic_args!(fx, args => (a, b); intrinsic); + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + // FIXME(bytecodealliance/wasmtime#8312): Use `fmin` directly once + // Cranelift backend lowerings are implemented. + let a = codegen_f16_f128::f16_to_f32(fx, a); + let b = codegen_f16_f128::f16_to_f32(fx, b); + let val = fx.bcx.ins().fmin(a, b); + let val = codegen_f16_f128::f32_to_f16(fx, val); + let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f16)); + ret.write_cvalue(fx, val); + } sym::minimumf32 => { intrinsic_args!(fx, args => (a, b); intrinsic); let a = a.load_scalar(fx); @@ -1136,6 +1151,31 @@ fn codegen_regular_intrinsic_call<'tcx>( let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64)); ret.write_cvalue(fx, val); } + sym::minimumf128 => { + intrinsic_args!(fx, args => (a, b); intrinsic); + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + // FIXME(bytecodealliance/wasmtime#8312): Use `fmin` once Cranelift + // backend lowerings are implemented. + let val = codegen_f16_f128::fmin_f128(fx, a, b); + let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f128)); + ret.write_cvalue(fx, val); + } + sym::maximumf16 => { + intrinsic_args!(fx, args => (a, b); intrinsic); + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + // FIXME(bytecodealliance/wasmtime#8312): Use `fmax` directly once + // Cranelift backend lowerings are implemented. + let a = codegen_f16_f128::f16_to_f32(fx, a); + let b = codegen_f16_f128::f16_to_f32(fx, b); + let val = fx.bcx.ins().fmax(a, b); + let val = codegen_f16_f128::f32_to_f16(fx, val); + let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f16)); + ret.write_cvalue(fx, val); + } sym::maximumf32 => { intrinsic_args!(fx, args => (a, b); intrinsic); let a = a.load_scalar(fx); @@ -1154,7 +1194,27 @@ fn codegen_regular_intrinsic_call<'tcx>( let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64)); ret.write_cvalue(fx, val); } + sym::maximumf128 => { + intrinsic_args!(fx, args => (a, b); intrinsic); + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + // FIXME(bytecodealliance/wasmtime#8312): Use `fmax` once Cranelift + // backend lowerings are implemented. + let val = codegen_f16_f128::fmax_f128(fx, a, b); + let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f128)); + ret.write_cvalue(fx, val); + } + + sym::minnumf16 => { + intrinsic_args!(fx, args => (a, b); intrinsic); + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + let val = crate::num::codegen_float_min(fx, a, b); + let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f16)); + ret.write_cvalue(fx, val); + } sym::minnumf32 => { intrinsic_args!(fx, args => (a, b); intrinsic); let a = a.load_scalar(fx); @@ -1173,6 +1233,24 @@ fn codegen_regular_intrinsic_call<'tcx>( let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64)); ret.write_cvalue(fx, val); } + sym::minnumf128 => { + intrinsic_args!(fx, args => (a, b); intrinsic); + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + let val = crate::num::codegen_float_min(fx, a, b); + let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f128)); + ret.write_cvalue(fx, val); + } + sym::maxnumf16 => { + intrinsic_args!(fx, args => (a, b); intrinsic); + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + let val = crate::num::codegen_float_max(fx, a, b); + let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f16)); + ret.write_cvalue(fx, val); + } sym::maxnumf32 => { intrinsic_args!(fx, args => (a, b); intrinsic); let a = a.load_scalar(fx); @@ -1191,6 +1269,15 @@ fn codegen_regular_intrinsic_call<'tcx>( let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64)); ret.write_cvalue(fx, val); } + sym::maxnumf128 => { + intrinsic_args!(fx, args => (a, b); intrinsic); + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + let val = crate::num::codegen_float_max(fx, a, b); + let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f128)); + ret.write_cvalue(fx, val); + } sym::catch_unwind => { intrinsic_args!(fx, args => (f, data, catch_fn); intrinsic); diff --git a/src/num.rs b/src/num.rs index 3ed276267..f53045df6 100644 --- a/src/num.rs +++ b/src/num.rs @@ -416,7 +416,10 @@ pub(crate) fn codegen_float_binop<'tcx>( BinOp::Gt => FloatCC::GreaterThan, _ => unreachable!(), }; - let val = fx.bcx.ins().fcmp(fltcc, lhs, rhs); + // FIXME(bytecodealliance/wasmtime#8312): Replace with Cranelift + // `fcmp` once `f16`/`f128` backend lowerings have been added to + // Cranelift. + let val = codegen_f16_f128::fcmp(fx, fltcc, lhs, rhs); return CValue::by_val(val, fx.layout_of(fx.tcx.types.bool)); } _ => unreachable!("{:?}({:?}, {:?})", bin_op, in_lhs, in_rhs), @@ -500,15 +503,19 @@ fn codegen_ptr_binop<'tcx>( // and `a.is_nan() ? b : (a <= b ? b : a)` for `maxnumf*`. NaN checks are done by comparing // a float against itself. Only in case of NaN is it not equal to itself. pub(crate) fn codegen_float_min(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value { - let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a); - let a_ge_b = fx.bcx.ins().fcmp(FloatCC::GreaterThanOrEqual, a, b); + // FIXME(bytecodealliance/wasmtime#8312): Replace with Cranelift `fcmp` once + // `f16`/`f128` backend lowerings have been added to Cranelift. + let a_is_nan = codegen_f16_f128::fcmp(fx, FloatCC::NotEqual, a, a); + let a_ge_b = codegen_f16_f128::fcmp(fx, FloatCC::GreaterThanOrEqual, a, b); let temp = fx.bcx.ins().select(a_ge_b, b, a); fx.bcx.ins().select(a_is_nan, b, temp) } pub(crate) fn codegen_float_max(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value { - let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a); - let a_le_b = fx.bcx.ins().fcmp(FloatCC::LessThanOrEqual, a, b); + // FIXME(bytecodealliance/wasmtime#8312): Replace with Cranelift `fcmp` once + // `f16`/`f128` backend lowerings have been added to Cranelift. + let a_is_nan = codegen_f16_f128::fcmp(fx, FloatCC::NotEqual, a, a); + let a_le_b = codegen_f16_f128::fcmp(fx, FloatCC::LessThanOrEqual, a, b); let temp = fx.bcx.ins().select(a_le_b, b, a); fx.bcx.ins().select(a_is_nan, b, temp) } From e46186f9944cf10c35b228e38ba6312903587896 Mon Sep 17 00:00:00 2001 From: beetrees Date: Fri, 23 May 2025 15:51:51 +0100 Subject: [PATCH 6/8] Add support for casting to and from `f16`/`f128` --- src/cast.rs | 21 ++++++- src/codegen_f16_f128.rs | 123 +++++++++++++++++++++++++++++++++++++++ src/compiler_builtins.rs | 19 ++++++ 3 files changed, 161 insertions(+), 2 deletions(-) diff --git a/src/cast.rs b/src/cast.rs index e23463242..8a725680e 100644 --- a/src/cast.rs +++ b/src/cast.rs @@ -1,5 +1,6 @@ //! Various number casting functions +use crate::codegen_f16_f128; use crate::prelude::*; pub(crate) fn clif_intcast( @@ -36,6 +37,14 @@ pub(crate) fn clif_int_or_float_cast( ) -> Value { let from_ty = fx.bcx.func.dfg.value_type(from); + // FIXME(bytecodealliance/wasmtime#8312): Remove in favour of native + // Cranelift operations once Cranelift backends have lowerings for them. + if matches!(from_ty, types::F16 | types::F128) + || matches!(to_ty, types::F16 | types::F128) && from_ty != to_ty + { + return codegen_f16_f128::codegen_cast(fx, from, from_signed, to_ty, to_signed); + } + if from_ty.is_int() && to_ty.is_int() { // int-like -> int-like clif_intcast( @@ -58,8 +67,10 @@ pub(crate) fn clif_int_or_float_cast( "__float{sign}ti{flt}f", sign = if from_signed { "" } else { "un" }, flt = match to_ty { + types::F16 => "h", types::F32 => "s", types::F64 => "d", + types::F128 => "t", _ => unreachable!("{:?}", to_ty), }, ); @@ -90,8 +101,10 @@ pub(crate) fn clif_int_or_float_cast( "__fix{sign}{flt}fti", sign = if to_signed { "" } else { "uns" }, flt = match from_ty { + types::F16 => "h", types::F32 => "s", types::F64 => "d", + types::F128 => "t", _ => unreachable!("{:?}", to_ty), }, ); @@ -145,8 +158,12 @@ pub(crate) fn clif_int_or_float_cast( } else if from_ty.is_float() && to_ty.is_float() { // float -> float match (from_ty, to_ty) { - (types::F32, types::F64) => fx.bcx.ins().fpromote(types::F64, from), - (types::F64, types::F32) => fx.bcx.ins().fdemote(types::F32, from), + (types::F16, types::F32 | types::F64 | types::F128) + | (types::F32, types::F64 | types::F128) + | (types::F64, types::F128) => fx.bcx.ins().fpromote(to_ty, from), + (types::F128, types::F64 | types::F32 | types::F16) + | (types::F64, types::F32 | types::F16) + | (types::F32, types::F16) => fx.bcx.ins().fdemote(to_ty, from), _ => from, } } else { diff --git a/src/codegen_f16_f128.rs b/src/codegen_f16_f128.rs index c570fbbd9..a887341ce 100644 --- a/src/codegen_f16_f128.rs +++ b/src/codegen_f16_f128.rs @@ -13,6 +13,11 @@ pub(crate) fn f16_to_f32(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value fx.lib_call("__extendhfsf2", vec![arg_ty], vec![AbiParam::new(types::F32)], &[value])[0] } +fn f16_to_f64(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value { + let ret = f16_to_f32(fx, value); + fx.bcx.ins().fpromote(types::F64, ret) +} + pub(crate) fn f32_to_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value { let ret_ty = if fx.tcx.sess.target.vendor == "apple" && fx.tcx.sess.target.arch == "x86_64" { types::I16 @@ -28,6 +33,21 @@ pub(crate) fn f32_to_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value if ret_ty == types::I16 { fx.bcx.ins().bitcast(types::F16, MemFlags::new(), ret) } else { ret } } +fn f64_to_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value { + let ret_ty = if fx.tcx.sess.target.vendor == "apple" && fx.tcx.sess.target.arch == "x86_64" { + types::I16 + } else { + types::F16 + }; + let ret = fx.lib_call( + "__truncdfhf2", + vec![AbiParam::new(types::F64)], + vec![AbiParam::new(ret_ty)], + &[value], + )[0]; + if ret_ty == types::I16 { fx.bcx.ins().bitcast(types::F16, MemFlags::new(), ret) } else { ret } +} + pub(crate) fn fcmp(fx: &mut FunctionCx<'_, '_, '_>, cc: FloatCC, lhs: Value, rhs: Value) -> Value { let ty = fx.bcx.func.dfg.value_type(lhs); match ty { @@ -99,6 +119,109 @@ pub(crate) fn neg_f128(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value { fx.bcx.ins().bitcast(types::F128, MemFlags::new(), bits) } +pub(crate) fn codegen_cast( + fx: &mut FunctionCx<'_, '_, '_>, + from: Value, + from_signed: bool, + to_ty: Type, + to_signed: bool, +) -> Value { + let from_ty = fx.bcx.func.dfg.value_type(from); + if from_ty.is_float() && to_ty.is_float() { + let name = match (from_ty, to_ty) { + (types::F16, types::F32) => return f16_to_f32(fx, from), + (types::F16, types::F64) => return f16_to_f64(fx, from), + (types::F16, types::F128) => "__extendhftf2", + (types::F32, types::F128) => "__extendsftf2", + (types::F64, types::F128) => "__extenddftf2", + (types::F128, types::F64) => "__trunctfdf2", + (types::F128, types::F32) => "__trunctfsf2", + (types::F128, types::F16) => "__trunctfhf2", + (types::F64, types::F16) => return f64_to_f16(fx, from), + (types::F32, types::F16) => return f32_to_f16(fx, from), + _ => unreachable!("{from_ty:?} -> {to_ty:?}"), + }; + fx.lib_call(name, vec![AbiParam::new(from_ty)], vec![AbiParam::new(to_ty)], &[from])[0] + } else if from_ty.is_int() && to_ty == types::F16 { + let res = clif_int_or_float_cast(fx, from, from_signed, types::F32, false); + f32_to_f16(fx, res) + } else if from_ty == types::F16 && to_ty.is_int() { + let from = f16_to_f32(fx, from); + clif_int_or_float_cast(fx, from, false, to_ty, to_signed) + } else if from_ty.is_int() && to_ty == types::F128 { + let (from, from_ty) = if from_ty.bits() < 32 { + (clif_int_or_float_cast(fx, from, from_signed, types::I32, from_signed), types::I32) + } else { + (from, from_ty) + }; + let name = format!( + "__float{sign}{size}itf", + sign = if from_signed { "" } else { "un" }, + size = match from_ty { + types::I32 => 's', + types::I64 => 'd', + types::I128 => 't', + _ => unreachable!("{from_ty:?}"), + }, + ); + fx.lib_call( + &name, + vec![lib_call_arg_param(fx.tcx, from_ty, from_signed)], + vec![AbiParam::new(to_ty)], + &[from], + )[0] + } else if from_ty == types::F128 && to_ty.is_int() { + let ret_ty = if to_ty.bits() < 32 { types::I32 } else { to_ty }; + let name = format!( + "__fix{sign}tf{size}i", + sign = if from_signed { "" } else { "un" }, + size = match ret_ty { + types::I32 => 's', + types::I64 => 'd', + types::I128 => 't', + _ => unreachable!("{from_ty:?}"), + }, + ); + let ret = + fx.lib_call(&name, vec![AbiParam::new(from_ty)], vec![AbiParam::new(to_ty)], &[from]) + [0]; + let val = if ret_ty == to_ty { + ret + } else { + let (min, max) = match (to_ty, to_signed) { + (types::I8, false) => (0, i64::from(u8::MAX)), + (types::I16, false) => (0, i64::from(u16::MAX)), + (types::I8, true) => (i64::from(i8::MIN as u32), i64::from(i8::MAX as u32)), + (types::I16, true) => (i64::from(i16::MIN as u32), i64::from(i16::MAX as u32)), + _ => unreachable!("{to_ty:?}"), + }; + let min_val = fx.bcx.ins().iconst(types::I32, min); + let max_val = fx.bcx.ins().iconst(types::I32, max); + + let val = if to_signed { + let has_underflow = fx.bcx.ins().icmp_imm(IntCC::SignedLessThan, ret, min); + let has_overflow = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThan, ret, max); + let bottom_capped = fx.bcx.ins().select(has_underflow, min_val, ret); + fx.bcx.ins().select(has_overflow, max_val, bottom_capped) + } else { + let has_overflow = fx.bcx.ins().icmp_imm(IntCC::UnsignedGreaterThan, ret, max); + fx.bcx.ins().select(has_overflow, max_val, ret) + }; + fx.bcx.ins().ireduce(to_ty, val) + }; + + if let Some(false) = fx.tcx.sess.opts.unstable_opts.saturating_float_casts { + return val; + } + + let is_not_nan = fcmp(fx, FloatCC::Equal, from, from); + let zero = type_zero_value(&mut fx.bcx, to_ty); + fx.bcx.ins().select(is_not_nan, val, zero) + } else { + unreachable!("{from_ty:?} -> {to_ty:?}"); + } +} + pub(crate) fn fmin_f128(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value { fx.lib_call( "fminimumf128", diff --git a/src/compiler_builtins.rs b/src/compiler_builtins.rs index 017a1370a..d9cbffd7a 100644 --- a/src/compiler_builtins.rs +++ b/src/compiler_builtins.rs @@ -49,15 +49,34 @@ builtin_functions! { // integer -> float fn __floattisf(i: i128) -> f32; fn __floattidf(i: i128) -> f64; + fn __floatsitf(i: i32) -> f128; + fn __floatditf(i: i64) -> f128; + fn __floattitf(i: i128) -> f128; fn __floatuntisf(i: u128) -> f32; fn __floatuntidf(i: u128) -> f64; + fn __floatunsitf(i: u32) -> f128; + fn __floatunditf(i: u64) -> f128; + fn __floatuntitf(i: u128) -> f128; // float -> integer fn __fixsfti(f: f32) -> i128; fn __fixdfti(f: f64) -> i128; + fn __fixtfsi(f: f128) -> i32; + fn __fixtfdi(f: f128) -> i64; + fn __fixtfti(f: f128) -> i128; fn __fixunssfti(f: f32) -> u128; fn __fixunsdfti(f: f64) -> u128; + fn __fixunstfsi(f: f128) -> u32; + fn __fixunstfdi(f: f128) -> u64; + fn __fixunstfti(f: f128) -> u128; // float -> float fn __extendhfsf2(f: f16) -> f32; + fn __extendhftf2(f: f16) -> f128; + fn __extendsftf2(f: f32) -> f128; + fn __extenddftf2(f: f64) -> f128; + fn __trunctfdf2(f: f128) -> f64; + fn __trunctfsf2(f: f128) -> f32; + fn __trunctfhf2(f: f128) -> f16; + fn __truncdfhf2(f: f64) -> f16; fn __truncsfhf2(f: f32) -> f16; // float binops fn __addtf3(a: f128, b: f128) -> f128; From 2055f01323fbeada8857f9a0725c12567891c377 Mon Sep 17 00:00:00 2001 From: beetrees Date: Fri, 23 May 2025 15:51:51 +0100 Subject: [PATCH 7/8] Add `f16`/`f128` intrinsic support --- src/codegen_f16_f128.rs | 43 ++++++++++++++++++++++ src/compiler_builtins.rs | 15 ++++++++ src/intrinsics/mod.rs | 79 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 136 insertions(+), 1 deletion(-) diff --git a/src/codegen_f16_f128.rs b/src/codegen_f16_f128.rs index a887341ce..1e202be1f 100644 --- a/src/codegen_f16_f128.rs +++ b/src/codegen_f16_f128.rs @@ -119,6 +119,41 @@ pub(crate) fn neg_f128(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value { fx.bcx.ins().bitcast(types::F128, MemFlags::new(), bits) } +pub(crate) fn abs_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value { + let bits = fx.bcx.ins().bitcast(types::I16, MemFlags::new(), value); + let bits = fx.bcx.ins().band_imm(bits, 0x7fff); + fx.bcx.ins().bitcast(types::F16, MemFlags::new(), bits) +} + +pub(crate) fn abs_f128(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value { + let bits = fx.bcx.ins().bitcast(types::I128, MemFlags::new(), value); + let (low, high) = fx.bcx.ins().isplit(bits); + let high = fx.bcx.ins().band_imm(high, 0x7fff_ffff_ffff_ffff_u64 as i64); + let bits = fx.bcx.ins().iconcat(low, high); + fx.bcx.ins().bitcast(types::F128, MemFlags::new(), bits) +} + +pub(crate) fn copysign_f16(fx: &mut FunctionCx<'_, '_, '_>, lhs: Value, rhs: Value) -> Value { + let lhs = fx.bcx.ins().bitcast(types::I16, MemFlags::new(), lhs); + let rhs = fx.bcx.ins().bitcast(types::I16, MemFlags::new(), rhs); + let res = fx.bcx.ins().band_imm(lhs, 0x7fff); + let sign = fx.bcx.ins().band_imm(rhs, 0x8000); + let res = fx.bcx.ins().bor(res, sign); + fx.bcx.ins().bitcast(types::F16, MemFlags::new(), res) +} + +pub(crate) fn copysign_f128(fx: &mut FunctionCx<'_, '_, '_>, lhs: Value, rhs: Value) -> Value { + let lhs = fx.bcx.ins().bitcast(types::I128, MemFlags::new(), lhs); + let rhs = fx.bcx.ins().bitcast(types::I128, MemFlags::new(), rhs); + let (low, lhs_high) = fx.bcx.ins().isplit(lhs); + let (_, rhs_high) = fx.bcx.ins().isplit(rhs); + let high = fx.bcx.ins().band_imm(lhs_high, 0x7fff_ffff_ffff_ffff_u64 as i64); + let sign = fx.bcx.ins().band_imm(rhs_high, 0x8000_0000_0000_0000_u64 as i64); + let high = fx.bcx.ins().bor(high, sign); + let res = fx.bcx.ins().iconcat(low, high); + fx.bcx.ins().bitcast(types::F128, MemFlags::new(), res) +} + pub(crate) fn codegen_cast( fx: &mut FunctionCx<'_, '_, '_>, from: Value, @@ -222,6 +257,14 @@ pub(crate) fn codegen_cast( } } +pub(crate) fn fma_f16(fx: &mut FunctionCx<'_, '_, '_>, x: Value, y: Value, z: Value) -> Value { + let x = f16_to_f64(fx, x); + let y = f16_to_f64(fx, y); + let z = f16_to_f64(fx, z); + let res = fx.bcx.ins().fma(x, y, z); + f64_to_f16(fx, res) +} + pub(crate) fn fmin_f128(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value { fx.lib_call( "fminimumf128", diff --git a/src/compiler_builtins.rs b/src/compiler_builtins.rs index d9cbffd7a..6eea19211 100644 --- a/src/compiler_builtins.rs +++ b/src/compiler_builtins.rs @@ -109,6 +109,8 @@ builtin_functions! { // float intrinsics fn __powisf2(a: f32, b: i32) -> f32; fn __powidf2(a: f64, b: i32) -> f64; + // FIXME(f16_f128): `compiler-builtins` doesn't currently support `__powitf2` on MSVC. + // fn __powitf2(a: f128, b: i32) -> f128; fn powf(a: f32, b: f32) -> f32; fn pow(a: f64, b: f64) -> f64; fn expf(f: f32) -> f32; @@ -125,6 +127,19 @@ builtin_functions! { fn sin(f: f64) -> f64; fn cosf(f: f32) -> f32; fn cos(f: f64) -> f64; + fn fmaf128(a: f128, b: f128, c: f128) -> f128; + fn floorf16(f: f16) -> f16; + fn floorf128(f: f128) -> f128; + fn ceilf16(f: f16) -> f16; + fn ceilf128(f: f128) -> f128; + fn truncf16(f: f16) -> f16; + fn truncf128(f: f128) -> f128; + fn rintf16(f: f16) -> f16; + fn rintf128(f: f128) -> f128; + fn sqrtf16(f: f16) -> f16; + fn sqrtf128(f: f128) -> f128; + // FIXME(f16_f128): Add other float intrinsics as compiler-builtins gains support (meaning they + // are available on all targets). // allocator // NOTE: These need to be mentioned here despite not being part of compiler_builtins because diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index 6481d2112..b21ca32c9 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -249,8 +249,10 @@ fn bool_to_zero_or_max_uint<'tcx>( let ty = fx.clif_type(ty).unwrap(); let int_ty = match ty { + types::F16 => types::I16, types::F32 => types::I32, types::F64 => types::I64, + types::F128 => types::I128, ty => ty, }; @@ -309,45 +311,83 @@ fn codegen_float_intrinsic_call<'tcx>( ret: CPlace<'tcx>, ) -> bool { let (name, arg_count, ty, clif_ty) = match intrinsic { + sym::expf16 => ("expf16", 1, fx.tcx.types.f16, types::F16), sym::expf32 => ("expf", 1, fx.tcx.types.f32, types::F32), sym::expf64 => ("exp", 1, fx.tcx.types.f64, types::F64), + sym::expf128 => ("expf128", 1, fx.tcx.types.f128, types::F128), + sym::exp2f16 => ("exp2f16", 1, fx.tcx.types.f16, types::F16), sym::exp2f32 => ("exp2f", 1, fx.tcx.types.f32, types::F32), sym::exp2f64 => ("exp2", 1, fx.tcx.types.f64, types::F64), + sym::exp2f128 => ("exp2f128", 1, fx.tcx.types.f128, types::F128), + sym::sqrtf16 => ("sqrtf16", 1, fx.tcx.types.f16, types::F16), sym::sqrtf32 => ("sqrtf", 1, fx.tcx.types.f32, types::F32), sym::sqrtf64 => ("sqrt", 1, fx.tcx.types.f64, types::F64), + sym::sqrtf128 => ("sqrtf128", 1, fx.tcx.types.f128, types::F128), + sym::powif16 => ("__powisf2", 2, fx.tcx.types.f16, types::F16), // compiler-builtins sym::powif32 => ("__powisf2", 2, fx.tcx.types.f32, types::F32), // compiler-builtins sym::powif64 => ("__powidf2", 2, fx.tcx.types.f64, types::F64), // compiler-builtins + sym::powif128 => ("__powitf2", 2, fx.tcx.types.f128, types::F128), // compiler-builtins + sym::powf16 => ("powf16", 2, fx.tcx.types.f16, types::F16), sym::powf32 => ("powf", 2, fx.tcx.types.f32, types::F32), sym::powf64 => ("pow", 2, fx.tcx.types.f64, types::F64), + sym::powf128 => ("powf128", 2, fx.tcx.types.f128, types::F128), + sym::logf16 => ("logf16", 1, fx.tcx.types.f16, types::F16), sym::logf32 => ("logf", 1, fx.tcx.types.f32, types::F32), sym::logf64 => ("log", 1, fx.tcx.types.f64, types::F64), + sym::logf128 => ("logf128", 1, fx.tcx.types.f128, types::F128), + sym::log2f16 => ("log2f16", 1, fx.tcx.types.f16, types::F16), sym::log2f32 => ("log2f", 1, fx.tcx.types.f32, types::F32), sym::log2f64 => ("log2", 1, fx.tcx.types.f64, types::F64), + sym::log2f128 => ("log2f128", 1, fx.tcx.types.f128, types::F128), + sym::log10f16 => ("log10f16", 1, fx.tcx.types.f16, types::F16), sym::log10f32 => ("log10f", 1, fx.tcx.types.f32, types::F32), sym::log10f64 => ("log10", 1, fx.tcx.types.f64, types::F64), + sym::log10f128 => ("log10f128", 1, fx.tcx.types.f128, types::F128), + sym::fabsf16 => ("fabsf16", 1, fx.tcx.types.f16, types::F16), sym::fabsf32 => ("fabsf", 1, fx.tcx.types.f32, types::F32), sym::fabsf64 => ("fabs", 1, fx.tcx.types.f64, types::F64), + sym::fabsf128 => ("fabsf128", 1, fx.tcx.types.f128, types::F128), + sym::fmaf16 => ("fmaf16", 3, fx.tcx.types.f16, types::F16), sym::fmaf32 => ("fmaf", 3, fx.tcx.types.f32, types::F32), sym::fmaf64 => ("fma", 3, fx.tcx.types.f64, types::F64), + sym::fmaf128 => ("fmaf128", 3, fx.tcx.types.f128, types::F128), // FIXME: calling `fma` from libc without FMA target feature uses expensive sofware emulation + sym::fmuladdf16 => ("fmaf16", 3, fx.tcx.types.f16, types::F16), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f16 sym::fmuladdf32 => ("fmaf", 3, fx.tcx.types.f32, types::F32), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f32 sym::fmuladdf64 => ("fma", 3, fx.tcx.types.f64, types::F64), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f64 + sym::fmuladdf128 => ("fmaf128", 3, fx.tcx.types.f128, types::F128), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f128 + sym::copysignf16 => ("copysignf16", 2, fx.tcx.types.f16, types::F16), sym::copysignf32 => ("copysignf", 2, fx.tcx.types.f32, types::F32), sym::copysignf64 => ("copysign", 2, fx.tcx.types.f64, types::F64), + sym::copysignf128 => ("copysignf128", 2, fx.tcx.types.f128, types::F128), + sym::floorf16 => ("floorf16", 1, fx.tcx.types.f16, types::F16), sym::floorf32 => ("floorf", 1, fx.tcx.types.f32, types::F32), sym::floorf64 => ("floor", 1, fx.tcx.types.f64, types::F64), + sym::floorf128 => ("floorf128", 1, fx.tcx.types.f128, types::F128), + sym::ceilf16 => ("ceilf16", 1, fx.tcx.types.f16, types::F16), sym::ceilf32 => ("ceilf", 1, fx.tcx.types.f32, types::F32), sym::ceilf64 => ("ceil", 1, fx.tcx.types.f64, types::F64), + sym::ceilf128 => ("ceilf128", 1, fx.tcx.types.f128, types::F128), + sym::truncf16 => ("truncf16", 1, fx.tcx.types.f16, types::F16), sym::truncf32 => ("truncf", 1, fx.tcx.types.f32, types::F32), sym::truncf64 => ("trunc", 1, fx.tcx.types.f64, types::F64), + sym::truncf128 => ("truncf128", 1, fx.tcx.types.f128, types::F128), + sym::round_ties_even_f16 => ("rintf16", 1, fx.tcx.types.f16, types::F16), sym::round_ties_even_f32 => ("rintf", 1, fx.tcx.types.f32, types::F32), sym::round_ties_even_f64 => ("rint", 1, fx.tcx.types.f64, types::F64), + sym::round_ties_even_f128 => ("rintf128", 1, fx.tcx.types.f128, types::F128), + sym::roundf16 => ("roundf16", 1, fx.tcx.types.f16, types::F16), sym::roundf32 => ("roundf", 1, fx.tcx.types.f32, types::F32), sym::roundf64 => ("round", 1, fx.tcx.types.f64, types::F64), + sym::roundf128 => ("roundf128", 1, fx.tcx.types.f128, types::F128), + sym::sinf16 => ("sinf16", 1, fx.tcx.types.f16, types::F16), sym::sinf32 => ("sinf", 1, fx.tcx.types.f32, types::F32), sym::sinf64 => ("sin", 1, fx.tcx.types.f64, types::F64), + sym::sinf128 => ("sinf128", 1, fx.tcx.types.f128, types::F128), + sym::cosf16 => ("cosf16", 1, fx.tcx.types.f16, types::F16), sym::cosf32 => ("cosf", 1, fx.tcx.types.f32, types::F32), sym::cosf64 => ("cos", 1, fx.tcx.types.f64, types::F64), + sym::cosf128 => ("cosf128", 1, fx.tcx.types.f128, types::F128), _ => return false, }; @@ -380,13 +420,26 @@ fn codegen_float_intrinsic_call<'tcx>( }; let layout = fx.layout_of(ty); + // FIXME(bytecodealliance/wasmtime#8312): Use native Cranelift operations + // for `f16` and `f128` once the lowerings have been implemented in Cranelift. let res = match intrinsic { + sym::fmaf16 | sym::fmuladdf16 => { + CValue::by_val(codegen_f16_f128::fma_f16(fx, args[0], args[1], args[2]), layout) + } sym::fmaf32 | sym::fmaf64 | sym::fmuladdf32 | sym::fmuladdf64 => { CValue::by_val(fx.bcx.ins().fma(args[0], args[1], args[2]), layout) } + sym::copysignf16 => { + CValue::by_val(codegen_f16_f128::copysign_f16(fx, args[0], args[1]), layout) + } + sym::copysignf128 => { + CValue::by_val(codegen_f16_f128::copysign_f128(fx, args[0], args[1]), layout) + } sym::copysignf32 | sym::copysignf64 => { CValue::by_val(fx.bcx.ins().fcopysign(args[0], args[1]), layout) } + sym::fabsf16 => CValue::by_val(codegen_f16_f128::abs_f16(fx, args[0]), layout), + sym::fabsf128 => CValue::by_val(codegen_f16_f128::abs_f128(fx, args[0]), layout), sym::fabsf32 | sym::fabsf64 | sym::floorf32 @@ -416,12 +469,36 @@ fn codegen_float_intrinsic_call<'tcx>( // These intrinsics aren't supported natively by Cranelift. // Lower them to a libcall. - sym::powif32 | sym::powif64 => { + sym::powif16 | sym::powif32 | sym::powif64 | sym::powif128 => { + let temp; + let (clif_ty, args) = if intrinsic == sym::powif16 { + temp = [codegen_f16_f128::f16_to_f32(fx, args[0]), args[1]]; + (types::F32, temp.as_slice()) + } else { + (clif_ty, args) + }; let input_tys: Vec<_> = vec![AbiParam::new(clif_ty), lib_call_arg_param(fx.tcx, types::I32, true)]; let ret_val = fx.lib_call(name, input_tys, vec![AbiParam::new(clif_ty)], &args)[0]; + let ret_val = if intrinsic == sym::powif16 { + codegen_f16_f128::f32_to_f16(fx, ret_val) + } else { + ret_val + }; CValue::by_val(ret_val, fx.layout_of(ty)) } + sym::powf16 => { + // FIXME(f16_f128): Rust `compiler-builtins` doesn't export `powf16` yet. + let x = codegen_f16_f128::f16_to_f32(fx, args[0]); + let y = codegen_f16_f128::f16_to_f32(fx, args[1]); + let ret_val = fx.lib_call( + "powf", + vec![AbiParam::new(types::F32), AbiParam::new(types::F32)], + vec![AbiParam::new(types::F32)], + &[x, y], + )[0]; + CValue::by_val(codegen_f16_f128::f32_to_f16(fx, ret_val), fx.layout_of(ty)) + } _ => { let input_tys: Vec<_> = args.iter().map(|_| AbiParam::new(clif_ty)).collect(); let ret_val = fx.lib_call(name, input_tys, vec![AbiParam::new(clif_ty)], &args)[0]; From 02195f56c718defd1df04a452530b14cc96155c9 Mon Sep 17 00:00:00 2001 From: beetrees Date: Fri, 23 May 2025 15:51:51 +0100 Subject: [PATCH 8/8] Enable tests and `compiler-builtins` for `f16`/`f128` --- build_system/build_sysroot.rs | 2 +- scripts/setup_rust_fork.sh | 2 +- scripts/test_rustc_tests.sh | 8 -------- src/lib.rs | 32 +++++++++++++++++++++++++++----- 4 files changed, 29 insertions(+), 15 deletions(-) diff --git a/build_system/build_sysroot.rs b/build_system/build_sysroot.rs index a6e956c51..00955998e 100644 --- a/build_system/build_sysroot.rs +++ b/build_system/build_sysroot.rs @@ -235,7 +235,7 @@ fn build_clif_sysroot_for_triple( compiler.rustflags.extend(rustflags); let mut build_cmd = STANDARD_LIBRARY.build(&compiler, dirs); build_cmd.arg("--release"); - build_cmd.arg("--features").arg("backtrace panic-unwind compiler-builtins-no-f16-f128"); + build_cmd.arg("--features").arg("backtrace panic-unwind"); build_cmd.arg(format!("-Zroot-dir={}", STDLIB_SRC.to_path(dirs).display())); build_cmd.env("CARGO_PROFILE_RELEASE_DEBUG", "true"); build_cmd.env("__CARGO_DEFAULT_LIB_METADATA", "cg_clif"); diff --git a/scripts/setup_rust_fork.sh b/scripts/setup_rust_fork.sh index ca6426f2b..532702bb1 100644 --- a/scripts/setup_rust_fork.sh +++ b/scripts/setup_rust_fork.sh @@ -43,7 +43,7 @@ verbose-tests = false # disabled bootstrap will crash trying to copy llvm tools for the bootstrap # compiler. llvm-tools = false -std-features = ["panic-unwind", "compiler-builtins-no-f16-f128"] +std-features = ["panic-unwind"] EOF diff --git a/scripts/test_rustc_tests.sh b/scripts/test_rustc_tests.sh index d014b6881..32c71f433 100755 --- a/scripts/test_rustc_tests.sh +++ b/scripts/test_rustc_tests.sh @@ -72,14 +72,6 @@ rm tests/ui/consts/precise-drop-with-coverage.rs rm tests/ui/issues/issue-85461.rs rm -r tests/ui/instrument-coverage/ -# missing f16/f128 support -rm tests/ui/half-open-range-patterns/half-open-range-pats-semantics.rs -rm tests/ui/asm/aarch64/type-f16.rs -rm tests/ui/float/conv-bits-runtime-const.rs -rm tests/ui/consts/const-eval/float_methods.rs -rm tests/ui/match/match-float.rs -rm tests/ui/float/target-has-reliable-nightly-float.rs - # optimization tests # ================== rm tests/ui/codegen/issue-28950.rs # depends on stack size optimizations diff --git a/src/lib.rs b/src/lib.rs index 03dfd4956..8ef623cde 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -201,14 +201,36 @@ impl CodegenBackend for CraneliftCodegenBackend { // FIXME do `unstable_target_features` properly let unstable_target_features = target_features.clone(); + // FIXME(f16_f128): LLVM 20 (currently used by `rustc`) passes `f128` in XMM registers on + // Windows, whereas LLVM 21+ and Cranelift pass it indirectly. This means that `f128` won't + // work when linking against a LLVM-built sysroot. + let has_reliable_f128 = !sess.target.is_like_windows; + let has_reliable_f16 = match &*sess.target.arch { + // FIXME(f16_f128): LLVM 20 does not support `f16` on s390x, meaning the required + // builtins are not available in `compiler-builtins`. + "s390x" => false, + // FIXME(f16_f128): `rustc_codegen_llvm` currently disables support on Windows GNU + // targets due to GCC using a different ABI than LLVM. Therefore `f16` won't be + // available when using a LLVM-built sysroot. + "x86_64" + if sess.target.os == "windows" + && sess.target.env == "gnu" + && sess.target.abi != "llvm" => + { + false + } + _ => true, + }; + TargetConfig { target_features, unstable_target_features, - // Cranelift does not yet support f16 or f128 - has_reliable_f16: false, - has_reliable_f16_math: false, - has_reliable_f128: false, - has_reliable_f128_math: false, + // `rustc_codegen_cranelift` polyfills functionality not yet + // available in Cranelift. + has_reliable_f16, + has_reliable_f16_math: has_reliable_f16, + has_reliable_f128, + has_reliable_f128_math: has_reliable_f128, } }