Skip to content

Commit

Permalink
Make have_fma a Julia intrinsic.
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt committed Nov 18, 2021

Verified

This commit was signed with the committer’s verified signature.
TLATER Tristan Maat
1 parent 416ce92 commit 3b2685c
Showing 9 changed files with 41 additions and 7 deletions.
1 change: 1 addition & 0 deletions base/compiler/optimize.jl
Original file line number Diff line number Diff line change
@@ -416,6 +416,7 @@ function is_pure_intrinsic_infer(f::IntrinsicFunction)
f === Intrinsics.arraylen || # this one is volatile
f === Intrinsics.sqrt_llvm || # this one may differ at runtime (by a few ulps)
f === Intrinsics.sqrt_llvm_fast || # this one may differ at runtime (by a few ulps)
f === Intrinsics.have_fma || # this one depends on the runtime environment
f === Intrinsics.cglobal) # cglobal lookup answer changes at runtime
end

3 changes: 2 additions & 1 deletion base/compiler/tfuncs.jl
Original file line number Diff line number Diff line change
@@ -10,7 +10,7 @@ const _NAMEDTUPLE_NAME = NamedTuple.body.body.name

const INT_INF = typemax(Int) # integer infinity

const N_IFUNC = reinterpret(Int32, arraylen) + 1
const N_IFUNC = reinterpret(Int32, have_fma) + 1
const T_IFUNC = Vector{Tuple{Int, Int, Any}}(undef, N_IFUNC)
const T_IFUNC_COST = Vector{Int}(undef, N_IFUNC)
const T_FFUNC_KEY = Vector{Any}()
@@ -214,6 +214,7 @@ cglobal_tfunc(@nospecialize(fptr)) = Ptr{Cvoid}
cglobal_tfunc(@nospecialize(fptr), @nospecialize(t)) = (isType(t) ? Ptr{t.parameters[1]} : Ptr)
cglobal_tfunc(@nospecialize(fptr), t::Const) = (isa(t.val, Type) ? Ptr{t.val} : Ptr)
add_tfunc(Core.Intrinsics.cglobal, 1, 2, cglobal_tfunc, 5)
add_tfunc(Core.Intrinsics.have_fma, 1, 1, @nospecialize(x)->Bool, 1)

function ifelse_tfunc(@nospecialize(cnd), @nospecialize(x), @nospecialize(y))
if isa(cnd, Const)
7 changes: 2 additions & 5 deletions base/floatfuncs.jl
Original file line number Diff line number Diff line change
@@ -409,11 +409,8 @@ fma_llvm(x::Float64, y::Float64, z::Float64) = fma_float(x, y, z)

# Disable LLVM's fma if it is incorrect, e.g. because LLVM falls back
# onto a broken system libm; if so, use a software emulated fma
have_fma(::Type) = false
have_fma(::Type{Float32}) = ccall("extern julia.cpu.have_fma.f32", llvmcall, Int, ()) == 1
have_fma(::Type{Float64}) = ccall("extern julia.cpu.have_fma.f64", llvmcall, Int, ()) == 1
fma(x::Float32, y::Float32, z::Float32) = have_fma(Float32) ? fma_llvm(x,y,z) : fma_emulated(x,y,z)
fma(x::Float64, y::Float64, z::Float64) = have_fma(Float64) ? fma_llvm(x,y,z) : fma_emulated(x,y,z)
fma(x::Float32, y::Float32, z::Float32) = Core.Intrinsics.have_fma(Float32) ? fma_llvm(x,y,z) : fma_emulated(x,y,z)
fma(x::Float64, y::Float64, z::Float64) = Core.Intrinsics.have_fma(Float64) ? fma_llvm(x,y,z) : fma_emulated(x,y,z)

function fma(a::Float16, b::Float16, c::Float16)
Float16(muladd(Float32(a), Float32(b), Float32(c))) #don't use fma if the hardware doesn't have it.
21 changes: 21 additions & 0 deletions src/intrinsics.cpp
Original file line number Diff line number Diff line change
@@ -1146,6 +1146,27 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
return mark_julia_type(ctx, ans, false, x.typ);
}

case have_fma: {
assert(nargs == 1);
const jl_cgval_t &x = argv[0];
if (!x.constant || !jl_is_datatype(x.constant))
return emit_runtime_call(ctx, f, argv, nargs);
jl_datatype_t *dt = (jl_datatype_t*) x.constant;

// select the appropriated overloaded intrinsic
std::string intr_name = "julia.cpu.have_fma.";
if (dt == jl_float32_type)
intr_name += "f32";
else if (dt == jl_float64_type)
intr_name += "f64";
else
return emit_runtime_call(ctx, f, argv, nargs);

FunctionCallee intr = jl_Module->getOrInsertFunction(intr_name, T_int1);
auto ret = ctx.builder.CreateCall(intr);
return mark_julia_type(ctx, ret, false, jl_bool_type);
}

default: {
assert(nargs >= 1 && "invalid nargs for intrinsic call");
const jl_cgval_t &xinfo = argv[0];
2 changes: 2 additions & 0 deletions src/intrinsics.h
Original file line number Diff line number Diff line change
@@ -103,6 +103,8 @@
ALIAS(llvmcall, llvmcall) \
/* object access */ \
ADD_I(arraylen, 1) \
/* cpu feature tests */ \
ADD_I(have_fma, 1) \
/* hidden intrinsics */ \
ADD_HIDDEN(cglobal_auto, 1)

1 change: 1 addition & 0 deletions src/jl_exported_funcs.inc
Original file line number Diff line number Diff line change
@@ -32,6 +32,7 @@
XX(jl_array_grow_end) \
XX(jl_array_isassigned) \
XX(jl_arraylen) \
XX(jl_have_fma) \
XX(jl_array_ptr) \
XX(jl_array_ptr_1d_append) \
XX(jl_array_ptr_1d_push) \
1 change: 1 addition & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
@@ -1240,6 +1240,7 @@ JL_DLLEXPORT jl_value_t *jl_copysign_float(jl_value_t *a, jl_value_t *b);
JL_DLLEXPORT jl_value_t *jl_flipsign_int(jl_value_t *a, jl_value_t *b);

JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a);
JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *a);
JL_DLLEXPORT int jl_stored_inline(jl_value_t *el_type);
JL_DLLEXPORT jl_value_t *(jl_array_data_owner)(jl_array_t *a);
JL_DLLEXPORT int jl_array_isassigned(jl_array_t *a, size_t i);
5 changes: 4 additions & 1 deletion src/llvm-cpufeatures.cpp
Original file line number Diff line number Diff line change
@@ -5,7 +5,10 @@
// specific CPU features.
//
// The following intrinsics are supported:
// - julia.cpu.have_fma.$typ: returns 1 if the platform supports hardware-accelerated FMA
// - julia.cpu.have_fma.$typ: returns 1 if the platform supports hardware-accelerated FMA.
//
// Some of these intrinsics are overloaded, i.e., they are suffixed with a type name.
// To extend support, make sure codegen (in intrinsics.cpp) knows how to emit them.
//
// XXX: can / do we want to make this a codegen pass to enable querying TargetPassConfig
// instead of using the global target machine?
7 changes: 7 additions & 0 deletions src/runtime_intrinsics.c
Original file line number Diff line number Diff line change
@@ -1349,3 +1349,10 @@ JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a)
JL_TYPECHK(arraylen, array, a);
return jl_box_long(jl_array_len((jl_array_t*)a));
}

JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *typ)
{
JL_TYPECHK(have_fma, datatype, typ);
// TODO: run-time feature check?
return jl_false;
}

0 comments on commit 3b2685c

Please sign in to comment.