Skip to content

Commit

Permalink
codegen: explicitly handle __extendhfsf2 and friends
Browse files Browse the repository at this point in the history
A partial fix to JuliaLang#44829
  • Loading branch information
vtjnash committed May 6, 2022
1 parent 902a5c1 commit 10064bf
Show file tree
Hide file tree
Showing 9 changed files with 91 additions and 41 deletions.
4 changes: 2 additions & 2 deletions base/compiler/ssair/slot2ssa.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ function scan_slot_def_use(nargs::Int, ci::CodeInfo, code::Vector{Any})
nslots = length(ci.slotflags)
result = SlotInfo[SlotInfo() for i = 1:nslots]
# Set defs for arguments
for var in result[1:nargs]
push!(var.defs, 0)
for var in 1:nargs
push!(result[var].defs, 0)
end
for idx in 1:length(code)
stmt = code[idx]
Expand Down
6 changes: 3 additions & 3 deletions src/APInt-C.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ void LLVMByteSwap(unsigned numbits, integerPart *pa, integerPart *pr) {
void LLVMFPtoInt(unsigned numbits, void *pa, unsigned onumbits, integerPart *pr, bool isSigned, bool *isExact) {
double Val;
if (numbits == 16)
Val = __gnu_h2f_ieee(*(uint16_t*)pa);
Val = julia__gnu_h2f_ieee(*(uint16_t*)pa);
else if (numbits == 32)
Val = *(float*)pa;
else if (numbits == 64)
Expand Down Expand Up @@ -391,7 +391,7 @@ void LLVMSItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPar
val = a.roundToDouble(true);
}
if (onumbits == 16)
*(uint16_t*)pr = __gnu_f2h_ieee(val);
*(uint16_t*)pr = julia__gnu_f2h_ieee(val);
else if (onumbits == 32)
*(float*)pr = val;
else if (onumbits == 64)
Expand All @@ -408,7 +408,7 @@ void LLVMUItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPar
val = a.roundToDouble(false);
}
if (onumbits == 16)
*(uint16_t*)pr = __gnu_f2h_ieee(val);
*(uint16_t*)pr = julia__gnu_f2h_ieee(val);
else if (onumbits == 32)
*(float*)pr = val;
else if (onumbits == 64)
Expand Down
43 changes: 38 additions & 5 deletions src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include <llvm/Analysis/TargetLibraryInfo.h>
#include <llvm/Analysis/TargetTransformInfo.h>
#include <llvm/IR/DataLayout.h>
#include <llvm/CodeGen/RuntimeLibcalls.h>
#include <llvm/CodeGen/TargetLowering.h>
#if JL_LLVM_VERSION >= 140000
#include <llvm/MC/TargetRegistry.h>
#else
Expand Down Expand Up @@ -481,7 +483,7 @@ void jl_dump_native_impl(void *native_code,
));

legacy::PassManager PM;
addTargetPasses(&PM, TM->getTargetTriple(), TM->getTargetIRAnalysis());
addTargetPasses(&PM, *TM, TM->getTargetTriple(), TM->getTargetIRAnalysis());

// set up optimization passes
SmallVector<char, 0> bc_Buffer;
Expand Down Expand Up @@ -595,9 +597,40 @@ void jl_dump_native_impl(void *native_code,
delete data;
}

void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis)
void addTargetPasses(legacy::PassManagerBase *PM, TargetMachine &TM, const Triple &triple, TargetIRAnalysis analysis)
{
PM->add(new TargetLibraryInfoWrapperPass(triple));
TargetLowering TLI(TM);
// poly-fill the F16 intrinsic names since the calling convention used is sometimes wrong otherwise
#if !defined(_OS_DARWIN_)
TLI.setLibcallName(RTLIB::FPEXT_F16_F64, "julia__extendhfdf2");
#define HANDLE_LIBCALL(A, n) TLI.setLibcallName(RTLIB::#A, "julia" ##n);
//HANDLE_LIBCALL(FPEXT_F16_F128, __extendhftf2)
//HANDLE_LIBCALL(FPEXT_F16_F80, __extendhfxf2)
// HANDLE_LIBCALL(FPEXT_F16_F64, __extendhfdf2)
// HANDLE_LIBCALL(FPEXT_F16_F32, __gnu_h2f_ieee) *
// HANDLE_LIBCALL(FPROUND_F32_F16, __gnu_f2h_ieee) *
// HANDLE_LIBCALL(FPROUND_F64_F16, __truncdfhf2) *
// //HANDLE_LIBCALL(FPROUND_F80_F16, __truncxfhf2)
// //HANDLE_LIBCALL(FPROUND_F128_F16, __trunctfhf2)
// //HANDLE_LIBCALL(FPROUND_PPCF128_F16, __trunctfhf2)
// HANDLE_LIBCALL(FPTOSINT_F16_I32, __fixhfsi)
// HANDLE_LIBCALL(FPTOSINT_F16_I64, __fixhfdi)
// //HANDLE_LIBCALL(FPTOSINT_F16_I128, __fixhfti)
// HANDLE_LIBCALL(FPTOUINT_F16_I32, __fixunshfsi)
// HANDLE_LIBCALL(FPTOUINT_F16_I64, __fixunshfdi)
// //HANDLE_LIBCALL(FPTOUINT_F16_I128, __fixunshfti)
// HANDLE_LIBCALL(SINTTOFP_I32_F16, __floatsihf)
// HANDLE_LIBCALL(SINTTOFP_I64_F16, __floatdihf)
// //HANDLE_LIBCALL(SINTTOFP_I128_F16, __floattihf)
// HANDLE_LIBCALL(UINTTOFP_I32_F16, __floatunsihf)
// HANDLE_LIBCALL(UINTTOFP_I64_F16, __floatundihf)
// //HANDLE_LIBCALL(UINTTOFP_I128_F16, __floatuntihf)
#undef HANDLE_LIBCALL
#endif
// now how do I instruct X86Subtarget::getCallLowering to return our new, improved TLI object?
TargetLibraryInfoImpl BaselineInfoImpl(triple);
//BaselineInfoImpl.addVectorizableFunctions({});
PM->add(new TargetLibraryInfoWrapperPass(BaselineInfoImpl));
PM->add(createTargetTransformInfoWrapperPass(std::move(analysis)));
}

Expand Down Expand Up @@ -857,7 +890,7 @@ class JuliaPipeline : public Pass {
(void)jl_init_llvm();
PMTopLevelManager *TPM = Stack.top()->getTopLevelManager();
TPMAdapter Adapter(TPM);
addTargetPasses(&Adapter, jl_ExecutionEngine->getTargetTriple(), jl_ExecutionEngine->getTargetIRAnalysis());
addTargetPasses(&Adapter, *jl_ExecutionEngine->cloneTargetMachine(), jl_ExecutionEngine->getTargetTriple(), jl_ExecutionEngine->getTargetIRAnalysis());
addOptimizationPasses(&Adapter, OptLevel, true, dump_native, true);
addMachinePasses(&Adapter, OptLevel);
}
Expand Down Expand Up @@ -993,7 +1026,7 @@ void *jl_get_llvmf_defn_impl(jl_method_instance_t *mi, size_t world, char getwra
static legacy::PassManager *PM;
if (!PM) {
PM = new legacy::PassManager();
addTargetPasses(PM, jl_ExecutionEngine->getTargetTriple(), jl_ExecutionEngine->getTargetIRAnalysis());
addTargetPasses(PM, *jl_ExecutionEngine->cloneTargetMachine(), jl_ExecutionEngine->getTargetTriple(), jl_ExecutionEngine->getTargetIRAnalysis());
addOptimizationPasses(PM, jl_options.opt_level);
addMachinePasses(PM, jl_options.opt_level);
}
Expand Down
2 changes: 1 addition & 1 deletion src/disasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1212,7 +1212,7 @@ jl_value_t *jl_dump_function_asm_impl(void *F, char raw_mc, const char* asm_vari
auto TMBase = jl_ExecutionEngine->cloneTargetMachine();
LLVMTargetMachine *TM = static_cast<LLVMTargetMachine*>(TMBase.get());
legacy::PassManager PM;
addTargetPasses(&PM, TM->getTargetTriple(), TM->getTargetIRAnalysis());
addTargetPasses(&PM, *TM, TM->getTargetTriple(), TM->getTargetIRAnalysis());
if (raw_mc) {
raw_svector_ostream obj_OS(ObjBufferSV);
if (TM->addPassesToEmitFile(PM, obj_OS, nullptr, CGFT_ObjectFile, false, nullptr))
Expand Down
2 changes: 1 addition & 1 deletion src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -901,7 +901,7 @@ namespace {
}
std::unique_ptr<legacy::PassManager> operator()() {
auto PM = std::make_unique<legacy::PassManager>();
addTargetPasses(PM.get(), TM->getTargetTriple(), TM->getTargetIRAnalysis());
addTargetPasses(PM.get(), *TM, TM->getTargetTriple(), TM->getTargetIRAnalysis());
addOptimizationPasses(PM.get(), optlevel);
addMachinePasses(PM.get(), optlevel);
return PM;
Expand Down
2 changes: 1 addition & 1 deletion src/jitlayers.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ using namespace llvm;

extern "C" jl_cgparams_t jl_default_cgparams;

void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis);
void addTargetPasses(legacy::PassManagerBase *PM, TargetMachine &TM, const Triple &triple, TargetIRAnalysis analysis);
void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, bool lower_intrinsics=true, bool dump_native=false, bool external_use=false);
void addMachinePasses(legacy::PassManagerBase *PM, int optlevel);
void jl_finalize_module(orc::ThreadSafeModule m);
Expand Down
6 changes: 0 additions & 6 deletions src/julia.expmap
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,6 @@
environ;
__progname;

/* compiler run-time intrinsics */
__gnu_h2f_ieee;
__extendhfsf2;
__gnu_f2h_ieee;
__truncdfhf2;

local:
*;
};
14 changes: 12 additions & 2 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1523,8 +1523,18 @@ jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT;
#define JL_GC_ASSERT_LIVE(x) (void)(x)
#endif

float __gnu_h2f_ieee(uint16_t param) JL_NOTSAFEPOINT;
uint16_t __gnu_f2h_ieee(float param) JL_NOTSAFEPOINT;
JL_DLLEXPORT float julia__gnu_h2f_ieee(uint16_t param) JL_NOTSAFEPOINT;
JL_DLLEXPORT uint16_t julia__gnu_f2h_ieee(float param) JL_NOTSAFEPOINT;
JL_DLLEXPORT uint16_t julia__truncdfhf2(double param) JL_NOTSAFEPOINT;
JL_DLLEXPORT double julia__extendhfdf2(uint16_t n) JL_NOTSAFEPOINT;
JL_DLLEXPORT int32_t julia__fixhfsi(uint16_t n) JL_NOTSAFEPOINT;
JL_DLLEXPORT int64_t julia__fixhfdi(uint16_t n) JL_NOTSAFEPOINT;
JL_DLLEXPORT uint32_t julia__fixunshfsi(uint16_t n) JL_NOTSAFEPOINT;
JL_DLLEXPORT uint64_t julia__fixunshfdi(uint16_t n) JL_NOTSAFEPOINT;
JL_DLLEXPORT uint16_t julia__floatsihf(int32_t n) JL_NOTSAFEPOINT;
JL_DLLEXPORT uint16_t julia__floatdihf(int64_t n) JL_NOTSAFEPOINT;
JL_DLLEXPORT uint16_t julia__floatunsihf(uint32_t n) JL_NOTSAFEPOINT;
JL_DLLEXPORT uint16_t julia__floatundihf(uint64_t n) JL_NOTSAFEPOINT;

#ifdef __cplusplus
}
Expand Down
53 changes: 33 additions & 20 deletions src/runtime_intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,22 +188,17 @@ static inline uint16_t float_to_half(float param) JL_NOTSAFEPOINT
return h;
}

JL_DLLEXPORT float __gnu_h2f_ieee(uint16_t param)
JL_DLLEXPORT float julia__gnu_h2f_ieee(uint16_t param)
{
return half_to_float(param);
}

JL_DLLEXPORT float __extendhfsf2(uint16_t param)
{
return half_to_float(param);
}

JL_DLLEXPORT uint16_t __gnu_f2h_ieee(float param)
JL_DLLEXPORT uint16_t julia__gnu_f2h_ieee(float param)
{
return float_to_half(param);
}

JL_DLLEXPORT uint16_t __truncdfhf2(double param)
JL_DLLEXPORT uint16_t julia__truncdfhf2(double param)
{
float res = (float)param;
uint32_t resi;
Expand All @@ -225,6 +220,24 @@ JL_DLLEXPORT uint16_t __truncdfhf2(double param)
return float_to_half(res);
}

JL_DLLEXPORT double julia__extendhfdf2(uint16_t n) { return (double)julia__gnu_h2f_ieee(n); }
JL_DLLEXPORT int32_t julia__fixhfsi(uint16_t n) { return (int32_t)julia__gnu_h2f_ieee(n); }
JL_DLLEXPORT int64_t julia__fixhfdi(uint16_t n) { return (int64_t)julia__gnu_h2f_ieee(n); }
JL_DLLEXPORT uint32_t julia__fixunshfsi(uint16_t n) { return (uint32_t)julia__gnu_h2f_ieee(n); }
JL_DLLEXPORT uint64_t julia__fixunshfdi(uint16_t n) { return (uint64_t)julia__gnu_h2f_ieee(n); }
JL_DLLEXPORT uint16_t julia__floatsihf(int32_t n) { return julia__gnu_f2h_ieee((float)n); }
JL_DLLEXPORT uint16_t julia__floatdihf(int64_t n) { return julia__gnu_f2h_ieee((float)n); }
JL_DLLEXPORT uint16_t julia__floatunsihf(uint32_t n) { return julia__gnu_f2h_ieee((float)n); }
JL_DLLEXPORT uint16_t julia__floatundihf(uint64_t n) { return julia__gnu_f2h_ieee((float)n); }
//HANDLE_LIBCALL(F16, F128, __extendhftf2)
//HANDLE_LIBCALL(F16, F80, __extendhfxf2)
//HANDLE_LIBCALL(F80, F16, __truncxfhf2)
//HANDLE_LIBCALL(F128, F16, __trunctfhf2)
//HANDLE_LIBCALL(PPCF128, F16, __trunctfhf2)
//HANDLE_LIBCALL(F16, I128, __fixhfti)
//HANDLE_LIBCALL(F16, I128, __fixunshfti)
//HANDLE_LIBCALL(I128, F16, __floattihf)
//HANDLE_LIBCALL(I128, F16, __floatuntihf)
#endif

// run time version of bitcast intrinsic
Expand Down Expand Up @@ -597,11 +610,11 @@ static inline void name(unsigned osize, void *pa, void *pr) JL_NOTSAFEPOINT \
static inline void name(unsigned osize, void *pa, void *pr) JL_NOTSAFEPOINT \
{ \
uint16_t a = *(uint16_t*)pa; \
float A = __gnu_h2f_ieee(a); \
float A = julia__gnu_h2f_ieee(a); \
if (osize == 16) { \
float R; \
OP(&R, A); \
*(uint16_t*)pr = __gnu_f2h_ieee(R); \
*(uint16_t*)pr = julia__gnu_f2h_ieee(R); \
} else { \
OP((uint16_t*)pr, A); \
} \
Expand All @@ -625,11 +638,11 @@ static void jl_##name##16(unsigned runtime_nbits, void *pa, void *pb, void *pr)
{ \
uint16_t a = *(uint16_t*)pa; \
uint16_t b = *(uint16_t*)pb; \
float A = __gnu_h2f_ieee(a); \
float B = __gnu_h2f_ieee(b); \
float A = julia__gnu_h2f_ieee(a); \
float B = julia__gnu_h2f_ieee(b); \
runtime_nbits = 16; \
float R = OP(A, B); \
*(uint16_t*)pr = __gnu_f2h_ieee(R); \
*(uint16_t*)pr = julia__gnu_f2h_ieee(R); \
}

// float or integer inputs, bool output
Expand All @@ -650,8 +663,8 @@ static int jl_##name##16(unsigned runtime_nbits, void *pa, void *pb) JL_NOTSAFEP
{ \
uint16_t a = *(uint16_t*)pa; \
uint16_t b = *(uint16_t*)pb; \
float A = __gnu_h2f_ieee(a); \
float B = __gnu_h2f_ieee(b); \
float A = julia__gnu_h2f_ieee(a); \
float B = julia__gnu_h2f_ieee(b); \
runtime_nbits = 16; \
return OP(A, B); \
}
Expand Down Expand Up @@ -691,12 +704,12 @@ static void jl_##name##16(unsigned runtime_nbits, void *pa, void *pb, void *pc,
uint16_t a = *(uint16_t*)pa; \
uint16_t b = *(uint16_t*)pb; \
uint16_t c = *(uint16_t*)pc; \
float A = __gnu_h2f_ieee(a); \
float B = __gnu_h2f_ieee(b); \
float C = __gnu_h2f_ieee(c); \
float A = julia__gnu_h2f_ieee(a); \
float B = julia__gnu_h2f_ieee(b); \
float C = julia__gnu_h2f_ieee(c); \
runtime_nbits = 16; \
float R = OP(A, B, C); \
*(uint16_t*)pr = __gnu_f2h_ieee(R); \
*(uint16_t*)pr = julia__gnu_f2h_ieee(R); \
}


Expand Down Expand Up @@ -1367,7 +1380,7 @@ cvt_iintrinsic(LLVMFPtoUI, fptoui)
if (!(osize < 8 * sizeof(a))) \
jl_error("fptrunc: output bitsize must be < input bitsize"); \
else if (osize == 16) \
*(uint16_t*)pr = __gnu_f2h_ieee(a); \
*(uint16_t*)pr = julia__gnu_f2h_ieee(a); \
else if (osize == 32) \
*(float*)pr = a; \
else if (osize == 64) \
Expand Down

0 comments on commit 10064bf

Please sign in to comment.