Skip to content

Commit eece6ba

Browse files
committed
IR: Add llvm.ldexp and llvm.experimental.constrained.ldexp intrinsics
AMDGPU has native instructions and target intrinsics for this, but these really should be subject to legalization and generic optimizations. This will enable legalization of f16->f32 on targets without f16 support. Implement a somewhat horrible inline expansion for targets without libcall support. This could be better if we could introduce control flow (GlobalISel version not yet implemented). Support for strictfp legalization is less complete but works for the simple cases.
1 parent 5d361ad commit eece6ba

File tree

71 files changed

+3780
-422
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+3780
-422
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17146,8 +17146,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1714617146
return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
1714717147
case AMDGPU::BI__builtin_amdgcn_ldexp:
1714817148
case AMDGPU::BI__builtin_amdgcn_ldexpf:
17149-
case AMDGPU::BI__builtin_amdgcn_ldexph:
17150-
return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
17149+
case AMDGPU::BI__builtin_amdgcn_ldexph: {
17150+
llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
17151+
llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
17152+
llvm::Function *F =
17153+
CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
17154+
return Builder.CreateCall(F, {Src0, Src1});
17155+
}
1715117156
case AMDGPU::BI__builtin_amdgcn_frexp_mant:
1715217157
case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
1715317158
case AMDGPU::BI__builtin_amdgcn_frexp_manth:

clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ void test_cos_f16(global half* out, half a)
5252
}
5353

5454
// CHECK-LABEL: @test_ldexp_f16
55-
// CHECK: call half @llvm.amdgcn.ldexp.f16
55+
// CHECK: call half @llvm.ldexp.f16.i32
5656
void test_ldexp_f16(global half* out, half a, int b)
5757
{
5858
*out = __builtin_amdgcn_ldexph(a, b);

clang/test/CodeGenOpenCL/builtins-amdgcn.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,14 +180,14 @@ void test_log_clamp_f32(global float* out, float a)
180180
}
181181

182182
// CHECK-LABEL: @test_ldexp_f32
183-
// CHECK: call float @llvm.amdgcn.ldexp.f32
183+
// CHECK: call float @llvm.ldexp.f32.i32
184184
void test_ldexp_f32(global float* out, float a, int b)
185185
{
186186
*out = __builtin_amdgcn_ldexpf(a, b);
187187
}
188188

189189
// CHECK-LABEL: @test_ldexp_f64
190-
// CHECK: call double @llvm.amdgcn.ldexp.f64
190+
// CHECK: call double @llvm.ldexp.f64.i32
191191
void test_ldexp_f64(global double* out, double a, int b)
192192
{
193193
*out = __builtin_amdgcn_ldexp(a, b);

llvm/docs/LangRef.rst

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14713,6 +14713,47 @@ trapping or setting ``errno``.
1471314713
When specified with the fast-math-flag 'afn', the result may be approximated
1471414714
using a less accurate calculation.
1471514715

14716+
'``llvm.ldexp.*``' Intrinsic
14717+
^^^^^^^^^^^^^^^^^^^^^^^^^^^
14718+
14719+
Syntax:
14720+
"""""""
14721+
14722+
This is an overloaded intrinsic. You can use ``llvm.ldexp`` on any
14723+
floating point or vector of floating point type. Not all targets support
14724+
all types however.
14725+
14726+
::
14727+
14728+
declare float @llvm.ldexp.f32.i32(float %Val, i32 %Exp)
14729+
declare double @llvm.ldexp.f64.i32(double %Val, i32 %Exp)
14730+
declare x86_fp80 @llvm.ldexp.f80.i32(x86_fp80 %Val, i32 %Exp)
14731+
declare fp128 @llvm.ldexp.f128.i32(fp128 %Val, i32 %Exp)
14732+
declare ppc_fp128 @llvm.ldexp.ppcf128.i32(ppc_fp128 %Val, i32 %Exp)
14733+
declare <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %Val, <2 x i32> %Exp)
14734+
14735+
Overview:
14736+
"""""""""
14737+
14738+
The '``llvm.ldexp.*``' intrinsics perform the ldexp function.
14739+
14740+
Arguments:
14741+
""""""""""
14742+
14743+
The first argument and the return value are :ref:`floating-point
14744+
<t_floating>` or :ref:`vector <t_vector>` of floating-point values of
14745+
the same type. The second argument is an integer with the same number
14746+
of elements.
14747+
14748+
Semantics:
14749+
""""""""""
14750+
14751+
This function multiplies the first argument by 2 raised to the second
14752+
argument's power. If the first argument is NaN or infinite, the same
14753+
value is returned. If the result underflows a zero with the same sign
14754+
is returned. If the result overflows, the result is an infinity with
14755+
the same sign.
14756+
1471614757
'``llvm.log.*``' Intrinsic
1471714758
^^^^^^^^^^^^^^^^^^^^^^^^^^
1471814759

@@ -24306,6 +24347,47 @@ This function returns the first value raised to the second power with an
2430624347
unspecified sequence of rounding operations.
2430724348

2430824349

24350+
'``llvm.experimental.constrained.ldexp``' Intrinsic
24351+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
24352+
24353+
Syntax:
24354+
"""""""
24355+
24356+
::
24357+
24358+
declare <type0>
24359+
@llvm.experimental.constrained.ldexp(<type0> <op1>, <type1> <op2>,
24360+
metadata <rounding mode>,
24361+
metadata <exception behavior>)
24362+
24363+
Overview:
24364+
"""""""""
24365+
24366+
The '``llvm.experimental.constrained.ldexp``' performs the ldexp function.
24367+
24368+
24369+
Arguments:
24370+
""""""""""
24371+
24372+
The first argument and the return value are :ref:`floating-point
24373+
<t_floating>` or :ref:`vector <t_vector>` of floating-point values of
24374+
the same type. The second argument is an integer with the same number
24375+
of elements.
24376+
24377+
24378+
The third and fourth arguments specify the rounding mode and exception
24379+
behavior as described above.
24380+
24381+
Semantics:
24382+
""""""""""
24383+
24384+
This function multiplies the first argument by 2 raised to the second
24385+
argument's power. If the first argument is NaN or infinite, the same
24386+
value is returned. If the result underflows a zero with the same sign
24387+
is returned. If the result overflows, the result is an infinity with
24388+
the same sign.
24389+
24390+
2430924391
'``llvm.experimental.constrained.sin``' Intrinsic
2431024392
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2431124393

llvm/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ Changes to the LLVM IR
6161
* The ``nofpclass`` attribute was introduced. This allows more
6262
optimizations around special floating point value comparisons.
6363

64+
* Introduced new ``llvm.ldexp`` and ``llvm.experimental.constrained.ldexp`` intrinsics.
65+
6466
* The constant expression variants of the following instructions have been
6567
removed:
6668

llvm/include/llvm/Analysis/TargetLibraryInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,7 @@ class TargetLibraryInfo {
378378
case LibFunc_trunc: case LibFunc_truncf: case LibFunc_truncl:
379379
case LibFunc_log2: case LibFunc_log2f: case LibFunc_log2l:
380380
case LibFunc_exp2: case LibFunc_exp2f: case LibFunc_exp2l:
381+
case LibFunc_ldexp: case LibFunc_ldexpf: case LibFunc_ldexpl:
381382
case LibFunc_memcpy: case LibFunc_memset: case LibFunc_memmove:
382383
case LibFunc_memcmp: case LibFunc_bcmp: case LibFunc_strcmp:
383384
case LibFunc_strcpy: case LibFunc_stpcpy: case LibFunc_strlen:

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,7 @@ class LegalizerHelper {
357357
LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
358358
LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
359359
LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
360+
LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
360361

361362
/// Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
362363
LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,

llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1825,6 +1825,13 @@ class MachineIRBuilder {
18251825
return buildInstr(TargetOpcode::G_FPOW, {Dst}, {Src0, Src1}, Flags);
18261826
}
18271827

1828+
/// Build and insert \p Dst = G_FLDEXP \p Src0, \p Src1
1829+
MachineInstrBuilder
1830+
buildFLdexp(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1,
1831+
std::optional<unsigned> Flags = std::nullopt) {
1832+
return buildInstr(TargetOpcode::G_FLDEXP, {Dst}, {Src0, Src1}, Flags);
1833+
}
1834+
18281835
/// Build and insert \p Res = G_FCOPYSIGN \p Op0, \p Op1
18291836
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0,
18301837
const SrcOp &Src1) {

llvm/include/llvm/CodeGen/ISDOpcodes.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,7 @@ enum NodeType {
411411
STRICT_FSQRT,
412412
STRICT_FPOW,
413413
STRICT_FPOWI,
414+
STRICT_FLDEXP,
414415
STRICT_FSIN,
415416
STRICT_FCOS,
416417
STRICT_FEXP,
@@ -926,8 +927,10 @@ enum NodeType {
926927
FCBRT,
927928
FSIN,
928929
FCOS,
929-
FPOWI,
930930
FPOW,
931+
FPOWI,
932+
/// FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
933+
FLDEXP,
931934
FLOG,
932935
FLOG2,
933936
FLOG10,

llvm/include/llvm/CodeGen/RuntimeLibcalls.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,10 @@ namespace RTLIB {
7070
/// UNKNOWN_LIBCALL if there is none.
7171
Libcall getPOWI(EVT RetVT);
7272

73+
/// getLDEXP - Return the LDEXP_* value for the given types, or
74+
/// UNKNOWN_LIBCALL if there is none.
75+
Libcall getLDEXP(EVT RetVT);
76+
7377
/// Return the SYNC_FETCH_AND_* value for the given opcode and type, or
7478
/// UNKNOWN_LIBCALL if there is none.
7579
Libcall getSYNC(unsigned Opc, MVT VT);

0 commit comments

Comments
 (0)