Skip to content

[SYCL][libclc][CUDA] Add native math extension #5747

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions clang/include/clang/Basic/BuiltinsNVPTX.def
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,8 @@ BUILTIN(__nvvm_saturate_d, "dd", "")
BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "")
BUILTIN(__nvvm_ex2_approx_f, "ff", "")
BUILTIN(__nvvm_ex2_approx_d, "dd", "")
TARGET_BUILTIN(__nvvm_ex2_approx_f16, "hh", "", AND(SM_75, PTX70))
TARGET_BUILTIN(__nvvm_ex2_approx_f16x2, "V2hV2h", "", AND(SM_75, PTX70))

BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "")
BUILTIN(__nvvm_lg2_approx_f, "ff", "")
Expand All @@ -218,6 +220,12 @@ BUILTIN(__nvvm_sin_approx_f, "ff", "")
BUILTIN(__nvvm_cos_approx_ftz_f, "ff", "")
BUILTIN(__nvvm_cos_approx_f, "ff", "")

// Tanh

TARGET_BUILTIN(__nvvm_tanh_approx_f, "ff", "", AND(SM_75,PTX70))
TARGET_BUILTIN(__nvvm_tanh_approx_f16, "hh", "", AND(SM_75, PTX70))
TARGET_BUILTIN(__nvvm_tanh_approx_f16x2, "V2hV2h", "", AND(SM_75, PTX70))

// Fma

BUILTIN(__nvvm_fma_rn_ftz_f, "ffff", "")
Expand Down
44 changes: 27 additions & 17 deletions libclc/generic/include/clcmacro.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,7 @@
#ifndef __CLC_MACRO_H
#define __CLC_MACRO_H

#define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \
} \
\
#define _CLC_UNARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \
return (RET_TYPE##3)(FUNCTION(x.x), FUNCTION(x.y), FUNCTION(x.z)); \
} \
Expand All @@ -30,12 +26,14 @@
return (RET_TYPE##16)(FUNCTION(x.lo), FUNCTION(x.hi)); \
}

#define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ARG2_TYPE) \
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) { \
return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y)); \
#define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \
} \
\
_CLC_UNARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE)

#define _CLC_BINARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ARG2_TYPE) \
DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) { \
return (RET_TYPE##3)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y), \
FUNCTION(x.z, y.z)); \
Expand All @@ -53,6 +51,14 @@
return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
}

#define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ARG2_TYPE) \
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) { \
return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y)); \
} \
_CLC_BINARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ARG2_TYPE)

#define _CLC_V_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ARG2_TYPE) \
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE##2 y) { \
Expand All @@ -76,13 +82,8 @@
return (RET_TYPE##16)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \
}

#define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ARG2_TYPE, ARG3_TYPE) \
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y, \
ARG3_TYPE##2 z) { \
return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y)); \
} \
\
#define _CLC_TERNARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ARG2_TYPE, ARG3_TYPE) \
DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y, \
ARG3_TYPE##3 z) { \
return (RET_TYPE##3)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y), \
Expand All @@ -107,6 +108,15 @@
FUNCTION(x.hi, y.hi, z.hi)); \
}

#define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ARG2_TYPE, ARG3_TYPE) \
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y, \
ARG3_TYPE##2 z) { \
return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y)); \
} \
_CLC_TERNARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ARG2_TYPE, ARG3_TYPE)

#define _CLC_V_S_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ARG2_TYPE, ARG3_TYPE) \
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##2 z) { \
Expand Down
43 changes: 43 additions & 0 deletions libclc/generic/include/spirv/spirv_builtins.h
Original file line number Diff line number Diff line change
Expand Up @@ -15776,6 +15776,21 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_fp32_t
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp32_t
__spirv_ocl_native_exp2(__clc_vec16_fp32_t);

#ifdef cl_khr_fp16
_CLC_OVERLOAD
_CLC_DECL _CLC_CONSTFN __clc_fp16_t __clc_native_exp2(__clc_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp16_t
__clc_native_exp2(__clc_vec2_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_fp16_t
__clc_native_exp2(__clc_vec3_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_fp16_t
__clc_native_exp2(__clc_vec4_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_fp16_t
__clc_native_exp2(__clc_vec8_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t
__clc_native_exp2(__clc_vec16_fp16_t);
#endif

_CLC_OVERLOAD
_CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_native_log(__clc_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t
Expand Down Expand Up @@ -19077,6 +19092,34 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t
__spirv_ocl_tanh(__clc_vec16_fp16_t);
#endif

_CLC_OVERLOAD
_CLC_DECL _CLC_CONSTFN __clc_fp32_t __clc_native_tanh(__clc_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t
__clc_native_tanh(__clc_vec2_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_fp32_t
__clc_native_tanh(__clc_vec3_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_fp32_t
__clc_native_tanh(__clc_vec4_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_fp32_t
__clc_native_tanh(__clc_vec8_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp32_t
__clc_native_tanh(__clc_vec16_fp32_t);

#ifdef cl_khr_fp16
_CLC_OVERLOAD
_CLC_DECL _CLC_CONSTFN __clc_fp16_t __clc_native_tanh(__clc_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp16_t
__clc_native_tanh(__clc_vec2_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_fp16_t
__clc_native_tanh(__clc_vec3_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_fp16_t
__clc_native_tanh(__clc_vec4_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_fp16_t
__clc_native_tanh(__clc_vec8_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t
__clc_native_tanh(__clc_vec16_fp16_t);
#endif

_CLC_OVERLOAD
_CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_tanpi(__clc_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t
Expand Down
60 changes: 60 additions & 0 deletions libclc/generic/libspirv/float16.cl
Original file line number Diff line number Diff line change
Expand Up @@ -4344,6 +4344,36 @@ __spirv_ocl_exp2(__clc_vec16_float16_t args_0) {
return __spirv_ocl_exp2(as_half16(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_fp16_t
__clc_native_exp2(__clc_float16_t args_0) {
return __clc_native_exp2(as_half(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec2_fp16_t
__clc_native_exp2(__clc_vec2_float16_t args_0) {
return __clc_native_exp2(as_half2(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec3_fp16_t
__clc_native_exp2(__clc_vec3_float16_t args_0) {
return __clc_native_exp2(as_half3(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec4_fp16_t
__clc_native_exp2(__clc_vec4_float16_t args_0) {
return __clc_native_exp2(as_half4(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec8_fp16_t
__clc_native_exp2(__clc_vec8_float16_t args_0) {
return __clc_native_exp2(as_half8(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec16_fp16_t
__clc_native_exp2(__clc_vec16_float16_t args_0) {
return __clc_native_exp2(as_half16(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_fp16_t
__spirv_ocl_expm1(__clc_float16_t args_0) {
return __spirv_ocl_expm1(as_half(args_0));
Expand Down Expand Up @@ -6613,6 +6643,36 @@ __spirv_ocl_tanh(__clc_vec16_float16_t args_0) {
return __spirv_ocl_tanh(as_half16(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_fp16_t
__clc_native_tanh(__clc_float16_t args_0) {
return __clc_native_tanh(as_half(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec2_fp16_t
__clc_native_tanh(__clc_vec2_float16_t args_0) {
return __clc_native_tanh(as_half2(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec3_fp16_t
__clc_native_tanh(__clc_vec3_float16_t args_0) {
return __clc_native_tanh(as_half3(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec4_fp16_t
__clc_native_tanh(__clc_vec4_float16_t args_0) {
return __clc_native_tanh(as_half4(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec8_fp16_t
__clc_native_tanh(__clc_vec8_float16_t args_0) {
return __clc_native_tanh(as_half8(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec16_fp16_t
__clc_native_tanh(__clc_vec16_float16_t args_0) {
return __clc_native_tanh(as_half16(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_fp16_t
__spirv_ocl_tanpi(__clc_float16_t args_0) {
return __spirv_ocl_tanpi(as_half(args_0));
Expand Down
1 change: 1 addition & 0 deletions libclc/ptx-nvidiacl/libspirv/SOURCES
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ math/native_rsqrt.cl
math/native_sin.cl
math/native_sqrt.cl
math/native_tan.cl
math/native_tanh.cl
math/nextafter.cl
math/pow.cl
math/remainder.cl
Expand Down
30 changes: 30 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/math/native_exp2.cl
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,34 @@
#define __CLC_FUNCTION __spirv_ocl_native_exp2
#define __CLC_BUILTIN __nv_exp2
#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, f)

#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable

int __clc_nvvm_reflect_arch();
#define __USE_HALF_EXP2_APPROX (__clc_nvvm_reflect_arch() >= 750)

_CLC_DEF _CLC_OVERLOAD half __clc_native_exp2(half x) {
return (__USE_HALF_EXP2_APPROX) ? __nvvm_ex2_approx_f16(x)
: __spirv_ocl_native_exp2((float)x);
}

_CLC_DEF _CLC_OVERLOAD half2 __clc_native_exp2(half2 x) {
return (__USE_HALF_EXP2_APPROX)
? __nvvm_ex2_approx_f16x2(x)
: (half2)(__spirv_ocl_native_exp2((float)x.x),
__spirv_ocl_native_exp2((float)x.y));
}

_CLC_UNARY_VECTORIZE_HAVE2(_CLC_OVERLOAD _CLC_DEF, half, __clc_native_exp2,
half)

#undef __USE_HALF_EXP2_APPROX

#endif // cl_khr_fp16

// Undef halfs before uncluding unary builtins, as they are handled above.
#ifdef cl_khr_fp16
#undef cl_khr_fp16
#endif // cl_khr_fp16
#include <math/unary_builtin.inc>
41 changes: 41 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/math/native_tanh.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <spirv/spirv.h>

#include "../../include/libdevice.h"
#include <clcmacro.h>

extern int __clc_nvvm_reflect_arch();

#define __USE_TANH_APPROX (__clc_nvvm_reflect_arch() >= 750)

_CLC_DEF _CLC_OVERLOAD float __clc_native_tanh(float x) {
return (__USE_TANH_APPROX) ? __nvvm_tanh_approx_f(x) : __nv_tanhf(x);
}

_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_native_tanh, float)

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

_CLC_DEF _CLC_OVERLOAD half __clc_native_tanh(half x) {
return (__USE_TANH_APPROX) ? __nvvm_tanh_approx_f16(x) : __nv_tanhf(x);
}

_CLC_DEF _CLC_OVERLOAD half2 __clc_native_tanh(half2 x) {
return (__USE_TANH_APPROX) ? __nvvm_tanh_approx_f16x2(x)
: (half2)(__nv_tanhf(x.x), __nv_tanhf(x.y));
}

_CLC_UNARY_VECTORIZE_HAVE2(_CLC_OVERLOAD _CLC_DEF, half, __clc_native_tanh, half)

#endif

#undef __USE_TANH_APPROX

11 changes: 11 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsNVVM.td
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,17 @@ let TargetPrefix = "nvvm" in {
def int_nvvm_cos_approx_f : GCCBuiltin<"__nvvm_cos_approx_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;

//
// Tanh
//

def int_nvvm_tanh_approx_f : GCCBuiltin<"__nvvm_tanh_approx_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_tanh_approx_f16 : GCCBuiltin<"__nvvm_tanh_approx_f16">,
DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty], [IntrNoMem]>;
def int_nvvm_tanh_approx_f16x2 : GCCBuiltin<"__nvvm_tanh_approx_f16x2">,
DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty], [IntrNoMem]>;

//
// Fma
//
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -933,6 +933,17 @@ def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;

//
// Tanh
//

def INT_NVVM_TANH_APPROX_F : F_MATH_1<"tanh.approx.f32 \t$dst, $src0;",
Float32Regs, Float32Regs, int_nvvm_tanh_approx_f>;
def INT_NVVM_TANH_APPROX_F16 : F_MATH_1<"tanh.approx.f16 \t$dst, $src0;",
Float16Regs, Float16Regs, int_nvvm_tanh_approx_f16>;
def INT_NVVM_TANH_APPROX_F16X2 : F_MATH_1<"tanh.approx.f16x2 \t$dst, $src0;",
Float16x2Regs, Float16x2Regs, int_nvvm_tanh_approx_f16x2>;

//
// Fma
//
Expand Down
Loading