diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def
index 449e4d1256944..cabfb4aa2090b 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -205,6 +205,8 @@ BUILTIN(__nvvm_saturate_d, "dd", "")
 BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "")
 BUILTIN(__nvvm_ex2_approx_f, "ff", "")
 BUILTIN(__nvvm_ex2_approx_d, "dd", "")
+TARGET_BUILTIN(__nvvm_ex2_approx_f16, "hh", "", AND(SM_75, PTX70))
+TARGET_BUILTIN(__nvvm_ex2_approx_f16x2, "V2hV2h", "", AND(SM_75, PTX70))
 
 BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "")
 BUILTIN(__nvvm_lg2_approx_f, "ff", "")
@@ -218,6 +220,12 @@ BUILTIN(__nvvm_sin_approx_f, "ff", "")
 BUILTIN(__nvvm_cos_approx_ftz_f, "ff", "")
 BUILTIN(__nvvm_cos_approx_f, "ff", "")
 
+// Tanh
+
+TARGET_BUILTIN(__nvvm_tanh_approx_f, "ff", "", AND(SM_75,PTX70))
+TARGET_BUILTIN(__nvvm_tanh_approx_f16, "hh", "", AND(SM_75, PTX70))
+TARGET_BUILTIN(__nvvm_tanh_approx_f16x2, "V2hV2h", "", AND(SM_75, PTX70))
+
 // Fma
 
 BUILTIN(__nvvm_fma_rn_ftz_f, "ffff", "")
diff --git a/libclc/generic/include/clcmacro.h b/libclc/generic/include/clcmacro.h
index d4167a8e4529e..addb461aa047d 100644
--- a/libclc/generic/include/clcmacro.h
+++ b/libclc/generic/include/clcmacro.h
@@ -9,11 +9,7 @@
 #ifndef __CLC_MACRO_H
 #define __CLC_MACRO_H
 
-#define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE)          \
-  DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) {                              \
-    return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y));                        \
-  }                                                                            \
-                                                                               \
+#define _CLC_UNARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE)    \
   DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) {                              \
     return (RET_TYPE##3)(FUNCTION(x.x), FUNCTION(x.y), FUNCTION(x.z));         \
   }                                                                            \
@@ -30,12 +26,14 @@
     return (RET_TYPE##16)(FUNCTION(x.lo), FUNCTION(x.hi));                     \
   }
 
-#define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE,         \
-                              ARG2_TYPE)                                       \
-  DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) {              \
-    return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y));              \
+#define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE)          \
+  DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) {                              \
+    return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y));                        \
   }                                                                            \
-                                                                               \
+  _CLC_UNARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE)
+
+#define _CLC_BINARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE,   \
+                                    ARG2_TYPE)                                 \
   DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) {              \
     return (RET_TYPE##3)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y),               \
                          FUNCTION(x.z, y.z));                                  \
@@ -53,6 +51,14 @@
     return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi));         \
   }
 
+#define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE,         \
+                              ARG2_TYPE)                                       \
+  DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) {              \
+    return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y));              \
+  }                                                                            \
+  _CLC_BINARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE,         \
+                              ARG2_TYPE)
+
 #define _CLC_V_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE,          \
                              ARG2_TYPE)                                        \
   DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE##2 y) {                 \
@@ -76,13 +82,8 @@
     return (RET_TYPE##16)(FUNCTION(x, y.lo), FUNCTION(x, y.hi));               \
   }
 
-#define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE,        \
-                               ARG2_TYPE, ARG3_TYPE)                           \
-  DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y,                \
-                                ARG3_TYPE##2 z) {                              \
-    return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y));    \
-  }                                                                            \
-                                                                               \
+#define _CLC_TERNARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE,  \
+                                     ARG2_TYPE, ARG3_TYPE)                     \
   DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y,                \
                                 ARG3_TYPE##3 z) {                              \
     return (RET_TYPE##3)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y),     \
@@ -107,6 +108,15 @@
                           FUNCTION(x.hi, y.hi, z.hi));                         \
   }
 
+#define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE,        \
+                               ARG2_TYPE, ARG3_TYPE)                           \
+  DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y,                \
+                                ARG3_TYPE##2 z) {                              \
+    return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y));    \
+  }                                                                            \
+  _CLC_TERNARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE,        \
+                               ARG2_TYPE, ARG3_TYPE)
+
 #define _CLC_V_S_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE,        \
                                ARG2_TYPE, ARG3_TYPE)                           \
   DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##2 z) {    \
diff --git a/libclc/generic/include/spirv/spirv_builtins.h b/libclc/generic/include/spirv/spirv_builtins.h
index adeb3a63460d6..ca15fdf3c6547 100644
--- a/libclc/generic/include/spirv/spirv_builtins.h
+++ b/libclc/generic/include/spirv/spirv_builtins.h
@@ -15776,6 +15776,21 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_fp32_t
 _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp32_t
     __spirv_ocl_native_exp2(__clc_vec16_fp32_t);
 
+#ifdef cl_khr_fp16
+_CLC_OVERLOAD
+_CLC_DECL _CLC_CONSTFN __clc_fp16_t __clc_native_exp2(__clc_fp16_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp16_t
+    __clc_native_exp2(__clc_vec2_fp16_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_fp16_t
+    __clc_native_exp2(__clc_vec3_fp16_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_fp16_t
+    __clc_native_exp2(__clc_vec4_fp16_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_fp16_t
+    __clc_native_exp2(__clc_vec8_fp16_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t
+    __clc_native_exp2(__clc_vec16_fp16_t);
+#endif
+
 _CLC_OVERLOAD
 _CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_native_log(__clc_fp32_t);
 _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t
@@ -19077,6 +19092,34 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t
     __spirv_ocl_tanh(__clc_vec16_fp16_t);
 #endif
 
+_CLC_OVERLOAD
+_CLC_DECL _CLC_CONSTFN __clc_fp32_t __clc_native_tanh(__clc_fp32_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t
+    __clc_native_tanh(__clc_vec2_fp32_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_fp32_t
+    __clc_native_tanh(__clc_vec3_fp32_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_fp32_t
+    __clc_native_tanh(__clc_vec4_fp32_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_fp32_t
+    __clc_native_tanh(__clc_vec8_fp32_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp32_t
+    __clc_native_tanh(__clc_vec16_fp32_t);
+
+#ifdef cl_khr_fp16
+_CLC_OVERLOAD
+_CLC_DECL _CLC_CONSTFN __clc_fp16_t __clc_native_tanh(__clc_fp16_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp16_t
+    __clc_native_tanh(__clc_vec2_fp16_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_fp16_t
+    __clc_native_tanh(__clc_vec3_fp16_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_fp16_t
+    __clc_native_tanh(__clc_vec4_fp16_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_fp16_t
+    __clc_native_tanh(__clc_vec8_fp16_t);
+_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t
+    __clc_native_tanh(__clc_vec16_fp16_t);
+#endif
+
 _CLC_OVERLOAD
 _CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_tanpi(__clc_fp32_t);
 _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t
diff --git a/libclc/generic/libspirv/float16.cl b/libclc/generic/libspirv/float16.cl
index b2cd14e8c63f4..28f5b65ac80e2 100644
--- a/libclc/generic/libspirv/float16.cl
+++ b/libclc/generic/libspirv/float16.cl
@@ -4344,6 +4344,36 @@ __spirv_ocl_exp2(__clc_vec16_float16_t args_0) {
   return __spirv_ocl_exp2(as_half16(args_0));
 }
 
+_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_fp16_t
+__clc_native_exp2(__clc_float16_t args_0) {
+  return __clc_native_exp2(as_half(args_0));
+}
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec2_fp16_t
+__clc_native_exp2(__clc_vec2_float16_t args_0) {
+  return __clc_native_exp2(as_half2(args_0));
+}
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec3_fp16_t
+__clc_native_exp2(__clc_vec3_float16_t args_0) {
+  return __clc_native_exp2(as_half3(args_0));
+}
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec4_fp16_t
+__clc_native_exp2(__clc_vec4_float16_t args_0) {
+  return __clc_native_exp2(as_half4(args_0));
+}
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec8_fp16_t
+__clc_native_exp2(__clc_vec8_float16_t args_0) {
+  return __clc_native_exp2(as_half8(args_0));
+}
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec16_fp16_t
+__clc_native_exp2(__clc_vec16_float16_t args_0) {
+  return __clc_native_exp2(as_half16(args_0));
+}
+
 _CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_fp16_t
 __spirv_ocl_expm1(__clc_float16_t args_0) {
   return __spirv_ocl_expm1(as_half(args_0));
@@ -6613,6 +6643,36 @@ __spirv_ocl_tanh(__clc_vec16_float16_t args_0) {
   return __spirv_ocl_tanh(as_half16(args_0));
 }
 
+_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_fp16_t
+__clc_native_tanh(__clc_float16_t args_0) {
+  return __clc_native_tanh(as_half(args_0));
+}
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec2_fp16_t
+__clc_native_tanh(__clc_vec2_float16_t args_0) {
+  return __clc_native_tanh(as_half2(args_0));
+}
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec3_fp16_t
+__clc_native_tanh(__clc_vec3_float16_t args_0) {
+  return __clc_native_tanh(as_half3(args_0));
+}
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec4_fp16_t
+__clc_native_tanh(__clc_vec4_float16_t args_0) {
+  return __clc_native_tanh(as_half4(args_0));
+}
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec8_fp16_t
+__clc_native_tanh(__clc_vec8_float16_t args_0) {
+  return __clc_native_tanh(as_half8(args_0));
+}
+
+_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec16_fp16_t
+__clc_native_tanh(__clc_vec16_float16_t args_0) {
+  return __clc_native_tanh(as_half16(args_0));
+}
+
 _CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_fp16_t
 __spirv_ocl_tanpi(__clc_float16_t args_0) {
   return __spirv_ocl_tanpi(as_half(args_0));
diff --git a/libclc/ptx-nvidiacl/libspirv/SOURCES b/libclc/ptx-nvidiacl/libspirv/SOURCES
index 5ec1dea1afc30..9f105b1556ed9 100644
--- a/libclc/ptx-nvidiacl/libspirv/SOURCES
+++ b/libclc/ptx-nvidiacl/libspirv/SOURCES
@@ -53,6 +53,7 @@ math/native_rsqrt.cl
 math/native_sin.cl
 math/native_sqrt.cl
 math/native_tan.cl
+math/native_tanh.cl
 math/nextafter.cl
 math/pow.cl
 math/remainder.cl
diff --git a/libclc/ptx-nvidiacl/libspirv/math/native_exp2.cl b/libclc/ptx-nvidiacl/libspirv/math/native_exp2.cl
index 93c12c0aeb067..8c06a7ed9ea9c 100644
--- a/libclc/ptx-nvidiacl/libspirv/math/native_exp2.cl
+++ b/libclc/ptx-nvidiacl/libspirv/math/native_exp2.cl
@@ -14,4 +14,34 @@
 #define __CLC_FUNCTION __spirv_ocl_native_exp2
 #define __CLC_BUILTIN __nv_exp2
 #define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, f)
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+int __clc_nvvm_reflect_arch();
+#define __USE_HALF_EXP2_APPROX (__clc_nvvm_reflect_arch() >= 750)
+
+_CLC_DEF _CLC_OVERLOAD half __clc_native_exp2(half x) {
+  return (__USE_HALF_EXP2_APPROX) ? __nvvm_ex2_approx_f16(x)
+                                  : __spirv_ocl_native_exp2((float)x);
+}
+
+_CLC_DEF _CLC_OVERLOAD half2 __clc_native_exp2(half2 x) {
+  return (__USE_HALF_EXP2_APPROX)
+             ? __nvvm_ex2_approx_f16x2(x)
+             : (half2)(__spirv_ocl_native_exp2((float)x.x),
+                       __spirv_ocl_native_exp2((float)x.y));
+}
+
+_CLC_UNARY_VECTORIZE_HAVE2(_CLC_OVERLOAD _CLC_DEF, half, __clc_native_exp2,
+                           half)
+
+#undef __USE_HALF_EXP2_APPROX
+
+#endif // cl_khr_fp16
+
+// Undef halfs before uncluding unary builtins, as they are handled above.
+#ifdef cl_khr_fp16
+#undef cl_khr_fp16
+#endif // cl_khr_fp16
 #include <math/unary_builtin.inc>
diff --git a/libclc/ptx-nvidiacl/libspirv/math/native_tanh.cl b/libclc/ptx-nvidiacl/libspirv/math/native_tanh.cl
new file mode 100644
index 0000000000000..3216059c5c7ce
--- /dev/null
+++ b/libclc/ptx-nvidiacl/libspirv/math/native_tanh.cl
@@ -0,0 +1,41 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include <spirv/spirv.h>
+
+#include "../../include/libdevice.h"
+#include <clcmacro.h>
+
+extern int __clc_nvvm_reflect_arch();
+
+#define __USE_TANH_APPROX (__clc_nvvm_reflect_arch() >= 750)
+
+_CLC_DEF _CLC_OVERLOAD float __clc_native_tanh(float x) {
+  return (__USE_TANH_APPROX) ? __nvvm_tanh_approx_f(x) : __nv_tanhf(x);
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_native_tanh, float)
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_DEF _CLC_OVERLOAD half __clc_native_tanh(half x) {
+  return (__USE_TANH_APPROX) ? __nvvm_tanh_approx_f16(x) : __nv_tanhf(x);
+}
+
+_CLC_DEF _CLC_OVERLOAD half2 __clc_native_tanh(half2 x) {
+  return (__USE_TANH_APPROX) ? __nvvm_tanh_approx_f16x2(x)
+                             : (half2)(__nv_tanhf(x.x), __nv_tanhf(x.y));
+}
+
+_CLC_UNARY_VECTORIZE_HAVE2(_CLC_OVERLOAD _CLC_DEF, half, __clc_native_tanh, half)
+
+#endif
+
+#undef __USE_TANH_APPROX
+
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index f026033782184..3321534a3218c 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -854,6 +854,17 @@ let TargetPrefix = "nvvm" in {
   def int_nvvm_cos_approx_f : GCCBuiltin<"__nvvm_cos_approx_f">,
       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
 
+//
+// Tanh
+//
+
+   def int_nvvm_tanh_approx_f : GCCBuiltin<"__nvvm_tanh_approx_f">,
+        DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+   def int_nvvm_tanh_approx_f16 : GCCBuiltin<"__nvvm_tanh_approx_f16">,
+       DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty], [IntrNoMem]>;
+   def int_nvvm_tanh_approx_f16x2 : GCCBuiltin<"__nvvm_tanh_approx_f16x2">,
+       DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty], [IntrNoMem]>;
+
 //
 // Fma
 //
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index c980f6ed4bdc2..377dce99578cc 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -933,6 +933,17 @@ def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
 def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
   Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
 
+//
+// Tanh
+//
+
+ def INT_NVVM_TANH_APPROX_F : F_MATH_1<"tanh.approx.f32 \t$dst, $src0;",
+    Float32Regs, Float32Regs, int_nvvm_tanh_approx_f>;
+ def INT_NVVM_TANH_APPROX_F16 : F_MATH_1<"tanh.approx.f16 \t$dst, $src0;",
+   Float16Regs, Float16Regs, int_nvvm_tanh_approx_f16>;
+ def INT_NVVM_TANH_APPROX_F16X2 : F_MATH_1<"tanh.approx.f16x2 \t$dst, $src0;",
+   Float16x2Regs, Float16x2Regs, int_nvvm_tanh_approx_f16x2>;
+
 //
 // Fma
 //
diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_native_math.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_native_math.asciidoc
new file mode 100644
index 0000000000000..9b8daaf0ab6d6
--- /dev/null
+++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_native_math.asciidoc
@@ -0,0 +1,112 @@
+= sycl_ext_oneapi_native_math
+
+:source-highlighter: coderay
+:coderay-linenums-mode: table
+
+// This section needs to be after the document title.
+:doctype: book
+:toc2:
+:toc: left
+:encoding: utf-8
+:lang: en
+:dpcpp: pass:[DPC++]
+
+// Set the default source code type in this document to C++,
+// for syntax highlighting purposes.  This is needed because
+// docbook uses c++ and html5 uses cpp.
+:language: {basebackend@docbook:c++:cpp}
+
+
+== Notice
+
+[%hardbreaks]
+Copyright (C) 2022-2022 Intel Corporation.  All rights reserved.
+
+Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks
+of The Khronos Group Inc.  OpenCL(TM) is a trademark of Apple Inc. used by
+permission by Khronos.
+
+
+== Contact
+
+To report problems with this extension, please open a new issue at:
+
+https://github.com/intel/llvm/issues
+
+
+== Dependencies
+
+This extension is written against the SYCL 2020 revision 4 specification.  All
+references below to the "core SYCL specification" or to section numbers in the
+SYCL specification refer to that revision.
+
+== Status
+
+This is an experimental extension specification, intended to provide early
+access to features and gather community feedback.  Interfaces defined in this
+specification are implemented in {dpcpp}, but they are not finalized and may
+change incompatibly in future versions of {dpcpp} without prior notice.
+*Shipping software products should not rely on APIs defined in this
+specification.*
+
+== Overview
+
+The CUDA backend has introduced fast math "approx" versions of the `exp2` and
+`tanh` functions for `half` and `float` types. It is considered likely that
+other backends will eventually introduce similar functionality, motivated
+particularly by deep learning use cases of these functions. We propose that the
+appropriate place to call such functionality in SYCL applications would be from
+the newly proposed native functions.
+
+== Specification
+
+=== Feature test macro
+
+This extension provides a feature-test macro as described in the core SYCL
+specification.  An implementation supporting this extension must predefine the
+macro `SYCL_EXT_ONEAPI_NATIVE_MATH` to one of the values defined in the table
+below.  Applications can test for the existence of this macro to determine if
+the implementation supports this feature, or applications can test the macro's
+value to determine which of the extension's features the implementation
+supports.
+
+[%header,cols="1,5"]
+|===
+|Value
+|Description
+
+|1
+|The APIs of this experimental extension are not versioned, so the
+ feature-test macro always has this value.
+|===
+
+=== New functions
+
+This extension allows the `sycl::native::exp2` function (that originally
+supports `genfloatf` types) to support `genfloath` types in the
+`sycl::ext::oneapi::experimental::native` namespace. It also introduces a new
+native `tanh` function supporting `genfloath` and `genfloatf` types in the
+`sycl::ext::oneapi::experimental::native` namespace.
+
+NOTE: This document does not propose `genfloatd` support for either
+`sycl::ext::oneapi::experimental::native::exp2` or
+`sycl::ext::oneapi::experimental::native::tanh`. 
+
+
+> This extension adds the following new native builtin functions to SYCL:
+>
+> ```
+> namespace sycl::ext::oneapi::experimental::native {
+> 
+> // Available only when "T" is one of the genfloath types.
+> template <typename T>
+> T exp2(T x);
+> 
+> // Available only when "T" is one of the genfloatf or genfloath types.
+> template <typename T>
+> T tanh(T x);
+> 
+> } // namespace sycl::ext::oneapi::experimental::native
+> ```
+
+
diff --git a/sycl/include/CL/__spirv/spirv_ops.hpp b/sycl/include/CL/__spirv/spirv_ops.hpp
index 4878cc4dd5db8..e7b660f9d29e7 100644
--- a/sycl/include/CL/__spirv/spirv_ops.hpp
+++ b/sycl/include/CL/__spirv/spirv_ops.hpp
@@ -755,6 +755,44 @@ __spirv_ocl_printf(const __attribute__((opencl_constant)) char *Format, ...);
 extern SYCL_EXTERNAL int __spirv_ocl_printf(const char *Format, ...);
 #endif
 
+// Native builtin extension
+
+extern SYCL_EXTERNAL float __clc_native_tanh(float);
+extern SYCL_EXTERNAL __ocl_vec_t<float, 2>
+    __clc_native_tanh(__ocl_vec_t<float, 2>);
+extern SYCL_EXTERNAL __ocl_vec_t<float, 3>
+    __clc_native_tanh(__ocl_vec_t<float, 3>);
+extern SYCL_EXTERNAL __ocl_vec_t<float, 4>
+    __clc_native_tanh(__ocl_vec_t<float, 4>);
+extern SYCL_EXTERNAL __ocl_vec_t<float, 8>
+    __clc_native_tanh(__ocl_vec_t<float, 8>);
+extern SYCL_EXTERNAL __ocl_vec_t<float, 16>
+    __clc_native_tanh(__ocl_vec_t<float, 16>);
+
+extern SYCL_EXTERNAL _Float16 __clc_native_tanh(_Float16);
+extern SYCL_EXTERNAL __ocl_vec_t<_Float16, 2>
+    __clc_native_tanh(__ocl_vec_t<_Float16, 2>);
+extern SYCL_EXTERNAL __ocl_vec_t<_Float16, 3>
+    __clc_native_tanh(__ocl_vec_t<_Float16, 3>);
+extern SYCL_EXTERNAL __ocl_vec_t<_Float16, 4>
+    __clc_native_tanh(__ocl_vec_t<_Float16, 4>);
+extern SYCL_EXTERNAL __ocl_vec_t<_Float16, 8>
+    __clc_native_tanh(__ocl_vec_t<_Float16, 8>);
+extern SYCL_EXTERNAL __ocl_vec_t<_Float16, 16>
+    __clc_native_tanh(__ocl_vec_t<_Float16, 16>);
+
+extern SYCL_EXTERNAL _Float16 __clc_native_exp2(_Float16);
+extern SYCL_EXTERNAL __ocl_vec_t<_Float16, 2>
+    __clc_native_exp2(__ocl_vec_t<_Float16, 2>);
+extern SYCL_EXTERNAL __ocl_vec_t<_Float16, 3>
+    __clc_native_exp2(__ocl_vec_t<_Float16, 3>);
+extern SYCL_EXTERNAL __ocl_vec_t<_Float16, 4>
+    __clc_native_exp2(__ocl_vec_t<_Float16, 4>);
+extern SYCL_EXTERNAL __ocl_vec_t<_Float16, 8>
+    __clc_native_exp2(__ocl_vec_t<_Float16, 8>);
+extern SYCL_EXTERNAL __ocl_vec_t<_Float16, 16>
+    __clc_native_exp2(__ocl_vec_t<_Float16, 16>);
+
 #else // if !__SYCL_DEVICE_ONLY__
 
 template <typename dataT>
diff --git a/sycl/include/CL/sycl/feature_test.hpp.in b/sycl/include/CL/sycl/feature_test.hpp.in
index a7f0ca071ee46..75986f5b3f9ff 100644
--- a/sycl/include/CL/sycl/feature_test.hpp.in
+++ b/sycl/include/CL/sycl/feature_test.hpp.in
@@ -54,6 +54,7 @@ namespace sycl {
 #define SYCL_EXT_ONEAPI_SRGB 1
 #define SYCL_EXT_ONEAPI_SUB_GROUP 1
 #define SYCL_EXT_ONEAPI_PROPERTIES 1
+#define SYCL_EXT_ONEAPI_NATIVE_MATH 1
 #define SYCL_EXT_INTEL_BF16_CONVERSION 1
 #define SYCL_EXT_INTEL_DATAFLOW_PIPES 1
 #ifdef __clang__
diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp
index e32e1c70a5a97..c8fa033d8c79e 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp
@@ -8,8 +8,17 @@
 
 #pragma once
 
+#include <CL/sycl/builtins.hpp>
+#include <CL/sycl/detail/builtins.hpp>
+#include <CL/sycl/detail/generic_type_lists.hpp>
+#include <CL/sycl/detail/generic_type_traits.hpp>
+#include <CL/sycl/detail/type_traits.hpp>
+
 #include <CL/__spirv/spirv_ops.hpp>
 
+// TODO Decide whether to mark functions with this attribute.
+#define __NOEXC /*noexcept*/
+
 #ifdef __SYCL_DEVICE_ONLY__
 #define __SYCL_CONSTANT_AS __attribute__((opencl_constant))
 #else
@@ -72,6 +81,42 @@ int printf(const FormatT *__format, Args... args) {
 #endif // defined(__SYCL_DEVICE_ONLY__) && defined(__SPIR__)
 }
 
+namespace native {
+
+// genfloatfh tanh (genfloatfh x)
+template <typename T>
+inline __SYCL_ALWAYS_INLINE
+    sycl::detail::enable_if_t<sycl::detail::is_genfloatf<T>::value ||
+                                  sycl::detail::is_genfloath<T>::value,
+                              T>
+    tanh(T x) __NOEXC {
+#if defined(__NVPTX__)
+  using _ocl_T = cl::sycl::detail::ConvertToOpenCLType_t<T>;
+  _ocl_T arg1 = cl::sycl::detail::convertDataToType<T, _ocl_T>(x);
+  return cl::sycl::detail::convertDataToType<_ocl_T, T>(
+      __clc_native_tanh(arg1));
+#else
+  return __sycl_std::__invoke_tanh<T>(x);
+#endif
+}
+
+// genfloath exp2 (genfloath x)
+template <typename T>
+inline __SYCL_ALWAYS_INLINE
+    sycl::detail::enable_if_t<sycl::detail::is_genfloath<T>::value, T>
+    exp2(T x) __NOEXC {
+#if defined(__NVPTX__)
+  using _ocl_T = cl::sycl::detail::ConvertToOpenCLType_t<T>;
+  _ocl_T arg1 = cl::sycl::detail::convertDataToType<T, _ocl_T>(x);
+  return cl::sycl::detail::convertDataToType<_ocl_T, T>(
+      __clc_native_exp2(arg1));
+#else
+  return __sycl_std::__invoke_exp2<T>(x);
+#endif
+}
+
+} // namespace native
+
 } // namespace experimental
 } // namespace oneapi
 } // namespace ext