diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3a598c5fad..6fa1b5830e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -602,7 +602,9 @@ target_link_libraries(raft::raft INTERFACE # Use `rapids_export` for 22.04 as it will have COMPONENT support rapids_export( INSTALL raft - EXPORT_SET raft-exports COMPONENTS ${raft_components} COMPONENTS_EXPORT_SET ${raft_export_sets} + EXPORT_SET raft-exports + COMPONENTS ${raft_components} + COMPONENTS_EXPORT_SET ${raft_export_sets} GLOBAL_TARGETS raft compiled distributed NAMESPACE raft:: DOCUMENTATION doc_string @@ -613,7 +615,9 @@ rapids_export( # * build export ------------------------------------------------------------- rapids_export( BUILD raft - EXPORT_SET raft-exports COMPONENTS ${raft_components} COMPONENTS_EXPORT_SET ${raft_export_sets} + EXPORT_SET raft-exports + COMPONENTS ${raft_components} + COMPONENTS_EXPORT_SET ${raft_export_sets} GLOBAL_TARGETS raft compiled distributed DOCUMENTATION doc_string NAMESPACE raft:: diff --git a/cpp/include/raft/core/detail/macros.hpp b/cpp/include/raft/core/detail/macros.hpp index 390acea697..bb4207938b 100644 --- a/cpp/include/raft/core/detail/macros.hpp +++ b/cpp/include/raft/core/detail/macros.hpp @@ -22,6 +22,14 @@ #endif #endif +#if defined(_RAFT_HAS_CUDA) +#define CUDA_CONDITION_ELSE_TRUE(condition) condition +#define CUDA_CONDITION_ELSE_FALSE(condition) condition +#else +#define CUDA_CONDITION_ELSE_TRUE(condition) true +#define CUDA_CONDITION_ELSE_FALSE(condition) false +#endif + #ifndef _RAFT_HOST_DEVICE #if defined(_RAFT_HAS_CUDA) #define _RAFT_DEVICE __device__ @@ -40,6 +48,10 @@ #define RAFT_INLINE_FUNCTION _RAFT_HOST_DEVICE _RAFT_FORCEINLINE #endif +#ifndef RAFT_DEVICE_INLINE_FUNCTION +#define RAFT_DEVICE_INLINE_FUNCTION _RAFT_DEVICE _RAFT_FORCEINLINE +#endif + // The RAFT_INLINE_CONDITIONAL is a conditional inline specifier that removes // the inline specification when RAFT_COMPILED is defined. // diff --git a/cpp/include/raft/core/math.hpp b/cpp/include/raft/core/math.hpp index c5f08b84b7..f251c14e4a 100644 --- a/cpp/include/raft/core/math.hpp +++ b/cpp/include/raft/core/math.hpp @@ -22,6 +22,11 @@ #include +#if _RAFT_HAS_CUDA +#include +#include +#endif + namespace raft { /** @@ -90,7 +95,10 @@ RAFT_INLINE_FUNCTION auto atanh(T x) } /** Cosine */ -template +template && + (!std::is_same_v)))), + int> = 0> RAFT_INLINE_FUNCTION auto cos(T x) { #ifdef __CUDA_ARCH__ @@ -100,8 +108,38 @@ RAFT_INLINE_FUNCTION auto cos(T x) #endif } -/** Sine */ +#if _RAFT_HAS_CUDA template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, __half> cos(T x) +{ +#if (__CUDA_ARCH__ >= 530) + return ::hcos(x); +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "__half is only supported on __CUDA_ARCH__ >= 530"); + return T{}; +#endif +} + +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, nv_bfloat16> +cos(T x) +{ +#if (__CUDA_ARCH__ >= 800) + return ::hcos(x); +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "nv_bfloat16 is only supported on __CUDA_ARCH__ >= 800"); + return T{}; +#endif +} +#endif + +/** Sine */ +template && + (!std::is_same_v)))), + int> = 0> RAFT_INLINE_FUNCTION auto sin(T x) { #ifdef __CUDA_ARCH__ @@ -111,6 +149,33 @@ RAFT_INLINE_FUNCTION auto sin(T x) #endif } +#if _RAFT_HAS_CUDA +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, __half> sin(T x) +{ +#if (__CUDA_ARCH__ >= 530) + return ::hsin(x); +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "__half is only supported on __CUDA_ARCH__ >= 530"); + return T{}; +#endif +} + +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, nv_bfloat16> +sin(T x) +{ +#if (__CUDA_ARCH__ >= 800) + return ::hsin(x); +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "nv_bfloat16 is only supported on __CUDA_ARCH__ >= 800"); + return T{}; +#endif +} +#endif + /** Sine and cosine */ template RAFT_INLINE_FUNCTION std::enable_if_t || std::is_same_v> sincos( @@ -141,7 +206,10 @@ RAFT_INLINE_FUNCTION auto tanh(T x) * @{ */ /** Exponential function */ -template +template && + (!std::is_same_v)))), + int> = 0> RAFT_INLINE_FUNCTION auto exp(T x) { #ifdef __CUDA_ARCH__ @@ -151,8 +219,38 @@ RAFT_INLINE_FUNCTION auto exp(T x) #endif } -/** Natural logarithm */ +#if _RAFT_HAS_CUDA +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, __half> exp(T x) +{ +#if (__CUDA_ARCH__ >= 530) + return ::hexp(x); +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "__half is only supported on __CUDA_ARCH__ >= 530"); + return T{}; +#endif +} + template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, nv_bfloat16> +exp(T x) +{ +#if (__CUDA_ARCH__ >= 800) + return ::hexp(x); +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "nv_bfloat16 is only supported on __CUDA_ARCH__ >= 800"); + return T{}; +#endif +} +#endif + +/** Natural logarithm */ +template && + (!std::is_same_v)))), + int> = 0> RAFT_INLINE_FUNCTION auto log(T x) { #ifdef __CUDA_ARCH__ @@ -161,6 +259,33 @@ RAFT_INLINE_FUNCTION auto log(T x) return std::log(x); #endif } + +#if _RAFT_HAS_CUDA +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, __half> log(T x) +{ +#if (__CUDA_ARCH__ >= 530) + return ::hlog(x); +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "__half is only supported on __CUDA_ARCH__ >= 530"); + return T{}; +#endif +} + +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, nv_bfloat16> +log(T x) +{ +#if (__CUDA_ARCH__ >= 800) + return ::hlog(x); +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "nv_bfloat16 is only supported on __CUDA_ARCH__ >= 800"); + return T{}; +#endif +} +#endif /** @} */ /** @@ -176,7 +301,13 @@ RAFT_INLINE_FUNCTION auto log(T x) * same (and that the less-than operator be defined). * @{ */ -template +template < + typename T1, + typename T2, + std::enable_if_t && !std::is_same_v) || + (!std::is_same_v && !std::is_same_v)))), + int> = 0> RAFT_INLINE_FUNCTION auto max(const T1& x, const T2& y) { #ifdef __CUDA_ARCH__ @@ -208,6 +339,34 @@ RAFT_INLINE_FUNCTION auto max(const T1& x, const T2& y) #endif } +#if _RAFT_HAS_CUDA +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, __half> max(T x, + T y) +{ +#if (__CUDA_ARCH__ >= 530) + return ::__hmax(x, y); +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "__half is only supported on __CUDA_ARCH__ >= 530"); + return T{}; +#endif +} + +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, nv_bfloat16> +max(T x, T y) +{ +#if (__CUDA_ARCH__ >= 800) + return ::__hmax(x, y); +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "nv_bfloat16 is only supported on __CUDA_ARCH__ >= 800"); + return T{}; +#endif +} +#endif + /** Many-argument overload to avoid verbose nested calls or use with variadic arguments */ template RAFT_INLINE_FUNCTION auto max(const T1& x, const T2& y, Args&&... args) @@ -221,6 +380,34 @@ constexpr RAFT_INLINE_FUNCTION auto max(const T& x) { return x; } + +#if _RAFT_HAS_CUDA +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, __half> max(T x) +{ +#if (__CUDA_ARCH__ >= 530) + return x; +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "__half is only supported on __CUDA_ARCH__ >= 530"); + return T{}; +#endif +} + +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, nv_bfloat16> +max(T x) +{ +#if (__CUDA_ARCH__ >= 800) + return x; +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "nv_bfloat16 is only supported on __CUDA_ARCH__ >= 800"); + return T{}; +#endif +} +#endif + /** @} */ /** @@ -236,7 +423,13 @@ constexpr RAFT_INLINE_FUNCTION auto max(const T& x) * same (and that the less-than operator be defined). * @{ */ -template +template < + typename T1, + typename T2, + std::enable_if_t && !std::is_same_v) || + (!std::is_same_v && !std::is_same_v)))), + int> = 0> RAFT_INLINE_FUNCTION auto min(const T1& x, const T2& y) { #ifdef __CUDA_ARCH__ @@ -268,6 +461,34 @@ RAFT_INLINE_FUNCTION auto min(const T1& x, const T2& y) #endif } +#if _RAFT_HAS_CUDA +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, __half> min(T x, + T y) +{ +#if (__CUDA_ARCH__ >= 530) + return ::__hmin(x, y); +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "__half is only supported on __CUDA_ARCH__ >= 530"); + return T{}; +#endif +} + +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, nv_bfloat16> +min(T x, T y) +{ +#if (__CUDA_ARCH__ >= 800) + return ::__hmin(x, y); +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "nv_bfloat16 is only supported on __CUDA_ARCH__ >= 800"); + return T{}; +#endif +} +#endif + /** Many-argument overload to avoid verbose nested calls or use with variadic arguments */ template RAFT_INLINE_FUNCTION auto min(const T1& x, const T2& y, Args&&... args) @@ -281,6 +502,34 @@ constexpr RAFT_INLINE_FUNCTION auto min(const T& x) { return x; } + +#if _RAFT_HAS_CUDA +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, __half> min( + T x) +{ +#if (__CUDA_ARCH__ >= 530) + return x; +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "__half is only supported on __CUDA_ARCH__ >= 530"); + return T{}; +#endif +} + +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, nv_bfloat16> +min(T x) +{ +#if (__CUDA_ARCH__ >= 800) + return x; +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "nv_bfloat16 is only supported on __CUDA_ARCH__ >= 800"); + return T{}; +#endif +} +#endif /** @} */ /** @@ -299,7 +548,10 @@ RAFT_INLINE_FUNCTION auto pow(T1 x, T2 y) } /** Square root */ -template +template && + (!std::is_same_v)))), + int> = 0> RAFT_INLINE_FUNCTION auto sqrt(T x) { #ifdef __CUDA_ARCH__ @@ -308,6 +560,33 @@ RAFT_INLINE_FUNCTION auto sqrt(T x) return std::sqrt(x); #endif } + +#if _RAFT_HAS_CUDA +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, __half> sqrt(T x) +{ +#if (__CUDA_ARCH__ >= 530) + return ::hsqrt(x); +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "__half is only supported on __CUDA_ARCH__ >= 530"); + return T{}; +#endif +} + +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, nv_bfloat16> +sqrt(T x) +{ +#if (__CUDA_ARCH__ >= 800) + return ::hsqrt(x); +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "nv_bfloat16 is only supported on __CUDA_ARCH__ >= 800"); + return T{}; +#endif +} +#endif /** @} */ /** Sign */ diff --git a/cpp/test/core/math_device.cu b/cpp/test/core/math_device.cu index ff4b343d9e..15c7b2b33a 100644 --- a/cpp/test/core/math_device.cu +++ b/cpp/test/core/math_device.cu @@ -21,6 +21,11 @@ #include #include +#if _RAFT_HAS_CUDA +#include +#include +#endif + template __global__ void math_eval_kernel(OutT* out, OpT op, Args... args) { @@ -118,8 +123,32 @@ struct cos_test_op { } }; +struct cos_test_op_device { + template + constexpr RAFT_DEVICE_INLINE_FUNCTION auto operator()(const Type& in) const + { +#if (__CUDA_ARCH__ < 530) + if constexpr (std::is_same_v) { + return __float2half(raft::cos(__half2float(in))); + } +#elif (__CUDA_ARCH__ < 800) + if constexpr (std::is_same_v) { + return __float2bfloat16(raft::cos(__bfloat162float(in))); + } else // else is there to make sure raft::cos(in) is not compiled with __half / nv_bfloat16 +#endif + return raft::cos(in); + } +}; + TEST(MathDevice, Cos) { + ASSERT_TRUE(raft::match(std::cos(12.34f), + __half2float(math_eval(cos_test_op_device{}, __float2half(12.34f))), + raft::CompareApprox(0.001f))); + ASSERT_TRUE( + raft::match(std::cos(12.34f), + __bfloat162float(math_eval(cos_test_op_device{}, __float2bfloat16(12.34f))), + raft::CompareApprox(0.01f))); ASSERT_TRUE(raft::match( std::cos(12.34f), math_eval(cos_test_op{}, 12.34f), raft::CompareApprox(0.0001f))); ASSERT_TRUE(raft::match( @@ -134,14 +163,54 @@ struct exp_test_op { } }; +struct exp_test_op_device { + template + constexpr RAFT_DEVICE_INLINE_FUNCTION auto operator()(const Type& in) const + { +#if (__CUDA_ARCH__ < 530) + if constexpr (std::is_same_v) { + return __float2half(raft::exp(__half2float(in))); + } +#elif (__CUDA_ARCH__ < 800) + if constexpr (std::is_same_v) { + return __float2bfloat16(raft::exp(__bfloat162float(in))); + } else // else is there to make sure raft::exp(in) is not compiled with __half / nv_bfloat16 +#endif + return raft::exp(in); + } +}; + TEST(MathDevice, Exp) { + ASSERT_TRUE(raft::match(std::exp(3.4f), + __half2float(math_eval(exp_test_op_device{}, __float2half(3.4f))), + raft::CompareApprox(0.001f))); + ASSERT_TRUE(raft::match(std::exp(3.4f), + __bfloat162float(math_eval(exp_test_op_device{}, __float2bfloat16(3.4f))), + raft::CompareApprox(0.01f))); ASSERT_TRUE(raft::match( - std::exp(12.34f), math_eval(exp_test_op{}, 12.34f), raft::CompareApprox(0.0001f))); + std::exp(3.4f), math_eval(exp_test_op{}, 3.4f), raft::CompareApprox(0.0001f))); ASSERT_TRUE(raft::match( - std::exp(12.34), math_eval(exp_test_op{}, 12.34), raft::CompareApprox(0.000001))); + std::exp(3.4), math_eval(exp_test_op{}, 3.4), raft::CompareApprox(0.000001))); } +struct log_test_op_device { + template + constexpr RAFT_DEVICE_INLINE_FUNCTION auto operator()(const Type& in) const + { +#if (__CUDA_ARCH__ < 530) + if constexpr (std::is_same_v) { + return __float2half(raft::log(__half2float(in))); + } +#elif (__CUDA_ARCH__ < 800) + if constexpr (std::is_same_v) { + return __float2bfloat16(raft::log(__bfloat162float(in))); + } else // else is there to make sure raft::log(in) is not compiled with __half / nv_bfloat16 +#endif + return raft::log(in); + } +}; + struct log_test_op { template constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in) const @@ -152,6 +221,13 @@ struct log_test_op { TEST(MathDevice, Log) { + ASSERT_TRUE(raft::match(std::log(12.34f), + __half2float(math_eval(log_test_op_device{}, __float2half(12.34f))), + raft::CompareApprox(0.001f))); + ASSERT_TRUE( + raft::match(std::log(12.34f), + __bfloat162float(math_eval(log_test_op_device{}, __float2bfloat16(12.34f))), + raft::CompareApprox(0.01f))); ASSERT_TRUE(raft::match( std::log(12.34f), math_eval(log_test_op{}, 12.34f), raft::CompareApprox(0.0001f))); ASSERT_TRUE(raft::match( @@ -277,6 +353,23 @@ TEST(MathDevice, Sgn) ASSERT_TRUE(raft::match(1, math_eval(sgn_test_op{}, 12.34f), raft::Compare())); } +struct sin_test_op_device { + template + constexpr RAFT_DEVICE_INLINE_FUNCTION auto operator()(const Type& in) const + { +#if (__CUDA_ARCH__ < 530) + if constexpr (std::is_same_v) { + return __float2half(raft::sin(__half2float(in))); + } +#elif (__CUDA_ARCH__ < 800) + if constexpr (std::is_same_v) { + return __float2bfloat16(raft::sin(__bfloat162float(in))); + } else // else is there to make sure raft::sin(in) is not compiled with __half / nv_bfloat16 +#endif + return raft::sin(in); + } +}; + struct sin_test_op { template constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in) const @@ -287,6 +380,13 @@ struct sin_test_op { TEST(MathDevice, Sin) { + ASSERT_TRUE(raft::match(std::sin(12.34f), + __half2float(math_eval(sin_test_op_device{}, __float2half(12.34f))), + raft::CompareApprox(0.01f))); + ASSERT_TRUE( + raft::match(std::sin(12.34f), + __bfloat162float(math_eval(sin_test_op_device{}, __float2bfloat16(12.34f))), + raft::CompareApprox(0.1f))); ASSERT_TRUE(raft::match( std::sin(12.34f), math_eval(sin_test_op{}, 12.34f), raft::CompareApprox(0.0001f))); ASSERT_TRUE(raft::match( @@ -319,6 +419,23 @@ TEST(MathDevice, SinCos) ASSERT_TRUE(raft::match(std::cos(12.34), cd.value(stream), raft::CompareApprox(0.0001f))); } +struct sqrt_test_op_device { + template + constexpr RAFT_DEVICE_INLINE_FUNCTION auto operator()(const Type& in) const + { +#if (__CUDA_ARCH__ < 530) + if constexpr (std::is_same_v) { + return __float2half(raft::sqrt(__half2float(in))); + } +#elif (__CUDA_ARCH__ < 800) + if constexpr (std::is_same_v) { + return __float2bfloat16(raft::sqrt(__bfloat162float(in))); + } else // else is there to make sure raft::sqrt(in) is not compiled with __half / nv_bfloat16 +#endif + return raft::sqrt(in); + } +}; + struct sqrt_test_op { template constexpr RAFT_INLINE_FUNCTION auto operator()(const Type& in) const @@ -329,6 +446,13 @@ struct sqrt_test_op { TEST(MathDevice, Sqrt) { + ASSERT_TRUE(raft::match(std::sqrt(12.34f), + __half2float(math_eval(sqrt_test_op_device{}, __float2half(12.34f))), + raft::CompareApprox(0.001f))); + ASSERT_TRUE( + raft::match(std::sqrt(12.34f), + __bfloat162float(math_eval(sqrt_test_op_device{}, __float2bfloat16(12.34f))), + raft::CompareApprox(0.01f))); ASSERT_TRUE(raft::match( std::sqrt(12.34f), math_eval(sqrt_test_op{}, 12.34f), raft::CompareApprox(0.0001f))); ASSERT_TRUE(raft::match(