sys tag.

trivialfis · trivialfis · commit ab8115aa860d · 2025-11-11T02:35:36.000+08:00
diff --git a/src/common/linalg_op.h b/src/common/linalg_op.h
@@ -125,6 +125,18 @@ auto end(TensorView<T, kDim>& v) {  // NOLINT
   return begin(v) + v.Size();
 }
 
+// A tag to workaround the one definition rule.
+template <bool kWithCuda, bool kWithSycl>
+struct SysTagImpl {};
+
+#if defined(__CUDACC__)
+using SysTag = SysTagImpl<true, false>;
+#elif defined(SYCL_LANGUAGE_VERSION)
+using SysTag = SysTagImpl<false, true>;
+#else
+using SysTag = SysTagImpl<false, false>;
+#endif
+
 /**
  * @brief Elementwise kernel without a return type.
  *
@@ -136,8 +148,8 @@ auto end(TensorView<T, kDim>& v) {  // NOLINT
  * @param t  Input array.
  * @param fn Transformation function.
  */
-template <typename T, std::int32_t D, typename Fn, bool CompiledWithCuda = WITH_CUDA()>
-void ElementWiseKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
+template <typename T, std::int32_t D, typename Fn>
+void ElementWiseKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn, SysTag = SysTag{}) {
   ctx->DispatchDevice([&] { cpu_impl::ElementWiseKernel(t, ctx->Threads(), std::forward<Fn>(fn)); },
                       [&] {
 #if defined(__CUDACC__)
@@ -167,8 +179,8 @@ void ElementWiseKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
  * @param t  Input array.
  * @param fn Transformation function, must return type T.
  */
-template <typename T, std::int32_t D, typename Fn, bool CompiledWithCuda = WITH_CUDA()>
-void TransformIdxKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
+template <typename T, std::int32_t D, typename Fn>
+void TransformIdxKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn, SysTag = SysTag{}) {
   ctx->DispatchDevice([&] { cpu_impl::TransformIdxKernel(t, ctx->Threads(), fn); },
                       [&] {
 #if defined(__CUDACC__)
@@ -192,8 +204,8 @@ void TransformIdxKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
  * @brief Elementwise transform, with the element itself as input. Rest is the same as @ref
  * TransformIdxKernel
  */
-template <typename T, std::int32_t D, typename Fn, bool CompiledWithCuda = WITH_CUDA()>
-void TransformKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
+template <typename T, std::int32_t D, typename Fn>
+void TransformKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn, SysTag = SysTag{}) {
   ctx->DispatchDevice([&] { cpu_impl::TransformKernel(t, ctx->Threads(), fn); },
                       [&] {
 #if defined(__CUDACC__)
@@ -214,17 +226,18 @@ void TransformKernel(Context const* ctx, TensorView<T, D> t, Fn&& fn) {
 }
 
 // vector-scalar multiplication
-inline void VecScaMul(Context const* ctx, linalg::VectorView<float> x, double mul) {
+inline void VecScaMul(Context const* ctx, linalg::VectorView<float> x, double mul, SysTag = SysTag{}) {
   CHECK_EQ(x.Device().ordinal, ctx->Device().ordinal);
   TransformKernel(ctx, x, [=] XGBOOST_DEVICE(float v) { return v * mul; });
 }
 
 // vector-scalar division
-inline void VecScaDiv(Context const* ctx, linalg::VectorView<float> x, double div) {
+inline void VecScaDiv(Context const* ctx, linalg::VectorView<float> x, double div,
+                      SysTag = SysTag{}) {
   return VecScaMul(ctx, x, 1.0 / div);
 }
 
-inline void LogE(Context const* ctx, linalg::VectorView<float> x) {
+inline void LogE(Context const* ctx, linalg::VectorView<float> x, SysTag = SysTag{}) {
   CHECK_EQ(x.Device().ordinal, ctx->Device().ordinal);
   TransformKernel(ctx, x, [=] XGBOOST_DEVICE(float v) { return log(v); });
 }