From b456e69e8516055a26dc5e0ec8de78039b4c3277 Mon Sep 17 00:00:00 2001 From: Gitty Burstein Date: Wed, 1 Oct 2025 01:17:56 +0300 Subject: [PATCH 1/3] SYCL: update element-wise ops and presets --- ggml/src/ggml-sycl/element_wise.cpp | 37 +++++++++++++++++++++++++++++ ggml/src/ggml-sycl/element_wise.hpp | 2 ++ ggml/src/ggml-sycl/ggml-sycl.cpp | 5 ++++ ggml/src/ggml-sycl/presets.hpp | 1 + 4 files changed, 45 insertions(+) diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp index c2da2fb48ad28..0443bdad055d2 100644 --- a/ggml/src/ggml-sycl/element_wise.cpp +++ b/ggml/src/ggml-sycl/element_wise.cpp @@ -417,6 +417,14 @@ static void acc_f32_sycl(const float *x, const float *y, float *dst, }); } +template +static void arange_kernel(T * dst, const int k, T start, T step, + const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = start + static_cast(i) * step; + } +} + template static void upscale_sycl(const T *x, T *dst, const int nb00, const int nb01, const int nb02, const int nb03, const int ne10, const int ne11, @@ -631,6 +639,30 @@ static inline void dispatch_ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, gg } } +// ב-namespace ggml_sycl_detail: +static inline void ggml_sycl_op_arange(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + GGML_ASSERT(dst->type == GGML_TYPE_F32); + + float start, stop, step; + memcpy(&start, dst->op_params, sizeof(float)); + memcpy(&stop, (float *) dst->op_params + 1, sizeof(float)); + memcpy(&step, (float *) dst->op_params + 2, sizeof(float)); + + dpct::queue_ptr stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + + float * dst_ptr = (float *)dst->data; + const int k = (int)ggml_nelements(dst); // הוספה חשובה! + + const int num_blocks = ceil_div(k, SYCL_ARANGE_BLOCK_SIZE); + stream->parallel_for( + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_ARANGE_BLOCK_SIZE), + sycl::range<1>(SYCL_ARANGE_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + arange_kernel(dst_ptr, k, start, step, item_ct1); + }); +} + } // namespace ggml_sycl_detail @@ -1168,3 +1200,8 @@ void ggml_sycl_geglu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1); ggml_sycl_op_geglu_quick(ctx, dst); } + +void ggml_sycl_arange(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/0); + ggml_sycl_detail::ggml_sycl_op_arange(ctx, dst); +} \ No newline at end of file diff --git a/ggml/src/ggml-sycl/element_wise.hpp b/ggml/src/ggml-sycl/element_wise.hpp index 50749e87d783e..92d7731cda285 100644 --- a/ggml/src/ggml-sycl/element_wise.hpp +++ b/ggml/src/ggml-sycl/element_wise.hpp @@ -83,4 +83,6 @@ void ggml_sycl_swiglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_geglu_erf(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_geglu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst); +void ggml_sycl_arange(ggml_backend_sycl_context & ctx, ggml_tensor * dst); + #endif // GGML_SYCL_ELEMENTWISE_HPP diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index e06ec613fc81f..231b9e0676b09 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -3768,6 +3768,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg case GGML_OP_GATED_LINEAR_ATTN: ggml_sycl_op_gated_linear_attn(ctx, dst); break; + case GGML_OP_ARANGE: + ggml_sycl_arange(ctx, dst); + break; default: return false; } @@ -4416,6 +4419,8 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g case GGML_OP_RWKV_WKV7: case GGML_OP_GATED_LINEAR_ATTN: return true; + case GGML_OP_ARANGE: + return op->type == GGML_TYPE_F32; default: return false; } diff --git a/ggml/src/ggml-sycl/presets.hpp b/ggml/src/ggml-sycl/presets.hpp index af1890727df8f..0814bd79a6504 100644 --- a/ggml/src/ggml-sycl/presets.hpp +++ b/ggml/src/ggml-sycl/presets.hpp @@ -49,6 +49,7 @@ #define SYCL_ARGMAX_BLOCK_SIZE 256 #define SYCL_CONV_TRANPOSE_1D_BLOCK_SIZE 256 #define SYCL_TIMESTEP_EMBEDDING_BLOCK_SIZE 256 +#define SYCL_ARANGE_BLOCK_SIZE 256 // dmmv = dequantize_mul_mat_vec #ifndef GGML_SYCL_DMMV_X From 1c65a240f7d8632230c863c8b1f9d40ceea44869 Mon Sep 17 00:00:00 2001 From: Gitty Burstein Date: Wed, 1 Oct 2025 02:43:22 +0300 Subject: [PATCH 2/3] clean arange --- ggml/src/ggml-sycl/element_wise.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp index 0443bdad055d2..bd1f13240a4ed 100644 --- a/ggml/src/ggml-sycl/element_wise.cpp +++ b/ggml/src/ggml-sycl/element_wise.cpp @@ -418,7 +418,7 @@ static void acc_f32_sycl(const float *x, const float *y, float *dst, } template -static void arange_kernel(T * dst, const int k, T start, T step, +static void arange_kernel(T * dst, const int k, T start, T step, const sycl::nd_item<1> &item_ct1) { SYCL_GLOBAL_ID_LOOP(k, item_ct1) { dst[i] = start + static_cast(i) * step; @@ -639,21 +639,16 @@ static inline void dispatch_ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, gg } } -// ב-namespace ggml_sycl_detail: static inline void ggml_sycl_op_arange(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_ASSERT(dst->type == GGML_TYPE_F32); - float start, stop, step; memcpy(&start, dst->op_params, sizeof(float)); memcpy(&stop, (float *) dst->op_params + 1, sizeof(float)); memcpy(&step, (float *) dst->op_params + 2, sizeof(float)); - dpct::queue_ptr stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - float * dst_ptr = (float *)dst->data; - const int k = (int)ggml_nelements(dst); // הוספה חשובה! - + const int k = (int)ggml_nelements(dst); const int num_blocks = ceil_div(k, SYCL_ARANGE_BLOCK_SIZE); stream->parallel_for( sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_ARANGE_BLOCK_SIZE), @@ -1204,4 +1199,4 @@ void ggml_sycl_geglu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_arange(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/0); ggml_sycl_detail::ggml_sycl_op_arange(ctx, dst); -} \ No newline at end of file +} From 322af87619e2bb3ab10963634d3c1aecb91bda5b Mon Sep 17 00:00:00 2001 From: Gitty Burstein Date: Thu, 16 Oct 2025 12:35:44 +0300 Subject: [PATCH 3/3] Re-trigger CI