@@ -3768,37 +3768,13 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
37683768 stream->memcpy (ids_host.data (), ids_dev, ggml_nbytes (ids))));
37693769 SYCL_CHECK (CHECK_TRY_ERROR (stream->wait ()));
37703770
3771- const ggml_tensor_extra_gpu *src0_extra =
3772- (const ggml_tensor_extra_gpu *)src0->extra ;
3773- const ggml_tensor_extra_gpu *src1_extra =
3774- (const ggml_tensor_extra_gpu *)src1->extra ;
3775- const ggml_tensor_extra_gpu *dst_extra =
3776- (const ggml_tensor_extra_gpu *)dst->extra ;
3777-
3778- ggml_tensor_extra_gpu src0_row_extra;
3779- ggml_tensor_extra_gpu src1_row_extra;
3780- ggml_tensor_extra_gpu dst_row_extra;
3781-
37823771 ggml_tensor src0_row = *src0;
37833772 ggml_tensor src1_row = *src1;
37843773 ggml_tensor dst_row = *dst;
37853774
3786- src1_row.backend = GGML_BACKEND_TYPE_GPU;
3787- dst_row.backend = GGML_BACKEND_TYPE_GPU;
3788-
3789- src0_row.extra = &src0_row_extra;
3790- src1_row.extra = &src1_row_extra;
3791- dst_row.extra = &dst_row_extra;
3792-
3793- char *src0_original = src1->backend == GGML_BACKEND_TYPE_CPU
3794- ? (char *)src0->data
3795- : (char *)src0_extra->data_device [ctx.device ];
3796- char *src1_original = src1->backend == GGML_BACKEND_TYPE_CPU
3797- ? (char *)src1->data
3798- : (char *)src1_extra->data_device [ctx.device ];
3799- char *dst_original = dst->backend == GGML_BACKEND_TYPE_CPU
3800- ? (char *)dst->data
3801- : (char *)dst_extra->data_device [ctx.device ];
3775+ char *src0_original = (char *)src0->data ;
3776+ char *src1_original = (char *)src1->data ;
3777+ char *dst_original = (char *)dst->data ;
38023778
38033779 src0_row.ne [2 ] = 1 ;
38043780 src0_row.ne [3 ] = 1 ;
@@ -3827,12 +3803,9 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
38273803 const int64_t i1 = id;
38283804 const int64_t i2 = i12;
38293805
3830- src0_row_extra.data_device [ctx.device ] =
3831- src0_original + i02*nb02;
3832- src1_row_extra.data_device [ctx.device ] =
3833- src1_original + + i11*nb11 + i12*nb12;
3834- dst_row_extra.data_device [ctx.device ] =
3835- dst_original + i1*nb1 + i2*nb2;
3806+ src0_row.data = src0_original + i02*nb02;
3807+ src1_row.data = src1_original + + i11*nb11 + i12*nb12;
3808+ dst_row.data = dst_original + i1*nb1 + i2*nb2;
38363809
38373810 ggml_sycl_mul_mat (ctx, &src0_row, &src1_row, &dst_row);
38383811 }
@@ -3841,8 +3814,8 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
38413814 ggml_sycl_pool_alloc<char > src1_contiguous (ctx.pool (), sizeof (float )*ggml_nelements (src1));
38423815 ggml_sycl_pool_alloc<char > dst_contiguous (ctx.pool (), sizeof (float )*ggml_nelements (dst));
38433816
3844- src1_row_extra. data_device [ctx. device ] = src1_contiguous.get ();
3845- dst_row_extra. data_device [ctx. device ] = dst_contiguous.get ();
3817+ src1_row. data = src1_contiguous.get ();
3818+ dst_row. data = dst_contiguous.get ();
38463819
38473820 for (int64_t i02 = 0 ; i02 < n_as; i02++) {
38483821 int64_t num_src1_rows = 0 ;
@@ -3898,7 +3871,7 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
38983871 });
38993872 }
39003873
3901- src0_row_extra. data_device [ctx. device ] = src0_original + i02*nb02;
3874+ src0_row. data = src0_original + i02*nb02;
39023875
39033876 GGML_ASSERT (nb11 == sizeof (float )*ne10);
39043877 GGML_ASSERT (nb1 == sizeof (float )*ne0);
@@ -5221,6 +5194,10 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
52215194 return false ;
52225195 }
52235196 }
5197+ ggml_type src0_type = op->src [0 ]->type ;
5198+ if (src0_type == GGML_TYPE_BF16) {
5199+ return false ;
5200+ }
52245201 return true ;
52255202 } break ;
52265203 case GGML_OP_GET_ROWS:
0 commit comments