@@ -37,25 +37,25 @@ template <> struct group_scope<::cl::sycl::intel::sub_group> {
3737// intrinsics, and should use the fewest broadcasts possible
3838// - Loop over 64-bit chunks until remaining bytes < 64-bit
3939// - At most one 32-bit, 16-bit and 8-bit chunk left over
40- template <typename T, typename ShuffleFunctor >
41- void GenericShuffle (const ShuffleFunctor &ShuffleBytes ) {
40+ template <typename T, typename Functor >
41+ void GenericCall (const Functor &ApplyToBytes ) {
4242 if (sizeof (T) >= sizeof (uint64_t )) {
4343#pragma unroll
4444 for (size_t Offset = 0 ; Offset < sizeof (T); Offset += sizeof (uint64_t )) {
45- ShuffleBytes (Offset, sizeof (uint64_t ));
45+ ApplyToBytes (Offset, sizeof (uint64_t ));
4646 }
4747 }
4848 if (sizeof (T) % sizeof (uint64_t ) >= sizeof (uint32_t )) {
4949 size_t Offset = sizeof (T) / sizeof (uint64_t ) * sizeof (uint64_t );
50- ShuffleBytes (Offset, sizeof (uint32_t ));
50+ ApplyToBytes (Offset, sizeof (uint32_t ));
5151 }
5252 if (sizeof (T) % sizeof (uint32_t ) >= sizeof (uint16_t )) {
5353 size_t Offset = sizeof (T) / sizeof (uint32_t ) * sizeof (uint32_t );
54- ShuffleBytes (Offset, sizeof (uint16_t ));
54+ ApplyToBytes (Offset, sizeof (uint16_t ));
5555 }
5656 if (sizeof (T) % sizeof (uint16_t ) >= sizeof (uint8_t )) {
5757 size_t Offset = sizeof (T) / sizeof (uint16_t ) * sizeof (uint16_t );
58- ShuffleBytes (Offset, sizeof (uint8_t ));
58+ ApplyToBytes (Offset, sizeof (uint8_t ));
5959 }
6060}
6161
@@ -143,7 +143,7 @@ EnableIfGenericBroadcast<T, IdT> GroupBroadcast(T x, IdT local_id) {
143143 BroadcastResult = GroupBroadcast<Group>(BroadcastX, local_id);
144144 detail::memcpy (ResultBytes + Offset, &BroadcastResult, Size);
145145 };
146- GenericShuffle <T>(BroadcastBytes);
146+ GenericCall <T>(BroadcastBytes);
147147 return Result;
148148}
149149
@@ -196,7 +196,7 @@ EnableIfGenericBroadcast<T> GroupBroadcast(T x, id<Dimensions> local_id) {
196196 BroadcastResult = GroupBroadcast<Group>(BroadcastX, local_id);
197197 detail::memcpy (ResultBytes + Offset, &BroadcastResult, Size);
198198 };
199- GenericShuffle <T>(BroadcastBytes);
199+ GenericCall <T>(BroadcastBytes);
200200 return Result;
201201}
202202
@@ -527,7 +527,7 @@ EnableIfGenericShuffle<T> SubgroupShuffle(T x, id<1> local_id) {
527527 ShuffleResult = SubgroupShuffle (ShuffleX, local_id);
528528 detail::memcpy (ResultBytes + Offset, &ShuffleResult, Size);
529529 };
530- GenericShuffle <T>(ShuffleBytes);
530+ GenericCall <T>(ShuffleBytes);
531531 return Result;
532532}
533533
@@ -542,7 +542,7 @@ EnableIfGenericShuffle<T> SubgroupShuffleXor(T x, id<1> local_id) {
542542 ShuffleResult = SubgroupShuffleXor (ShuffleX, local_id);
543543 detail::memcpy (ResultBytes + Offset, &ShuffleResult, Size);
544544 };
545- GenericShuffle <T>(ShuffleBytes);
545+ GenericCall <T>(ShuffleBytes);
546546 return Result;
547547}
548548
@@ -559,7 +559,7 @@ EnableIfGenericShuffle<T> SubgroupShuffleDown(T x, T y, id<1> local_id) {
559559 ShuffleResult = SubgroupShuffleDown (ShuffleX, ShuffleY, local_id);
560560 detail::memcpy (ResultBytes + Offset, &ShuffleResult, Size);
561561 };
562- GenericShuffle <T>(ShuffleBytes);
562+ GenericCall <T>(ShuffleBytes);
563563 return Result;
564564}
565565
@@ -576,7 +576,7 @@ EnableIfGenericShuffle<T> SubgroupShuffleUp(T x, T y, id<1> local_id) {
576576 ShuffleResult = SubgroupShuffleUp (ShuffleX, ShuffleY, local_id);
577577 detail::memcpy (ResultBytes + Offset, &ShuffleResult, Size);
578578 };
579- GenericShuffle <T>(ShuffleBytes);
579+ GenericCall <T>(ShuffleBytes);
580580 return Result;
581581}
582582
0 commit comments