@@ -81,6 +81,92 @@ SYCL_EXTERNAL void __esimd_flat_write4(
8181 sycl::INTEL::gpu::vector_type_t <Ty, N * NumChannels (Mask)> vals,
8282 sycl::INTEL::gpu::vector_type_t<uint16_t, N> pred = 1);
8383
84+ // Low-level surface-based gather. Collects elements located at given offsets in
85+ // a surface and returns them as a single \ref simd object. Element can be
86+ // 1, 2 or 4-byte value, but is always returned as a 4-byte value within the
87+ // resulting simd object, with upper 2 or 3 bytes undefined.
88+ // Template (compile-time constant) parameters:
89+ // @tparam Ty - element type; can only be a 4-byte integer or \c float,
90+ // @tparam N - the number of elements
91+ // @tparam SurfIndAliasTy - "surface index alias" type - internal type in the
92+ // accessor used to denote the surface
93+ // @tparam TySizeLog2 - Log2 of the number of bytes read per element:
94+ // 0 - 1 byte, 1 - 2 bytes, 2 - 4 bytes
95+ // @tparam L1H - L1 cache hint
96+ // @tparam L3H - L3 cache hint
97+ //
98+ // Formal parameters:
99+ // @param scale - the scale; must be 0
100+ // @param surf_ind - the surface index, taken from the SYCL memory object
101+ // @param global_offset - offset added to each individual element's offset to
102+ // compute actual memory access offset for that element
103+ // @param elem_offsets - per-element offsets
104+ //
105+ template <typename Ty, int N, typename SurfIndAliasTy, int TySizeLog2,
106+ sycl::INTEL::gpu::CacheHint L1H = sycl::INTEL::gpu::CacheHint::None,
107+ sycl::INTEL::gpu::CacheHint L3H = sycl::INTEL::gpu::CacheHint::None>
108+ SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t <Ty, N>
109+ __esimd_surf_read (int16_t scale, SurfIndAliasTy surf_ind,
110+ uint32_t global_offset,
111+ sycl::INTEL::gpu::vector_type_t <uint32_t , N> elem_offsets)
112+ #ifdef __SYCL_DEVICE_ONLY__
113+ ;
114+ #else
115+ {
116+ static_assert (N == 1 || N == 8 || N == 16 );
117+ static_assert (TySizeLog2 <= 2 );
118+ static_assert (std::is_integral<Ty>::value || TySizeLog2 == 2 );
119+ throw cl::sycl::feature_not_supported ();
120+ }
121+ #endif // __SYCL_DEVICE_ONLY__
122+
123+ // Low-level surface-based scatter. Writes elements of a \ref simd object into a
124+ // surface at given offsets. Element can be a 1, 2 or 4-byte value, but it is
125+ // always represented as a 4-byte value within the input simd object,
126+ // unused (not written) upper bytes are ignored.
127+ // Template (compile-time constant) parameters:
128+ // @tparam Ty - element type; can only be a 4-byte integer or \c float,
129+ // @tparam N - the number of elements to write
130+ // @tparam SurfIndAliasTy - "surface index alias" type - internal type in the
131+ // accessor used to denote the surface
132+ // @tparam TySizeLog2 - Log2 of the number of bytes written per element:
133+ // 0 - 1 byte, 1 - 2 bytes, 2 - 4 bytes
134+ // @tparam L1H - L1 cache hint
135+ // @tparam L3H - L3 cache hint
136+ //
137+ // Formal parameters:
138+ // @param pred - per-element predicates; elements with zero corresponding
139+ // predicates are not written
140+ // @param scale - the scale; must be 0
141+ // @param surf_ind - the surface index, taken from the SYCL memory object
142+ // @param global_offset - offset added to each individual element's offset to
143+ // compute actual memory access offset for that element
144+ // @param elem_offsets - per-element offsets
145+ // @param vals - values to write
146+ //
147+ template <typename Ty, int N, typename SurfIndAliasTy, int TySizeLog2,
148+ sycl::INTEL::gpu::CacheHint L1H = sycl::INTEL::gpu::CacheHint::None,
149+ sycl::INTEL::gpu::CacheHint L3H = sycl::INTEL::gpu::CacheHint::None>
150+ SYCL_EXTERNAL void
151+ __esimd_surf_write (sycl::INTEL::gpu::vector_type_t <uint16_t , N> pred,
152+ int16_t scale, SurfIndAliasTy surf_ind,
153+ uint32_t global_offset,
154+ sycl::INTEL::gpu::vector_type_t <uint32_t , N> elem_offsets,
155+ sycl::INTEL::gpu::vector_type_t <Ty, N> vals)
156+ #ifdef __SYCL_DEVICE_ONLY__
157+ ;
158+ #else
159+ {
160+ static_assert (N == 1 || N == 8 || N == 16 );
161+ static_assert (TySizeLog2 <= 2 );
162+ static_assert (std::is_integral<Ty>::value || TySizeLog2 == 2 );
163+ throw cl::sycl::feature_not_supported ();
164+ }
165+ #endif // __SYCL_DEVICE_ONLY__
166+
167+ // TODO bring the parameter order of __esimd* intrinsics in accordance with the
168+ // correponsing BE intrinsicics parameter order.
169+
84170// flat_atomic: flat-address atomic
85171template <sycl::INTEL::gpu::EsimdAtomicOpType Op, typename Ty, int N,
86172 sycl::INTEL::gpu::CacheHint L1H = sycl::INTEL::gpu::CacheHint::None,
0 commit comments