Skip to content

Commit 38d0536

Browse files
authored
[SYCL][libdevice] Fix rounding issue in __imf_vavgs (intel#8777)
__imf_vavgs2/4 correspond to CUDA math function __vavgs2/4. CUDA __vavgs2/4 rounds to nearest, ties away from zero, so we need to apply the same rounding mode. Signed-off-by: jinge90 <ge.jin@intel.com>
1 parent 25211c4 commit 38d0536

File tree

2 files changed

+8
-1
lines changed

2 files changed

+8
-1
lines changed

libdevice/imf_utils/simd_emulate.cpp

+7-1
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,13 @@ template <typename Tp> class __avgs_op {
310310

311311
public:
312312
UTp operator()(const Tp &x, const Tp &y) {
313-
return static_cast<UTp>(__srhadd(x, y));
313+
int32_t z = static_cast<int32_t>(x) + static_cast<int32_t>(y);
314+
if ((z & 1) == 0)
315+
return static_cast<UTp>(z / 2);
316+
else if (z > 0)
317+
return static_cast<UTp>(z / 2 + 1);
318+
else
319+
return static_cast<UTp>(z / 2 - 1);
314320
}
315321
};
316322

sycl/test-e2e/DeviceLib/imf_simd_emulate_test.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -1837,4 +1837,5 @@ int main(int, char **) {
18371837
run_veqne_2_4_test(device_queue);
18381838
run_vgelt_2_4_test(device_queue);
18391839
run_vgtle_2_4_test(device_queue);
1840+
run_vavgs_2_4_test(device_queue);
18401841
}

0 commit comments

Comments
 (0)