[SYCL][libdevice] Fix rounding issue in __imf_vavgs (intel#8777)

jinge90 · web-flow · commit 38d05361f06e · 2023-05-08T09:25:35.000-07:00
__imf_vavgs2/4 correspond to CUDA math function __vavgs2/4. CUDA
__vavgs2/4 rounds to nearest, ties away from zero, so we need to apply
the same rounding mode.
Signed-off-by: jinge90 &lt;ge.jin@intel.com&gt;
diff --git a/libdevice/imf_utils/simd_emulate.cpp b/libdevice/imf_utils/simd_emulate.cpp
@@ -310,7 +310,13 @@ template <typename Tp> class __avgs_op {
 
 public:
   UTp operator()(const Tp &x, const Tp &y) {
-    return static_cast<UTp>(__srhadd(x, y));
+    int32_t z = static_cast<int32_t>(x) + static_cast<int32_t>(y);
+    if ((z & 1) == 0)
+      return static_cast<UTp>(z / 2);
+    else if (z > 0)
+      return static_cast<UTp>(z / 2 + 1);
+    else
+      return static_cast<UTp>(z / 2 - 1);
   }
 };
 
diff --git a/sycl/test-e2e/DeviceLib/imf_simd_emulate_test.cpp b/sycl/test-e2e/DeviceLib/imf_simd_emulate_test.cpp
@@ -1837,4 +1837,5 @@ int main(int, char **) {
   run_veqne_2_4_test(device_queue);
   run_vgelt_2_4_test(device_queue);
   run_vgtle_2_4_test(device_queue);
+  run_vavgs_2_4_test(device_queue);
 }

Original file line number	Diff line number	Diff line change
`@@ -1837,4 +1837,5 @@ int main(int, char **) {`
`1837`	`1837`	`run_veqne_2_4_test(device_queue);`
`1838`	`1838`	`run_vgelt_2_4_test(device_queue);`
`1839`	`1839`	`run_vgtle_2_4_test(device_queue);`
	`1840`	`+ run_vavgs_2_4_test(device_queue);`
`1840`	`1841`	`}`