Merge pull request #599 from Cuda-Chen/gemerate-more-random-test-inputs

Align result to SSE when input is 0.0f/-0.0f in _mm_rsqrt_{ps, ss}
DLTcollab · Jun 9, 2023 · a057ce8 · a057ce8
2 parents 6b61652 + 098a786
commit a057ce8
Show file tree

Hide file tree

Showing 4 changed files with 69 additions and 4 deletions.
diff --git a/README.md b/README.md
@@ -36,6 +36,30 @@ their equivalents are built utilizing a number of NEON intrinsics.
 For example, SSE intrinsic `_mm_loadu_si128` has a direct NEON mapping (`vld1q_s32`),
 but SSE intrinsic `_mm_maddubs_epi16` has to be implemented with 13+ NEON instructions.
 
+### Floating-point compatibility
+
+Some conversions require several NEON intrinsics, which may produce inconsistent results
+compared to their SSE counterparts due to differences in the arithmetic rules of IEEE-754.
+
+Taking a possible conversion of `_mm_rsqrt_ps` as example:
+
+```c
+__m128 _mm_rsqrt_ps(__m128 in)
+{
+    float32x4_t out = vrsqrteq_f32(vreinterpretq_f32_m128(in));
+
+    out = vmulq_f32(
+        out, vrsqrtsq_f32(vmulq_f32(vreinterpretq_f32_m128(in), out), out));
+
+    return vreinterpretq_m128_f32(out);
+}
+```
+
+The `_mm_rsqrt_ps` conversion will produce NaN if a source value is `0.0` (first INF for the
+reciprocal square root of `0.0`, then INF * `0.0` using `vmulq_f32`). In contrast,
+the SSE counterpart produces INF if a source value is `0.0`.
+As a result, additional treatments should be applied to ensure consistency between the conversion and its SSE counterpart.
+
 ## Usage
 
 - Put the file `sse2neon.h` in to your source code directory.

diff --git a/sse2neon.h b/sse2neon.h
@@ -2313,8 +2313,24 @@ FORCE_INLINE __m128 _mm_rcp_ss(__m128 a)
 FORCE_INLINE __m128 _mm_rsqrt_ps(__m128 in)
 {
     float32x4_t out = vrsqrteq_f32(vreinterpretq_f32_m128(in));
+
+    // Generate masks for detecting whether input has any 0.0f/-0.0f
+    // (which becomes positive/negative infinity by IEEE-754 arthimetic rules).
+    const uint32x4_t pos_inf = vdupq_n_u32(0x7F800000);
+    const uint32x4_t neg_inf = vdupq_n_u32(0xFF800000);
+    const uint32x4_t has_pos_zero =
+        vceqq_u32(pos_inf, vreinterpretq_u32_f32(out));
+    const uint32x4_t has_neg_zero =
+        vceqq_u32(neg_inf, vreinterpretq_u32_f32(out));
+
     out = vmulq_f32(
         out, vrsqrtsq_f32(vmulq_f32(vreinterpretq_f32_m128(in), out), out));
+
+    // Set output vector element to infinity/negative-infinity if
+    // the corresponding input vector element is 0.0f/-0.0f.
+    out = vbslq_f32(has_pos_zero, (float32x4_t) pos_inf, out);
+    out = vbslq_f32(has_neg_zero, (float32x4_t) neg_inf, out);
+
     return vreinterpretq_m128_f32(out);
 }
 

diff --git a/tests/common.cpp b/tests/common.cpp
@@ -357,19 +357,23 @@ result_t validateFloatError(__m128 a,
     float df2 = fabsf((t[2] - f2) / f2);
     float df3 = fabsf((t[3] - f3) / f3);
 
-    if ((std::isnan(t[0]) && std::isnan(f0)) || (t[0] == 0 && f0 == 0)) {
+    if ((std::isnan(t[0]) && std::isnan(f0)) || (t[0] == 0 && f0 == 0) ||
+        (std::isinf(t[0]) && std::isinf(f0))) {
         df0 = 0;
     }
 
-    if ((std::isnan(t[1]) && std::isnan(f1)) || (t[1] == 0 && f1 == 0)) {
+    if ((std::isnan(t[1]) && std::isnan(f1)) || (t[1] == 0 && f1 == 0) ||
+        (std::isinf(t[1]) && std::isinf(f1))) {
         df1 = 0;
     }
 
-    if ((std::isnan(t[2]) && std::isnan(f2)) || (t[2] == 0 && f2 == 0)) {
+    if ((std::isnan(t[2]) && std::isnan(f2)) || (t[2] == 0 && f2 == 0) ||
+        (std::isinf(t[2]) && std::isinf(f2))) {
         df2 = 0;
     }
 
-    if ((std::isnan(t[3]) && std::isnan(f3)) || (t[3] == 0 && f3 == 0)) {
+    if ((std::isnan(t[3]) && std::isnan(f3)) || (t[3] == 0 && f3 == 0) ||
+        (std::isinf(t[3]) && std::isinf(f3))) {
         df3 = 0;
     }
 

diff --git a/tests/impl.cpp b/tests/impl.cpp
@@ -78,6 +78,27 @@ class SSE2NEONTestImpl : public SSE2NEONTest
                 mTestFloatPointer1[2] = 1.0f / mTestFloatPointer1[2];
                 mTestFloatPointer1[3] = 1.0f / mTestFloatPointer1[3];
             }
+            if (test == it_mm_rcp_ps || test == it_mm_rcp_ss ||
+                test == it_mm_rsqrt_ps || test == it_mm_rsqrt_ss) {
+                if ((rand() & 3) == 0) {
+                    uint32_t r1 = rand() & 3;
+                    uint32_t r2 = rand() & 3;
+                    uint32_t r3 = rand() & 3;
+                    uint32_t r4 = rand() & 3;
+                    uint32_t r5 = rand() & 3;
+                    uint32_t r6 = rand() & 3;
+                    uint32_t r7 = rand() & 3;
+                    uint32_t r8 = rand() & 3;
+                    mTestFloatPointer1[r1] = 0.0f;
+                    mTestFloatPointer1[r2] = 0.0f;
+                    mTestFloatPointer1[r3] = 0.0f;
+                    mTestFloatPointer1[r4] = 0.0f;
+                    mTestFloatPointer1[r5] = -0.0f;
+                    mTestFloatPointer1[r6] = -0.0f;
+                    mTestFloatPointer1[r7] = -0.0f;
+                    mTestFloatPointer1[r8] = -0.0f;
+                }
+            }
             if (test == it_mm_cmpge_ps || test == it_mm_cmpge_ss ||
                 test == it_mm_cmple_ps || test == it_mm_cmple_ss ||
                 test == it_mm_cmpeq_ps || test == it_mm_cmpeq_ss) {