wip

DLTcollab · Jun 28, 2024 · 8766f26 · 8766f26
1 parent fc79268
commit 8766f26
Showing 1 changed file with 32 additions and 29 deletions.
diff --git a/tests/impl.cpp b/tests/impl.cpp
@@ -431,7 +431,7 @@ template <class T>
 __m128i load_m128i(const T *p)
 {
     __m128 a = _mm_loadu_ps((const float *) p);
-    __m128i ia = *(const __m128i *) &a;
+    __m128i ia = _mm_castps_si128(a);
     return ia;
 }
 
@@ -850,7 +850,7 @@ result_t test_mm_and_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
     r[2] = ia[2] & ib[2];
     r[3] = ia[3] & ib[3];
     __m128i ret = do_mm_set_epi32(r[3], r[2], r[1], r[0]);
-    result_t res = VALIDATE_INT32_M128(*(const __m128i *) &c, r);
+    result_t res = VALIDATE_INT32_M128(_mm_castps_si128(c), r);
     if (res) {
         res = VALIDATE_INT32_M128(ret, r);
     }
@@ -879,7 +879,7 @@ result_t test_mm_andnot_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
     r[3] = ~ia[3] & ib[3];
     __m128i ret = do_mm_set_epi32(r[3], r[2], r[1], r[0]);
     result_t res = TEST_FAIL;
-    res = VALIDATE_INT32_M128(*(const __m128i *) &c, r);
+    res = VALIDATE_INT32_M128(_mm_castps_si128(c), r);
     if (res) {
         res = VALIDATE_INT32_M128(ret, r);
     }
@@ -938,7 +938,7 @@ result_t test_mm_cmpeq_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
     result[3] = _a[3] == _b[3] ? -1 : 0;
 
     __m128 ret = _mm_cmpeq_ps(a, b);
-    __m128i iret = *(const __m128i *) &ret;
+    __m128i iret = _mm_castps_si128(ret);
     return VALIDATE_INT32_M128(iret, result);
 }
 
@@ -973,7 +973,7 @@ result_t test_mm_cmpge_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
     result[3] = _a[3] >= _b[3] ? -1 : 0;
 
     __m128 ret = _mm_cmpge_ps(a, b);
-    __m128i iret = *(const __m128i *) &ret;
+    __m128i iret = _mm_castps_si128(ret);
     return VALIDATE_INT32_M128(iret, result);
 }
 
@@ -1008,7 +1008,7 @@ result_t test_mm_cmpgt_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
     result[3] = _a[3] > _b[3] ? -1 : 0;
 
     __m128 ret = _mm_cmpgt_ps(a, b);
-    __m128i iret = *(const __m128i *) &ret;
+    __m128i iret = _mm_castps_si128(ret);
     return VALIDATE_INT32_M128(iret, result);
 }
 
@@ -1043,7 +1043,7 @@ result_t test_mm_cmple_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
     result[3] = _a[3] <= _b[3] ? -1 : 0;
 
     __m128 ret = _mm_cmple_ps(a, b);
-    __m128i iret = *(const __m128i *) &ret;
+    __m128i iret = _mm_castps_si128(ret);
     return VALIDATE_INT32_M128(iret, result);
 }
 
@@ -1078,7 +1078,7 @@ result_t test_mm_cmplt_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
     result[3] = _a[3] < _b[3] ? -1 : 0;
 
     __m128 ret = _mm_cmplt_ps(a, b);
-    __m128i iret = *(const __m128i *) &ret;
+    __m128i iret = _mm_castps_si128(ret);
     return VALIDATE_INT32_M128(iret, result);
 }
 
@@ -1114,7 +1114,7 @@ result_t test_mm_cmpneq_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
     result[3] = _a[3] != _b[3] ? -1 : 0;
 
     __m128 ret = _mm_cmpneq_ps(a, b);
-    __m128i iret = *(const __m128i *) &ret;
+    __m128i iret = _mm_castps_si128(ret);
     return VALIDATE_INT32_M128(iret, result);
 }
 
@@ -3256,17 +3256,17 @@ result_t test_mm_xor_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
     const int32_t *_a = (const int32_t *) impl.mTestFloatPointer1;
     const int32_t *_b = (const int32_t *) impl.mTestFloatPointer2;
 
-    int32_t d0 = _a[0] ^ _b[0];
-    int32_t d1 = _a[1] ^ _b[1];
-    int32_t d2 = _a[2] ^ _b[2];
-    int32_t d3 = _a[3] ^ _b[3];
+    bit32_union_t d0, d1, d2, d3;
+    d0.i32 = _a[0] ^ _b[0];
+    d1.i32 = _a[1] ^ _b[1];
+    d2.i32 = _a[2] ^ _b[2];
+    d3.i32 = _a[3] ^ _b[3];
 
     __m128 a = load_m128(_a);
     __m128 b = load_m128(_b);
     __m128 c = _mm_xor_ps(a, b);
 
-    return validateFloat(c, *((float *) &d0), *((float *) &d1),
-                         *((float *) &d2), *((float *) &d3));
+    return validateFloat(c, d0.f32, d1.f32, d2.f32, d3.f32);
 }
 
 /* SSE2 */
@@ -3552,15 +3552,16 @@ result_t test_mm_and_pd(const SSE2NEONTestImpl &impl, uint32_t iter)
 {
     const int64_t *_a = (const int64_t *) impl.mTestFloatPointer1;
     const int64_t *_b = (const int64_t *) impl.mTestFloatPointer2;
-
-    int64_t d0 = _a[0] & _b[0];
-    int64_t d1 = _a[1] & _b[1];
+
+    bit64_union_t d0, d1;
+    d0.i64 = _a[0] & _b[0];
+    d1.i64 = _a[1] & _b[1];
 
     __m128d a = load_m128d(_a);
     __m128d b = load_m128d(_b);
     __m128d c = _mm_and_pd(a, b);
 
-    return validateDouble(c, *((double *) &d0), *((double *) &d1));
+    return validateDouble(c, d0.f64, d1.f64);
 }
 
 result_t test_mm_and_si128(const SSE2NEONTestImpl &impl, uint32_t iter)
@@ -3570,7 +3571,7 @@ result_t test_mm_and_si128(const SSE2NEONTestImpl &impl, uint32_t iter)
     __m128i a = load_m128i(_a);
     __m128i b = load_m128i(_b);
     __m128 fc = _mm_and_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b));
-    __m128i c = *(const __m128i *) &fc;
+    __m128i c = _mm_castps_si128(fc);
     // now for the assertion...
     const uint32_t *ia = (const uint32_t *) &a;
     const uint32_t *ib = (const uint32_t *) &b;
@@ -3603,7 +3604,7 @@ result_t test_mm_andnot_pd(const SSE2NEONTestImpl &impl, uint32_t iter)
     const uint64_t *ib = (const uint64_t *) &b;
     uint64_t r0 = ~ia[0] & ib[0];
     uint64_t r1 = ~ia[1] & ib[1];
-    return validateUInt64(*(const __m128i *) &c, r0, r1);
+    return validateUInt64(_mm_castpd_si128(c), r0, r1);
 }
 
 result_t test_mm_andnot_si128(const SSE2NEONTestImpl &impl, uint32_t iter)
@@ -3613,7 +3614,7 @@ result_t test_mm_andnot_si128(const SSE2NEONTestImpl &impl, uint32_t iter)
     __m128i a = load_m128i(_a);
     __m128i b = load_m128i(_b);
     __m128 fc = _mm_andnot_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b));
-    __m128i c = *(const __m128i *) &fc;
+    __m128i c = _mm_castps_si128(fc);
     // now for the assertion...
     const uint32_t *ia = (const uint32_t *) &a;
     const uint32_t *ib = (const uint32_t *) &b;
@@ -5522,14 +5523,15 @@ result_t test_mm_or_pd(const SSE2NEONTestImpl &impl, uint32_t iter)
     const int64_t *_a = (const int64_t *) impl.mTestFloatPointer1;
     const int64_t *_b = (const int64_t *) impl.mTestFloatPointer2;
 
-    int64_t d0 = _a[0] | _b[0];
-    int64_t d1 = _a[1] | _b[1];
+    bit64_union_t d0, d1;
+    d0.i64 = _a[0] | _b[0];
+    d1.i64 = _a[1] | _b[1];
 
     __m128d a = load_m128d(_a);
     __m128d b = load_m128d(_b);
     __m128d c = _mm_or_pd(a, b);
 
-    return validateDouble(c, *((double *) &d0), *((double *) &d1));
+    return validateDouble(c, d0.f64, d1.f64);
 }
 
 result_t test_mm_or_si128(const SSE2NEONTestImpl &impl, uint32_t iter)
@@ -5539,7 +5541,7 @@ result_t test_mm_or_si128(const SSE2NEONTestImpl &impl, uint32_t iter)
     __m128i a = load_m128i(_a);
     __m128i b = load_m128i(_b);
     __m128 fc = _mm_or_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b));
-    __m128i c = *(const __m128i *) &fc;
+    __m128i c = _mm_castps_si128(fc);
     // now for the assertion...
     const uint32_t *ia = (const uint32_t *) &a;
     const uint32_t *ib = (const uint32_t *) &b;
@@ -7110,14 +7112,15 @@ result_t test_mm_xor_pd(const SSE2NEONTestImpl &impl, uint32_t iter)
     const int64_t *_a = (const int64_t *) impl.mTestFloatPointer1;
     const int64_t *_b = (const int64_t *) impl.mTestFloatPointer2;
 
-    int64_t d0 = _a[0] ^ _b[0];
-    int64_t d1 = _a[1] ^ _b[1];
+    bit64_union_t d0, d1;
+    d0.i64 = _a[0] ^ _b[0];
+    d1.i64 = _a[1] ^ _b[1];
 
     __m128d a = load_m128d(_a);
     __m128d b = load_m128d(_b);
     __m128d c = _mm_xor_pd(a, b);
 
-    return validateDouble(c, *((double *) &d0), *((double *) &d1));
+    return validateDouble(c, d0.f64, d1.f64);
 }
 
 result_t test_mm_xor_si128(const SSE2NEONTestImpl &impl, uint32_t iter)