From 890d05aeceb7b1afd209a636fa188f57085bbff9 Mon Sep 17 00:00:00 2001 From: Yang Hau Date: Fri, 28 Jun 2024 19:06:26 +0800 Subject: [PATCH] wip --- tests/impl.cpp | 51 +++++++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/tests/impl.cpp b/tests/impl.cpp index 11e8fa05..6f77b5d9 100644 --- a/tests/impl.cpp +++ b/tests/impl.cpp @@ -409,6 +409,17 @@ __m128i do_mm_set_epi32(int32_t x, int32_t y, int32_t z, int32_t w) return a; } +typedef union bit64_union_t { + double f64; + int64_t i64; + uint64_t u64; +} bit64_union_t; +typedef union bit32_union_t { + float f32; + int32_t i32; + uint32_t u32; +} bit32_union_t; + // This function is not called from "runSingleTest", but for other intrinsic // tests that might need to load __m64 data. template @@ -419,10 +430,10 @@ __m64 load_m64(const T *p) // This function is not called from "runSingleTest", but for other intrinsic // tests that might need to call "_mm_load_ps". -template -__m128 load_m128(const T *p) +// template +__m128 load_m128(const float *p) { - return _mm_loadu_ps((const float *) p); + return _mm_loadu_ps(p); } // This function is not called from "runSingleTest", but for other intrinsic @@ -430,17 +441,17 @@ __m128 load_m128(const T *p) template __m128i load_m128i(const T *p) { - __m128 a = _mm_loadu_ps((const float *) p); - __m128i ia = *(const __m128i *) &a; + __m128 a = _mm_loadu_si32(p); + __m128i ia = _mm_castsi128_ps(a); return ia; } // This function is not called from "runSingleTest", but for other intrinsic // tests that might need to call "_mm_load_pd". -template -__m128d load_m128d(const T *p) +// template +__m128d load_m128d(const double *p) { - return _mm_loadu_pd((const double *) p); + return _mm_loadu_pd(p); } // This function is not called from "runSingleTest", but for other intrinsic @@ -3261,8 +3272,8 @@ result_t test_mm_xor_ps(const SSE2NEONTestImpl &impl, uint32_t iter) int32_t d2 = _a[2] ^ _b[2]; int32_t d3 = _a[3] ^ _b[3]; - __m128 a = load_m128(_a); - __m128 b = load_m128(_b); + __m128 a = load_m128((const float *)_a); + __m128 b = load_m128((const float *)_b); __m128 c = _mm_xor_ps(a, b); return validateFloat(c, *((float *) &d0), *((float *) &d1), @@ -3556,8 +3567,8 @@ result_t test_mm_and_pd(const SSE2NEONTestImpl &impl, uint32_t iter) int64_t d0 = _a[0] & _b[0]; int64_t d1 = _a[1] & _b[1]; - __m128d a = load_m128d(_a); - __m128d b = load_m128d(_b); + __m128d a = load_m128d((const double *)_a); + __m128d b = load_m128d((const double *)_b); __m128d c = _mm_and_pd(a, b); return validateDouble(c, *((double *) &d0), *((double *) &d1)); @@ -3690,7 +3701,7 @@ result_t test_mm_bsrli_si128(const SSE2NEONTestImpl &impl, uint32_t iter) result_t test_mm_castpd_ps(const SSE2NEONTestImpl &impl, uint32_t iter) { const float *_a = impl.mTestFloatPointer1; - const __m128d a = load_m128d(_a); + const __m128d a = load_m128d((const double *)_a); const __m128 _c = load_m128(_a); __m128 r = _mm_castpd_ps(a); @@ -3701,7 +3712,7 @@ result_t test_mm_castpd_ps(const SSE2NEONTestImpl &impl, uint32_t iter) result_t test_mm_castpd_si128(const SSE2NEONTestImpl &impl, uint32_t iter) { const float *_a = impl.mTestFloatPointer1; - const __m128d a = load_m128d(_a); + const __m128d a = load_m128d((const double *)_a); const __m128i *_c = (const __m128i *) _a; __m128i r = _mm_castpd_si128(a); @@ -5508,8 +5519,8 @@ result_t test_mm_or_pd(const SSE2NEONTestImpl &impl, uint32_t iter) int64_t d0 = _a[0] | _b[0]; int64_t d1 = _a[1] | _b[1]; - __m128d a = load_m128d(_a); - __m128d b = load_m128d(_b); + __m128d a = load_m128d((const double *)_a); + __m128d b = load_m128d((const double *)_b); __m128d c = _mm_or_pd(a, b); return validateDouble(c, *((double *) &d0), *((double *) &d1)); @@ -7096,8 +7107,8 @@ result_t test_mm_xor_pd(const SSE2NEONTestImpl &impl, uint32_t iter) int64_t d0 = _a[0] ^ _b[0]; int64_t d1 = _a[1] ^ _b[1]; - __m128d a = load_m128d(_a); - __m128d b = load_m128d(_b); + __m128d a = load_m128d((const double *)_a); + __m128d b = load_m128d((const double *)_b); __m128d c = _mm_xor_pd(a, b); return validateDouble(c, *((double *) &d0), *((double *) &d1)); @@ -8095,7 +8106,9 @@ result_t test_mm_blendv_pd(const SSE2NEONTestImpl &impl, uint32_t iter) for (int i = 0; i < 2; i++) { // signed shift right would return a result which is either all 1's from // negative numbers or all 0's from positive numbers - if ((*(const int64_t *) (_mask + i)) >> 63) { + bit64_union_t m; + m.f64 = _mask[i]; + if (m.i64 >> 63) { _c[i] = _b[i]; } else { _c[i] = _a[i];