Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
howjmay committed Jun 28, 2024
1 parent 14690a6 commit 890d05a
Showing 1 changed file with 32 additions and 19 deletions.
51 changes: 32 additions & 19 deletions tests/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,17 @@ __m128i do_mm_set_epi32(int32_t x, int32_t y, int32_t z, int32_t w)
return a;
}

typedef union bit64_union_t {
double f64;
int64_t i64;
uint64_t u64;
} bit64_union_t;
typedef union bit32_union_t {
float f32;
int32_t i32;
uint32_t u32;
} bit32_union_t;

// This function is not called from "runSingleTest", but for other intrinsic
// tests that might need to load __m64 data.
template <class T>
Expand All @@ -419,28 +430,28 @@ __m64 load_m64(const T *p)

// This function is not called from "runSingleTest", but for other intrinsic
// tests that might need to call "_mm_load_ps".
template <class T>
__m128 load_m128(const T *p)
// template <class T>
__m128 load_m128(const float *p)
{
return _mm_loadu_ps((const float *) p);
return _mm_loadu_ps(p);
}

// This function is not called from "runSingleTest", but for other intrinsic
// tests that might need to call "_mm_load_ps".
template <class T>
__m128i load_m128i(const T *p)
{
__m128 a = _mm_loadu_ps((const float *) p);
__m128i ia = *(const __m128i *) &a;
__m128 a = _mm_loadu_si32(p);
__m128i ia = _mm_castsi128_ps(a);
return ia;
}

// This function is not called from "runSingleTest", but for other intrinsic
// tests that might need to call "_mm_load_pd".
template <class T>
__m128d load_m128d(const T *p)
// template <class T>
__m128d load_m128d(const double *p)
{
return _mm_loadu_pd((const double *) p);
return _mm_loadu_pd(p);
}

// This function is not called from "runSingleTest", but for other intrinsic
Expand Down Expand Up @@ -3261,8 +3272,8 @@ result_t test_mm_xor_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
int32_t d2 = _a[2] ^ _b[2];
int32_t d3 = _a[3] ^ _b[3];

__m128 a = load_m128(_a);
__m128 b = load_m128(_b);
__m128 a = load_m128((const float *)_a);
__m128 b = load_m128((const float *)_b);
__m128 c = _mm_xor_ps(a, b);

return validateFloat(c, *((float *) &d0), *((float *) &d1),
Expand Down Expand Up @@ -3556,8 +3567,8 @@ result_t test_mm_and_pd(const SSE2NEONTestImpl &impl, uint32_t iter)
int64_t d0 = _a[0] & _b[0];
int64_t d1 = _a[1] & _b[1];

__m128d a = load_m128d(_a);
__m128d b = load_m128d(_b);
__m128d a = load_m128d((const double *)_a);
__m128d b = load_m128d((const double *)_b);
__m128d c = _mm_and_pd(a, b);

return validateDouble(c, *((double *) &d0), *((double *) &d1));
Expand Down Expand Up @@ -3690,7 +3701,7 @@ result_t test_mm_bsrli_si128(const SSE2NEONTestImpl &impl, uint32_t iter)
result_t test_mm_castpd_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
{
const float *_a = impl.mTestFloatPointer1;
const __m128d a = load_m128d(_a);
const __m128d a = load_m128d((const double *)_a);
const __m128 _c = load_m128(_a);

__m128 r = _mm_castpd_ps(a);
Expand All @@ -3701,7 +3712,7 @@ result_t test_mm_castpd_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
result_t test_mm_castpd_si128(const SSE2NEONTestImpl &impl, uint32_t iter)
{
const float *_a = impl.mTestFloatPointer1;
const __m128d a = load_m128d(_a);
const __m128d a = load_m128d((const double *)_a);
const __m128i *_c = (const __m128i *) _a;

__m128i r = _mm_castpd_si128(a);
Expand Down Expand Up @@ -5508,8 +5519,8 @@ result_t test_mm_or_pd(const SSE2NEONTestImpl &impl, uint32_t iter)
int64_t d0 = _a[0] | _b[0];
int64_t d1 = _a[1] | _b[1];

__m128d a = load_m128d(_a);
__m128d b = load_m128d(_b);
__m128d a = load_m128d((const double *)_a);
__m128d b = load_m128d((const double *)_b);
__m128d c = _mm_or_pd(a, b);

return validateDouble(c, *((double *) &d0), *((double *) &d1));
Expand Down Expand Up @@ -7096,8 +7107,8 @@ result_t test_mm_xor_pd(const SSE2NEONTestImpl &impl, uint32_t iter)
int64_t d0 = _a[0] ^ _b[0];
int64_t d1 = _a[1] ^ _b[1];

__m128d a = load_m128d(_a);
__m128d b = load_m128d(_b);
__m128d a = load_m128d((const double *)_a);
__m128d b = load_m128d((const double *)_b);
__m128d c = _mm_xor_pd(a, b);

return validateDouble(c, *((double *) &d0), *((double *) &d1));
Expand Down Expand Up @@ -8095,7 +8106,9 @@ result_t test_mm_blendv_pd(const SSE2NEONTestImpl &impl, uint32_t iter)
for (int i = 0; i < 2; i++) {
// signed shift right would return a result which is either all 1's from
// negative numbers or all 0's from positive numbers
if ((*(const int64_t *) (_mask + i)) >> 63) {
bit64_union_t m;
m.f64 = _mask[i];
if (m.i64 >> 63) {
_c[i] = _b[i];
} else {
_c[i] = _a[i];
Expand Down

0 comments on commit 890d05a

Please sign in to comment.