Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
howjmay committed Jun 28, 2024
1 parent fc79268 commit 8766f26
Showing 1 changed file with 32 additions and 29 deletions.
61 changes: 32 additions & 29 deletions tests/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ template <class T>
__m128i load_m128i(const T *p)
{
__m128 a = _mm_loadu_ps((const float *) p);
__m128i ia = *(const __m128i *) &a;
__m128i ia = _mm_castps_si128(a);
return ia;
}

Expand Down Expand Up @@ -850,7 +850,7 @@ result_t test_mm_and_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
r[2] = ia[2] & ib[2];
r[3] = ia[3] & ib[3];
__m128i ret = do_mm_set_epi32(r[3], r[2], r[1], r[0]);
result_t res = VALIDATE_INT32_M128(*(const __m128i *) &c, r);
result_t res = VALIDATE_INT32_M128(_mm_castps_si128(c), r);
if (res) {
res = VALIDATE_INT32_M128(ret, r);
}
Expand Down Expand Up @@ -879,7 +879,7 @@ result_t test_mm_andnot_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
r[3] = ~ia[3] & ib[3];
__m128i ret = do_mm_set_epi32(r[3], r[2], r[1], r[0]);
result_t res = TEST_FAIL;
res = VALIDATE_INT32_M128(*(const __m128i *) &c, r);
res = VALIDATE_INT32_M128(_mm_castps_si128(c), r);
if (res) {
res = VALIDATE_INT32_M128(ret, r);
}
Expand Down Expand Up @@ -938,7 +938,7 @@ result_t test_mm_cmpeq_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
result[3] = _a[3] == _b[3] ? -1 : 0;

__m128 ret = _mm_cmpeq_ps(a, b);
__m128i iret = *(const __m128i *) &ret;
__m128i iret = _mm_castps_si128(ret);
return VALIDATE_INT32_M128(iret, result);
}

Expand Down Expand Up @@ -973,7 +973,7 @@ result_t test_mm_cmpge_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
result[3] = _a[3] >= _b[3] ? -1 : 0;

__m128 ret = _mm_cmpge_ps(a, b);
__m128i iret = *(const __m128i *) &ret;
__m128i iret = _mm_castps_si128(ret);
return VALIDATE_INT32_M128(iret, result);
}

Expand Down Expand Up @@ -1008,7 +1008,7 @@ result_t test_mm_cmpgt_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
result[3] = _a[3] > _b[3] ? -1 : 0;

__m128 ret = _mm_cmpgt_ps(a, b);
__m128i iret = *(const __m128i *) &ret;
__m128i iret = _mm_castps_si128(ret);
return VALIDATE_INT32_M128(iret, result);
}

Expand Down Expand Up @@ -1043,7 +1043,7 @@ result_t test_mm_cmple_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
result[3] = _a[3] <= _b[3] ? -1 : 0;

__m128 ret = _mm_cmple_ps(a, b);
__m128i iret = *(const __m128i *) &ret;
__m128i iret = _mm_castps_si128(ret);
return VALIDATE_INT32_M128(iret, result);
}

Expand Down Expand Up @@ -1078,7 +1078,7 @@ result_t test_mm_cmplt_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
result[3] = _a[3] < _b[3] ? -1 : 0;

__m128 ret = _mm_cmplt_ps(a, b);
__m128i iret = *(const __m128i *) &ret;
__m128i iret = _mm_castps_si128(ret);
return VALIDATE_INT32_M128(iret, result);
}

Expand Down Expand Up @@ -1114,7 +1114,7 @@ result_t test_mm_cmpneq_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
result[3] = _a[3] != _b[3] ? -1 : 0;

__m128 ret = _mm_cmpneq_ps(a, b);
__m128i iret = *(const __m128i *) &ret;
__m128i iret = _mm_castps_si128(ret);
return VALIDATE_INT32_M128(iret, result);
}

Expand Down Expand Up @@ -3256,17 +3256,17 @@ result_t test_mm_xor_ps(const SSE2NEONTestImpl &impl, uint32_t iter)
const int32_t *_a = (const int32_t *) impl.mTestFloatPointer1;
const int32_t *_b = (const int32_t *) impl.mTestFloatPointer2;

int32_t d0 = _a[0] ^ _b[0];
int32_t d1 = _a[1] ^ _b[1];
int32_t d2 = _a[2] ^ _b[2];
int32_t d3 = _a[3] ^ _b[3];
bit32_union_t d0, d1, d2, d3;
d0.i32 = _a[0] ^ _b[0];
d1.i32 = _a[1] ^ _b[1];
d2.i32 = _a[2] ^ _b[2];
d3.i32 = _a[3] ^ _b[3];

__m128 a = load_m128(_a);
__m128 b = load_m128(_b);
__m128 c = _mm_xor_ps(a, b);

return validateFloat(c, *((float *) &d0), *((float *) &d1),
*((float *) &d2), *((float *) &d3));
return validateFloat(c, d0.f32, d1.f32, d2.f32, d3.f32);
}

/* SSE2 */
Expand Down Expand Up @@ -3552,15 +3552,16 @@ result_t test_mm_and_pd(const SSE2NEONTestImpl &impl, uint32_t iter)
{
const int64_t *_a = (const int64_t *) impl.mTestFloatPointer1;
const int64_t *_b = (const int64_t *) impl.mTestFloatPointer2;

int64_t d0 = _a[0] & _b[0];
int64_t d1 = _a[1] & _b[1];

bit64_union_t d0, d1;
d0.i64 = _a[0] & _b[0];
d1.i64 = _a[1] & _b[1];

__m128d a = load_m128d(_a);
__m128d b = load_m128d(_b);
__m128d c = _mm_and_pd(a, b);

return validateDouble(c, *((double *) &d0), *((double *) &d1));
return validateDouble(c, d0.f64, d1.f64);
}

result_t test_mm_and_si128(const SSE2NEONTestImpl &impl, uint32_t iter)
Expand All @@ -3570,7 +3571,7 @@ result_t test_mm_and_si128(const SSE2NEONTestImpl &impl, uint32_t iter)
__m128i a = load_m128i(_a);
__m128i b = load_m128i(_b);
__m128 fc = _mm_and_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b));
__m128i c = *(const __m128i *) &fc;
__m128i c = _mm_castps_si128(fc);
// now for the assertion...
const uint32_t *ia = (const uint32_t *) &a;
const uint32_t *ib = (const uint32_t *) &b;
Expand Down Expand Up @@ -3603,7 +3604,7 @@ result_t test_mm_andnot_pd(const SSE2NEONTestImpl &impl, uint32_t iter)
const uint64_t *ib = (const uint64_t *) &b;
uint64_t r0 = ~ia[0] & ib[0];
uint64_t r1 = ~ia[1] & ib[1];
return validateUInt64(*(const __m128i *) &c, r0, r1);
return validateUInt64(_mm_castpd_si128(c), r0, r1);
}

result_t test_mm_andnot_si128(const SSE2NEONTestImpl &impl, uint32_t iter)
Expand All @@ -3613,7 +3614,7 @@ result_t test_mm_andnot_si128(const SSE2NEONTestImpl &impl, uint32_t iter)
__m128i a = load_m128i(_a);
__m128i b = load_m128i(_b);
__m128 fc = _mm_andnot_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b));
__m128i c = *(const __m128i *) &fc;
__m128i c = _mm_castps_si128(fc);
// now for the assertion...
const uint32_t *ia = (const uint32_t *) &a;
const uint32_t *ib = (const uint32_t *) &b;
Expand Down Expand Up @@ -5522,14 +5523,15 @@ result_t test_mm_or_pd(const SSE2NEONTestImpl &impl, uint32_t iter)
const int64_t *_a = (const int64_t *) impl.mTestFloatPointer1;
const int64_t *_b = (const int64_t *) impl.mTestFloatPointer2;

int64_t d0 = _a[0] | _b[0];
int64_t d1 = _a[1] | _b[1];
bit64_union_t d0, d1;
d0.i64 = _a[0] | _b[0];
d1.i64 = _a[1] | _b[1];

__m128d a = load_m128d(_a);
__m128d b = load_m128d(_b);
__m128d c = _mm_or_pd(a, b);

return validateDouble(c, *((double *) &d0), *((double *) &d1));
return validateDouble(c, d0.f64, d1.f64);
}

result_t test_mm_or_si128(const SSE2NEONTestImpl &impl, uint32_t iter)
Expand All @@ -5539,7 +5541,7 @@ result_t test_mm_or_si128(const SSE2NEONTestImpl &impl, uint32_t iter)
__m128i a = load_m128i(_a);
__m128i b = load_m128i(_b);
__m128 fc = _mm_or_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b));
__m128i c = *(const __m128i *) &fc;
__m128i c = _mm_castps_si128(fc);
// now for the assertion...
const uint32_t *ia = (const uint32_t *) &a;
const uint32_t *ib = (const uint32_t *) &b;
Expand Down Expand Up @@ -7110,14 +7112,15 @@ result_t test_mm_xor_pd(const SSE2NEONTestImpl &impl, uint32_t iter)
const int64_t *_a = (const int64_t *) impl.mTestFloatPointer1;
const int64_t *_b = (const int64_t *) impl.mTestFloatPointer2;

int64_t d0 = _a[0] ^ _b[0];
int64_t d1 = _a[1] ^ _b[1];
bit64_union_t d0, d1;
d0.i64 = _a[0] ^ _b[0];
d1.i64 = _a[1] ^ _b[1];

__m128d a = load_m128d(_a);
__m128d b = load_m128d(_b);
__m128d c = _mm_xor_pd(a, b);

return validateDouble(c, *((double *) &d0), *((double *) &d1));
return validateDouble(c, d0.f64, d1.f64);
}

result_t test_mm_xor_si128(const SSE2NEONTestImpl &impl, uint32_t iter)
Expand Down

0 comments on commit 8766f26

Please sign in to comment.