Add an i586 builder (rust-lang#101)

The i586 targets on x86 are defined to be 32-bit and lacking in sse/sse2 unlike the i686 target which has sse2 turned on by default. I was mostly curious what would happen when turning on this target, and it turns out quite a few tests failed! Most of the tests here had to do with calling functions with ABI mismatches where the callee wasn't `#[inline(always)]`. Various pieces have been updated now and we should be passing all tests. Only one instruction assertion ended up changing where the function generates a different instruction with sse2 ambiently enabled and without it enabled.
kazcw · Oct 6, 2017 · b4098a7 · b4098a7
1 parent cab8a5d
commit b4098a7
Show file tree

Hide file tree

Showing 6 changed files with 23 additions and 12 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -3,7 +3,9 @@ sudo: false
 rust: nightly
 
 matrix:
+ fast_finish: true
  include:
+ - env: TARGET=i586-unknown-linux-gnu
  - env: TARGET=i686-unknown-linux-gnu
  - env: TARGET=x86_64-unknown-linux-gnu NO_ADD=1
  - env: TARGET=arm-unknown-linux-gnueabihf

diff --git a/ci/docker/i586-unknown-linux-gnu/Dockerfile b/ci/docker/i586-unknown-linux-gnu/Dockerfile
@@ -0,0 +1,7 @@
+FROM ubuntu:17.04
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ gcc-multilib \
+ libc6-dev \
+ file \
+ make \
+ ca-certificates
diff --git a/src/x86/avx.rs b/src/x86/avx.rs
@@ -135,7 +135,7 @@ pub unsafe fn _mm256_andnot_ps(a: f32x8, b: f32x8) -> f32x8 {
  mem::transmute((!a) & b)
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements 
+/// Compare packed double-precision (64-bit) floating-point elements
 /// in `a` and `b`, and return packed maximum values
 #[inline(always)]
 #[target_feature = "+avx"]
@@ -144,7 +144,7 @@ pub unsafe fn _mm256_max_pd(a: f64x4, b: f64x4) -> f64x4 {
  maxpd256(a, b)
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`, 
+/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`,
 /// and return packed maximum values
 #[inline(always)]
 #[target_feature = "+avx"]
@@ -153,7 +153,7 @@ pub unsafe fn _mm256_max_ps(a: f32x8, b: f32x8) -> f32x8 {
  maxps256(a, b)
 }
 
-/// Compare packed double-precision (64-bit) floating-point elements 
+/// Compare packed double-precision (64-bit) floating-point elements
 /// in `a` and `b`, and return packed minimum values
 #[inline(always)]
 #[target_feature = "+avx"]
@@ -162,7 +162,7 @@ pub unsafe fn _mm256_min_pd(a: f64x4, b: f64x4) -> f64x4 {
  minpd256(a, b)
 }
 
-/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`, 
+/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`,
 /// and return packed minimum values
 #[inline(always)]
 #[target_feature = "+avx"]
@@ -711,21 +711,21 @@ pub unsafe fn _mm256_permute_ps(a: f32x8, imm8: i32) -> f32x8 {
 #[inline(always)]
 #[target_feature = "+avx"]
 pub unsafe fn _mm256_undefined_ps() -> f32x8 {
- mem::uninitialized()
+ f32x8::splat(mem::uninitialized())
 }
 
 /// Return vector of type `f64x4` with undefined elements.
 #[inline(always)]
 #[target_feature = "+avx"]
 pub unsafe fn _mm256_undefined_pd() -> f64x4 {
- mem::uninitialized()
+ f64x4::splat(mem::uninitialized())
 }
 
 /// Return vector of type `i64x4` with undefined elements.
 #[inline(always)]
 #[target_feature = "+avx"]
 pub unsafe fn _mm256_undefined_si256() -> i64x4 {
- mem::uninitialized()
+ i64x4::splat(mem::uninitialized())
 }
 
 /// LLVM intrinsics used in the above functions

diff --git a/src/x86/sse.rs b/src/x86/sse.rs
@@ -252,7 +252,8 @@ pub unsafe fn _mm_movehl_ps(a: f32x4, b: f32x4) -> f32x4 {
 /// half of result.
 #[inline(always)]
 #[target_feature = "+sse"]
-#[cfg_attr(test, assert_instr(unpcklpd))]
+#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(unpcklpd))]
+#[cfg_attr(all(test, not(target_feature = "sse2")), assert_instr(movlhps))]
 pub unsafe fn _mm_movelh_ps(a: f32x4, b: f32x4) -> f32x4 {
  simd_shuffle4(a, b, [0, 1, 4, 5])
 }
@@ -851,7 +852,7 @@ mod tests {
  let b = f32x4::new(0.001, 0.0, 0.0, 1.0);
 
  sse::_MM_SET_FLUSH_ZERO_MODE(sse::_MM_FLUSH_ZERO_ON);
- let r = sse::_mm_mul_ps(black_box(a), black_box(b));
+ let r = sse::_mm_mul_ps(*black_box(&a), *black_box(&b));
 
  sse::_mm_setcsr(saved_csr);
 
@@ -869,7 +870,7 @@ mod tests {
  let b = f32x4::new(0.001, 0.0, 0.0, 1.0);
 
  sse::_MM_SET_FLUSH_ZERO_MODE(sse::_MM_FLUSH_ZERO_OFF);
- let r = sse::_mm_mul_ps(black_box(a), black_box(b));
+ let r = sse::_mm_mul_ps(*black_box(&a), *black_box(&b));
 
  sse::_mm_setcsr(saved_csr);
 
@@ -886,7 +887,7 @@ mod tests {
 
  assert_eq!(sse::_MM_GET_EXCEPTION_STATE(), 0); // just to be sure
 
- let r = sse::_mm_mul_ps(black_box(a), black_box(b));
+ let r = sse::_mm_mul_ps(*black_box(&a), *black_box(&b));
 
  let exp = f32x4::new(1.1e-41, 0.0, 0.0, 1.0);
  assert_eq!(r, exp);

diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs
@@ -891,7 +891,7 @@ pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
 #[target_feature = "+sse2"]
 #[cfg_attr(test, assert_instr(movups))]
 pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
- let mut dst = mem::uninitialized();
+ let mut dst = __m128i::splat(mem::uninitialized());
  ptr::copy_nonoverlapping(
  mem_addr as *const u8,
  &mut dst as *mut __m128i as *mut u8,

diff --git a/src/x86/sse42.rs b/src/x86/sse42.rs
@@ -638,6 +638,7 @@ mod tests {
  // a bit difficult. Rather than `load` and mutate the __m128i,
  // it is easier to memcpy the given string to a local slice with
  // length 16 and `load` the local slice.
+ #[target_feature = "+sse4.2"]
  unsafe fn str_to_m128i(s: &[u8]) -> __m128i {
  assert!(s.len() <= 16);
  let slice = &mut [0u8; 16];