Skip to content

Commit

Permalink
Add an i586 builder (rust-lang#101)
Browse files Browse the repository at this point in the history
The i586 targets on x86 are defined to be 32-bit and lacking in sse/sse2 unlike
the i686 target which has sse2 turned on by default. I was mostly curious what
would happen when turning on this target, and it turns out quite a few tests
failed!

Most of the tests here had to do with calling functions with ABI mismatches
where the callee wasn't `#[inline(always)]`. Various pieces have been updated
now and we should be passing all tests.

Only one instruction assertion ended up changing where the function generates a
different instruction with sse2 ambiently enabled and without it enabled.
  • Loading branch information
alexcrichton authored Oct 6, 2017
1 parent cab8a5d commit b4098a7
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 12 deletions.
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ sudo: false
rust: nightly

matrix:
fast_finish: true
include:
- env: TARGET=i586-unknown-linux-gnu
- env: TARGET=i686-unknown-linux-gnu
- env: TARGET=x86_64-unknown-linux-gnu NO_ADD=1
- env: TARGET=arm-unknown-linux-gnueabihf
Expand Down
7 changes: 7 additions & 0 deletions ci/docker/i586-unknown-linux-gnu/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM ubuntu:17.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc-multilib \
libc6-dev \
file \
make \
ca-certificates
14 changes: 7 additions & 7 deletions src/x86/avx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ pub unsafe fn _mm256_andnot_ps(a: f32x8, b: f32x8) -> f32x8 {
mem::transmute((!a) & b)
}

/// Compare packed double-precision (64-bit) floating-point elements
/// Compare packed double-precision (64-bit) floating-point elements
/// in `a` and `b`, and return packed maximum values
#[inline(always)]
#[target_feature = "+avx"]
Expand All @@ -144,7 +144,7 @@ pub unsafe fn _mm256_max_pd(a: f64x4, b: f64x4) -> f64x4 {
maxpd256(a, b)
}

/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`,
/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`,
/// and return packed maximum values
#[inline(always)]
#[target_feature = "+avx"]
Expand All @@ -153,7 +153,7 @@ pub unsafe fn _mm256_max_ps(a: f32x8, b: f32x8) -> f32x8 {
maxps256(a, b)
}

/// Compare packed double-precision (64-bit) floating-point elements
/// Compare packed double-precision (64-bit) floating-point elements
/// in `a` and `b`, and return packed minimum values
#[inline(always)]
#[target_feature = "+avx"]
Expand All @@ -162,7 +162,7 @@ pub unsafe fn _mm256_min_pd(a: f64x4, b: f64x4) -> f64x4 {
minpd256(a, b)
}

/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`,
/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`,
/// and return packed minimum values
#[inline(always)]
#[target_feature = "+avx"]
Expand Down Expand Up @@ -711,21 +711,21 @@ pub unsafe fn _mm256_permute_ps(a: f32x8, imm8: i32) -> f32x8 {
#[inline(always)]
#[target_feature = "+avx"]
pub unsafe fn _mm256_undefined_ps() -> f32x8 {
mem::uninitialized()
f32x8::splat(mem::uninitialized())
}

/// Return vector of type `f64x4` with undefined elements.
#[inline(always)]
#[target_feature = "+avx"]
pub unsafe fn _mm256_undefined_pd() -> f64x4 {
mem::uninitialized()
f64x4::splat(mem::uninitialized())
}

/// Return vector of type `i64x4` with undefined elements.
#[inline(always)]
#[target_feature = "+avx"]
pub unsafe fn _mm256_undefined_si256() -> i64x4 {
mem::uninitialized()
i64x4::splat(mem::uninitialized())
}

/// LLVM intrinsics used in the above functions
Expand Down
9 changes: 5 additions & 4 deletions src/x86/sse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,8 @@ pub unsafe fn _mm_movehl_ps(a: f32x4, b: f32x4) -> f32x4 {
/// half of result.
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(unpcklpd))]
#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(unpcklpd))]
#[cfg_attr(all(test, not(target_feature = "sse2")), assert_instr(movlhps))]
pub unsafe fn _mm_movelh_ps(a: f32x4, b: f32x4) -> f32x4 {
simd_shuffle4(a, b, [0, 1, 4, 5])
}
Expand Down Expand Up @@ -851,7 +852,7 @@ mod tests {
let b = f32x4::new(0.001, 0.0, 0.0, 1.0);

sse::_MM_SET_FLUSH_ZERO_MODE(sse::_MM_FLUSH_ZERO_ON);
let r = sse::_mm_mul_ps(black_box(a), black_box(b));
let r = sse::_mm_mul_ps(*black_box(&a), *black_box(&b));

sse::_mm_setcsr(saved_csr);

Expand All @@ -869,7 +870,7 @@ mod tests {
let b = f32x4::new(0.001, 0.0, 0.0, 1.0);

sse::_MM_SET_FLUSH_ZERO_MODE(sse::_MM_FLUSH_ZERO_OFF);
let r = sse::_mm_mul_ps(black_box(a), black_box(b));
let r = sse::_mm_mul_ps(*black_box(&a), *black_box(&b));

sse::_mm_setcsr(saved_csr);

Expand All @@ -886,7 +887,7 @@ mod tests {

assert_eq!(sse::_MM_GET_EXCEPTION_STATE(), 0); // just to be sure

let r = sse::_mm_mul_ps(black_box(a), black_box(b));
let r = sse::_mm_mul_ps(*black_box(&a), *black_box(&b));

let exp = f32x4::new(1.1e-41, 0.0, 0.0, 1.0);
assert_eq!(r, exp);
Expand Down
2 changes: 1 addition & 1 deletion src/x86/sse2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -891,7 +891,7 @@ pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
#[target_feature = "+sse2"]
#[cfg_attr(test, assert_instr(movups))]
pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
let mut dst = mem::uninitialized();
let mut dst = __m128i::splat(mem::uninitialized());
ptr::copy_nonoverlapping(
mem_addr as *const u8,
&mut dst as *mut __m128i as *mut u8,
Expand Down
1 change: 1 addition & 0 deletions src/x86/sse42.rs
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,7 @@ mod tests {
// a bit difficult. Rather than `load` and mutate the __m128i,
// it is easier to memcpy the given string to a local slice with
// length 16 and `load` the local slice.
#[target_feature = "+sse4.2"]
unsafe fn str_to_m128i(s: &[u8]) -> __m128i {
assert!(s.len() <= 16);
let slice = &mut [0u8; 16];
Expand Down

0 comments on commit b4098a7

Please sign in to comment.