diff --git a/.travis.yml b/.travis.yml index 84aebf317..274f6ed5c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -169,10 +169,10 @@ matrix: # BENCHMARKS: - name: "Benchmarks - x86_64-unknown-linux-gnu" install: TARGET=x86_64-unknown-linux-gnu ./ci/setup_benchmarks.sh - script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=coresimd,ispc,sleef-sys ci/benchmark.sh + script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=core_arch,ispc,sleef-sys ci/benchmark.sh - name: "Benchmarks - x86_64-apple-darwin" install: TARGET=x86_64-apple-darwin ./ci/setup_benchmarks.sh - script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=coresimd,ispc,sleef-sys ci/benchmark.sh + script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=core_arch,ispc,sleef-sys ci/benchmark.sh os: osx osx_image: xcode9.4 # TOOLS: diff --git a/Cargo.toml b/Cargo.toml index aae8eb17a..df076edd7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ maintenance = { status = "experimental" } [dependencies] cfg-if = "^0.1" -coresimd = { version = "^0.1.0", optional = true } +core_arch = { version = "^0.1.3", optional = true } [features] default = [] @@ -32,7 +32,7 @@ paste = "^0.1.3" arrayvec = { version = "^0.4", default-features = false } [target.'cfg(target_arch = "x86_64")'.dependencies.sleef-sys] -version = "^0.1" +version = "^0.1.2" optional = true [target.wasm32-unknown-unknown.dev-dependencies] diff --git a/ci/android-install-ndk.sh b/ci/android-install-ndk.sh index 873f6c52c..818e78446 100644 --- a/ci/android-install-ndk.sh +++ b/ci/android-install-ndk.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env sh # Copyright 2016 The Rust Project Developers. See the COPYRIGHT # file at the top-level directory of this distribution and at # http://rust-lang.org/COPYRIGHT. @@ -11,27 +11,27 @@ set -ex -curl -O https://dl.google.com/android/repository/android-ndk-r15b-linux-x86_64.zip +curl --retry 5 -O https://dl.google.com/android/repository/android-ndk-r15b-linux-x86_64.zip unzip -q android-ndk-r15b-linux-x86_64.zip case "$1" in - aarch64) - arch=arm64 - ;; + aarch64) + arch=arm64 + ;; - i686) - arch=x86 - ;; + i686) + arch=x86 + ;; - *) - arch=$1 - ;; + *) + arch=$1 + ;; esac; android-ndk-r15b/build/tools/make_standalone_toolchain.py \ - --unified-headers \ - --install-dir /android/ndk-$1 \ - --arch $arch \ - --api 24 + --unified-headers \ + --install-dir "/android/ndk-${1}" \ + --arch "${arch}" \ + --api 24 rm -rf ./android-ndk-r15b-linux-x86_64.zip ./android-ndk-r15b diff --git a/ci/android-install-sdk.sh b/ci/android-install-sdk.sh index ab7e14d95..6b5ac09ab 100644 --- a/ci/android-install-sdk.sh +++ b/ci/android-install-sdk.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env sh # Copyright 2016 The Rust Project Developers. See the COPYRIGHT # file at the top-level directory of this distribution and at # http://rust-lang.org/COPYRIGHT. @@ -19,7 +19,7 @@ set -ex # which apparently magically accepts the licenses. mkdir sdk -curl https://dl.google.com/android/repository/sdk-tools-linux-3859397.zip -O +curl --retry 5 https://dl.google.com/android/repository/sdk-tools-linux-3859397.zip -O unzip -d sdk sdk-tools-linux-3859397.zip case "$1" in @@ -46,9 +46,9 @@ case "$1" in esac; # --no_https avoids -# javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: No trusted certificate found -echo "yes" | \ - ./sdk/tools/bin/sdkmanager --no_https \ + # javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: No trusted certificate found +yes | ./sdk/tools/bin/sdkmanager --licenses --no_https +yes | ./sdk/tools/bin/sdkmanager --no_https \ "emulator" \ "platform-tools" \ "platforms;android-24" \ @@ -56,5 +56,5 @@ echo "yes" | \ echo "no" | ./sdk/tools/bin/avdmanager create avd \ - --name $1 \ + --name "${1}" \ --package "system-images;android-24;default;$abi" diff --git a/ci/android-sysimage.sh b/ci/android-sysimage.sh index 9611dfeb0..9eabd7c8d 100644 --- a/ci/android-sysimage.sh +++ b/ci/android-sysimage.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + # Copyright 2017 The Rust Project Developers. See the COPYRIGHT # file at the top-level directory of this distribution and at # http://rust-lang.org/COPYRIGHT. @@ -13,32 +15,34 @@ set -ex URL=https://dl.google.com/android/repository/sys-img/android main() { - local arch=$1 - local name=$2 + local arch="${1}" + local name="${2}" local dest=/system - local td=$(mktemp -d) + local td + td="$(mktemp -d)" apt-get install --no-install-recommends e2tools - pushd $td - curl -O $URL/$name - unzip -q $name + pushd "${td}" + curl --retry 5 -O "${URL}/${name}" + unzip -q "${name}" - local system=$(find . -name system.img) - mkdir -p $dest/{bin,lib,lib64} + local system + system="$(find . -name system.img)" + mkdir -p ${dest}/{bin,lib,lib64} # Extract android linker and libraries to /system # This allows android executables to be run directly (or with qemu) - if [ $arch = "x86_64" -o $arch = "arm64" ]; then - e2cp -p $system:/bin/linker64 $dest/bin/ - e2cp -p $system:/lib64/libdl.so $dest/lib64/ - e2cp -p $system:/lib64/libc.so $dest/lib64/ - e2cp -p $system:/lib64/libm.so $dest/lib64/ + if [ "${arch}" = "x86_64" ] || [ "${arch}" = "arm64" ]; then + e2cp -p "${system}:/bin/linker64" "${dest}/bin/" + e2cp -p "${system}:/lib64/libdl.so" "${dest}/lib64/" + e2cp -p "${system}:/lib64/libc.so" "${dest}/lib64/" + e2cp -p "${system}:/lib64/libm.so" "${dest}/lib64/" else - e2cp -p $system:/bin/linker $dest/bin/ - e2cp -p $system:/lib/libdl.so $dest/lib/ - e2cp -p $system:/lib/libc.so $dest/lib/ - e2cp -p $system:/lib/libm.so $dest/lib/ + e2cp -p "${system}:/bin/linker" "${dest}/bin/" + e2cp -p "${system}:/lib/libdl.so" "${dest}/lib/" + e2cp -p "${system}:/lib/libc.so" "${dest}/lib/" + e2cp -p "${system}:/lib/libm.so" "${dest}/lib/" fi # clean up @@ -46,7 +50,7 @@ main() { popd - rm -rf $td + rm -rf "${td}" } main "${@}" diff --git a/ci/benchmark.sh b/ci/benchmark.sh index d4b821a4d..3635b9e37 100755 --- a/ci/benchmark.sh +++ b/ci/benchmark.sh @@ -2,7 +2,7 @@ # # Runs all benchmarks. Controlled by the following environment variables: # -# FEATURES={} - cargo features to pass to all benchmarks (e.g. coresimd,sleef-sys,ispc) +# FEATURES={} - cargo features to pass to all benchmarks (e.g. core_arch,sleef-sys,ispc) # NORUN={1} - only builds the benchmarks set -ex diff --git a/ci/run.sh b/ci/run.sh index fefad3409..7bb825883 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -6,7 +6,7 @@ set -ex # Tests are all super fast anyway, and they fault often enough on travis that # having only one thread increases debuggability to be worth it. -export RUST_TEST_THREADS=1 +#export RUST_TEST_THREADS=1 #export RUST_BACKTRACE=full #export RUST_TEST_NOCAPTURE=1 @@ -47,6 +47,13 @@ echo "RUST_TEST_NOCAPTURE=${RUST_TEST_NOCAPTURE}" cargo_test() { cmd="cargo ${CARGO_SUBCMD} --verbose --target=${TARGET} ${@}" + if [ "${NORUN}" != "1" ] + then + if [ "$TARGET" != "wasm32-unknown-unknown" ] + then + cmd="$cmd -- --quiet" + fi + fi mkdir target || true ${cmd} 2>&1 | tee > target/output if [[ ${PIPESTATUS[0]} != 0 ]]; then @@ -71,9 +78,9 @@ fi if [[ "${TARGET}" == "x86_64-unknown-linux-gnu" ]] || [[ "${TARGET}" == "x86_64-pc-windows-msvc" ]]; then # use sleef on linux and windows x86_64 builds - cargo_test_impl --release --features=into_bits,coresimd,sleef-sys + cargo_test_impl --release --features=into_bits,core_arch,sleef-sys else - cargo_test_impl --release --features=into_bits,coresimd + cargo_test_impl --release --features=into_bits,core_arch fi # Verify code generation diff --git a/ci/runtest-android.rs b/ci/runtest-android.rs index d8968f99f..ed1cd80c8 100644 --- a/ci/runtest-android.rs +++ b/ci/runtest-android.rs @@ -3,8 +3,12 @@ use std::process::Command; use std::path::{Path, PathBuf}; fn main() { - assert_eq!(env::args_os().len(), 2); - let test = PathBuf::from(env::args_os().nth(1).unwrap()); + let args = env::args_os() + .skip(1) + .filter(|arg| arg != "--quiet") + .collect::<Vec<_>>(); + assert_eq!(args.len(), 1); + let test = PathBuf::from(&args[0]); let dst = Path::new("/data/local/tmp").join(test.file_name().unwrap()); let status = Command::new("adb") diff --git a/examples/aobench/Cargo.toml b/examples/aobench/Cargo.toml index e3b34d831..b990a8afb 100644 --- a/examples/aobench/Cargo.toml +++ b/examples/aobench/Cargo.toml @@ -33,7 +33,7 @@ criterion = { version = '^0.2', features=['real_blackbox'] } default = [ "256bit" ] 256bit = [] sleef-sys = [ "packed_simd/sleef-sys" ] -coresimd = [ "packed_simd/coresimd" ] +core_arch = [ "packed_simd/core_arch" ] [[bench]] name = "isec_sphere" diff --git a/examples/mandelbrot/Cargo.toml b/examples/mandelbrot/Cargo.toml index e9f85a69b..ab9b17f5c 100644 --- a/examples/mandelbrot/Cargo.toml +++ b/examples/mandelbrot/Cargo.toml @@ -25,4 +25,4 @@ path = "src/lib.rs" [features] default = [] sleef-sys = ["packed_simd/sleef-sys"] -coresimd = ["packed_simd/coresimd"] +core_arch = ["packed_simd/core_arch"] diff --git a/examples/nbody/Cargo.toml b/examples/nbody/Cargo.toml index 5913e0d11..81c284d06 100644 --- a/examples/nbody/Cargo.toml +++ b/examples/nbody/Cargo.toml @@ -14,3 +14,8 @@ path = "src/main.rs" [lib] name = "nbody_lib" path = "src/lib.rs" + +[features] +default = [ ] +sleef-sys = [ "packed_simd/sleef-sys" ] +core_arch = [ "packed_simd/core_arch" ] diff --git a/examples/options_pricing/Cargo.toml b/examples/options_pricing/Cargo.toml index 7d9e98ad6..1bc0a7d11 100644 --- a/examples/options_pricing/Cargo.toml +++ b/examples/options_pricing/Cargo.toml @@ -23,6 +23,6 @@ path = "src/lib.rs" [features] default = [] -coresimd = [ "packed_simd/coresimd" ] +core_arch = [ "packed_simd/core_arch" ] sleef-sys = [ "packed_simd/sleef-sys" ] ispc_libm = [ "ispc" ] diff --git a/examples/stencil/Cargo.toml b/examples/stencil/Cargo.toml index f44a41e9c..c1bf8c89d 100644 --- a/examples/stencil/Cargo.toml +++ b/examples/stencil/Cargo.toml @@ -23,5 +23,5 @@ path = "src/lib.rs" [features] default = [] -coresimd = ["packed_simd/coresimd"] +core_arch = ["packed_simd/core_arch"] sleef-sys = ["packed_simd/sleef-sys"] diff --git a/src/api/bit_manip.rs b/src/api/bit_manip.rs index b87e1808c..3d3c4eb88 100644 --- a/src/api/bit_manip.rs +++ b/src/api/bit_manip.rs @@ -17,15 +17,15 @@ macro_rules! impl_bit_manip { super::codegen::bit_manip::BitManip::ctpop(!self) } - /// Returns the number of leading zeros in the binary representation - /// of the lanes of `self`. + /// Returns the number of leading zeros in the binary + /// representation of the lanes of `self`. #[inline] pub fn leading_zeros(self) -> Self { super::codegen::bit_manip::BitManip::ctlz(self) } - /// Returns the number of trailing zeros in the binary representation - /// of the lanes of `self`. + /// Returns the number of trailing zeros in the binary + /// representation of the lanes of `self`. #[inline] pub fn trailing_zeros(self) -> Self { super::codegen::bit_manip::BitManip::cttz(self) @@ -45,7 +45,10 @@ macro_rules! impl_bit_manip { ($x:expr, $func:ident) => {{ let mut actual = $x; for i in 0..$id::lanes() { - actual = actual.replace(i, $x.extract(i).$func() as $elem_ty); + actual = actual.replace( + i, + $x.extract(i).$func() as $elem_ty + ); } let expected = $x.$func(); assert_eq!(actual, expected); @@ -73,21 +76,24 @@ macro_rules! impl_bit_manip { $id::from_slice_unaligned(elems) } - #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn count_ones() { test_func!($id::splat(0), count_ones); test_func!($id::splat(!0), count_ones); test_func!(load_bytes(), count_ones); } - #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn count_zeros() { test_func!($id::splat(0), count_zeros); test_func!($id::splat(!0), count_zeros); test_func!(load_bytes(), count_zeros); } - #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn leading_zeros() { test_func!($id::splat(0), leading_zeros); test_func!($id::splat(1), leading_zeros); @@ -95,17 +101,24 @@ macro_rules! impl_bit_manip { // behavior when the 8th bit is set. test_func!($id::splat(0b1000_0010), leading_zeros); test_func!($id::splat(!0), leading_zeros); - test_func!($id::splat(1 << (LANE_WIDTH - 1)), leading_zeros); + test_func!( + $id::splat(1 << (LANE_WIDTH - 1)), + leading_zeros + ); test_func!(load_bytes(), leading_zeros); } - #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn trailing_zeros() { test_func!($id::splat(0), trailing_zeros); test_func!($id::splat(1), trailing_zeros); test_func!($id::splat(0b1000_0010), trailing_zeros); test_func!($id::splat(!0), trailing_zeros); - test_func!($id::splat(1 << (LANE_WIDTH - 1)), trailing_zeros); + test_func!( + $id::splat(1 << (LANE_WIDTH - 1)), + trailing_zeros + ); test_func!(load_bytes(), trailing_zeros); } } diff --git a/src/api/from/from_array.rs b/src/api/from/from_array.rs index eb5f53c45..964d1501d 100644 --- a/src/api/from/from_array.rs +++ b/src/api/from/from_array.rs @@ -53,7 +53,7 @@ macro_rules! impl_from_array { } */ - test_if!{ + test_if! { $test_tt: paste::item! { mod [<$id _from>] { diff --git a/src/api/from/from_vector.rs b/src/api/from/from_vector.rs index 2cda3f330..55f70016d 100644 --- a/src/api/from/from_vector.rs +++ b/src/api/from/from_vector.rs @@ -31,7 +31,7 @@ macro_rules! impl_from_vector { } */ - test_if!{ + test_if! { $test_tt: paste::item! { pub mod [<$id _from_ $source>] { diff --git a/src/api/into_bits/arch_specific.rs b/src/api/into_bits/arch_specific.rs index e4c412162..a93cdcd30 100644 --- a/src/api/into_bits/arch_specific.rs +++ b/src/api/into_bits/arch_specific.rs @@ -38,13 +38,13 @@ macro_rules! impl_arch { #[cfg(any( not(target_arch = "arm"), all(target_feature = "v7", target_feature = "neon", - feature = "coresimd")) + feature = "core_arch")) )] // note: if target is "powerpc", "altivec" must be enabled // and the std library must be recompiled with it #[cfg(any( not(target_arch = "powerpc"), - all(target_feature = "altivec", feature = "coresimd"), + all(target_feature = "altivec", feature = "core_arch"), ))] #[cfg(target_arch = $arch_tt)] use crate::arch::$arch::{ @@ -54,11 +54,11 @@ macro_rules! impl_arch { #[cfg(any( not(target_arch = "arm"), all(target_feature = "v7", target_feature = "neon", - feature = "coresimd")) + feature = "core_arch")) )] #[cfg(any( not(target_arch = "powerpc"), - all(target_feature = "altivec", feature = "coresimd"), + all(target_feature = "altivec", feature = "core_arch"), ))] #[cfg(target_arch = $arch_tt)] impl_arch!($($arch_ty),* | $($from_ty),* | $($into_ty),* | diff --git a/src/api/into_bits/macros.rs b/src/api/into_bits/macros.rs index 48d6b6e2f..8cec5b004 100644 --- a/src/api/into_bits/macros.rs +++ b/src/api/into_bits/macros.rs @@ -9,7 +9,7 @@ macro_rules! impl_from_bits_ { } } - test_if!{ + test_if! { $test_tt: paste::item! { pub mod [<$id _from_bits_ $from_ty>] { diff --git a/src/api/reductions/float_arithmetic.rs b/src/api/reductions/float_arithmetic.rs index 95fb3606c..dd722ae25 100644 --- a/src/api/reductions/float_arithmetic.rs +++ b/src/api/reductions/float_arithmetic.rs @@ -90,7 +90,7 @@ macro_rules! impl_reduction_float_arithmetic { } } - test_if!{ + test_if! { $test_tt: paste::item! { pub mod [<$id _reduction_float_arith>] { diff --git a/src/api/reductions/integer_arithmetic.rs b/src/api/reductions/integer_arithmetic.rs index a26608318..91dffad31 100644 --- a/src/api/reductions/integer_arithmetic.rs +++ b/src/api/reductions/integer_arithmetic.rs @@ -95,7 +95,7 @@ macro_rules! impl_reduction_integer_arithmetic { } } - test_if!{ + test_if! { $test_tt: paste::item! { pub mod [<$id _reduction_int_arith>] { diff --git a/src/api/reductions/min_max.rs b/src/api/reductions/min_max.rs index bee9e1e2b..c4d3aa10f 100644 --- a/src/api/reductions/min_max.rs +++ b/src/api/reductions/min_max.rs @@ -74,7 +74,7 @@ macro_rules! impl_reduction_min_max { } } } - test_if!{$test_tt: + test_if! {$test_tt: paste::item! { pub mod [<$id _reduction_min_max>] { use super::*; diff --git a/src/api/slice/from_slice.rs b/src/api/slice/from_slice.rs index 0208cf318..ca83c7df7 100644 --- a/src/api/slice/from_slice.rs +++ b/src/api/slice/from_slice.rs @@ -84,7 +84,7 @@ macro_rules! impl_slice_from_slice { } } - test_if!{ + test_if! { $test_tt: paste::item! { pub mod [<$id _slice_from_slice>] { diff --git a/src/api/slice/write_to_slice.rs b/src/api/slice/write_to_slice.rs index bf79849a4..becb564d4 100644 --- a/src/api/slice/write_to_slice.rs +++ b/src/api/slice/write_to_slice.rs @@ -55,8 +55,16 @@ macro_rules! impl_slice_write_to_slice { 0 ); - #[cfg_attr(feature = "cargo-clippy", - allow(clippy::cast_ptr_alignment))] + #[cfg_attr(feature = "cargo-clippy", + allow(clippy::cast_ptr_alignment))] + #[cfg_attr( + feature = "cargo-clippy", + allow(clippy::cast_ptr_alignment) + )] + #[cfg_attr( + feature = "cargo-clippy", + allow(clippy::cast_ptr_alignment) + )] #[cfg_attr( feature = "cargo-clippy", allow(clippy::cast_ptr_alignment) @@ -85,7 +93,7 @@ macro_rules! impl_slice_write_to_slice { } } - test_if!{ + test_if! { $test_tt: paste::item! { pub mod [<$id _slice_write_to_slice>] { diff --git a/src/codegen.rs b/src/codegen.rs index 72be7f1b2..b7ccd8386 100644 --- a/src/codegen.rs +++ b/src/codegen.rs @@ -1,12 +1,12 @@ //! Code-generation utilities +crate mod bit_manip; crate mod llvm; crate mod math; crate mod reductions; crate mod shuffle; crate mod shuffle1_dyn; crate mod swap_bytes; -crate mod bit_manip; macro_rules! impl_simd_array { ([$elem_ty:ident; $elem_count:expr]: diff --git a/src/codegen/bit_manip.rs b/src/codegen/bit_manip.rs index 9700a5a1e..947266f5b 100644 --- a/src/codegen/bit_manip.rs +++ b/src/codegen/bit_manip.rs @@ -1,4 +1,5 @@ //! LLVM bit manipulation intrinsics. +#![rustfmt::skip] use crate::*; @@ -51,7 +52,6 @@ extern "C" { #[link_name = "llvm.ctlz.v4i128"] fn ctlz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4; - #[link_name = "llvm.cttz.v2i8"] fn cttz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2; #[link_name = "llvm.cttz.v4i8"] @@ -99,7 +99,6 @@ extern "C" { #[link_name = "llvm.cttz.v4i128"] fn cttz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4; - #[link_name = "llvm.ctpop.v2i8"] fn ctpop_u8x2(x: u8x2) -> u8x2; #[link_name = "llvm.ctpop.v4i8"] @@ -155,7 +154,8 @@ crate trait BitManip { } macro_rules! impl_bit_manip { - (inner: $ty:ident, $scalar:ty, $uty:ident, $ctpop:ident, $ctlz:ident, $cttz:ident) => { + (inner: $ty:ident, $scalar:ty, $uty:ident, + $ctpop:ident, $ctlz:ident, $cttz:ident) => { // FIXME: several LLVM intrinsics break on s390x https://github.com/rust-lang-nursery/packed_simd/issues/192 #[cfg(target_arch = "s390x")] impl_bit_manip! { scalar: $ty, $scalar } @@ -170,7 +170,8 @@ macro_rules! impl_bit_manip { #[inline] fn ctlz(self) -> Self { let y: $uty = self.cast(); - // the ctxx intrinsics need compile-time constant `is_zero_undef` + // the ctxx intrinsics need compile-time constant + // `is_zero_undef` unsafe { $ctlz(y, false).cast() } } @@ -211,7 +212,8 @@ macro_rules! impl_bit_manip { fn ctpop(self) -> Self { let mut ones = self; for i in 0..Self::lanes() { - ones = ones.replace(i, self.extract(i).count_ones() as $scalar); + ones = ones + .replace(i, self.extract(i).count_ones() as $scalar); } ones } @@ -220,7 +222,10 @@ macro_rules! impl_bit_manip { fn ctlz(self) -> Self { let mut lz = self; for i in 0..Self::lanes() { - lz = lz.replace(i, self.extract(i).leading_zeros() as $scalar); + lz = lz.replace( + i, + self.extract(i).leading_zeros() as $scalar, + ); } lz } @@ -229,44 +234,49 @@ macro_rules! impl_bit_manip { fn cttz(self) -> Self { let mut tz = self; for i in 0..Self::lanes() { - tz = tz.replace(i, self.extract(i).trailing_zeros() as $scalar); + tz = tz.replace( + i, + self.extract(i).trailing_zeros() as $scalar, + ); } tz } } }; - ($uty:ident, $uscalar:ty, $ity:ident, $iscalar:ty, $ctpop:ident, $ctlz:ident, $cttz:ident) => { + ($uty:ident, $uscalar:ty, $ity:ident, $iscalar:ty, + $ctpop:ident, $ctlz:ident, $cttz:ident) => { impl_bit_manip! { inner: $uty, $uscalar, $uty, $ctpop, $ctlz, $cttz } impl_bit_manip! { inner: $ity, $iscalar, $uty, $ctpop, $ctlz, $cttz } }; - (sized: $usize:ident, $uscalar:ty, $isize:ident, $iscalar:ty, $ty:ident) => { + (sized: $usize:ident, $uscalar:ty, $isize:ident, + $iscalar:ty, $ty:ident) => { impl_bit_manip! { sized_inner: $usize, $uscalar, $ty } impl_bit_manip! { sized_inner: $isize, $iscalar, $ty } }; } -impl_bit_manip! { u8x2, u8, i8x2, i8, ctpop_u8x2, ctlz_u8x2, cttz_u8x2 } -impl_bit_manip! { u8x4, u8, i8x4, i8, ctpop_u8x4, ctlz_u8x4, cttz_u8x4 } +impl_bit_manip! { u8x2 , u8, i8x2, i8, ctpop_u8x2, ctlz_u8x2, cttz_u8x2 } +impl_bit_manip! { u8x4 , u8, i8x4, i8, ctpop_u8x4, ctlz_u8x4, cttz_u8x4 } #[cfg(not(target_arch = "aarch64"))] // see below -impl_bit_manip! { u8x8, u8, i8x8, i8, ctpop_u8x8, ctlz_u8x8, cttz_u8x8 } -impl_bit_manip! { u8x16, u8, i8x16, i8, ctpop_u8x16, ctlz_u8x16, cttz_u8x16 } -impl_bit_manip! { u8x32, u8, i8x32, i8, ctpop_u8x32, ctlz_u8x32, cttz_u8x32 } -impl_bit_manip! { u8x64, u8, i8x64, i8, ctpop_u8x64, ctlz_u8x64, cttz_u8x64 } -impl_bit_manip! { u16x2, u16, i16x2, i16, ctpop_u16x2, ctlz_u16x2, cttz_u16x2 } -impl_bit_manip! { u16x4, u16, i16x4, i16, ctpop_u16x4, ctlz_u16x4, cttz_u16x4 } -impl_bit_manip! { u16x8, u16, i16x8, i16, ctpop_u16x8, ctlz_u16x8, cttz_u16x8 } -impl_bit_manip! { u16x16, u16, i16x16, i16, ctpop_u16x16, ctlz_u16x16, cttz_u16x16 } -impl_bit_manip! { u16x32, u16, i16x32, i16, ctpop_u16x32, ctlz_u16x32, cttz_u16x32 } -impl_bit_manip! { u32x2, u32, i32x2, i32, ctpop_u32x2, ctlz_u32x2, cttz_u32x2 } -impl_bit_manip! { u32x4, u32, i32x4, i32, ctpop_u32x4, ctlz_u32x4, cttz_u32x4 } -impl_bit_manip! { u32x8, u32, i32x8, i32, ctpop_u32x8, ctlz_u32x8, cttz_u32x8 } -impl_bit_manip! { u32x16, u32, i32x16, i32, ctpop_u32x16, ctlz_u32x16, cttz_u32x16 } -impl_bit_manip! { u64x2, u64, i64x2, i64, ctpop_u64x2, ctlz_u64x2, cttz_u64x2 } -impl_bit_manip! { u64x4, u64, i64x4, i64, ctpop_u64x4, ctlz_u64x4, cttz_u64x4 } -impl_bit_manip! { u64x8, u64, i64x8, i64, ctpop_u64x8, ctlz_u64x8, cttz_u64x8 } -impl_bit_manip! { u128x1, u128, i128x1, i128, ctpop_u128x1, ctlz_u128x1, cttz_u128x1 } -impl_bit_manip! { u128x2, u128, i128x2, i128, ctpop_u128x2, ctlz_u128x2, cttz_u128x2 } -impl_bit_manip! { u128x4, u128, i128x4, i128, ctpop_u128x4, ctlz_u128x4, cttz_u128x4 } +impl_bit_manip! { u8x8 , u8, i8x8, i8, ctpop_u8x8, ctlz_u8x8, cttz_u8x8 } +impl_bit_manip! { u8x16 , u8, i8x16, i8, ctpop_u8x16, ctlz_u8x16, cttz_u8x16 } +impl_bit_manip! { u8x32 , u8, i8x32, i8, ctpop_u8x32, ctlz_u8x32, cttz_u8x32 } +impl_bit_manip! { u8x64 , u8, i8x64, i8, ctpop_u8x64, ctlz_u8x64, cttz_u8x64 } +impl_bit_manip! { u16x2 , u16, i16x2, i16, ctpop_u16x2, ctlz_u16x2, cttz_u16x2 } +impl_bit_manip! { u16x4 , u16, i16x4, i16, ctpop_u16x4, ctlz_u16x4, cttz_u16x4 } +impl_bit_manip! { u16x8 , u16, i16x8, i16, ctpop_u16x8, ctlz_u16x8, cttz_u16x8 } +impl_bit_manip! { u16x16 , u16, i16x16, i16, ctpop_u16x16, ctlz_u16x16, cttz_u16x16 } +impl_bit_manip! { u16x32 , u16, i16x32, i16, ctpop_u16x32, ctlz_u16x32, cttz_u16x32 } +impl_bit_manip! { u32x2 , u32, i32x2, i32, ctpop_u32x2, ctlz_u32x2, cttz_u32x2 } +impl_bit_manip! { u32x4 , u32, i32x4, i32, ctpop_u32x4, ctlz_u32x4, cttz_u32x4 } +impl_bit_manip! { u32x8 , u32, i32x8, i32, ctpop_u32x8, ctlz_u32x8, cttz_u32x8 } +impl_bit_manip! { u32x16 , u32, i32x16, i32, ctpop_u32x16, ctlz_u32x16, cttz_u32x16 } +impl_bit_manip! { u64x2 , u64, i64x2, i64, ctpop_u64x2, ctlz_u64x2, cttz_u64x2 } +impl_bit_manip! { u64x4 , u64, i64x4, i64, ctpop_u64x4, ctlz_u64x4, cttz_u64x4 } +impl_bit_manip! { u64x8 , u64, i64x8, i64, ctpop_u64x8, ctlz_u64x8, cttz_u64x8 } +impl_bit_manip! { u128x1 , u128, i128x1, i128, ctpop_u128x1, ctlz_u128x1, cttz_u128x1 } +impl_bit_manip! { u128x2 , u128, i128x2, i128, ctpop_u128x2, ctlz_u128x2, cttz_u128x2 } +impl_bit_manip! { u128x4 , u128, i128x4, i128, ctpop_u128x4, ctlz_u128x4, cttz_u128x4 } #[cfg(target_arch = "aarch64")] impl BitManip for u8x8 { @@ -285,7 +295,8 @@ impl BitManip for u8x8 { #[inline] fn cttz(self) -> Self { // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191 - // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64 intrinsics + // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64 + // intrinsics let mut tz = self; for i in 0..Self::lanes() { tz = tz.replace(i, self.extract(i).trailing_zeros() as u8); @@ -310,7 +321,8 @@ impl BitManip for i8x8 { #[inline] fn cttz(self) -> Self { // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191 - // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64 intrinsics + // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64 + // intrinsics let mut tz = self; for i in 0..Self::lanes() { tz = tz.replace(i, self.extract(i).trailing_zeros() as i8); diff --git a/src/codegen/reductions/mask.rs b/src/codegen/reductions/mask.rs index 9e5e6e418..498817ad8 100644 --- a/src/codegen/reductions/mask.rs +++ b/src/codegen/reductions/mask.rs @@ -23,7 +23,7 @@ cfg_if! { #[macro_use] mod x86; } else if #[cfg(all(target_arch = "arm", target_feature = "v7", - target_feature = "neon", feature = "coresimd"))] { + target_feature = "neon", feature = "core_arch"))] { #[macro_use] mod arm; } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { diff --git a/src/codegen/reductions/mask/x86.rs b/src/codegen/reductions/mask/x86.rs index 89f500b03..2ae4ed81c 100644 --- a/src/codegen/reductions/mask/x86.rs +++ b/src/codegen/reductions/mask/x86.rs @@ -1,9 +1,5 @@ //! Mask reductions implementation for `x86` and `x86_64` targets -#[cfg(target_feature = "mmx")] -#[macro_use] -mod mmx; - #[cfg(target_feature = "sse")] #[macro_use] mod sse; @@ -24,8 +20,8 @@ mod avx2; macro_rules! x86_m8x8_impl { ($id:ident) => { cfg_if! { - if #[cfg(all(target_arch = "x86_64", target_feature = "mmx"))] { - x86_m8x8_mmx_impl!($id); + if #[cfg(all(target_arch = "x86_64", target_feature = "sse"))] { + x86_m8x8_sse_impl!($id); } else { fallback_impl!($id); } diff --git a/src/codegen/reductions/mask/x86/mmx.rs b/src/codegen/reductions/mask/x86/mmx.rs deleted file mode 100644 index 3109f6f7b..000000000 --- a/src/codegen/reductions/mask/x86/mmx.rs +++ /dev/null @@ -1,34 +0,0 @@ -//! Mask reductions implementation for `x86` and `x86_64` targets with `MMX`. -#![allow(unused)] - -macro_rules! x86_m8x8_mmx_impl { - ($id:ident) => { - impl All for $id { - #[inline] - #[target_feature(enable = "mmx")] - unsafe fn all(self) -> bool { - #[cfg(target_arch = "x86")] - use crate::arch::x86::_mm_movemask_pi8; - #[cfg(target_arch = "x86_64")] - use crate::arch::x86_64::_mm_movemask_pi8; - // _mm_movemask_pi8(a) creates an 8bit mask containing the most - // significant bit of each byte of `a`. If all bits are set, - // then all 8 lanes of the mask are true. - _mm_movemask_pi8(crate::mem::transmute(self)) - == u8::max_value() as i32 - } - } - impl Any for $id { - #[inline] - #[target_feature(enable = "mmx")] - unsafe fn any(self) -> bool { - #[cfg(target_arch = "x86")] - use crate::arch::x86::_mm_movemask_pi8; - #[cfg(target_arch = "x86_64")] - use crate::arch::x86_64::_mm_movemask_pi8; - - _mm_movemask_pi8(crate::mem::transmute(self)) != 0 - } - } - }; -} diff --git a/src/codegen/reductions/mask/x86/sse.rs b/src/codegen/reductions/mask/x86/sse.rs index eb1ef7fac..7482f9430 100644 --- a/src/codegen/reductions/mask/x86/sse.rs +++ b/src/codegen/reductions/mask/x86/sse.rs @@ -34,3 +34,35 @@ macro_rules! x86_m32x4_sse_impl { } }; } + +macro_rules! x86_m8x8_sse_impl { + ($id:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "sse")] + unsafe fn all(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm_movemask_pi8; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm_movemask_pi8; + // _mm_movemask_pi8(a) creates an 8bit mask containing the most + // significant bit of each byte of `a`. If all bits are set, + // then all 8 lanes of the mask are true. + _mm_movemask_pi8(crate::mem::transmute(self)) + == u8::max_value() as i32 + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "sse")] + unsafe fn any(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm_movemask_pi8; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm_movemask_pi8; + + _mm_movemask_pi8(crate::mem::transmute(self)) != 0 + } + } + }; +} diff --git a/src/codegen/shuffle1_dyn.rs b/src/codegen/shuffle1_dyn.rs index d87fbdb85..adc856175 100644 --- a/src/codegen/shuffle1_dyn.rs +++ b/src/codegen/shuffle1_dyn.rs @@ -53,7 +53,7 @@ macro_rules! impl_shuffle1_dyn { all(target_aarch = "aarch64", target_feature = "neon"), all(target_aarch = "arm", target_feature = "v7", target_feature = "neon")), - feature = "coresimd") + feature = "core_arch") )] { impl Shuffle1Dyn for u8x8 { type Indices = Self; @@ -104,7 +104,7 @@ macro_rules! impl_shuffle1_dyn { } } } else if #[cfg(all(target_aarch = "aarch64", target_feature = "neon", - feature = "coresimd"))] { + feature = "core_arch"))] { impl Shuffle1Dyn for u8x16 { type Indices = Self; #[inline] @@ -123,7 +123,7 @@ macro_rules! impl_shuffle1_dyn { } } } else if #[cfg(all(target_aarch = "arm", target_feature = "v7", - target_feature = "neon", feature = "coresimd"))] { + target_feature = "neon", feature = "core_arch"))] { impl Shuffle1Dyn for u8x16 { type Indices = Self; #[inline] diff --git a/src/lib.rs b/src/lib.rs index 3842d29c2..60867db6c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -125,10 +125,10 @@ //! resulting vector contains the elements of `a` for those lanes for which the //! mask is `true`, and the elements of `b` otherwise. //! -//! The example constructs a mask with the first two lanes set to `true` and the -//! last two lanes set to `false`. This selects the first two lanes of `a + 1` -//! and the last two lanes of `a`, producing a vector where the first two lanes -//! have been incremented by `1`. +//! The example constructs a mask with the first two lanes set to `true` and +//! the last two lanes set to `false`. This selects the first two lanes of `a + +//! 1` and the last two lanes of `a`, producing a vector where the first two +//! lanes have been incremented by `1`. //! //! > note: mask `select` can be used on vector types that have the same number //! > of lanes as the mask. The example shows this by using [`m16x4`] instead @@ -239,9 +239,9 @@ use cfg_if::cfg_if; cfg_if! { if #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon", - feature = "coresimd"))] { + feature = "core_arch"))] { #[allow(unused_imports)] - use coresimd::arch; + use core_arch as arch; } else { #[allow(unused_imports)] use core::arch; @@ -260,7 +260,6 @@ use core::{ #[macro_use] mod testing; - #[macro_use] mod api; mod codegen; diff --git a/tests/endianness.rs b/tests/endianness.rs index 2afd56173..1e6b4f354 100644 --- a/tests/endianness.rs +++ b/tests/endianness.rs @@ -224,9 +224,6 @@ fn endian_tuple_access() { assert_eq!(x.6, e[6]); assert_eq!(x.7, e[7]); - // Without repr(C) this produces total garbage. - // FIXME: investigate more, this is maybe due to - // to tuple field reordering to minimize padding. #[cfg_attr(rustfmt, rustfmt_skip)] #[repr(C)] #[derive(Copy ,Clone)]