diff --git a/ci/docker/wasm32-wasi/Dockerfile b/ci/docker/wasm32-wasi/Dockerfile index 7017d374de..f8e090f1c8 100644 --- a/ci/docker/wasm32-wasi/Dockerfile +++ b/ci/docker/wasm32-wasi/Dockerfile @@ -1,16 +1,26 @@ +FROM rust:1.50.0 + +# Install wasmtime from source for now while the `experimental_x64` feature is +# not yet the default. (it's not actually that experimental at the time of this +# writing, wasmtime should switch defaults soon and the backend this enables has +# better support for simd instructions) +RUN \ + CARGO_INCREMENTAL=0 \ + CARGO_PROFILE_DEV_DEBUGINFO=0 \ + cargo install wasmtime-cli --features experimental_x64 --debug --vers 0.25.0 --locked + FROM ubuntu:20.04 ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update -y && apt-get install -y --no-install-recommends \ ca-certificates \ - curl \ - xz-utils \ clang -RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/v0.24.0/wasmtime-v0.24.0-x86_64-linux.tar.xz | tar xJf - -ENV PATH=$PATH:/wasmtime-v0.24.0-x86_64-linux +COPY --from=0 /usr/local/cargo/bin/wasmtime /usr/local/bin/wasmtime ENV CARGO_TARGET_WASM32_WASI_RUNNER="wasmtime \ --enable-simd \ + --enable-threads \ + --opt-level 0 \ --mapdir .::/checkout/target/wasm32-wasi/release/deps \ --" diff --git a/ci/run.sh b/ci/run.sh index af78f6a5f8..1766a37ca3 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -87,14 +87,6 @@ case ${TARGET} in export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx" cargo_test "--release" ;; - wasm32*) - # TODO: need to re-enable simd testing for wasm32 - # TODO: should enable atomics testing for wasm32 - # prev="$RUSTFLAGS" - # export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+simd128,+unimplemented-simd128" - # cargo_test "--release" - # export RUSTFLAGS="$prev" - ;; # FIXME: don't build anymore #mips-*gnu* | mipsel-*gnu*) # export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+msa,+fp64,+mips32r5" diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs index d7735c76d9..acf30024b1 100644 --- a/crates/core_arch/src/macros.rs +++ b/crates/core_arch/src/macros.rs @@ -67,7 +67,7 @@ macro_rules! static_assert_imm16 { #[allow(unused)] macro_rules! static_assert { - ($imm:ident : $ty:ty where $e:expr) => { + ($imm:ident : $ty:ty where $e:expr) => {{ struct Validate(); impl Validate<$imm> { const VALID: () = { @@ -75,7 +75,7 @@ macro_rules! static_assert { }; } let _ = Validate::<$imm>::VALID; - }; + }}; } #[allow(unused)] diff --git a/crates/core_arch/src/mod.rs b/crates/core_arch/src/mod.rs index a5b0e30cad..5b25687c3d 100644 --- a/crates/core_arch/src/mod.rs +++ b/crates/core_arch/src/mod.rs @@ -59,11 +59,23 @@ pub mod arch { /// Platform-specific intrinsics for the `wasm32` platform. /// /// This module provides intrinsics specific to the WebAssembly - /// architecture. Here you'll find intrinsics necessary for leveraging - /// WebAssembly proposals such as [atomics] and [simd]. These proposals are - /// evolving over time and as such the support here is unstable and requires - /// the nightly channel. As WebAssembly proposals stabilize these functions - /// will also become stable. + /// architecture. Here you'll find intrinsics specific to WebAssembly that + /// aren't otherwise surfaced somewhere in a cross-platform abstraction of + /// `std`, and you'll also find functions for leveraging WebAssembly + /// proposals such as [atomics] and [simd]. + /// + /// Intrinsics in the `wasm32` module are modeled after the WebAssembly + /// instructions that they represent. All functions are named after the + /// instruction they intend to correspond to, and the arguments/results + /// correspond to the type signature of the instruction itself. Stable + /// WebAssembly instructions are [documented online][instrdoc]. + /// + /// [instrdoc]: https://webassembly.github.io/spec/core/valid/instructions.html + /// + /// If a proposal is not yet stable in WebAssembly itself then the functions + /// within this function may be unstable and require the nightly channel of + /// Rust to use. As the proposal itself stabilizes the intrinsics in this + /// module should stabilize as well. /// /// [atomics]: https://github.com/webassembly/threads /// [simd]: https://github.com/webassembly/simd @@ -74,35 +86,35 @@ pub mod arch { /// ## Atomics /// /// The [threads proposal][atomics] for WebAssembly adds a number of - /// instructions for dealing with multithreaded programs. Atomic - /// instructions can all be generated through `std::sync::atomic` types, but - /// some instructions have no equivalent in Rust such as - /// `memory.atomic.notify` so this module will provide these intrinsics. + /// instructions for dealing with multithreaded programs. Most instructions + /// added in the [atomics] proposal are exposed in Rust through the + /// `std::sync::atomic` module. Some instructions, however, don't have + /// direct equivalents in Rust so they're exposed here instead. + /// + /// Note that the instructions added in the [atomics] proposal can work in + /// either a context with a shared wasm memory and without. These intrinsics + /// are always available in the standard library, but you likely won't be + /// able to use them too productively unless you recompile the standard + /// library (and all your code) with `-Ctarget-feature=+atomics`. /// - /// At this time, however, these intrinsics are only available **when the - /// standard library itself is compiled with atomics**. Compiling with - /// atomics is not enabled by default and requires passing - /// `-Ctarget-feature=+atomics` to rustc. The standard library shipped via - /// `rustup` is not compiled with atomics. To get access to these intrinsics - /// you'll need to compile the standard library from source with the - /// requisite compiler flags. + /// It's also worth pointing out that multi-threaded WebAssembly and its + /// story in Rust is still in a somewhat "early days" phase as of the time + /// of this writing. Pieces should mostly work but it generally requires a + /// good deal of manual setup. At this time it's not as simple as "just call + /// `std::thread::spawn`", but it will hopefully get there one day! /// /// ## SIMD /// /// The [simd proposal][simd] for WebAssembly adds a new `v128` type for a /// 128-bit SIMD register. It also adds a large array of instructions to /// operate on the `v128` type to perform data processing. The SIMD proposal - /// has been in progress for quite some time and many instructions have come - /// and gone. This module attempts to keep up with the proposal, but if you - /// notice anything awry please feel free to [open an + /// at the time of this writing is in [phase 4] which means that it's in the + /// standardization phase. It's expected that once some testing on nightly + /// has happened a stabilization proposal will be made for the Rust + /// intrinsics. If you notice anything awry please feel free to [open an /// issue](https://github.com/rust-lang/stdarch/issues/new). /// - /// It's important to be aware that the current state of development of SIMD - /// in WebAssembly is still somewhat early days. There's lots of pieces to - /// demo and prototype with, but discussions and support are still in - /// progress. There's a number of pitfalls and gotchas in various places, - /// which will attempt to be documented here, but there may be others - /// lurking! + /// [phase 4]: https://github.com/webassembly/proposals /// /// Using SIMD is intended to be similar to as you would on `x86_64`, for /// example. You'd write a function such as: @@ -118,15 +130,17 @@ pub mod arch { /// /// Unlike `x86_64`, however, WebAssembly does not currently have dynamic /// detection at runtime as to whether SIMD is supported (this is one of the - /// motivators for the [conditional sections proposal][condsections], but - /// that is still pretty early days). This means that your binary will - /// either have SIMD and can only run on engines which support SIMD, or it - /// will not have SIMD at all. For compatibility the standard library itself - /// does not use any SIMD internally. Determining how best to ship your - /// WebAssembly binary with SIMD is largely left up to you as it can can be - /// pretty nuanced depending on your situation. + /// motivators for the [conditional sections][condsections] and [feature + /// detection] proposals, but that is still pretty early days). This means + /// that your binary will either have SIMD and can only run on engines + /// which support SIMD, or it will not have SIMD at all. For compatibility + /// the standard library itself does not use any SIMD internally. + /// Determining how best to ship your WebAssembly binary with SIMD is + /// largely left up to you as it can can be pretty nuanced depending on + /// your situation. /// /// [condsections]: https://github.com/webassembly/conditional-sections + /// [feature detection]: https://github.com/WebAssembly/feature-detection /// /// To enable SIMD support at compile time you need to do one of two things: /// @@ -138,7 +152,9 @@ pub mod arch { /// * Second you can compile your program with `-Ctarget-feature=+simd128`. /// This compilation flag blanket enables SIMD support for your entire /// compilation. Note that this does not include the standard library - /// unless you recompile the standard library. + /// unless you [recompile the standard library][buildstd]. + /// + /// [buildstd]: https://doc.rust-lang.org/nightly/cargo/reference/unstable.html#build-std /// /// If you enable SIMD via either of these routes then you'll have a /// WebAssembly binary that uses SIMD instructions, and you'll need to ship @@ -147,21 +163,6 @@ pub mod arch { /// generated in your program. This means to generate a binary without SIMD /// you'll need to avoid both options above plus calling into any intrinsics /// in this module. - /// - /// > **Note**: Due to - /// > [rust-lang/rust#74320](https://github.com/rust-lang/rust/issues/74320) - /// > it's recommended to compile your entire program with SIMD support - /// > (using `RUSTFLAGS`) or otherwise functions may not be inlined - /// > correctly. - /// - /// > **Note**: LLVM's SIMD support is actually split into two features: - /// > `simd128` and `unimplemented-simd128`. Rust code can enable `simd128` - /// > with `#[target_feature]` (and test for it with `#[cfg(target_feature = - /// > "simd128")]`, but it cannot enable `unimplemented-simd128`. The only - /// > way to enable this feature is to compile with - /// > `-Ctarget-feature=+simd128,+unimplemented-simd128`. This second - /// > feature enables more recent instructions implemented in LLVM which - /// > haven't always had enough time to make their way to runtimes. #[cfg(any(target_arch = "wasm32", doc))] #[doc(cfg(target_arch = "wasm32"))] #[stable(feature = "simd_wasm32", since = "1.33.0")] diff --git a/crates/core_arch/src/wasm32/atomic.rs b/crates/core_arch/src/wasm32/atomic.rs index fa6a31406f..febfa7a4d2 100644 --- a/crates/core_arch/src/wasm32/atomic.rs +++ b/crates/core_arch/src/wasm32/atomic.rs @@ -1,13 +1,3 @@ -//! Intrinsics associated with WebAssembly's upcoming threads proposal. -//! -//! These intrinsics are all unstable because they're not actually stable in -//! WebAssembly itself yet. The signatures may change as [the -//! specification][spec] is updated. -//! -//! [spec]: https://github.com/WebAssembly/threads - -#![cfg(any(target_feature = "atomics", doc))] - #[cfg(test)] use stdarch_test::assert_instr; @@ -41,16 +31,10 @@ extern "C" { /// didn't block /// * 2 - the thread blocked, but the timeout expired. /// -/// # Availability -/// -/// This intrinsic is only available **when the standard library itself is -/// compiled with the `atomics` target feature**. This version of the standard -/// library is not obtainable via `rustup`, but rather will require the -/// standard library to be compiled from source. -/// /// [instr]: https://webassembly.github.io/threads/syntax/instructions.html#syntax-instr-atomic-memory #[inline] -#[cfg_attr(test, assert_instr("i32.atomic.wait"))] +#[cfg_attr(test, assert_instr(memory.atomic.wait32))] +#[target_feature(enable = "atomics")] pub unsafe fn memory_atomic_wait32(ptr: *mut i32, expression: i32, timeout_ns: i64) -> i32 { llvm_atomic_wait_i32(ptr, expression, timeout_ns) } @@ -76,16 +60,10 @@ pub unsafe fn memory_atomic_wait32(ptr: *mut i32, expression: i32, timeout_ns: i /// didn't block /// * 2 - the thread blocked, but the timeout expired. /// -/// # Availability -/// -/// This intrinsic is only available **when the standard library itself is -/// compiled with the `atomics` target feature**. This version of the standard -/// library is not obtainable via `rustup`, but rather will require the -/// standard library to be compiled from source. -/// /// [instr]: https://webassembly.github.io/threads/syntax/instructions.html#syntax-instr-atomic-memory #[inline] -#[cfg_attr(test, assert_instr("i64.atomic.wait"))] +#[cfg_attr(test, assert_instr(memory.atomic.wait64))] +#[target_feature(enable = "atomics")] pub unsafe fn memory_atomic_wait64(ptr: *mut i64, expression: i64, timeout_ns: i64) -> i32 { llvm_atomic_wait_i64(ptr, expression, timeout_ns) } @@ -103,16 +81,10 @@ pub unsafe fn memory_atomic_wait64(ptr: *mut i64, expression: i64, timeout_ns: i /// /// Returns the number of waiters which were actually notified. /// -/// # Availability -/// -/// This intrinsic is only available **when the standard library itself is -/// compiled with the `atomics` target feature**. This version of the standard -/// library is not obtainable via `rustup`, but rather will require the -/// standard library to be compiled from source. -/// /// [instr]: https://webassembly.github.io/threads/syntax/instructions.html#syntax-instr-atomic-memory #[inline] -#[cfg_attr(test, assert_instr("atomic.wake"))] +#[cfg_attr(test, assert_instr(memory.atomic.notify))] +#[target_feature(enable = "atomics")] pub unsafe fn memory_atomic_notify(ptr: *mut i32, waiters: u32) -> u32 { llvm_atomic_notify(ptr, waiters as i32) as u32 } diff --git a/crates/core_arch/src/wasm32/memory.rs b/crates/core_arch/src/wasm32/memory.rs index 71a3e6805b..19fbd48d0c 100644 --- a/crates/core_arch/src/wasm32/memory.rs +++ b/crates/core_arch/src/wasm32/memory.rs @@ -3,9 +3,9 @@ use stdarch_test::assert_instr; extern "C" { #[link_name = "llvm.wasm.memory.grow.i32"] - fn llvm_memory_grow(mem: i32, pages: i32) -> i32; + fn llvm_memory_grow(mem: u32, pages: i32) -> i32; #[link_name = "llvm.wasm.memory.size.i32"] - fn llvm_memory_size(mem: i32) -> i32; + fn llvm_memory_size(mem: u32) -> i32; } /// Corresponding intrinsic to wasm's [`memory.size` instruction][instr] @@ -25,13 +25,8 @@ extern "C" { #[rustc_legacy_const_generics(0)] #[stable(feature = "simd_wasm32", since = "1.33.0")] pub fn memory_size() -> usize { - unsafe { - // FIXME: Consider replacing with a static_assert! - if MEM != 0 { - crate::intrinsics::abort(); - } - llvm_memory_size(0) as usize - } + static_assert!(MEM: u32 where MEM == 0); + unsafe { llvm_memory_size(MEM) as usize } } /// Corresponding intrinsic to wasm's [`memory.grow` instruction][instr] @@ -55,10 +50,7 @@ pub fn memory_size() -> usize { #[stable(feature = "simd_wasm32", since = "1.33.0")] pub fn memory_grow(delta: usize) -> usize { unsafe { - // FIXME: Consider replacing with a static_assert! - if MEM != 0 { - crate::intrinsics::abort(); - } - llvm_memory_grow(0, delta as i32) as isize as usize + static_assert!(MEM: u32 where MEM == 0); + llvm_memory_grow(MEM, delta as i32) as isize as usize } } diff --git a/crates/core_arch/src/wasm32/mod.rs b/crates/core_arch/src/wasm32/mod.rs index cead8c36c4..9052772894 100644 --- a/crates/core_arch/src/wasm32/mod.rs +++ b/crates/core_arch/src/wasm32/mod.rs @@ -3,9 +3,7 @@ #[cfg(test)] use stdarch_test::assert_instr; -#[cfg(any(target_feature = "atomics", doc))] mod atomic; -#[cfg(any(target_feature = "atomics", doc))] pub use self::atomic::*; mod simd128; diff --git a/crates/core_arch/src/wasm32/simd128.rs b/crates/core_arch/src/wasm32/simd128.rs index 23d74a299a..6bd80abcb4 100644 --- a/crates/core_arch/src/wasm32/simd128.rs +++ b/crates/core_arch/src/wasm32/simd128.rs @@ -19,71 +19,79 @@ use stdarch_test::assert_instr; types! { /// WASM-specific 128-bit wide SIMD vector type. + /// + /// This type corresponds to the `v128` type in the [WebAssembly SIMD + /// proposal](https://github.com/webassembly/simd). This type is 128-bits + /// large and the meaning of all the bits is defined within the context of + /// how this value is used. + /// + /// This same type is used simultaneously for all 128-bit-wide SIMD types, + /// for example: + /// + /// * sixteen 8-bit integers (both `i8` and `u8`) + /// * eight 16-bit integers (both `i16` and `u16`) + /// * four 32-bit integers (both `i32` and `u32`) + /// * two 64-bit integers (both `i64` and `u64`) + /// * four 32-bit floats (`f32`) + /// * two 64-bit floats (`f64`) + /// + /// The `v128` type in Rust is intended to be quite analagous to the `v128` + /// type in WebAssembly. Operations on `v128` can only be performed with the + /// functions in this module. // N.B., internals here are arbitrary. pub struct v128(i32, i32, i32, i32); } #[allow(non_camel_case_types)] -#[unstable(feature = "stdsimd_internal", issue = "none")] -pub(crate) trait v128Ext: Sized { +trait v128Ext: Sized { unsafe fn as_v128(self) -> v128; #[inline] - #[target_feature(enable = "simd128")] unsafe fn as_u8x16(self) -> u8x16 { transmute(self.as_v128()) } #[inline] - #[target_feature(enable = "simd128")] unsafe fn as_u16x8(self) -> u16x8 { transmute(self.as_v128()) } #[inline] - #[target_feature(enable = "simd128")] unsafe fn as_u32x4(self) -> u32x4 { transmute(self.as_v128()) } #[inline] - #[target_feature(enable = "simd128")] unsafe fn as_u64x2(self) -> u64x2 { transmute(self.as_v128()) } #[inline] - #[target_feature(enable = "simd128")] unsafe fn as_i8x16(self) -> i8x16 { transmute(self.as_v128()) } #[inline] - #[target_feature(enable = "simd128")] unsafe fn as_i16x8(self) -> i16x8 { transmute(self.as_v128()) } #[inline] - #[target_feature(enable = "simd128")] unsafe fn as_i32x4(self) -> i32x4 { transmute(self.as_v128()) } #[inline] - #[target_feature(enable = "simd128")] unsafe fn as_i64x2(self) -> i64x2 { transmute(self.as_v128()) } #[inline] - #[target_feature(enable = "simd128")] unsafe fn as_f32x4(self) -> f32x4 { transmute(self.as_v128()) } #[inline] - #[target_feature(enable = "simd128")] unsafe fn as_f64x2(self) -> f64x2 { transmute(self.as_v128()) } @@ -91,7 +99,6 @@ pub(crate) trait v128Ext: Sized { impl v128Ext for v128 { #[inline] - #[target_feature(enable = "simd128")] unsafe fn as_v128(self) -> Self { self } @@ -122,8 +129,6 @@ extern "C" { #[link_name = "llvm.wasm.swizzle"] fn llvm_swizzle(a: i8x16, b: i8x16) -> i8x16; - #[link_name = "llvm.wasm.eq"] - fn llvm_eq(a: i64x2, b: i64x2) -> i64x2; #[link_name = "llvm.wasm.bitselect.v16i8"] fn llvm_bitselect(a: i8x16, b: i8x16, c: i8x16) -> i8x16; @@ -144,9 +149,9 @@ extern "C" { fn llvm_i8x16_add_sat_s(a: i8x16, b: i8x16) -> i8x16; #[link_name = "llvm.uadd.sat.v16i8"] fn llvm_i8x16_add_sat_u(a: i8x16, b: i8x16) -> i8x16; - #[link_name = "llvm.wasm.sub.saturate.signed.v16i8"] + #[link_name = "llvm.wasm.sub.sat.signed.v16i8"] fn llvm_i8x16_sub_sat_s(a: i8x16, b: i8x16) -> i8x16; - #[link_name = "llvm.wasm.sub.saturate.unsigned.v16i8"] + #[link_name = "llvm.wasm.sub.sat.unsigned.v16i8"] fn llvm_i8x16_sub_sat_u(a: i8x16, b: i8x16) -> i8x16; #[link_name = "llvm.wasm.avgr.unsigned.v16i8"] fn llvm_avgr_u_i8x16(a: i8x16, b: i8x16) -> i8x16; @@ -155,23 +160,23 @@ extern "C" { fn llvm_i16x8_extadd_pairwise_i8x16_s(x: i8x16) -> i16x8; #[link_name = "llvm.wasm.extadd.pairwise.unsigned.v8i16"] fn llvm_i16x8_extadd_pairwise_i8x16_u(x: i8x16) -> i16x8; - #[link_name = "llvm.wasm.q15mulr.saturate.signed"] + #[link_name = "llvm.wasm.q15mulr.sat.signed"] fn llvm_q15mulr(a: i16x8, b: i16x8) -> i16x8; #[link_name = "llvm.wasm.alltrue.v8i16"] fn llvm_i16x8_all_true(x: i16x8) -> i32; #[link_name = "llvm.wasm.bitmask.v8i16"] fn llvm_bitmask_i16x8(a: i16x8) -> i32; - #[link_name = "llvm.wasm.narrow.signed.v8i16.v8i16"] + #[link_name = "llvm.wasm.narrow.signed.v8i16.v4i32"] fn llvm_narrow_i16x8_s(a: i32x4, b: i32x4) -> i16x8; - #[link_name = "llvm.wasm.narrow.unsigned.v8i16.v8i16"] + #[link_name = "llvm.wasm.narrow.unsigned.v8i16.v4i32"] fn llvm_narrow_i16x8_u(a: i32x4, b: i32x4) -> i16x8; #[link_name = "llvm.sadd.sat.v8i16"] fn llvm_i16x8_add_sat_s(a: i16x8, b: i16x8) -> i16x8; #[link_name = "llvm.uadd.sat.v8i16"] fn llvm_i16x8_add_sat_u(a: i16x8, b: i16x8) -> i16x8; - #[link_name = "llvm.wasm.sub.saturate.signed.v8i16"] + #[link_name = "llvm.wasm.sub.sat.signed.v8i16"] fn llvm_i16x8_sub_sat_s(a: i16x8, b: i16x8) -> i16x8; - #[link_name = "llvm.wasm.sub.saturate.unsigned.v8i16"] + #[link_name = "llvm.wasm.sub.sat.unsigned.v8i16"] fn llvm_i16x8_sub_sat_u(a: i16x8, b: i16x8) -> i16x8; #[link_name = "llvm.wasm.avgr.unsigned.v8i16"] fn llvm_avgr_u_i16x8(a: i16x8, b: i16x8) -> i16x8; @@ -207,6 +212,14 @@ extern "C" { fn llvm_i64x2_all_true(x: i64x2) -> i32; #[link_name = "llvm.wasm.bitmask.v2i64"] fn llvm_bitmask_i64x2(a: i64x2) -> i32; + #[link_name = "llvm.wasm.extend.low.signed"] + fn llvm_i64x2_extend_low_i32x4_s(a: i32x4) -> i64x2; + #[link_name = "llvm.wasm.extend.high.signed"] + fn llvm_i64x2_extend_high_i32x4_s(a: i32x4) -> i64x2; + #[link_name = "llvm.wasm.extend.low.unsigned"] + fn llvm_i64x2_extend_low_i32x4_u(a: i32x4) -> i64x2; + #[link_name = "llvm.wasm.extend.high.unsigned"] + fn llvm_i64x2_extend_high_i32x4_u(a: i32x4) -> i64x2; #[link_name = "llvm.wasm.extmul.low.signed.v2i64"] fn llvm_i64x2_extmul_low_i32x4_s(a: i32x4, b: i32x4) -> i64x2; #[link_name = "llvm.wasm.extmul.high.signed.v2i64"] @@ -262,9 +275,9 @@ extern "C" { fn llvm_f64x2_convert_low_i32x4_s(x: i32x4) -> f64x2; #[link_name = "llvm.wasm.convert.low.unsigned"] fn llvm_f64x2_convert_low_i32x4_u(x: i32x4) -> f64x2; - #[link_name = "llvm.wasm.trunc.saturate.zero.signed"] + #[link_name = "llvm.wasm.trunc.sat.zero.signed"] fn llvm_i32x4_trunc_sat_f64x2_s_zero(x: f64x2) -> i32x4; - #[link_name = "llvm.wasm.trunc.saturate.zero.unsigned"] + #[link_name = "llvm.wasm.trunc.sat.zero.unsigned"] fn llvm_i32x4_trunc_sat_f64x2_u_zero(x: f64x2) -> i32x4; #[link_name = "llvm.wasm.demote.zero"] fn llvm_f32x4_demote_f64x2_zero(x: f64x2) -> f32x4; @@ -363,7 +376,7 @@ pub unsafe fn v128_load64_splat(m: *const u64) -> v128 { /// Load a 32-bit element into the low bits of the vector and sets all other /// bits to zero. #[inline] -// #[cfg_attr(test, assert_instr(v128.load32_zero))] // FIXME +#[cfg_attr(test, assert_instr(v128.load32_zero))] #[target_feature(enable = "simd128")] pub unsafe fn v128_load32_zero(m: *const u32) -> v128 { transmute(llvm_load32_zero(m)) @@ -372,7 +385,7 @@ pub unsafe fn v128_load32_zero(m: *const u32) -> v128 { /// Load a 64-bit element into the low bits of the vector and sets all other /// bits to zero. #[inline] -// #[cfg_attr(test, assert_instr(v128.load64_zero))] // FIXME +#[cfg_attr(test, assert_instr(v128.load64_zero))] #[target_feature(enable = "simd128")] pub unsafe fn v128_load64_zero(m: *const u64) -> v128 { transmute(llvm_load64_zero(m)) @@ -388,144 +401,104 @@ pub unsafe fn v128_store(m: *mut v128, a: v128) { /// Loads an 8-bit value from `m` and sets lane `L` of `v` to that value. #[inline] +#[cfg_attr(test, assert_instr(v128.load8_lane, L = 0))] #[target_feature(enable = "simd128")] pub unsafe fn v128_load8_lane(v: v128, m: *const u8) -> v128 { + static_assert!(L: usize where L < 16); transmute(llvm_load8_lane(m, v.as_u8x16(), L)) } -// #[cfg(test)] -// #[assert_instr(v128.load8_lane)] -// #[target_feature(enable = "simd128")] -// unsafe fn v128_load8_lane_test(v: v128, m: *const u8) -> v128 { -// v128_load8_lane::<0>(v, m) -// } - /// Loads a 16-bit value from `m` and sets lane `L` of `v` to that value. #[inline] +#[cfg_attr(test, assert_instr(v128.load16_lane, L = 0))] #[target_feature(enable = "simd128")] pub unsafe fn v128_load16_lane(v: v128, m: *const u16) -> v128 { + static_assert!(L: usize where L < 8); transmute(llvm_load16_lane(m, v.as_u16x8(), L)) } -// #[cfg(test)] -// #[assert_instr(v128.load16_lane)] -// #[target_feature(enable = "simd128")] -// unsafe fn v128_load16_lane_test(v: v128, m: *const u16) -> v128 { -// v128_load16_lane::<0>(v, m) -// } - /// Loads a 32-bit value from `m` and sets lane `L` of `v` to that value. #[inline] +#[cfg_attr(test, assert_instr(v128.load32_lane, L = 0))] #[target_feature(enable = "simd128")] pub unsafe fn v128_load32_lane(v: v128, m: *const u32) -> v128 { + static_assert!(L: usize where L < 4); transmute(llvm_load32_lane(m, v.as_u32x4(), L)) } -// #[cfg(test)] -// #[assert_instr(v128.load32_lane)] -// #[target_feature(enable = "simd128")] -// unsafe fn v128_load32_lane_test(v: v128, m: *const u32) -> v128 { -// v128_load32_lane::<0>(v, m) -// } - /// Loads a 64-bit value from `m` and sets lane `L` of `v` to that value. #[inline] +#[cfg_attr(test, assert_instr(v128.load64_lane, L = 0))] #[target_feature(enable = "simd128")] pub unsafe fn v128_load64_lane(v: v128, m: *const u64) -> v128 { + static_assert!(L: usize where L < 2); transmute(llvm_load64_lane(m, v.as_u64x2(), L)) } -// #[cfg(test)] -// #[assert_instr(v128.load64_lane)] -// #[target_feature(enable = "simd128")] -// unsafe fn v128_load64_lane_test(v: v128, m: *const u64) -> v128 { -// v128_load64_lane::<0>(v, m) -// } - /// Stores the 8-bit value from lane `L` of `v` into `m` #[inline] +#[cfg_attr(test, assert_instr(v128.store8_lane, L = 0))] #[target_feature(enable = "simd128")] pub unsafe fn v128_store8_lane(v: v128, m: *mut u8) { + static_assert!(L: usize where L < 16); llvm_store8_lane(m, v.as_u8x16(), L); } -// #[cfg(test)] -// #[assert_instr(v128.store8_lane)] -// #[target_feature(enable = "simd128")] -// unsafe fn v128_store8_lane_test(v: v128, m: *mut u8) { -// v128_store8_lane::<0>(v, m) -// } - /// Stores the 16-bit value from lane `L` of `v` into `m` #[inline] +#[cfg_attr(test, assert_instr(v128.store16_lane, L = 0))] #[target_feature(enable = "simd128")] pub unsafe fn v128_store16_lane(v: v128, m: *mut u16) { + static_assert!(L: usize where L < 8); llvm_store16_lane(m, v.as_u16x8(), L) } -//#[cfg(test)] -//#[assert_instr(v128.store16_lane)] -//#[target_feature(enable = "simd128")] -//unsafe fn v128_store16_lane_test(v: v128, m: *mut u16) { -// v128_store16_lane::<0>(v, m) -//} - /// Stores the 32-bit value from lane `L` of `v` into `m` #[inline] +#[cfg_attr(test, assert_instr(v128.store32_lane, L = 0))] #[target_feature(enable = "simd128")] pub unsafe fn v128_store32_lane(v: v128, m: *mut u32) { + static_assert!(L: usize where L < 4); llvm_store32_lane(m, v.as_u32x4(), L) } -// #[cfg(test)] -// #[assert_instr(v128.store32_lane)] -// #[target_feature(enable = "simd128")] -// unsafe fn v128_store32_lane_test(v: v128, m: *mut u32) { -// v128_store32_lane::<0>(v, m) -// } - /// Stores the 64-bit value from lane `L` of `v` into `m` #[inline] +#[cfg_attr(test, assert_instr(v128.store64_lane, L = 0))] #[target_feature(enable = "simd128")] pub unsafe fn v128_store64_lane(v: v128, m: *mut u64) { + static_assert!(L: usize where L < 2); llvm_store64_lane(m, v.as_u64x2(), L) } -// #[cfg(test)] -// #[assert_instr(v128.store64_lane)] -// #[target_feature(enable = "simd128")] -// unsafe fn v128_store64_lane_test(v: v128, m: *mut u64) { -// v128_store64_lane::<0>(v, m) -// } - /// Materializes a constant SIMD value from the immediate operands. /// /// This function generates a `v128.const` instruction as if the generated /// vector was interpreted as sixteen 8-bit integers. #[inline] #[target_feature(enable = "simd128")] -// #[cfg_attr( -// test, -// assert_instr( -// v128.const, -// a0 = 0, -// a1 = 1, -// a2 = 2, -// a3 = 3, -// a4 = 4, -// a5 = 5, -// a6 = 6, -// a7 = 7, -// a8 = 8, -// a9 = 9, -// a10 = 10, -// a11 = 11, -// a12 = 12, -// a13 = 13, -// a14 = 14, -// a15 = 15, -// ) -// )] +#[cfg_attr( + test, + assert_instr( + v128.const, + a0 = 0, + a1 = 1, + a2 = 2, + a3 = 3, + a4 = 4, + a5 = 5, + a6 = 6, + a7 = 7, + a8 = 8, + a9 = 9, + a10 = 10, + a11 = 11, + a12 = 12, + a13 = 13, + a14 = 14, + a15 = 15, + ) +)] pub const unsafe fn v128_const( a0: i8, a1: i8, @@ -555,20 +528,20 @@ pub const unsafe fn v128_const( /// vector was interpreted as eight 16-bit integers. #[inline] #[target_feature(enable = "simd128")] -// #[cfg_attr( -// test, -// assert_instr( -// v128.const, -// a0 = 0, -// a1 = 1, -// a2 = 2, -// a3 = 3, -// a4 = 4, -// a5 = 5, -// a6 = 6, -// a7 = 7, -// ) -// )] +#[cfg_attr( + test, + assert_instr( + v128.const, + a0 = 0, + a1 = 1, + a2 = 2, + a3 = 3, + a4 = 4, + a5 = 5, + a6 = 6, + a7 = 7, + ) +)] pub const unsafe fn i16x8_const( a0: i16, a1: i16, @@ -588,7 +561,7 @@ pub const unsafe fn i16x8_const( /// vector was interpreted as four 32-bit integers. #[inline] #[target_feature(enable = "simd128")] -// #[cfg_attr(test, assert_instr(v128.const, a0 = 0, a1 = 1, a2 = 2, a3 = 3))] +#[cfg_attr(test, assert_instr(v128.const, a0 = 0, a1 = 1, a2 = 2, a3 = 3))] pub const unsafe fn i32x4_const(a0: i32, a1: i32, a2: i32, a3: i32) -> v128 { transmute(i32x4(a0, a1, a2, a3)) } @@ -599,7 +572,7 @@ pub const unsafe fn i32x4_const(a0: i32, a1: i32, a2: i32, a3: i32) -> v128 { /// vector was interpreted as two 64-bit integers. #[inline] #[target_feature(enable = "simd128")] -// #[cfg_attr(test, assert_instr(v128.const, a0 = 0, a1 = 1))] +#[cfg_attr(test, assert_instr(v128.const, a0 = 0, a1 = 1))] pub const unsafe fn i64x2_const(a0: i64, a1: i64) -> v128 { transmute(i64x2(a0, a1)) } @@ -610,7 +583,7 @@ pub const unsafe fn i64x2_const(a0: i64, a1: i64) -> v128 { /// vector was interpreted as four 32-bit floats. #[inline] #[target_feature(enable = "simd128")] -// #[cfg_attr(test, assert_instr(v128.const, a0 = 0.0, a1 = 1.0, a2 = 2.0, a3 = 3.0))] +#[cfg_attr(test, assert_instr(v128.const, a0 = 0.0, a1 = 1.0, a2 = 2.0, a3 = 3.0))] pub const unsafe fn f32x4_const(a0: f32, a1: f32, a2: f32, a3: f32) -> v128 { transmute(f32x4(a0, a1, a2, a3)) } @@ -621,7 +594,7 @@ pub const unsafe fn f32x4_const(a0: f32, a1: f32, a2: f32, a3: f32) -> v128 { /// vector was interpreted as two 64-bit floats. #[inline] #[target_feature(enable = "simd128")] -// #[cfg_attr(test, assert_instr(v128.const, a0 = 0.0, a1 = 1.0))] +#[cfg_attr(test, assert_instr(v128.const, a0 = 0.0, a1 = 1.0))] pub const unsafe fn f64x2_const(a0: f64, a1: f64) -> v128 { transmute(f64x2(a0, a1)) } @@ -641,6 +614,27 @@ pub const unsafe fn f64x2_const(a0: f64, a1: f64) -> v128 { /// /// All indexes `$i*` must have the type `u32`. #[inline] +#[cfg_attr(test, + assert_instr( + i8x16.shuffle, + I0 = 0, + I1 = 2, + I2 = 4, + I3 = 6, + I4 = 8, + I5 = 10, + I6 = 12, + I7 = 14, + I8 = 16, + I9 = 18, + I10 = 20, + I11 = 22, + I12 = 24, + I13 = 26, + I14 = 28, + I15 = 30, + ) +)] #[target_feature(enable = "simd128")] pub unsafe fn i8x16_shuffle< const I0: usize, @@ -663,6 +657,22 @@ pub unsafe fn i8x16_shuffle< a: v128, b: v128, ) -> v128 { + static_assert!(I0: usize where I0 < 32); + static_assert!(I1: usize where I1 < 32); + static_assert!(I2: usize where I2 < 32); + static_assert!(I3: usize where I3 < 32); + static_assert!(I4: usize where I4 < 32); + static_assert!(I5: usize where I5 < 32); + static_assert!(I6: usize where I6 < 32); + static_assert!(I7: usize where I7 < 32); + static_assert!(I8: usize where I8 < 32); + static_assert!(I9: usize where I9 < 32); + static_assert!(I10: usize where I10 < 32); + static_assert!(I11: usize where I11 < 32); + static_assert!(I12: usize where I12 < 32); + static_assert!(I13: usize where I13 < 32); + static_assert!(I14: usize where I14 < 32); + static_assert!(I15: usize where I15 < 32); let shuf = simd_shuffle16::( a.as_u8x16(), b.as_u8x16(), @@ -675,13 +685,6 @@ pub unsafe fn i8x16_shuffle< transmute(shuf) } -#[cfg(test)] -#[assert_instr(i8x16.shuffle)] -#[target_feature(enable = "simd128")] -unsafe fn i8x16_shuffle_test(a: v128, b: v128) -> v128 { - i8x16_shuffle::<0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30>(a, b) -} - /// Same as [`i8x16_shuffle`], except operates as if the inputs were eight /// 16-bit integers, only taking 8 indices to shuffle. /// @@ -690,6 +693,19 @@ unsafe fn i8x16_shuffle_test(a: v128, b: v128) -> v128 { /// is no native `i16x8.shuffle` instruction (there is no need for one since /// `i8x16.shuffle` suffices). #[inline] +#[cfg_attr(test, + assert_instr( + i8x16.shuffle, + I0 = 0, + I1 = 2, + I2 = 4, + I3 = 6, + I4 = 8, + I5 = 10, + I6 = 12, + I7 = 14, + ) +)] #[target_feature(enable = "simd128")] pub unsafe fn i16x8_shuffle< const I0: usize, @@ -704,6 +720,14 @@ pub unsafe fn i16x8_shuffle< a: v128, b: v128, ) -> v128 { + static_assert!(I0: usize where I0 < 16); + static_assert!(I1: usize where I1 < 16); + static_assert!(I2: usize where I2 < 16); + static_assert!(I3: usize where I3 < 16); + static_assert!(I4: usize where I4 < 16); + static_assert!(I5: usize where I5 < 16); + static_assert!(I6: usize where I6 < 16); + static_assert!(I7: usize where I7 < 16); let shuf = simd_shuffle8::( a.as_u16x8(), b.as_u16x8(), @@ -714,13 +738,6 @@ pub unsafe fn i16x8_shuffle< transmute(shuf) } -#[cfg(test)] -#[assert_instr(i8x16.shuffle)] -#[target_feature(enable = "simd128")] -unsafe fn i16x8_shuffle_test(a: v128, b: v128) -> v128 { - i16x8_shuffle::<0, 2, 4, 6, 8, 10, 12, 14>(a, b) -} - /// Same as [`i8x16_shuffle`], except operates as if the inputs were four /// 32-bit integers, only taking 4 indices to shuffle. /// @@ -729,11 +746,16 @@ unsafe fn i16x8_shuffle_test(a: v128, b: v128) -> v128 { /// is no native `i32x4.shuffle` instruction (there is no need for one since /// `i8x16.shuffle` suffices). #[inline] +#[cfg_attr(test, assert_instr(i8x16.shuffle, I0 = 0, I1 = 2, I2 = 4, I3 = 6))] #[target_feature(enable = "simd128")] pub unsafe fn i32x4_shuffle( a: v128, b: v128, ) -> v128 { + static_assert!(I0: usize where I0 < 8); + static_assert!(I1: usize where I1 < 8); + static_assert!(I2: usize where I2 < 8); + static_assert!(I3: usize where I3 < 8); let shuf = simd_shuffle4::( a.as_u32x4(), b.as_u32x4(), @@ -742,13 +764,6 @@ pub unsafe fn i32x4_shuffle v128 { - i32x4_shuffle::<0, 2, 4, 6>(a, b) -} - /// Same as [`i8x16_shuffle`], except operates as if the inputs were two /// 64-bit integers, only taking 2 indices to shuffle. /// @@ -757,237 +772,159 @@ unsafe fn i32x4_shuffle_test(a: v128, b: v128) -> v128 { /// is no native `i64x2.shuffle` instruction (there is no need for one since /// `i8x16.shuffle` suffices). #[inline] +#[cfg_attr(test, assert_instr(i8x16.shuffle, I0 = 0, I1 = 2))] #[target_feature(enable = "simd128")] pub unsafe fn i64x2_shuffle(a: v128, b: v128) -> v128 { + static_assert!(I0: usize where I0 < 4); + static_assert!(I1: usize where I1 < 4); let shuf = simd_shuffle2::(a.as_u64x2(), b.as_u64x2(), [I0 as u32, I1 as u32]); transmute(shuf) } -#[cfg(test)] -#[assert_instr(i8x16.shuffle)] -#[target_feature(enable = "simd128")] -unsafe fn i64x2_shuffle_test(a: v128, b: v128) -> v128 { - i64x2_shuffle::<0, 2>(a, b) -} - /// Extracts a lane from a 128-bit vector interpreted as 16 packed i8 numbers. /// /// Extracts the scalar value of lane specified in the immediate mode operand /// `N` from `a`. If `N` is out of bounds then it is a compile time error. #[inline] +#[cfg_attr(test, assert_instr(i8x16.extract_lane_s, N = 3))] #[target_feature(enable = "simd128")] pub unsafe fn i8x16_extract_lane(a: v128) -> i8 { + static_assert!(N: usize where N < 16); simd_extract(a.as_i8x16(), N as u32) } -#[cfg(test)] -#[assert_instr(i8x16.extract_lane_s)] -#[target_feature(enable = "simd128")] -unsafe fn i8x16_extract_lane_s(a: v128) -> i32 { - i8x16_extract_lane::<0>(a) as i32 -} - -#[cfg(test)] -#[assert_instr(i8x16.extract_lane_u)] -#[target_feature(enable = "simd128")] -unsafe fn i8x16_extract_lane_u(a: v128) -> u32 { - i8x16_extract_lane::<0>(a) as u8 as u32 -} - /// Replaces a lane from a 128-bit vector interpreted as 16 packed i8 numbers. /// /// Replaces the scalar value of lane specified in the immediate mode operand /// `N` from `a`. If `N` is out of bounds then it is a compile time error. #[inline] +#[cfg_attr(test, assert_instr(i8x16.replace_lane, N = 2))] #[target_feature(enable = "simd128")] pub unsafe fn i8x16_replace_lane(a: v128, val: i8) -> v128 { + static_assert!(N: usize where N < 16); transmute(simd_insert(a.as_i8x16(), N as u32, val)) } -#[cfg(test)] -#[assert_instr(i8x16.replace_lane)] -#[target_feature(enable = "simd128")] -unsafe fn i8x16_replace_lane_test(a: v128, val: i8) -> v128 { - i8x16_replace_lane::<0>(a, val) -} - /// Extracts a lane from a 128-bit vector interpreted as 8 packed i16 numbers. /// /// Extracts a the scalar value of lane specified in the immediate mode operand /// `N` from `a`. If `N` is out of bounds then it is a compile time error. #[inline] +#[cfg_attr(test, assert_instr(i16x8.extract_lane_s, N = 2))] #[target_feature(enable = "simd128")] pub unsafe fn i16x8_extract_lane(a: v128) -> i16 { + static_assert!(N: usize where N < 8); simd_extract(a.as_i16x8(), N as u32) } -#[cfg(test)] -#[assert_instr(i16x8.extract_lane_s)] -#[target_feature(enable = "simd128")] -unsafe fn i16x8_extract_lane_s(a: v128) -> i32 { - i16x8_extract_lane::<0>(a) as i32 -} - -#[cfg(test)] -#[assert_instr(i16x8.extract_lane_u)] -#[target_feature(enable = "simd128")] -unsafe fn i16x8_extract_lane_u(a: v128) -> u32 { - i16x8_extract_lane::<0>(a) as u16 as u32 -} - /// Replaces a lane from a 128-bit vector interpreted as 8 packed i16 numbers. /// /// Replaces the scalar value of lane specified in the immediate mode operand /// `N` from `a`. If `N` is out of bounds then it is a compile time error. #[inline] +#[cfg_attr(test, assert_instr(i16x8.replace_lane, N = 2))] #[target_feature(enable = "simd128")] pub unsafe fn i16x8_replace_lane(a: v128, val: i16) -> v128 { + static_assert!(N: usize where N < 8); transmute(simd_insert(a.as_i16x8(), N as u32, val)) } -#[cfg(test)] -#[assert_instr(i16x8.replace_lane)] -#[target_feature(enable = "simd128")] -unsafe fn i16x8_replace_lane_test(a: v128, val: i16) -> v128 { - i16x8_replace_lane::<0>(a, val) -} - /// Extracts a lane from a 128-bit vector interpreted as 4 packed i32 numbers. /// /// Extracts the scalar value of lane specified in the immediate mode operand /// `N` from `a`. If `N` is out of bounds then it is a compile time error. #[inline] +#[cfg_attr(test, assert_instr(i32x4.extract_lane, N = 2))] #[target_feature(enable = "simd128")] pub unsafe fn i32x4_extract_lane(a: v128) -> i32 { + static_assert!(N: usize where N < 4); simd_extract(a.as_i32x4(), N as u32) } -#[cfg(test)] -#[assert_instr(i32x4.extract_lane)] -#[target_feature(enable = "simd128")] -unsafe fn i32x4_extract_lane_test(a: v128) -> i32 { - i32x4_extract_lane::<0>(a) -} - /// Replaces a lane from a 128-bit vector interpreted as 4 packed i32 numbers. /// /// Replaces the scalar value of lane specified in the immediate mode operand /// `N` from `a`. If `N` is out of bounds then it is a compile time error. #[inline] +#[cfg_attr(test, assert_instr(i32x4.replace_lane, N = 2))] #[target_feature(enable = "simd128")] pub unsafe fn i32x4_replace_lane(a: v128, val: i32) -> v128 { + static_assert!(N: usize where N < 4); transmute(simd_insert(a.as_i32x4(), N as u32, val)) } -#[cfg(test)] -#[assert_instr(i32x4.replace_lane)] -#[target_feature(enable = "simd128")] -unsafe fn i32x4_replace_lane_test(a: v128, val: i32) -> v128 { - i32x4_replace_lane::<0>(a, val) -} - /// Extracts a lane from a 128-bit vector interpreted as 2 packed i64 numbers. /// /// Extracts the scalar value of lane specified in the immediate mode operand /// `N` from `a`. If `N` is out of bounds then it is a compile time error. #[inline] +#[cfg_attr(test, assert_instr(i64x2.extract_lane, N = 1))] #[target_feature(enable = "simd128")] pub unsafe fn i64x2_extract_lane(a: v128) -> i64 { + static_assert!(N: usize where N < 2); simd_extract(a.as_i64x2(), N as u32) } -#[cfg(test)] -#[assert_instr(i64x2.extract_lane)] -#[target_feature(enable = "simd128")] -unsafe fn i64x2_extract_lane_test(a: v128) -> i64 { - i64x2_extract_lane::<0>(a) -} - /// Replaces a lane from a 128-bit vector interpreted as 2 packed i64 numbers. /// /// Replaces the scalar value of lane specified in the immediate mode operand /// `N` from `a`. If `N` is out of bounds then it is a compile time error. #[inline] +#[cfg_attr(test, assert_instr(i64x2.replace_lane, N = 0))] #[target_feature(enable = "simd128")] pub unsafe fn i64x2_replace_lane(a: v128, val: i64) -> v128 { + static_assert!(N: usize where N < 2); transmute(simd_insert(a.as_i64x2(), N as u32, val)) } -#[cfg(test)] -#[assert_instr(i64x2.replace_lane)] -#[target_feature(enable = "simd128")] -unsafe fn i64x2_replace_lane_test(a: v128, val: i64) -> v128 { - i64x2_replace_lane::<0>(a, val) -} - /// Extracts a lane from a 128-bit vector interpreted as 4 packed f32 numbers. /// /// Extracts the scalar value of lane specified fn the immediate mode operand /// `N` from `a`. If `N` is out of bounds then it is a compile time error. #[inline] +#[cfg_attr(test, assert_instr(f32x4.extract_lane, N = 1))] #[target_feature(enable = "simd128")] pub unsafe fn f32x4_extract_lane(a: v128) -> f32 { + static_assert!(N: usize where N < 4); simd_extract(a.as_f32x4(), N as u32) } -#[cfg(test)] -#[assert_instr(f32x4.extract_lane)] -#[target_feature(enable = "simd128")] -unsafe fn f32x4_extract_lane_test(a: v128) -> f32 { - f32x4_extract_lane::<0>(a) -} - /// Replaces a lane from a 128-bit vector interpreted as 4 packed f32 numbers. /// /// Replaces the scalar value of lane specified fn the immediate mode operand /// `N` from `a`. If `N` is out of bounds then it is a compile time error. #[inline] +#[cfg_attr(test, assert_instr(f32x4.replace_lane, N = 1))] #[target_feature(enable = "simd128")] pub unsafe fn f32x4_replace_lane(a: v128, val: f32) -> v128 { + static_assert!(N: usize where N < 4); transmute(simd_insert(a.as_f32x4(), N as u32, val)) } -#[cfg(test)] -#[assert_instr(f32x4.replace_lane)] -#[target_feature(enable = "simd128")] -unsafe fn f32x4_replace_lane_test(a: v128, val: f32) -> v128 { - f32x4_replace_lane::<0>(a, val) -} - /// Extracts a lane from a 128-bit vector interpreted as 2 packed f64 numbers. /// /// Extracts the scalar value of lane specified fn the immediate mode operand /// `N` from `a`. If `N` fs out of bounds then it is a compile time error. #[inline] +#[cfg_attr(test, assert_instr(f64x2.extract_lane, N = 1))] #[target_feature(enable = "simd128")] pub unsafe fn f64x2_extract_lane(a: v128) -> f64 { + static_assert!(N: usize where N < 2); simd_extract(a.as_f64x2(), N as u32) } -#[cfg(test)] -#[assert_instr(f64x2.extract_lane)] -#[target_feature(enable = "simd128")] -unsafe fn f64x2_extract_lane_test(a: v128) -> f64 { - f64x2_extract_lane::<0>(a) -} - /// Replaces a lane from a 128-bit vector interpreted as 2 packed f64 numbers. /// /// Replaces the scalar value of lane specified in the immediate mode operand /// `N` from `a`. If `N` is out of bounds then it is a compile time error. #[inline] +#[cfg_attr(test, assert_instr(f64x2.replace_lane, N = 1))] #[target_feature(enable = "simd128")] pub unsafe fn f64x2_replace_lane(a: v128, val: f64) -> v128 { + static_assert!(N: usize where N < 2); transmute(simd_insert(a.as_f64x2(), N as u32, val)) } -#[cfg(test)] -#[assert_instr(f64x2.replace_lane)] -#[target_feature(enable = "simd128")] -unsafe fn f64x2_replace_lane_test(a: v128, val: f64) -> v128 { - f64x2_replace_lane::<0>(a, val) -} - /// Returns a new vector with lanes selected from the lanes of the first input /// vector `a` specified in the second input vector `s`. /// @@ -1426,10 +1363,10 @@ pub unsafe fn i32x4_ge_u(a: v128, b: v128) -> v128 { /// Returns a new vector where each lane is all ones if the pairwise elements /// were equal, or all zeros if the elements were not equal. #[inline] -// #[cfg_attr(test, assert_instr(i64x2.eq))] // FIXME +// #[cfg_attr(test, assert_instr(i64x2.eq))] // FIXME llvm #[target_feature(enable = "simd128")] pub unsafe fn i64x2_eq(a: v128, b: v128) -> v128 { - transmute(llvm_eq(a.as_i64x2(), b.as_i64x2())) + transmute(simd_eq::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) } /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit @@ -1438,7 +1375,7 @@ pub unsafe fn i64x2_eq(a: v128, b: v128) -> v128 { /// Returns a new vector where each lane is all ones if the pairwise elements /// were not equal, or all zeros if the elements were equal. #[inline] -// #[cfg_attr(test, assert_instr(i64x2.ne))] // FIXME +// #[cfg_attr(test, assert_instr(i64x2.ne))] // FIXME llvm #[target_feature(enable = "simd128")] pub unsafe fn i64x2_ne(a: v128, b: v128) -> v128 { transmute(simd_ne::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) @@ -1450,7 +1387,7 @@ pub unsafe fn i64x2_ne(a: v128, b: v128) -> v128 { /// Returns a new vector where each lane is all ones if the pairwise left /// element is less than the pairwise right element, or all zeros otherwise. #[inline] -// #[cfg_attr(test, assert_instr(i64x2.lt_s))] // FIXME +// #[cfg_attr(test, assert_instr(i64x2.lt_s))] // FIXME llvm #[target_feature(enable = "simd128")] pub unsafe fn i64x2_lt_s(a: v128, b: v128) -> v128 { transmute(simd_lt::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) @@ -1462,7 +1399,7 @@ pub unsafe fn i64x2_lt_s(a: v128, b: v128) -> v128 { /// Returns a new vector where each lane is all ones if the pairwise left /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] -// #[cfg_attr(test, assert_instr(i64x2.gt_s))] // FIXME +// #[cfg_attr(test, assert_instr(i64x2.gt_s))] // FIXME llvm #[target_feature(enable = "simd128")] pub unsafe fn i64x2_gt_s(a: v128, b: v128) -> v128 { transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) @@ -1474,7 +1411,7 @@ pub unsafe fn i64x2_gt_s(a: v128, b: v128) -> v128 { /// Returns a new vector where each lane is all ones if the pairwise left /// element is less than the pairwise right element, or all zeros otherwise. #[inline] -// #[cfg_attr(test, assert_instr(i64x2.le_s))] // FIXME +// #[cfg_attr(test, assert_instr(i64x2.le_s))] // FIXME llvm #[target_feature(enable = "simd128")] pub unsafe fn i64x2_le_s(a: v128, b: v128) -> v128 { transmute(simd_le::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) @@ -1486,7 +1423,7 @@ pub unsafe fn i64x2_le_s(a: v128, b: v128) -> v128 { /// Returns a new vector where each lane is all ones if the pairwise left /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] -// #[cfg_attr(test, assert_instr(i64x2.ge_s))] // FIXME +// #[cfg_attr(test, assert_instr(i64x2.ge_s))] // FIXME llvm #[target_feature(enable = "simd128")] pub unsafe fn i64x2_ge_s(a: v128, b: v128) -> v128 { transmute(simd_ge::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) @@ -1694,7 +1631,7 @@ pub unsafe fn v128_bitselect(v1: v128, v2: v128, c: v128) -> v128 { /// Returns true if any lane is nonzero or false if all lanes are zero. #[inline] -// #[cfg_attr(test, assert_instr(v128.any_true))] // FIXME +// #[cfg_attr(test, assert_instr(v128.any_true))] // FIXME llvm #[target_feature(enable = "simd128")] pub unsafe fn v128_any_true(a: v128) -> bool { llvm_any_true_i8x16(a.as_i8x16()) != 0 @@ -1724,7 +1661,7 @@ pub unsafe fn i8x16_neg(a: v128) -> v128 { /// Count the number of bits set to one within each lane. #[inline] -// #[cfg_attr(test, assert_instr(i8x16.popcnt))] // FIXME +// #[cfg_attr(test, assert_instr(i8x16.popcnt))] // FIXME llvm & wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i8x16_popcnt(v: v128) -> v128 { transmute(llvm_popcnt(v.as_i8x16())) @@ -1741,7 +1678,8 @@ pub unsafe fn i8x16_all_true(a: v128) -> bool { /// Extracts the high bit for each lane in `a` and produce a scalar mask with /// all bits concatenated. #[inline] -// #[cfg_attr(test, assert_instr(i8x16.bitmask))] // FIXME +#[cfg_attr(test, assert_instr(i8x16.bitmask))] +#[target_feature(enable = "simd128")] pub unsafe fn i8x16_bitmask(a: v128) -> i32 { llvm_bitmask_i8x16(transmute(a)) } @@ -1912,7 +1850,7 @@ pub unsafe fn i8x16_avgr_u(a: v128, b: v128) -> v128 { /// Lane-wise integer extended pairwise addition producing extended results /// (twice wider results than the inputs). #[inline] -// #[cfg_attr(test, assert_instr(i16x8.extadd_pairwise_i8x16_s))] // FIXME +// #[cfg_attr(test, assert_instr(i16x8.extadd_pairwise_i8x16_s))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i16x8_extadd_pairwise_i8x16_s(a: v128) -> v128 { transmute(llvm_i16x8_extadd_pairwise_i8x16_s(a.as_i8x16())) @@ -1921,7 +1859,7 @@ pub unsafe fn i16x8_extadd_pairwise_i8x16_s(a: v128) -> v128 { /// Lane-wise integer extended pairwise addition producing extended results /// (twice wider results than the inputs). #[inline] -// #[cfg_attr(test, assert_instr(i16x8.extadd_pairwise_i8x16_u))] // FIXME +// #[cfg_attr(test, assert_instr(i16x8.extadd_pairwise_i8x16_u))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i16x8_extadd_pairwise_i8x16_u(a: v128) -> v128 { transmute(llvm_i16x8_extadd_pairwise_i8x16_u(a.as_i8x16())) @@ -1951,7 +1889,7 @@ pub unsafe fn i16x8_neg(a: v128) -> v128 { /// Lane-wise saturating rounding multiplication in Q15 format. #[inline] -// #[cfg_attr(test, assert_instr(i16x8.qmulr_sat_s))] // FIXME +// #[cfg_attr(test, assert_instr(i16x8.qmulr_sat_s))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i16x8_q15mulr_sat_s(a: v128, b: v128) -> v128 { transmute(llvm_q15mulr(a.as_i16x8(), b.as_i16x8())) @@ -1968,7 +1906,7 @@ pub unsafe fn i16x8_all_true(a: v128) -> bool { /// Extracts the high bit for each lane in `a` and produce a scalar mask with /// all bits concatenated. #[inline] -// #[cfg_attr(test, assert_instr(i16x8.bitmask))] // FIXME +#[cfg_attr(test, assert_instr(i16x8.bitmask))] #[target_feature(enable = "simd128")] pub unsafe fn i16x8_bitmask(a: v128) -> i32 { llvm_bitmask_i16x8(transmute(a)) @@ -2203,7 +2141,7 @@ pub unsafe fn i16x8_avgr_u(a: v128, b: v128) -> v128 { /// /// Equivalent of `i16x8_mul(i16x8_extend_low_i8x16_s(a), i16x8_extend_low_i8x16_s(b))` #[inline] -// #[cfg_attr(test, assert_instr(i16x8.avgr_u))] // FIXME +// #[cfg_attr(test, assert_instr(i16x8.extmul_low_i8x16_s))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i16x8_extmul_low_i8x16_s(a: v128, b: v128) -> v128 { transmute(llvm_i16x8_extmul_low_i8x16_s(a.as_i8x16(), b.as_i8x16())) @@ -2214,7 +2152,7 @@ pub unsafe fn i16x8_extmul_low_i8x16_s(a: v128, b: v128) -> v128 { /// /// Equivalent of `i16x8_mul(i16x8_extend_high_i8x16_s(a), i16x8_extend_high_i8x16_s(b))` #[inline] -// #[cfg_attr(test, assert_instr(i16x8.avgr_u))] // FIXME +// #[cfg_attr(test, assert_instr(i16x8.extmul_high_i8x16_s))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i16x8_extmul_high_i8x16_s(a: v128, b: v128) -> v128 { transmute(llvm_i16x8_extmul_high_i8x16_s(a.as_i8x16(), b.as_i8x16())) @@ -2225,7 +2163,7 @@ pub unsafe fn i16x8_extmul_high_i8x16_s(a: v128, b: v128) -> v128 { /// /// Equivalent of `i16x8_mul(i16x8_extend_low_i8x16_u(a), i16x8_extend_low_i8x16_u(b))` #[inline] -// #[cfg_attr(test, assert_instr(i16x8.avgr_u))] // FIXME +// #[cfg_attr(test, assert_instr(i16x8.extmul_low_i8x16_u))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i16x8_extmul_low_i8x16_u(a: v128, b: v128) -> v128 { transmute(llvm_i16x8_extmul_low_i8x16_u(a.as_i8x16(), b.as_i8x16())) @@ -2236,7 +2174,7 @@ pub unsafe fn i16x8_extmul_low_i8x16_u(a: v128, b: v128) -> v128 { /// /// Equivalent of `i16x8_mul(i16x8_extend_high_i8x16_u(a), i16x8_extend_high_i8x16_u(b))` #[inline] -// #[cfg_attr(test, assert_instr(i16x8.avgr_u))] // FIXME +// #[cfg_attr(test, assert_instr(i16x8.extmul_high_i8x16_u))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i16x8_extmul_high_i8x16_u(a: v128, b: v128) -> v128 { transmute(llvm_i16x8_extmul_high_i8x16_u(a.as_i8x16(), b.as_i8x16())) @@ -2245,7 +2183,7 @@ pub unsafe fn i16x8_extmul_high_i8x16_u(a: v128, b: v128) -> v128 { /// Lane-wise integer extended pairwise addition producing extended results /// (twice wider results than the inputs). #[inline] -// #[cfg_attr(test, assert_instr(i32x4.extadd_pairwise_i16x8_s))] // FIXME +// #[cfg_attr(test, assert_instr(i32x4.extadd_pairwise_i16x8_s))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i32x4_extadd_pairwise_i16x8_s(a: v128) -> v128 { transmute(llvm_i32x4_extadd_pairwise_i16x8_s(a.as_i16x8())) @@ -2254,7 +2192,7 @@ pub unsafe fn i32x4_extadd_pairwise_i16x8_s(a: v128) -> v128 { /// Lane-wise integer extended pairwise addition producing extended results /// (twice wider results than the inputs). #[inline] -// #[cfg_attr(test, assert_instr(i32x4.extadd_pairwise_i16x8_u))] // FIXME +// #[cfg_attr(test, assert_instr(i32x4.extadd_pairwise_i16x8_u))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i32x4_extadd_pairwise_i16x8_u(a: v128) -> v128 { transmute(llvm_i32x4_extadd_pairwise_i16x8_u(a.as_i16x8())) @@ -2293,7 +2231,7 @@ pub unsafe fn i32x4_all_true(a: v128) -> bool { /// Extracts the high bit for each lane in `a` and produce a scalar mask with /// all bits concatenated. #[inline] -// #[cfg_attr(test, assert_instr(i32x4.bitmask))] // FIXME +#[cfg_attr(test, assert_instr(i32x4.bitmask))] #[target_feature(enable = "simd128")] pub unsafe fn i32x4_bitmask(a: v128) -> i32 { llvm_bitmask_i32x4(transmute(a)) @@ -2469,7 +2407,7 @@ pub unsafe fn i32x4_dot_i16x8_s(a: v128, b: v128) -> v128 { /// /// Equivalent of `i32x4_mul(i32x4_extend_low_i16x8_s(a), i32x4_extend_low_i16x8_s(b))` #[inline] -// #[cfg_attr(test, assert_instr(i32x4.extmul_low_i16x8_s))] // FIXME +// #[cfg_attr(test, assert_instr(i32x4.extmul_low_i16x8_s))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i32x4_extmul_low_i16x8_s(a: v128, b: v128) -> v128 { transmute(llvm_i32x4_extmul_low_i16x8_s(a.as_i16x8(), b.as_i16x8())) @@ -2480,7 +2418,7 @@ pub unsafe fn i32x4_extmul_low_i16x8_s(a: v128, b: v128) -> v128 { /// /// Equivalent of `i32x4_mul(i32x4_extend_high_i16x8_s(a), i32x4_extend_high_i16x8_s(b))` #[inline] -// #[cfg_attr(test, assert_instr(i32x4.extmul_high_i16x8_s))] // FIXME +// #[cfg_attr(test, assert_instr(i32x4.extmul_high_i16x8_s))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i32x4_extmul_high_i16x8_s(a: v128, b: v128) -> v128 { transmute(llvm_i32x4_extmul_high_i16x8_s(a.as_i16x8(), b.as_i16x8())) @@ -2491,7 +2429,7 @@ pub unsafe fn i32x4_extmul_high_i16x8_s(a: v128, b: v128) -> v128 { /// /// Equivalent of `i32x4_mul(i32x4_extend_low_i16x8_u(a), i32x4_extend_low_i16x8_u(b))` #[inline] -// #[cfg_attr(test, assert_instr(i32x4.extmul_low_i16x8_u))] // FIXME +// #[cfg_attr(test, assert_instr(i32x4.extmul_low_i16x8_u))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i32x4_extmul_low_i16x8_u(a: v128, b: v128) -> v128 { transmute(llvm_i32x4_extmul_low_i16x8_u(a.as_i16x8(), b.as_i16x8())) @@ -2502,7 +2440,7 @@ pub unsafe fn i32x4_extmul_low_i16x8_u(a: v128, b: v128) -> v128 { /// /// Equivalent of `i32x4_mul(i32x4_extend_high_i16x8_u(a), i32x4_extend_high_i16x8_u(b))` #[inline] -// #[cfg_attr(test, assert_instr(i32x4.extmul_high_i16x8_u))] // FIXME +// #[cfg_attr(test, assert_instr(i32x4.extmul_high_i16x8_u))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i32x4_extmul_high_i16x8_u(a: v128, b: v128) -> v128 { transmute(llvm_i32x4_extmul_high_i16x8_u(a.as_i16x8(), b.as_i16x8())) @@ -2510,7 +2448,7 @@ pub unsafe fn i32x4_extmul_high_i16x8_u(a: v128, b: v128) -> v128 { /// Lane-wise wrapping absolute value. #[inline] -// #[cfg_attr(test, assert_instr(i64x2.abs))] // FIXME +// #[cfg_attr(test, assert_instr(i64x2.abs))] // FIXME llvm #[target_feature(enable = "simd128")] pub unsafe fn i64x2_abs(a: v128) -> v128 { let a = transmute::<_, i64x2>(a); @@ -2541,7 +2479,7 @@ pub unsafe fn i64x2_all_true(a: v128) -> bool { /// Extracts the high bit for each lane in `a` and produce a scalar mask with /// all bits concatenated. #[inline] -// #[cfg_attr(test, assert_instr(i64x2.bitmask))] // FIXME +#[cfg_attr(test, assert_instr(i64x2.bitmask))] #[target_feature(enable = "simd128")] pub unsafe fn i64x2_bitmask(a: v128) -> i32 { llvm_bitmask_i64x2(transmute(a)) @@ -2550,53 +2488,37 @@ pub unsafe fn i64x2_bitmask(a: v128) -> i32 { /// Converts low half of the smaller lane vector to a larger lane /// vector, sign extended. #[inline] -// #[cfg_attr(test, assert_instr(i64x2.extend_low_i32x4_s))] // FIXME +// #[cfg_attr(test, assert_instr(i64x2.extend_low_i32x4_s))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i64x2_extend_low_i32x4_s(a: v128) -> v128 { - transmute(simd_cast::<_, i64x2>(simd_shuffle2::<_, i32x2>( - a.as_i32x4(), - a.as_i32x4(), - [0, 1], - ))) + transmute(llvm_i64x2_extend_low_i32x4_s(a.as_i32x4())) } /// Converts high half of the smaller lane vector to a larger lane /// vector, sign extended. #[inline] -// #[cfg_attr(test, assert_instr(i64x2.extend_high_i32x4_s))] // FIXME +// #[cfg_attr(test, assert_instr(i64x2.extend_high_i32x4_s))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i64x2_extend_high_i32x4_s(a: v128) -> v128 { - transmute(simd_cast::<_, i64x2>(simd_shuffle2::<_, i32x2>( - a.as_i32x4(), - a.as_i32x4(), - [2, 3], - ))) + transmute(llvm_i64x2_extend_high_i32x4_s(a.as_i32x4())) } /// Converts low half of the smaller lane vector to a larger lane /// vector, zero extended. #[inline] -// #[cfg_attr(test, assert_instr(i64x2.extend_low_i32x4_u))] // FIXME +// #[cfg_attr(test, assert_instr(i64x2.extend_low_i32x4_u))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i64x2_extend_low_i32x4_u(a: v128) -> v128 { - transmute(simd_cast::<_, u64x2>(simd_shuffle2::<_, u32x2>( - a.as_u32x4(), - a.as_u32x4(), - [0, 1], - ))) + transmute(llvm_i64x2_extend_low_i32x4_u(a.as_i32x4())) } /// Converts high half of the smaller lane vector to a larger lane /// vector, zero extended. #[inline] -// #[cfg_attr(test, assert_instr(i64x2.extend_high_i32x4_u))] // FIXME +// #[cfg_attr(test, assert_instr(i64x2.extend_high_i32x4_u))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i64x2_extend_high_i32x4_u(a: v128) -> v128 { - transmute(simd_cast::<_, u64x2>(simd_shuffle2::<_, u32x2>( - a.as_u32x4(), - a.as_u32x4(), - [2, 3], - ))) + transmute(llvm_i64x2_extend_low_i32x4_u(a.as_i32x4())) } /// Shifts each lane to the left by the specified number of bits. @@ -2663,7 +2585,7 @@ pub unsafe fn i64x2_mul(a: v128, b: v128) -> v128 { /// /// Equivalent of `i64x2_mul(i64x2_extend_low_i32x4_s(a), i64x2_extend_low_i32x4_s(b))` #[inline] -// #[cfg_attr(test, assert_instr(i64x2.extmul_low_i32x4_s))] // FIXME +// #[cfg_attr(test, assert_instr(i64x2.extmul_low_i32x4_s))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i64x2_extmul_low_i32x4_s(a: v128, b: v128) -> v128 { transmute(llvm_i64x2_extmul_low_i32x4_s(a.as_i32x4(), b.as_i32x4())) @@ -2674,7 +2596,7 @@ pub unsafe fn i64x2_extmul_low_i32x4_s(a: v128, b: v128) -> v128 { /// /// Equivalent of `i64x2_mul(i64x2_extend_high_i32x4_s(a), i64x2_extend_high_i32x4_s(b))` #[inline] -// #[cfg_attr(test, assert_instr(i64x2.extmul_high_i32x4_s))] // FIXME +// #[cfg_attr(test, assert_instr(i64x2.extmul_high_i32x4_s))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i64x2_extmul_high_i32x4_s(a: v128, b: v128) -> v128 { transmute(llvm_i64x2_extmul_high_i32x4_s(a.as_i32x4(), b.as_i32x4())) @@ -2685,7 +2607,7 @@ pub unsafe fn i64x2_extmul_high_i32x4_s(a: v128, b: v128) -> v128 { /// /// Equivalent of `i64x2_mul(i64x2_extend_low_i32x4_u(a), i64x2_extend_low_i32x4_u(b))` #[inline] -// #[cfg_attr(test, assert_instr(i64x2.extmul_low_i32x4_u))] // FIXME +// #[cfg_attr(test, assert_instr(i64x2.extmul_low_i32x4_u))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i64x2_extmul_low_i32x4_u(a: v128, b: v128) -> v128 { transmute(llvm_i64x2_extmul_low_i32x4_u(a.as_i32x4(), b.as_i32x4())) @@ -2696,7 +2618,7 @@ pub unsafe fn i64x2_extmul_low_i32x4_u(a: v128, b: v128) -> v128 { /// /// Equivalent of `i64x2_mul(i64x2_extend_high_i32x4_u(a), i64x2_extend_high_i32x4_u(b))` #[inline] -// #[cfg_attr(test, assert_instr(i64x2.extmul_high_i32x4_u))] // FIXME +// #[cfg_attr(test, assert_instr(i64x2.extmul_high_i32x4_u))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i64x2_extmul_high_i32x4_u(a: v128, b: v128) -> v128 { transmute(llvm_i64x2_extmul_high_i32x4_u(a.as_i32x4(), b.as_i32x4())) @@ -2704,7 +2626,7 @@ pub unsafe fn i64x2_extmul_high_i32x4_u(a: v128, b: v128) -> v128 { /// Lane-wise rounding to the nearest integral value not smaller than the input. #[inline] -// #[cfg_attr(test, assert_instr(f32x4.ceil))] // FIXME +#[cfg_attr(test, assert_instr(f32x4.ceil))] #[target_feature(enable = "simd128")] pub unsafe fn f32x4_ceil(a: v128) -> v128 { transmute(llvm_f32x4_ceil(a.as_f32x4())) @@ -2712,7 +2634,7 @@ pub unsafe fn f32x4_ceil(a: v128) -> v128 { /// Lane-wise rounding to the nearest integral value not greater than the input. #[inline] -// #[cfg_attr(test, assert_instr(f32x4.floor))] // FIXME +#[cfg_attr(test, assert_instr(f32x4.floor))] #[target_feature(enable = "simd128")] pub unsafe fn f32x4_floor(a: v128) -> v128 { transmute(llvm_f32x4_floor(a.as_f32x4())) @@ -2721,7 +2643,7 @@ pub unsafe fn f32x4_floor(a: v128) -> v128 { /// Lane-wise rounding to the nearest integral value with the magnitude not /// larger than the input. #[inline] -// #[cfg_attr(test, assert_instr(f32x4.trunc))] // FIXME +#[cfg_attr(test, assert_instr(f32x4.trunc))] #[target_feature(enable = "simd128")] pub unsafe fn f32x4_trunc(a: v128) -> v128 { transmute(llvm_f32x4_trunc(a.as_f32x4())) @@ -2730,7 +2652,7 @@ pub unsafe fn f32x4_trunc(a: v128) -> v128 { /// Lane-wise rounding to the nearest integral value; if two values are equally /// near, rounds to the even one. #[inline] -// #[cfg_attr(test, assert_instr(f32x4.nearest))] // FIXME +#[cfg_attr(test, assert_instr(f32x4.nearest))] #[target_feature(enable = "simd128")] pub unsafe fn f32x4_nearest(a: v128) -> v128 { transmute(llvm_f32x4_nearest(a.as_f32x4())) @@ -2819,7 +2741,7 @@ pub unsafe fn f32x4_max(a: v128, b: v128) -> v128 { /// Lane-wise minimum value, defined as `b < a ? b : a` #[inline] -// #[cfg_attr(test, assert_instr(f32x4.pmin))] // FIXME +#[cfg_attr(test, assert_instr(f32x4.pmin))] #[target_feature(enable = "simd128")] pub unsafe fn f32x4_pmin(a: v128, b: v128) -> v128 { transmute(llvm_f32x4_pmin(a.as_f32x4(), b.as_f32x4())) @@ -2827,7 +2749,7 @@ pub unsafe fn f32x4_pmin(a: v128, b: v128) -> v128 { /// Lane-wise maximum value, defined as `a < b ? b : a` #[inline] -// #[cfg_attr(test, assert_instr(f32x4.pmax))] // FIXME +#[cfg_attr(test, assert_instr(f32x4.pmax))] #[target_feature(enable = "simd128")] pub unsafe fn f32x4_pmax(a: v128, b: v128) -> v128 { transmute(llvm_f32x4_pmax(a.as_f32x4(), b.as_f32x4())) @@ -2835,7 +2757,7 @@ pub unsafe fn f32x4_pmax(a: v128, b: v128) -> v128 { /// Lane-wise rounding to the nearest integral value not smaller than the input. #[inline] -// #[cfg_attr(test, assert_instr(f64x2.ceil))] // FIXME +#[cfg_attr(test, assert_instr(f64x2.ceil))] #[target_feature(enable = "simd128")] pub unsafe fn f64x2_ceil(a: v128) -> v128 { transmute(llvm_f64x2_ceil(a.as_f64x2())) @@ -2843,7 +2765,7 @@ pub unsafe fn f64x2_ceil(a: v128) -> v128 { /// Lane-wise rounding to the nearest integral value not greater than the input. #[inline] -// #[cfg_attr(test, assert_instr(f64x2.floor))] // FIXME +#[cfg_attr(test, assert_instr(f64x2.floor))] #[target_feature(enable = "simd128")] pub unsafe fn f64x2_floor(a: v128) -> v128 { transmute(llvm_f64x2_floor(a.as_f64x2())) @@ -2852,7 +2774,7 @@ pub unsafe fn f64x2_floor(a: v128) -> v128 { /// Lane-wise rounding to the nearest integral value with the magnitude not /// larger than the input. #[inline] -// #[cfg_attr(test, assert_instr(f64x2.trunc))] // FIXME +#[cfg_attr(test, assert_instr(f64x2.trunc))] #[target_feature(enable = "simd128")] pub unsafe fn f64x2_trunc(a: v128) -> v128 { transmute(llvm_f64x2_trunc(a.as_f64x2())) @@ -2861,7 +2783,7 @@ pub unsafe fn f64x2_trunc(a: v128) -> v128 { /// Lane-wise rounding to the nearest integral value; if two values are equally /// near, rounds to the even one. #[inline] -// #[cfg_attr(test, assert_instr(f64x2.nearest))] // FIXME +#[cfg_attr(test, assert_instr(f64x2.nearest))] #[target_feature(enable = "simd128")] pub unsafe fn f64x2_nearest(a: v128) -> v128 { transmute(llvm_f64x2_nearest(a.as_f64x2())) @@ -2950,7 +2872,7 @@ pub unsafe fn f64x2_max(a: v128, b: v128) -> v128 { /// Lane-wise minimum value, defined as `b < a ? b : a` #[inline] -// #[cfg_attr(test, assert_instr(f64x2.pmin))] // FIXME +#[cfg_attr(test, assert_instr(f64x2.pmin))] #[target_feature(enable = "simd128")] pub unsafe fn f64x2_pmin(a: v128, b: v128) -> v128 { transmute(llvm_f64x2_pmin(a.as_f64x2(), b.as_f64x2())) @@ -2958,7 +2880,7 @@ pub unsafe fn f64x2_pmin(a: v128, b: v128) -> v128 { /// Lane-wise maximum value, defined as `a < b ? b : a` #[inline] -// #[cfg_attr(test, assert_instr(f64x2.pmax))] // FIXME +#[cfg_attr(test, assert_instr(f64x2.pmax))] #[target_feature(enable = "simd128")] pub unsafe fn f64x2_pmax(a: v128, b: v128) -> v128 { transmute(llvm_f64x2_pmax(a.as_f64x2(), b.as_f64x2())) @@ -3015,7 +2937,7 @@ pub unsafe fn f32x4_convert_i32x4_u(a: v128) -> v128 { /// lane is outside the range of the destination type, the result is saturated /// to the nearest representable integer value. #[inline] -// #[cfg_attr(test, assert_instr(i32x4.trunc_sat_f64x2_s_zero))] // FIXME +// #[cfg_attr(test, assert_instr(i32x4.trunc_sat_f64x2_s_zero))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i32x4_trunc_sat_f64x2_s_zero(a: v128) -> v128 { transmute(llvm_i32x4_trunc_sat_f64x2_s_zero(a.as_f64x2())) @@ -3030,7 +2952,7 @@ pub unsafe fn i32x4_trunc_sat_f64x2_s_zero(a: v128) -> v128 { /// lane is outside the range of the destination type, the result is saturated /// to the nearest representable integer value. #[inline] -// #[cfg_attr(test, assert_instr(i32x4.trunc_sat_f64x2_u_zero))] // FIXME +// #[cfg_attr(test, assert_instr(i32x4.trunc_sat_f64x2_u_zero))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn i32x4_trunc_sat_f64x2_u_zero(a: v128) -> v128 { transmute(llvm_i32x4_trunc_sat_f64x2_u_zero(a.as_f64x2())) @@ -3038,7 +2960,7 @@ pub unsafe fn i32x4_trunc_sat_f64x2_u_zero(a: v128) -> v128 { /// Lane-wise conversion from integer to floating point. #[inline] -// #[cfg_attr(test, assert_instr(f64x2.convert_low_i32x4_s))] // FIXME +// #[cfg_attr(test, assert_instr(f64x2.convert_low_i32x4_s))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn f64x2_convert_low_i32x4_s(a: v128) -> v128 { transmute(llvm_f64x2_convert_low_i32x4_s(a.as_i32x4())) @@ -3046,7 +2968,7 @@ pub unsafe fn f64x2_convert_low_i32x4_s(a: v128) -> v128 { /// Lane-wise conversion from integer to floating point. #[inline] -// #[cfg_attr(test, assert_instr(f64x2.convert_low_i32x4_u))] // FIXME +// #[cfg_attr(test, assert_instr(f64x2.convert_low_i32x4_u))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn f64x2_convert_low_i32x4_u(a: v128) -> v128 { transmute(llvm_f64x2_convert_low_i32x4_u(a.as_i32x4())) @@ -3058,7 +2980,7 @@ pub unsafe fn f64x2_convert_low_i32x4_u(a: v128) -> v128 { /// single-precision floating point number, it is rounded to the nearest-even /// representable number. #[inline] -// #[cfg_attr(test, assert_instr(f32x4.demote_f64x2_zero))] // FIXME +// #[cfg_attr(test, assert_instr(f32x4.demote_f64x2_zero))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn f32x4_demote_f64x2_zero(a: v128) -> v128 { transmute(llvm_f32x4_demote_f64x2_zero(a.as_f64x2())) @@ -3067,7 +2989,7 @@ pub unsafe fn f32x4_demote_f64x2_zero(a: v128) -> v128 { /// Conversion of the two lower single-precision floating point lanes to the two /// double-precision lanes of the result. #[inline] -// #[cfg_attr(test, assert_instr(f64x2.promote_low_f32x4))] // FIXME +// #[cfg_attr(test, assert_instr(f64x2.promote_low_f32x4))] // FIXME wasmtime #[target_feature(enable = "simd128")] pub unsafe fn f64x2_promote_low_f32x4(a: v128) -> v128 { transmute(llvm_f64x2_promote_low_f32x4(a.as_f32x4())) @@ -3076,6 +2998,7 @@ pub unsafe fn f64x2_promote_low_f32x4(a: v128) -> v128 { #[cfg(test)] pub mod tests { use super::*; + use core::ops::{Add, Div, Mul, Neg, Sub}; use std; use std::mem; use std::num::Wrapping; @@ -3129,7 +3052,13 @@ pub mod tests { } } - // TODO: v128_load{32,64}_zero + #[test] + fn test_load_zero() { + unsafe { + compare_bytes(v128_load32_zero(&10), i32x4_const(10, 0, 0, 0)); + compare_bytes(v128_load64_zero(&11), i64x2_const(11, 0)); + } + } #[test] fn test_store() { @@ -3140,14 +3069,65 @@ pub mod tests { } } - // TODO: v128_load*_lane - // TODO: v128_store*_lane + #[test] + fn test_load_lane() { + unsafe { + let zero = i8x16_splat(0); + compare_bytes( + v128_load8_lane::<2>(zero, &1), + i8x16_replace_lane::<2>(zero, 1), + ); + + compare_bytes( + v128_load16_lane::<2>(zero, &1), + i16x8_replace_lane::<2>(zero, 1), + ); + + compare_bytes( + v128_load32_lane::<2>(zero, &1), + i32x4_replace_lane::<2>(zero, 1), + ); + + compare_bytes( + v128_load64_lane::<1>(zero, &1), + i64x2_replace_lane::<1>(zero, 1), + ); + } + } + + #[test] + fn test_store_lane() { + unsafe { + let mut spot = 0; + let zero = i8x16_splat(0); + v128_store8_lane::<5>(i8x16_replace_lane::<5>(zero, 7), &mut spot); + assert_eq!(spot, 7); + + let mut spot = 0; + v128_store16_lane::<5>(i16x8_replace_lane::<5>(zero, 7), &mut spot); + assert_eq!(spot, 7); + + let mut spot = 0; + v128_store32_lane::<3>(i32x4_replace_lane::<3>(zero, 7), &mut spot); + assert_eq!(spot, 7); + + let mut spot = 0; + v128_store64_lane::<0>(i64x2_replace_lane::<0>(zero, 7), &mut spot); + assert_eq!(spot, 7); + } + } #[test] fn test_v128_const() { const A: v128 = unsafe { super::v128_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) }; compare_bytes(A, A); + + const _: v128 = unsafe { i16x8_const(0, 1, 2, 3, 4, 5, 6, 7) }; + const _: v128 = unsafe { i32x4_const(0, 1, 2, 3) }; + const _: v128 = unsafe { i64x2_const(0, 1) }; + const _: v128 = unsafe { f32x4_const(0., 1., 2., 3.) }; + const _: v128 = unsafe { f64x2_const(0., 1.) }; } #[test] @@ -3308,10 +3288,188 @@ pub mod tests { test_splat!(f64x2_splat: 42. => 0, 0, 0, 0, 0, 0, 69, 64, 0, 0, 0, 0, 0, 0, 69, 64); } + #[test] + fn test_bitmasks() { + unsafe { + let zero = i8x16_splat(0); + let ones = i8x16_splat(!0); + + assert_eq!(i8x16_bitmask(zero), 0); + assert_eq!(i8x16_bitmask(ones), (1 << 16) - 1); + assert_eq!(i8x16_bitmask(i8x16_splat(i8::MAX)), 0); + assert_eq!(i8x16_bitmask(i8x16_splat(i8::MIN)), (1 << 16) - 1); + assert_eq!(i8x16_bitmask(i8x16_replace_lane::<1>(zero, -1)), 0b10); + + assert_eq!(i16x8_bitmask(zero), 0); + assert_eq!(i16x8_bitmask(ones), (1 << 8) - 1); + assert_eq!(i16x8_bitmask(i16x8_splat(i16::MAX)), 0); + assert_eq!(i16x8_bitmask(i16x8_splat(i16::MIN)), (1 << 8) - 1); + assert_eq!(i16x8_bitmask(i16x8_replace_lane::<1>(zero, -1)), 0b10); + + assert_eq!(i32x4_bitmask(zero), 0); + assert_eq!(i32x4_bitmask(ones), (1 << 4) - 1); + assert_eq!(i32x4_bitmask(i32x4_splat(i32::MAX)), 0); + assert_eq!(i32x4_bitmask(i32x4_splat(i32::MIN)), (1 << 4) - 1); + assert_eq!(i32x4_bitmask(i32x4_replace_lane::<1>(zero, -1)), 0b10); + + assert_eq!(i64x2_bitmask(zero), 0); + assert_eq!(i64x2_bitmask(ones), (1 << 2) - 1); + assert_eq!(i64x2_bitmask(i64x2_splat(i64::MAX)), 0); + assert_eq!(i64x2_bitmask(i64x2_splat(i64::MIN)), (1 << 2) - 1); + assert_eq!(i64x2_bitmask(i64x2_replace_lane::<1>(zero, -1)), 0b10); + } + } + + #[test] + fn test_narrow() { + unsafe { + let zero = i8x16_splat(0); + let ones = i8x16_splat(!0); + + compare_bytes(i8x16_narrow_i16x8_s(zero, zero), zero); + compare_bytes(i8x16_narrow_i16x8_u(zero, zero), zero); + compare_bytes(i8x16_narrow_i16x8_s(ones, ones), ones); + compare_bytes(i8x16_narrow_i16x8_u(ones, ones), zero); + + compare_bytes( + i8x16_narrow_i16x8_s( + i16x8_const( + 0, + 1, + 2, + -1, + i8::MIN.into(), + i8::MAX.into(), + u8::MIN.into(), + u8::MAX.into(), + ), + i16x8_const( + i16::MIN.into(), + i16::MAX.into(), + u16::MIN as i16, + u16::MAX as i16, + 0, + 0, + 0, + 0, + ), + ), + v128_const(0, 1, 2, -1, -128, 127, 0, 127, -128, 127, 0, -1, 0, 0, 0, 0), + ); + + compare_bytes( + i8x16_narrow_i16x8_u( + i16x8_const( + 0, + 1, + 2, + -1, + i8::MIN.into(), + i8::MAX.into(), + u8::MIN.into(), + u8::MAX.into(), + ), + i16x8_const( + i16::MIN.into(), + i16::MAX.into(), + u16::MIN as i16, + u16::MAX as i16, + 0, + 0, + 0, + 0, + ), + ), + v128_const(0, 1, 2, 0, 0, 127, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0), + ); + + compare_bytes(i16x8_narrow_i32x4_s(zero, zero), zero); + compare_bytes(i16x8_narrow_i32x4_u(zero, zero), zero); + compare_bytes(i16x8_narrow_i32x4_s(ones, ones), ones); + compare_bytes(i16x8_narrow_i32x4_u(ones, ones), zero); + + compare_bytes( + i16x8_narrow_i32x4_s( + i32x4_const(0, -1, i16::MIN.into(), i16::MAX.into()), + i32x4_const( + i32::MIN.into(), + i32::MAX.into(), + u32::MIN as i32, + u32::MAX as i32, + ), + ), + i16x8_const(0, -1, i16::MIN, i16::MAX, i16::MIN, i16::MAX, 0, -1), + ); + + compare_bytes( + i16x8_narrow_i32x4_u( + i32x4_const(u16::MAX.into(), -1, i16::MIN.into(), i16::MAX.into()), + i32x4_const( + i32::MIN.into(), + i32::MAX.into(), + u32::MIN as i32, + u32::MAX as i32, + ), + ), + i16x8_const(-1, 0, 0, i16::MAX, 0, -1, 0, 0), + ); + } + } + + #[test] + fn test_extend() { + unsafe { + let zero = i8x16_splat(0); + let ones = i8x16_splat(!0); + + compare_bytes(i16x8_extend_low_i8x16_s(zero), zero); + compare_bytes(i16x8_extend_high_i8x16_s(zero), zero); + compare_bytes(i16x8_extend_low_i8x16_u(zero), zero); + compare_bytes(i16x8_extend_high_i8x16_u(zero), zero); + compare_bytes(i16x8_extend_low_i8x16_s(ones), ones); + compare_bytes(i16x8_extend_high_i8x16_s(ones), ones); + let halves = i16x8_splat(u8::MAX.into()); + compare_bytes(i16x8_extend_low_i8x16_u(ones), halves); + compare_bytes(i16x8_extend_high_i8x16_u(ones), halves); + + compare_bytes(i32x4_extend_low_i16x8_s(zero), zero); + compare_bytes(i32x4_extend_high_i16x8_s(zero), zero); + compare_bytes(i32x4_extend_low_i16x8_u(zero), zero); + compare_bytes(i32x4_extend_high_i16x8_u(zero), zero); + compare_bytes(i32x4_extend_low_i16x8_s(ones), ones); + compare_bytes(i32x4_extend_high_i16x8_s(ones), ones); + let halves = i32x4_splat(u16::MAX.into()); + compare_bytes(i32x4_extend_low_i16x8_u(ones), halves); + compare_bytes(i32x4_extend_high_i16x8_u(ones), halves); + + // FIXME wasmtime + // compare_bytes(i64x2_extend_low_i32x4_s(zero), zero); + // compare_bytes(i64x2_extend_high_i32x4_s(zero), zero); + // compare_bytes(i64x2_extend_low_i32x4_u(zero), zero); + // compare_bytes(i64x2_extend_high_i32x4_u(zero), zero); + // compare_bytes(i64x2_extend_low_i32x4_s(ones), ones); + // compare_bytes(i64x2_extend_high_i32x4_s(ones), ones); + // let halves = i64x2_splat(u32::MAX.into()); + // compare_bytes(i64x2_extend_low_i32x4_u(ones), halves); + // compare_bytes(i64x2_extend_high_i32x4_u(ones), halves); + } + } + + #[test] + fn test_dot() { + unsafe { + let zero = i8x16_splat(0); + let ones = i8x16_splat(!0); + let two = i32x4_splat(2); + compare_bytes(i32x4_dot_i16x8_s(zero, zero), zero); + compare_bytes(i32x4_dot_i16x8_s(ones, ones), two); + } + } + macro_rules! test_binop { ( $($name:ident => { - $([$($vec1:tt)*] ($op:tt | $f:ident) [$($vec2:tt)*],)* + $([$($vec1:tt)*] ($op:ident | $f:ident) [$($vec2:tt)*],)* })* ) => ($( #[test] @@ -3328,7 +3486,7 @@ pub mod tests { v3 = mem::transmute(v3_v128); for (i, actual) in v3.iter().enumerate() { - let expected = (Wrapping(v1[i]) $op Wrapping(v2[i])).0; + let expected = v1[i].$op(v2[i]); assert_eq!(*actual, expected); } )* @@ -3340,7 +3498,7 @@ pub mod tests { macro_rules! test_unop { ( $($name:ident => { - $(($op:tt | $f:ident) [$($vec1:tt)*],)* + $(($op:ident | $f:ident) [$($vec1:tt)*],)* })* ) => ($( #[test] @@ -3355,7 +3513,7 @@ pub mod tests { v2 = mem::transmute(v2_v128); for (i, actual) in v2.iter().enumerate() { - let expected = ($op Wrapping(v1[i])).0; + let expected = v1[i].$op(); assert_eq!(*actual, expected); } )* @@ -3364,112 +3522,584 @@ pub mod tests { )*) } + trait Avgr: Sized { + fn avgr(self, other: Self) -> Self; + } + + macro_rules! impl_avgr { + ($($i:ident)*) => ($(impl Avgr for $i { + fn avgr(self, other: Self) -> Self { + ((self as u64 + other as u64 + 1) / 2) as $i + } + })*) + } + + impl_avgr!(u8 u16); + test_binop! { test_i8x16_add => { [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - (+ | i8x16_add) + (wrapping_add | i8x16_add) [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] - (+ | i8x16_add) + (wrapping_add | i8x16_add) [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18], [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] - (+ | i8x16_add) + (wrapping_add | i8x16_add) [127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 9, -24], } + + test_i8x16_add_sat_s => { + [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + (saturating_add | i8x16_add_sat_s) + [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (saturating_add | i8x16_add_sat_s) + [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18], + + [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (saturating_add | i8x16_add_sat_s) + [127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 9, -24], + } + + test_i8x16_add_sat_u => { + [0u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + (saturating_add | i8x16_add_sat_u) + [1u8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (saturating_add | i8x16_add_sat_u) + [255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240], + + [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (saturating_add | i8x16_add_sat_u) + [127, -44i8 as u8, 43, 126, 4, 2, 9, -3i8 as u8, -59i8 as u8, -43i8 as u8, 39, -69i8 as u8, 79, -3i8 as u8, 9, -24i8 as u8], + } + test_i8x16_sub => { [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - (- | i8x16_sub) + (wrapping_sub | i8x16_sub) [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] - (- | i8x16_sub) + (wrapping_sub | i8x16_sub) [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18], [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] - (- | i8x16_sub) + (wrapping_sub | i8x16_sub) [-127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 4, 8], } + test_i8x16_sub_sat_s => { + [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + (saturating_sub | i8x16_sub_sat_s) + [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (saturating_sub | i8x16_sub_sat_s) + [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18], + + [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (saturating_sub | i8x16_sub_sat_s) + [-127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 4, 8], + } + + test_i8x16_sub_sat_u => { + [0u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + (saturating_sub | i8x16_sub_sat_u) + [1u8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (saturating_sub | i8x16_sub_sat_u) + [255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240], + + [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (saturating_sub | i8x16_sub_sat_u) + [127, -44i8 as u8, 43, 126, 4, 2, 9, -3i8 as u8, -59i8 as u8, -43i8 as u8, 39, -69i8 as u8, 79, -3i8 as u8, 9, -24i8 as u8], + } + + test_i8x16_min_s => { + [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + (min | i8x16_min_s) + [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (min | i8x16_min_s) + [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18], + + [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (min | i8x16_min_s) + [-127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 4, 8], + } + + test_i8x16_min_u => { + [0u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + (min | i8x16_min_u) + [1u8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (min | i8x16_min_u) + [255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240], + + [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (min | i8x16_min_u) + [127, -44i8 as u8, 43, 126, 4, 2, 9, -3i8 as u8, -59i8 as u8, -43i8 as u8, 39, -69i8 as u8, 79, -3i8 as u8, 9, -24i8 as u8], + } + + test_i8x16_max_s => { + [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + (max | i8x16_max_s) + [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (max | i8x16_max_s) + [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18], + + [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (max | i8x16_max_s) + [-127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 4, 8], + } + + test_i8x16_max_u => { + [0u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + (max | i8x16_max_u) + [1u8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (max | i8x16_max_u) + [255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240], + + [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (max | i8x16_max_u) + [127, -44i8 as u8, 43, 126, 4, 2, 9, -3i8 as u8, -59i8 as u8, -43i8 as u8, 39, -69i8 as u8, 79, -3i8 as u8, 9, -24i8 as u8], + } + + test_i8x16_avgr_u => { + [0u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + (avgr | i8x16_avgr_u) + [1u8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (avgr | i8x16_avgr_u) + [255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240], + + [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + (avgr | i8x16_avgr_u) + [127, -44i8 as u8, 43, 126, 4, 2, 9, -3i8 as u8, -59i8 as u8, -43i8 as u8, 39, -69i8 as u8, 79, -3i8 as u8, 9, -24i8 as u8], + } + test_i16x8_add => { [0i16, 0, 0, 0, 0, 0, 0, 0] - (+ | i16x8_add) + (wrapping_add | i16x8_add) [1i16, 1, 1, 1, 1, 1, 1, 1], [1i16, 2, 3, 4, 5, 6, 7, 8] - (+ | i16x8_add) + (wrapping_add | i16x8_add) [32767, 8, -2494,-4, 4882, -4, 848, 3830], } + test_i16x8_add_sat_s => { + [0i16, 0, 0, 0, 0, 0, 0, 0] + (saturating_add | i16x8_add_sat_s) + [1i16, 1, 1, 1, 1, 1, 1, 1], + + [1i16, 2, 3, 4, 5, 6, 7, 8] + (saturating_add | i16x8_add_sat_s) + [32767, 8, -2494,-4, 4882, -4, 848, 3830], + } + + test_i16x8_add_sat_u => { + [0u16, 0, 0, 0, 0, 0, 0, 0] + (saturating_add | i16x8_add_sat_u) + [1u16, 1, 1, 1, 1, 1, 1, 1], + + [1u16, 2, 3, 4, 5, 6, 7, 8] + (saturating_add | i16x8_add_sat_u) + [32767, 8, -2494i16 as u16,-4i16 as u16, 4882, -4i16 as u16, 848, 3830], + } + test_i16x8_sub => { [0i16, 0, 0, 0, 0, 0, 0, 0] - (- | i16x8_sub) + (wrapping_sub | i16x8_sub) + [1i16, 1, 1, 1, 1, 1, 1, 1], + + [1i16, 2, 3, 4, 5, 6, 7, 8] + (wrapping_sub | i16x8_sub) + [32767, 8, -2494,-4, 4882, -4, 848, 3830], + } + + test_i16x8_sub_sat_s => { + [0i16, 0, 0, 0, 0, 0, 0, 0] + (saturating_sub | i16x8_sub_sat_s) [1i16, 1, 1, 1, 1, 1, 1, 1], [1i16, 2, 3, 4, 5, 6, 7, 8] - (- | i16x8_sub) + (saturating_sub | i16x8_sub_sat_s) [32767, 8, -2494,-4, 4882, -4, 848, 3830], } + test_i16x8_sub_sat_u => { + [0u16, 0, 0, 0, 0, 0, 0, 0] + (saturating_sub | i16x8_sub_sat_u) + [1u16, 1, 1, 1, 1, 1, 1, 1], + + [1u16, 2, 3, 4, 5, 6, 7, 8] + (saturating_sub | i16x8_sub_sat_u) + [32767, 8, -2494i16 as u16,-4i16 as u16, 4882, -4i16 as u16, 848, 3830], + } + test_i16x8_mul => { [0i16, 0, 0, 0, 0, 0, 0, 0] - (* | i16x8_mul) + (wrapping_mul | i16x8_mul) + [1i16, 1, 1, 1, 1, 1, 1, 1], + + [1i16, 2, 3, 4, 5, 6, 7, 8] + (wrapping_mul | i16x8_mul) + [32767, 8, -2494,-4, 4882, -4, 848, 3830], + } + + test_i16x8_min_s => { + [0i16, 0, 0, 0, 0, 0, 0, 0] + (min | i16x8_min_s) + [1i16, 1, 1, 1, 1, 1, 1, 1], + + [1i16, 2, 3, 4, 5, 6, 7, 8] + (min | i16x8_min_s) + [32767, 8, -2494,-4, 4882, -4, 848, 3830], + } + + test_i16x8_min_u => { + [0u16, 0, 0, 0, 0, 0, 0, 0] + (min | i16x8_min_u) + [1u16, 1, 1, 1, 1, 1, 1, 1], + + [1u16, 2, 3, 4, 5, 6, 7, 8] + (min | i16x8_min_u) + [32767, 8, -2494i16 as u16,-4i16 as u16, 4882, -4i16 as u16, 848, 3830], + } + + test_i16x8_max_s => { + [0i16, 0, 0, 0, 0, 0, 0, 0] + (max | i16x8_max_s) [1i16, 1, 1, 1, 1, 1, 1, 1], [1i16, 2, 3, 4, 5, 6, 7, 8] - (* | i16x8_mul) + (max | i16x8_max_s) [32767, 8, -2494,-4, 4882, -4, 848, 3830], } + test_i16x8_max_u => { + [0u16, 0, 0, 0, 0, 0, 0, 0] + (max | i16x8_max_u) + [1u16, 1, 1, 1, 1, 1, 1, 1], + + [1u16, 2, 3, 4, 5, 6, 7, 8] + (max | i16x8_max_u) + [32767, 8, -2494i16 as u16,-4i16 as u16, 4882, -4i16 as u16, 848, 3830], + } + + test_i16x8_avgr_u => { + [0u16, 0, 0, 0, 0, 0, 0, 0] + (avgr | i16x8_avgr_u) + [1u16, 1, 1, 1, 1, 1, 1, 1], + + [1u16, 2, 3, 4, 5, 6, 7, 8] + (avgr | i16x8_avgr_u) + [32767, 8, -2494i16 as u16,-4i16 as u16, 4882, -4i16 as u16, 848, 3830], + } + test_i32x4_add => { - [0i32, 0, 0, 0] (+ | i32x4_add) [1, 2, 3, 4], + [0i32, 0, 0, 0] (wrapping_add | i32x4_add) [1, 2, 3, 4], [1i32, 1283, i32::MAX, i32::MIN] - (+ | i32x4_add) + (wrapping_add | i32x4_add) [i32::MAX; 4], } test_i32x4_sub => { - [0i32, 0, 0, 0] (- | i32x4_sub) [1, 2, 3, 4], + [0i32, 0, 0, 0] (wrapping_sub | i32x4_sub) [1, 2, 3, 4], [1i32, 1283, i32::MAX, i32::MIN] - (- | i32x4_sub) + (wrapping_sub | i32x4_sub) [i32::MAX; 4], } test_i32x4_mul => { - [0i32, 0, 0, 0] (* | i32x4_mul) [1, 2, 3, 4], + [0i32, 0, 0, 0] (wrapping_mul | i32x4_mul) [1, 2, 3, 4], + [1i32, 1283, i32::MAX, i32::MIN] + (wrapping_mul | i32x4_mul) + [i32::MAX; 4], + } + + test_i32x4_min_s => { + [0i32, 0, 0, 0] (min | i32x4_min_s) [1, 2, 3, 4], + [1i32, 1283, i32::MAX, i32::MIN] + (min | i32x4_min_s) + [i32::MAX; 4], + } + + test_i32x4_min_u => { + [0u32, 0, 0, 0] (min | i32x4_min_u) [1, 2, 3, 4], + [1u32, 1283, i32::MAX as u32, i32::MIN as u32] + (min | i32x4_min_u) + [i32::MAX as u32; 4], + } + + test_i32x4_max_s => { + [0i32, 0, 0, 0] (max | i32x4_max_s) [1, 2, 3, 4], [1i32, 1283, i32::MAX, i32::MIN] - (* | i32x4_mul) + (max | i32x4_max_s) [i32::MAX; 4], } - // TODO: test_i64x2_add - // TODO: test_i64x2_sub + test_i32x4_max_u => { + [0u32, 0, 0, 0] (max | i32x4_max_u) [1, 2, 3, 4], + [1u32, 1283, i32::MAX as u32, i32::MIN as u32] + (max | i32x4_max_u) + [i32::MAX as u32; 4], + } + + test_i64x2_add => { + [0i64, 0] (wrapping_add | i64x2_add) [1, 2], + [i64::MIN, i64::MAX] (wrapping_add | i64x2_add) [i64::MAX, i64::MIN], + [i64::MAX; 2] (wrapping_add | i64x2_add) [i64::MAX; 2], + [-4i64, -4] (wrapping_add | i64x2_add) [800, 939], + } + + test_i64x2_sub => { + [0i64, 0] (wrapping_sub | i64x2_sub) [1, 2], + [i64::MIN, i64::MAX] (wrapping_sub | i64x2_sub) [i64::MAX, i64::MIN], + [i64::MAX; 2] (wrapping_sub | i64x2_sub) [i64::MAX; 2], + [-4i64, -4] (wrapping_sub | i64x2_sub) [800, 939], + } + + test_i64x2_mul => { + [0i64, 0] (wrapping_mul | i64x2_mul) [1, 2], + [i64::MIN, i64::MAX] (wrapping_mul | i64x2_mul) [i64::MAX, i64::MIN], + [i64::MAX; 2] (wrapping_mul | i64x2_mul) [i64::MAX; 2], + [-4i64, -4] (wrapping_mul | i64x2_mul) [800, 939], + } + + test_f32x4_add => { + [-1.0f32, 2.0, 3.0, 4.0] (add | f32x4_add) [1., 2., 0., 0.], + [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0] + (add | f32x4_add) + [1., 2., 0., 0.], + } + + test_f32x4_sub => { + [-1.0f32, 2.0, 3.0, 4.0] (sub | f32x4_sub) [1., 2., 0., 0.], + [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0] + (sub | f32x4_sub) + [1., 2., 0., 0.], + } + + test_f32x4_mul => { + [-1.0f32, 2.0, 3.0, 4.0] (mul | f32x4_mul) [1., 2., 0., 0.], + [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0] + (mul | f32x4_mul) + [1., 2., 1., 0.], + } + + test_f32x4_div => { + [-1.0f32, 2.0, 3.0, 4.0] (div | f32x4_div) [1., 2., 0., 0.], + [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0] + (div | f32x4_div) + [1., 2., 0., 0.], + } + + test_f32x4_min => { + [-1.0f32, 2.0, 3.0, 4.0] (min | f32x4_min) [1., 2., 0., 0.], + [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0] + (min | f32x4_min) + [1., 2., 0., 0.], + } + + test_f32x4_max => { + [-1.0f32, 2.0, 3.0, 4.0] (max | f32x4_max) [1., 2., 0., 0.], + [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0] + (max | f32x4_max) + [1., 2., 0., 0.], + } + + test_f32x4_pmin => { + [-1.0f32, 2.0, 3.0, 4.0] (min | f32x4_pmin) [1., 2., 0., 0.], + [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0] + (min | f32x4_pmin) + [1., 2., 0., 0.], + } + + test_f32x4_pmax => { + [-1.0f32, 2.0, 3.0, 4.0] (max | f32x4_pmax) [1., 2., 0., 0.], + [f32::INFINITY, -0.0, f32::NEG_INFINITY, 3.0] + (max | f32x4_pmax) + [1., 2., 0., 0.], + } + + test_f64x2_add => { + [-1.0f64, 2.0] (add | f64x2_add) [1., 2.], + [f64::INFINITY, f64::NEG_INFINITY] (add | f64x2_add) [1., 2.], + } + + test_f64x2_sub => { + [-1.0f64, 2.0] (sub | f64x2_sub) [1., 2.], + [f64::INFINITY, f64::NEG_INFINITY] (sub | f64x2_sub) [1., 2.], + } + + test_f64x2_mul => { + [-1.0f64, 2.0] (mul | f64x2_mul) [1., 2.], + [f64::INFINITY, f64::NEG_INFINITY] (mul | f64x2_mul) [1., 2.], + } + + test_f64x2_div => { + [-1.0f64, 2.0] (div | f64x2_div) [1., 2.], + [f64::INFINITY, f64::NEG_INFINITY] (div | f64x2_div) [1., 2.], + } + + test_f64x2_min => { + [-1.0f64, 2.0] (min | f64x2_min) [1., 2.], + [f64::INFINITY, f64::NEG_INFINITY] (min | f64x2_min) [1., 2.], + } + + test_f64x2_max => { + [-1.0f64, 2.0] (max | f64x2_max) [1., 2.], + [f64::INFINITY, f64::NEG_INFINITY] (max | f64x2_max) [1., 2.], + } + + test_f64x2_pmin => { + [-1.0f64, 2.0] (min | f64x2_pmin) [1., 2.], + [f64::INFINITY, f64::NEG_INFINITY] (min | f64x2_pmin) [1., 2.], + } + + test_f64x2_pmax => { + [-1.0f64, 2.0] (max | f64x2_pmax) [1., 2.], + [f64::INFINITY, f64::NEG_INFINITY] (max | f64x2_pmax) [1., 2.], + } } test_unop! { + test_i8x16_abs => { + (wrapping_abs | i8x16_abs) + [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + + (wrapping_abs | i8x16_abs) + [-2i8, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18], + + (wrapping_abs | i8x16_abs) + [-127i8, -44, 43, 126, 4, -128, 127, -59, -43, 39, -69, 79, -3, 35, 83, 13], + } + test_i8x16_neg => { - (- | i8x16_neg) + (wrapping_neg | i8x16_neg) [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - (- | i8x16_neg) + (wrapping_neg | i8x16_neg) [-2i8, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18], - (- | i8x16_neg) + (wrapping_neg | i8x16_neg) [-127i8, -44, 43, 126, 4, -128, 127, -59, -43, 39, -69, 79, -3, 35, 83, 13], } + test_i16x8_abs => { + (wrapping_abs | i16x8_abs) [1i16, 1, 1, 1, 1, 1, 1, 1], + (wrapping_abs | i16x8_abs) [2i16, 0x7fff, !0, 4, 42, -5, 33, -4847], + } + test_i16x8_neg => { - (- | i16x8_neg) [1i16, 1, 1, 1, 1, 1, 1, 1], - (- | i16x8_neg) [2i16, 0x7fff, !0, 4, 42, -5, 33, -4847], + (wrapping_neg | i16x8_neg) [1i16, 1, 1, 1, 1, 1, 1, 1], + (wrapping_neg | i16x8_neg) [2i16, 0x7fff, !0, 4, 42, -5, 33, -4847], + } + + test_i32x4_abs => { + (wrapping_abs | i32x4_abs) [1i32, 2, 3, 4], + (wrapping_abs | i32x4_abs) [i32::MIN, i32::MAX, 0, 4], } test_i32x4_neg => { - (- | i32x4_neg) [1i32, 2, 3, 4], - (- | i32x4_neg) [i32::MIN, i32::MAX, 0, 4], + (wrapping_neg | i32x4_neg) [1i32, 2, 3, 4], + (wrapping_neg | i32x4_neg) [i32::MIN, i32::MAX, 0, 4], + } + + test_i64x2_abs => { + (wrapping_abs | i64x2_abs) [1i64, 2], + (wrapping_abs | i64x2_abs) [i64::MIN, i64::MAX], + } + + test_i64x2_neg => { + (wrapping_neg | i64x2_neg) [1i64, 2], + (wrapping_neg | i64x2_neg) [i64::MIN, i64::MAX], + } + + test_f32x4_ceil => { + (ceil | f32x4_ceil) [1.0f32, 2., 2.5, 3.3], + (ceil | f32x4_ceil) [0.0, -0.3, f32::INFINITY, -0.0], + } + + test_f32x4_floor => { + (floor | f32x4_floor) [1.0f32, 2., 2.5, 3.3], + (floor | f32x4_floor) [0.0, -0.3, f32::INFINITY, -0.0], } - // TODO: test_i64x2_neg + test_f32x4_trunc => { + (trunc | f32x4_trunc) [1.0f32, 2., 2.5, 3.3], + (trunc | f32x4_trunc) [0.0, -0.3, f32::INFINITY, -0.0], + } + + test_f32x4_nearest => { + (round | f32x4_nearest) [1.0f32, 2., 2.6, 3.3], + (round | f32x4_nearest) [0.0, -0.3, f32::INFINITY, -0.0], + } + + test_f32x4_abs => { + (abs | f32x4_abs) [1.0f32, 2., 2.6, 3.3], + (abs | f32x4_abs) [0.0, -0.3, f32::INFINITY, -0.0], + } + + test_f32x4_neg => { + (neg | f32x4_neg) [1.0f32, 2., 2.6, 3.3], + (neg | f32x4_neg) [0.0, -0.3, f32::INFINITY, -0.0], + } + + test_f32x4_sqrt => { + (sqrt | f32x4_sqrt) [1.0f32, 2., 2.6, 3.3], + (sqrt | f32x4_sqrt) [0.0, 0.3, f32::INFINITY, 0.1], + } + + test_f64x2_ceil => { + (ceil | f64x2_ceil) [1.0f64, 2.3], + (ceil | f64x2_ceil) [f64::INFINITY, -0.1], + } + + test_f64x2_floor => { + (floor | f64x2_floor) [1.0f64, 2.3], + (floor | f64x2_floor) [f64::INFINITY, -0.1], + } + + test_f64x2_trunc => { + (trunc | f64x2_trunc) [1.0f64, 2.3], + (trunc | f64x2_trunc) [f64::INFINITY, -0.1], + } + + test_f64x2_nearest => { + (round | f64x2_nearest) [1.0f64, 2.3], + (round | f64x2_nearest) [f64::INFINITY, -0.1], + } + + test_f64x2_abs => { + (abs | f64x2_abs) [1.0f64, 2.3], + (abs | f64x2_abs) [f64::INFINITY, -0.1], + } + + test_f64x2_neg => { + (neg | f64x2_neg) [1.0f64, 2.3], + (neg | f64x2_neg) [f64::INFINITY, -0.1], + } + + test_f64x2_sqrt => { + (sqrt | f64x2_sqrt) [1.0f64, 2.3], + (sqrt | f64x2_sqrt) [f64::INFINITY, 0.1], + } } macro_rules! floating_point { @@ -3624,6 +4254,12 @@ pub mod tests { compare_bytes(r, vec_a); let r: v128 = v128_and(vec_a, vec_b); compare_bytes(r, vec_a); + let r: v128 = v128_andnot(vec_a, vec_b); + compare_bytes(r, vec_c); + let r: v128 = v128_andnot(vec_a, vec_a); + compare_bytes(r, vec_c); + let r: v128 = v128_andnot(vec_a, vec_c); + compare_bytes(r, vec_a); let r: v128 = v128_or(vec_a, vec_b); compare_bytes(r, vec_b); let r: v128 = v128_not(vec_b); @@ -3680,6 +4316,12 @@ pub mod tests { | [0_i32, 0, 0, 0] | [1_i32, 0, 1, 0] ); + test_bool_red!( + [i64x2_boolean_reductions, v128_any_true, i64x2_all_true] + | [1_i64, 1] + | [0_i64, 0] + | [1_i64, 0] + ); test_bop!(i8x16[i8; 16] | i8x16_eq[i8x16_eq_test]: ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], @@ -3690,6 +4332,8 @@ pub mod tests { [-1, 0, -1, 0 ,-1, 0, -1, -1]); test_bop!(i32x4[i32; 4] | i32x4_eq[i32x4_eq_test]: ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]); + test_bop!(i64x2[i64; 2] | i64x2_eq[i64x2_eq_test]: + ([0, 1], [0, 2]) => [-1, 0]); test_bop!(f32x4[f32; 4] => i32 | f32x4_eq[f32x4_eq_test]: ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]); test_bop!(f64x2[f64; 2] => i64 | f64x2_eq[f64x2_eq_test]: ([0., 1.], [0., 2.]) => [-1, 0]); @@ -3703,59 +4347,106 @@ pub mod tests { [0, -1, 0, -1 ,0, -1, 0, 0]); test_bop!(i32x4[i32; 4] | i32x4_ne[i32x4_ne_test]: ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]); + test_bop!(i64x2[i64; 2] | i64x2_ne[i64x2_ne_test]: + ([0, 1], [0, 2]) => [0, -1]); test_bop!(f32x4[f32; 4] => i32 | f32x4_ne[f32x4_ne_test]: ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]); test_bop!(f64x2[f64; 2] => i64 | f64x2_ne[f64x2_ne_test]: ([0., 1.], [0., 2.]) => [0, -1]); - test_bop!(i8x16[i8; 16] | i8x16_lt_s[i8x16_lt_test]: - ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + test_bop!(i8x16[i8; 16] | i8x16_lt_s[i8x16_lt_s_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -12, 13, 14, 15], + [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => + [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1, -1, -1, 0, 0]); + test_bop!(i8x16[i8; 16] | i8x16_lt_u[i8x16_lt_u_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, -12, 13, 14, 15], [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]); - test_bop!(i16x8[i16; 8] | i16x8_lt_s[i16x8_lt_test]: - ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => + test_bop!(i16x8[i16; 8] | i16x8_lt_s[i16x8_lt_s_test]: + ([0, 1, 2, 3, 4, 5, 6, -7], [0, 2, 2, 4, 4, 6, 6, 7]) => + [0, -1, 0, -1 ,0, -1, 0, -1]); + test_bop!(i16x8[i16; 8] | i16x8_lt_u[i16x8_lt_u_test]: + ([0, 1, 2, 3, 4, 5, 6, -7], [0, 2, 2, 4, 4, 6, 6, 7]) => [0, -1, 0, -1 ,0, -1, 0, 0]); - test_bop!(i32x4[i32; 4] | i32x4_lt_s[i32x4_lt_test]: - ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]); + test_bop!(i32x4[i32; 4] | i32x4_lt_s[i32x4_lt_s_test]: + ([-1, 1, 2, 3], [0, 2, 2, 4]) => [-1, -1, 0, -1]); + test_bop!(i32x4[i32; 4] | i32x4_lt_u[i32x4_lt_u_test]: + ([-1, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]); + test_bop!(i64x2[i64; 2] | i64x2_lt_s[i64x2_lt_s_test]: + ([-1, 3], [0, 2]) => [-1, 0]); test_bop!(f32x4[f32; 4] => i32 | f32x4_lt[f32x4_lt_test]: ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]); test_bop!(f64x2[f64; 2] => i64 | f64x2_lt[f64x2_lt_test]: ([0., 1.], [0., 2.]) => [0, -1]); - test_bop!(i8x16[i8; 16] | i8x16_gt_s[i8x16_gt_test]: - ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15], + test_bop!(i8x16[i8; 16] | i8x16_gt_s[i8x16_gt_s_test]: + ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, -15], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) => [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]); - test_bop!(i16x8[i16; 8] | i16x8_gt_s[i16x8_gt_test]: - ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) => + test_bop!(i8x16[i8; 16] | i8x16_gt_u[i8x16_gt_u_test]: + ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, -15], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) => + [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, -1]); + test_bop!(i16x8[i16; 8] | i16x8_gt_s[i16x8_gt_s_test]: + ([0, 2, 2, 4, 4, 6, 6, -7], [0, 1, 2, 3, 4, 5, 6, 7]) => [0, -1, 0, -1 ,0, -1, 0, 0]); - test_bop!(i32x4[i32; 4] | i32x4_gt_s[i32x4_gt_test]: - ([0, 2, 2, 4], [0, 1, 2, 3]) => [0, -1, 0, -1]); + test_bop!(i16x8[i16; 8] | i16x8_gt_u[i16x8_gt_u_test]: + ([0, 2, 2, 4, 4, 6, 6, -7], [0, 1, 2, 3, 4, 5, 6, 7]) => + [0, -1, 0, -1 ,0, -1, 0, -1]); + test_bop!(i32x4[i32; 4] | i32x4_gt_s[i32x4_gt_s_test]: + ([0, 2, 2, -4], [0, 1, 2, 3]) => [0, -1, 0, 0]); + test_bop!(i32x4[i32; 4] | i32x4_gt_u[i32x4_gt_u_test]: + ([0, 2, 2, -4], [0, 1, 2, 3]) => [0, -1, 0, -1]); + test_bop!(i64x2[i64; 2] | i64x2_gt_s[i64x2_gt_s_test]: + ([-1, 2], [0, 1]) => [0, -1]); test_bop!(f32x4[f32; 4] => i32 | f32x4_gt[f32x4_gt_test]: ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [0, -1, 0, -1]); test_bop!(f64x2[f64; 2] => i64 | f64x2_gt[f64x2_gt_test]: ([0., 2.], [0., 1.]) => [0, -1]); - test_bop!(i8x16[i8; 16] | i8x16_ge_s[i8x16_ge_test]: - ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + test_bop!(i8x16[i8; 16] | i8x16_ge_s[i8x16_ge_s_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, -15], + [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, 0]); + test_bop!(i8x16[i8; 16] | i8x16_ge_u[i8x16_ge_u_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, -15], [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]); - test_bop!(i16x8[i16; 8] | i16x8_ge_s[i16x8_ge_test]: - ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => + test_bop!(i16x8[i16; 8] | i16x8_ge_s[i16x8_ge_s_test]: + ([0, 1, 2, 3, 4, 5, 6, -7], [0, 2, 2, 4, 4, 6, 6, 7]) => + [-1, 0, -1, 0 ,-1, 0, -1, 0]); + test_bop!(i16x8[i16; 8] | i16x8_ge_u[i16x8_ge_u_test]: + ([0, 1, 2, 3, 4, 5, 6, -7], [0, 2, 2, 4, 4, 6, 6, 7]) => [-1, 0, -1, 0 ,-1, 0, -1, -1]); - test_bop!(i32x4[i32; 4] | i32x4_ge_s[i32x4_ge_test]: - ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]); + test_bop!(i32x4[i32; 4] | i32x4_ge_s[i32x4_ge_s_test]: + ([0, 1, 2, -3], [0, 2, 2, 4]) => [-1, 0, -1, 0]); + test_bop!(i32x4[i32; 4] | i32x4_ge_u[i32x4_ge_u_test]: + ([0, 1, 2, -3], [0, 2, 2, 4]) => [-1, 0, -1, -1]); + test_bop!(i64x2[i64; 2] | i64x2_ge_s[i64x2_ge_s_test]: + ([0, 1], [-1, 2]) => [-1, 0]); test_bop!(f32x4[f32; 4] => i32 | f32x4_ge[f32x4_ge_test]: ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]); test_bop!(f64x2[f64; 2] => i64 | f64x2_ge[f64x2_ge_test]: ([0., 1.], [0., 2.]) => [-1, 0]); - test_bop!(i8x16[i8; 16] | i8x16_le_s[i8x16_le_test]: - ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15], + test_bop!(i8x16[i8; 16] | i8x16_le_s[i8x16_le_s_test]: + ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, -15], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] ) => [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]); - test_bop!(i16x8[i16; 8] | i16x8_le_s[i16x8_le_test]: - ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) => + test_bop!(i8x16[i8; 16] | i8x16_le_u[i8x16_le_u_test]: + ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, -15], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] + ) => + [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, 0]); + test_bop!(i16x8[i16; 8] | i16x8_le_s[i16x8_le_s_test]: + ([0, 2, 2, 4, 4, 6, 6, -7], [0, 1, 2, 3, 4, 5, 6, 7]) => [-1, 0, -1, 0 ,-1, 0, -1, -1]); - test_bop!(i32x4[i32; 4] | i32x4_le_s[i32x4_le_test]: - ([0, 2, 2, 4], [0, 1, 2, 3]) => [-1, 0, -1, 0]); + test_bop!(i16x8[i16; 8] | i16x8_le_u[i16x8_le_u_test]: + ([0, 2, 2, 4, 4, 6, 6, -7], [0, 1, 2, 3, 4, 5, 6, 7]) => + [-1, 0, -1, 0 ,-1, 0, -1, 0]); + test_bop!(i32x4[i32; 4] | i32x4_le_s[i32x4_le_s_test]: + ([0, 2, 2, -4], [0, 1, 2, 3]) => [-1, 0, -1, -1]); + test_bop!(i32x4[i32; 4] | i32x4_le_u[i32x4_le_u_test]: + ([0, 2, 2, -4], [0, 1, 2, 3]) => [-1, 0, -1, 0]); + test_bop!(i64x2[i64; 2] | i64x2_le_s[i64x2_le_s_test]: + ([0, 2], [0, 1]) => [-1, 0]); test_bop!(f32x4[f32; 4] => i32 | f32x4_le[f32x4_le_test]: ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [-1, 0, -1, -0]); test_bop!(f64x2[f64; 2] => i64 | f64x2_le[f64x2_le_test]: ([0., 2.], [0., 1.]) => [-1, 0]); @@ -3827,13 +4518,27 @@ pub mod tests { [u32::MAX as f32, 2., 3., 4.] ); - // FIXME: this fails, and produces 0 instead of saturating at i32::MAX - // test_conv!( - // i32x4_trunc_s_f32x4_sat - // | i32x4_trunc_sat_f32x4_s - // | i32x4 - // | [f32::NAN, 2., (i32::MAX as f32 + 1.), 4.], - // [0, 2, i32::MAX, 4] - // ); - // FIXME: add other saturating tests + #[test] + fn test_conversions() { + unsafe { + compare_bytes( + i32x4_trunc_sat_f32x4_s(f32x4_const( + 1., + f32::NEG_INFINITY, + f32::INFINITY, + f32::NAN, + )), + i32x4_const(1, i32::MIN, i32::MAX, 0), + ); + compare_bytes( + i32x4_trunc_sat_f32x4_u(f32x4_const( + 1., + f32::NEG_INFINITY, + f32::INFINITY, + f32::NAN, + )), + i32x4_const(1, 0, u32::MAX as i32, 0), + ); + } + } }