diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c5ef15004c..cadfc38300 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -77,7 +77,7 @@ jobs: - mips64-unknown-linux-gnuabi64 - mips64el-unknown-linux-gnuabi64 - s390x-unknown-linux-gnu - - wasm32-unknown-unknown + - wasm32-wasi - i586-unknown-linux-gnu - x86_64-linux-android - arm-linux-androideabi @@ -129,7 +129,7 @@ jobs: disable_assert_instr: true - target: s390x-unknown-linux-gnu os: ubuntu-latest - - target: wasm32-unknown-unknown + - target: wasm32-wasi os: ubuntu-latest - target: aarch64-unknown-linux-gnu os: ubuntu-latest diff --git a/ci/docker/wasm32-unknown-unknown/Dockerfile b/ci/docker/wasm32-unknown-unknown/Dockerfile deleted file mode 100644 index 7b2567bcc7..0000000000 --- a/ci/docker/wasm32-unknown-unknown/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -FROM ubuntu:18.04 - -RUN apt-get update -y && apt-get install -y --no-install-recommends \ - ca-certificates \ - clang \ - cmake \ - curl \ - git \ - libc6-dev \ - make \ - python \ - python3 \ - xz-utils - -# Install `wasm2wat` -RUN git clone --recursive https://github.com/WebAssembly/wabt -RUN make -C wabt -j$(nproc) -ENV PATH=$PATH:/wabt/bin - -# Install `node` -RUN curl https://nodejs.org/dist/v12.0.0/node-v12.0.0-linux-x64.tar.xz | tar xJf - -ENV PATH=$PATH:/node-v12.0.0-linux-x64/bin - -COPY docker/wasm32-unknown-unknown/wasm-entrypoint.sh /wasm-entrypoint.sh -ENTRYPOINT ["/wasm-entrypoint.sh"] diff --git a/ci/docker/wasm32-unknown-unknown/wasm-entrypoint.sh b/ci/docker/wasm32-unknown-unknown/wasm-entrypoint.sh deleted file mode 100755 index 9916d1cb22..0000000000 --- a/ci/docker/wasm32-unknown-unknown/wasm-entrypoint.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -set -e - -# Download an appropriate version of wasm-bindgen based off of what's being used -# in the lock file. Ideally we'd use `wasm-pack` at some point for this! -version=$(grep -A 1 'name = "wasm-bindgen"' Cargo.lock | grep version) -version=$(echo $version | awk '{print $3}' | sed 's/"//g') -curl -L https://github.com/rustwasm/wasm-bindgen/releases/download/$version/wasm-bindgen-$version-x86_64-unknown-linux-musl.tar.gz \ - | tar xzf - -C target -export PATH=$PATH:`pwd`/target/wasm-bindgen-$version-x86_64-unknown-linux-musl -export CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=wasm-bindgen-test-runner -export NODE_ARGS=--experimental-wasm-simd - -exec "$@" diff --git a/ci/docker/wasm32-wasi/Dockerfile b/ci/docker/wasm32-wasi/Dockerfile new file mode 100644 index 0000000000..9bb4dc6f1d --- /dev/null +++ b/ci/docker/wasm32-wasi/Dockerfile @@ -0,0 +1,16 @@ +FROM ubuntu:20.04 + +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update -y && apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + xz-utils \ + clang + +RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/v0.19.0/wasmtime-v0.19.0-x86_64-linux.tar.xz | tar xJf - +ENV PATH=$PATH:/wasmtime-v0.19.0-x86_64-linux + +ENV CARGO_TARGET_WASM32_WASI_RUNNER="wasmtime \ + --enable-simd \ + --mapdir .::/checkout/target/wasm32-wasi/release/deps \ + --" diff --git a/ci/run.sh b/ci/run.sh index 682a38636c..2b7e51be3d 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -44,6 +44,16 @@ cargo_test() { fi cmd="$cmd ${subcmd} --target=$TARGET $1" cmd="$cmd -- $2" + + # wasm targets can't catch panics so if a test failures make sure the test + # harness isn't trying to capture output, otherwise we won't get any useful + # output. + case ${TARGET} in + wasm32*) + cmd="$cmd --nocapture" + ;; + esac + $cmd } @@ -72,20 +82,11 @@ case ${TARGET} in export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx" cargo_test "--release" ;; - wasm32-unknown-unknown*) - # Attempt to actually run some SIMD tests in node.js. Unfortunately - # though node.js (transitively through v8) doesn't have support for the - # full SIMD spec yet, only some functions. As a result only pass in - # some target features and a special `--cfg` - # FIXME: broken - #export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+simd128 --cfg only_node_compatible_functions" - #cargo_test "--release" - - # After that passes make sure that all intrinsics compile, passing in - # the extra feature to compile in non-node-compatible SIMD. - # FIXME: broken - #export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+simd128,+unimplemented-simd128" - #cargo_test "--release --no-run" + wasm32*) + prev="$RUSTFLAGS" + export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+simd128,+unimplemented-simd128" + cargo_test "--release" + export RUSTFLAGS="$prev" ;; # FIXME: don't build anymore #mips-*gnu* | mipsel-*gnu*) @@ -111,7 +112,7 @@ case ${TARGET} in esac -if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ] && [ "$TARGET" != "wasm32-unknown-unknown" ]; then +if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ]; then # Test examples ( cd examples diff --git a/crates/assert-instr-macro/src/lib.rs b/crates/assert-instr-macro/src/lib.rs index 75fe9851ca..0c03e80653 100644 --- a/crates/assert-instr-macro/src/lib.rs +++ b/crates/assert-instr-macro/src/lib.rs @@ -122,6 +122,13 @@ pub fn assert_instr( // generate some code that's hopefully very tight in terms of // codegen but is otherwise unique to prevent code from being // folded. + // + // This is avoided on Wasm32 right now since these functions aren't + // inlined which breaks our tests since each intrinsic looks like it + // calls functions. Turns out functions aren't similar enough to get + // merged on wasm32 anyway. This bug is tracked at + // rust-lang/rust#74320. + #[cfg(not(target_arch = "wasm32"))] ::stdarch_test::_DONT_DEDUP.store( std::mem::transmute(#shim_name_str.as_bytes().as_ptr()), std::sync::atomic::Ordering::Relaxed, @@ -131,8 +138,7 @@ pub fn assert_instr( }; let tokens: TokenStream = quote! { - #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] - #[cfg_attr(not(target_arch = "wasm32"), test)] + #[test] #[allow(non_snake_case)] fn #assert_name() { #to_test @@ -146,11 +152,6 @@ pub fn assert_instr( #instr); } }; - // why? necessary now to get tests to work? - let tokens: TokenStream = tokens - .to_string() - .parse() - .expect("cannot parse tokenstream"); let tokens: TokenStream = quote! { #item diff --git a/crates/core_arch/Cargo.toml b/crates/core_arch/Cargo.toml index 72d89b0168..a25b20bf0c 100644 --- a/crates/core_arch/Cargo.toml +++ b/crates/core_arch/Cargo.toml @@ -26,8 +26,5 @@ maintenance = { status = "experimental" } stdarch-test = { version = "0.*", path = "../stdarch-test" } std_detect = { version = "0.*", path = "../std_detect" } -[target.wasm32-unknown-unknown.dev-dependencies] -wasm-bindgen-test = "0.2.47" - [package.metadata.docs.rs] rustdoc-args = [ "--cfg", "dox" ] diff --git a/crates/core_arch/build.rs b/crates/core_arch/build.rs index 4d65e9ddc3..8a347e3f62 100644 --- a/crates/core_arch/build.rs +++ b/crates/core_arch/build.rs @@ -1,3 +1,17 @@ +use std::env; + fn main() { println!("cargo:rustc-cfg=core_arch_docs"); + + // Used to tell our `#[assert_instr]` annotations that all simd intrinsics + // are available to test their codegen, since some are gated behind an extra + // `-Ctarget-feature=+unimplemented-simd128` that doesn't have any + // equivalent in `#[target_feature]` right now. + println!("cargo:rerun-if-env-changed=RUSTFLAGS"); + if env::var("RUSTFLAGS") + .unwrap_or_default() + .contains("unimplemented-simd128") + { + println!("cargo:rustc-cfg=all_simd"); + } } diff --git a/crates/core_arch/src/lib.rs b/crates/core_arch/src/lib.rs index fae4519a0e..aa8d4c9820 100644 --- a/crates/core_arch/src/lib.rs +++ b/crates/core_arch/src/lib.rs @@ -1,9 +1,12 @@ #![doc(include = "core_arch_docs.md")] +#![allow(improper_ctypes_definitions)] #![allow(dead_code)] #![allow(unused_features)] +#![allow(incomplete_features)] #![feature( const_fn, const_fn_union, + const_generics, custom_inner_attributes, link_llvm_intrinsics, platform_intrinsics, @@ -32,9 +35,12 @@ adx_target_feature, rtm_target_feature, f16c_target_feature, - external_doc + external_doc, + allow_internal_unstable, + decl_macro )] #![cfg_attr(test, feature(test, abi_vectorcall, untagged_unions))] +#![cfg_attr(all(test, target_arch = "wasm32"), feature(wasm_simd))] #![deny(clippy::missing_inline_in_public_items)] #![allow( clippy::inline_always, @@ -66,13 +72,10 @@ extern crate std_detect; #[cfg(test)] extern crate stdarch_test; -#[cfg(all(test, target_arch = "wasm32"))] -extern crate wasm_bindgen_test; - #[path = "mod.rs"] mod core_arch; -pub use self::core_arch::arch::*; +pub use self::core_arch::arch; #[allow(unused_imports)] use core::{ffi, hint, intrinsics, marker, mem, ops, ptr, sync}; diff --git a/crates/core_arch/src/mod.rs b/crates/core_arch/src/mod.rs index 4ed18d7648..d66bbede9d 100644 --- a/crates/core_arch/src/mod.rs +++ b/crates/core_arch/src/mod.rs @@ -57,14 +57,110 @@ pub mod arch { /// Platform-specific intrinsics for the `wasm32` platform. /// - - /// # Availability + /// This module provides intrinsics specific to the WebAssembly + /// architecture. Here you'll find intrinsics necessary for leveraging + /// WebAssembly proposals such as [atomics] and [simd]. These proposals are + /// evolving over time and as such the support here is unstable and requires + /// the nightly channel. As WebAssembly proposals stabilize these functions + /// will also become stable. /// - /// Note that intrinsics gated by `target_feature = "atomics"` or `target_feature = "simd128"` - /// are only available **when the standard library itself is compiled with the the respective - /// target feature**. This version of the standard library is not obtainable via `rustup`, - /// but rather will require the standard library to be compiled from source. - /// See the [module documentation](../index.html) for more details. + /// [atomics]: https://github.com/webassembly/threads + /// [simd]: https://github.com/webassembly/simd + /// + /// See the [module documentation](../index.html) for general information + /// about the `arch` module and platform intrinsics. + /// + /// ## Atomics + /// + /// The [threads proposal][atomics] for WebAssembly adds a number of + /// instructions for dealing with multithreaded programs. Atomic + /// instructions can all be generated through `std::sync::atomic` types, but + /// some instructions have no equivalent in Rust such as + /// `memory.atomic.notify` so this module will provide these intrinsics. + /// + /// At this time, however, these intrinsics are only available **when the + /// standard library itself is compiled with atomics**. Compiling with + /// atomics is not enabled by default and requires passing + /// `-Ctarget-feature=+atomics` to rustc. The standard library shipped via + /// `rustup` is not compiled with atomics. To get access to these intrinsics + /// you'll need to compile the standard library from source with the + /// requisite compiler flags. + /// + /// ## SIMD + /// + /// The [simd proposal][simd] for WebAssembly adds a new `v128` type for a + /// 128-bit SIMD register. It also adds a large array of instructions to + /// operate on the `v128` type to perform data processing. The SIMD proposal + /// has been in progress for quite some time and many instructions have come + /// and gone. This module attempts to keep up with the proposal, but if you + /// notice anything awry please feel free to [open an + /// issue](https://github.com/rust-lang/stdarch/issues/new). + /// + /// It's important to be aware that the current state of development of SIMD + /// in WebAssembly is still somewhat early days. There's lots of pieces to + /// demo and prototype with, but discussions and support are still in + /// progress. There's a number of pitfalls and gotchas in various places, + /// which will attempt to be documented here, but there may be others + /// lurking! + /// + /// Using SIMD is intended to be similar to as you would on `x86_64`, for + /// example. You'd write a function such as: + /// + /// ```rust,ignore + /// #[cfg(target_arch = "wasm32")] + /// #[target_feature(enable = "simd128")] + /// unsafe fn uses_simd() { + /// use std::arch::wasm32::*; + /// // ... + /// } + /// ``` + /// + /// Unlike `x86_64`, however, WebAssembly does not currently have dynamic + /// detection at runtime as to whether SIMD is supported (this is one of the + /// motivators for the [conditional sections proposal][condsections], but + /// that is still pretty early days). This means that your binary will + /// either have SIMD and can only run on engines which support SIMD, or it + /// will not have SIMD at all. For compatibility the standard library itself + /// does not use any SIMD internally. Determining how best to ship your + /// WebAssembly binary with SIMD is largely left up to you as it can can be + /// pretty nuanced depending on your situation. + /// + /// [condsections]: https://github.com/webassembly/conditional-sections + /// + /// To enable SIMD support at compile time you need to do one of two things: + /// + /// * First you can annotate functions with `#[target_feature(enable = + /// "simd128")]`. This causes just that one function to have SIMD support + /// available to it, and intrinsics will get inlined as usual in this + /// situation. + /// + /// * Second you can compile your program with `-Ctarget-feature=+simd128`. + /// This compilation flag blanket enables SIMD support for your entire + /// compilation. Note that this does not include the standard library + /// unless you recompile the standard library. + /// + /// If you enable SIMD via either of these routes then you'll have a + /// WebAssembly binary that uses SIMD instructions, and you'll need to ship + /// that accordingly. Also note that if you call SIMD intrinsics but don't + /// enable SIMD via either of these mechanisms, you'll still have SIMD + /// generated in your program. This means to generate a binary without SIMD + /// you'll need to avoid both options above plus calling into any intrinsics + /// in this module. + /// + /// > **Note**: Due to + /// > [rust-lang/rust#74320](https://github.com/rust-lang/rust/issues/74320) + /// > it's recommended to compile your entire program with SIMD support + /// > (using `RUSTFLAGS`) or otherwise functions may not be inlined + /// > correctly. + /// + /// > **Note**: LLVM's SIMD support is actually split into two features: + /// > `simd128` and `unimplemented-simd128`. Rust code can enable `simd128` + /// > with `#[target_feature]` (and test for it with `#[cfg(target_feature = + /// > "simd128")]`, but it cannot enable `unimplemented-simd128`. The only + /// > way to enable this feature is to compile with + /// > `-Ctarget-feature=+simd128,+unimplemented-simd128`. This second + /// > feature enables more recent instructions implemented in LLVM which + /// > haven't always had enough time to make their way to runtimes. #[cfg(any(target_arch = "wasm32", dox))] #[doc(cfg(target_arch = "wasm32"))] #[stable(feature = "simd_wasm32", since = "1.33.0")] diff --git a/crates/core_arch/src/wasm32/atomic.rs b/crates/core_arch/src/wasm32/atomic.rs index b8ffaeac0e..950f565f92 100644 --- a/crates/core_arch/src/wasm32/atomic.rs +++ b/crates/core_arch/src/wasm32/atomic.rs @@ -10,8 +10,6 @@ #[cfg(test)] use stdarch_test::assert_instr; -#[cfg(test)] -use wasm_bindgen_test::wasm_bindgen_test; extern "C" { #[link_name = "llvm.wasm.atomic.wait.i32"] @@ -22,7 +20,7 @@ extern "C" { fn llvm_atomic_notify(ptr: *mut i32, cnt: i32) -> i32; } -/// Corresponding intrinsic to wasm's [`i32.atomic.wait` instruction][instr] +/// Corresponding intrinsic to wasm's [`memory.atomic.wait32` instruction][instr] /// /// This function, when called, will block the current thread if the memory /// pointed to by `ptr` is equal to `expression` (performing this action @@ -50,14 +48,14 @@ extern "C" { /// library is not obtainable via `rustup`, but rather will require the /// standard library to be compiled from source. /// -/// [instr]: https://github.com/WebAssembly/threads/blob/master/proposals/threads/Overview.md#wait +/// [instr]: https://webassembly.github.io/threads/syntax/instructions.html#syntax-instr-atomic-memory #[inline] #[cfg_attr(test, assert_instr("i32.atomic.wait"))] -pub unsafe fn i32_atomic_wait(ptr: *mut i32, expression: i32, timeout_ns: i64) -> i32 { +pub unsafe fn memory_atomic_wait32(ptr: *mut i32, expression: i32, timeout_ns: i64) -> i32 { llvm_atomic_wait_i32(ptr, expression, timeout_ns) } -/// Corresponding intrinsic to wasm's [`i64.atomic.wait` instruction][instr] +/// Corresponding intrinsic to wasm's [`memory.atomic.wait64` instruction][instr] /// /// This function, when called, will block the current thread if the memory /// pointed to by `ptr` is equal to `expression` (performing this action @@ -85,14 +83,14 @@ pub unsafe fn i32_atomic_wait(ptr: *mut i32, expression: i32, timeout_ns: i64) - /// library is not obtainable via `rustup`, but rather will require the /// standard library to be compiled from source. /// -/// [instr]: https://github.com/WebAssembly/threads/blob/master/proposals/threads/Overview.md#wait +/// [instr]: https://webassembly.github.io/threads/syntax/instructions.html#syntax-instr-atomic-memory #[inline] #[cfg_attr(test, assert_instr("i64.atomic.wait"))] -pub unsafe fn i64_atomic_wait(ptr: *mut i64, expression: i64, timeout_ns: i64) -> i32 { +pub unsafe fn memory_atomic_wait64(ptr: *mut i64, expression: i64, timeout_ns: i64) -> i32 { llvm_atomic_wait_i64(ptr, expression, timeout_ns) } -/// Corresponding intrinsic to wasm's [`atomic.notify` instruction][instr] +/// Corresponding intrinsic to wasm's [`memory.atomic.notify` instruction][instr] /// /// This function will notify a number of threads blocked on the address /// indicated by `ptr`. Threads previously blocked with the `i32_atomic_wait` @@ -112,9 +110,9 @@ pub unsafe fn i64_atomic_wait(ptr: *mut i64, expression: i64, timeout_ns: i64) - /// library is not obtainable via `rustup`, but rather will require the /// standard library to be compiled from source. /// -/// [instr]: https://github.com/WebAssembly/threads/blob/master/proposals/threads/Overview.md#wake +/// [instr]: https://webassembly.github.io/threads/syntax/instructions.html#syntax-instr-atomic-memory #[inline] #[cfg_attr(test, assert_instr("atomic.wake"))] -pub unsafe fn atomic_notify(ptr: *mut i32, waiters: u32) -> u32 { +pub unsafe fn memory_atomic_notify(ptr: *mut i32, waiters: u32) -> u32 { llvm_atomic_notify(ptr, waiters as i32) as u32 } diff --git a/crates/core_arch/src/wasm32/memory.rs b/crates/core_arch/src/wasm32/memory.rs index 3df8abdee2..c4e801b738 100644 --- a/crates/core_arch/src/wasm32/memory.rs +++ b/crates/core_arch/src/wasm32/memory.rs @@ -1,7 +1,5 @@ #[cfg(test)] use stdarch_test::assert_instr; -#[cfg(test)] -use wasm_bindgen_test::wasm_bindgen_test; extern "C" { #[link_name = "llvm.wasm.memory.grow.i32"] diff --git a/crates/core_arch/src/wasm32/mod.rs b/crates/core_arch/src/wasm32/mod.rs index 5e7a9d85f4..10f07ce610 100644 --- a/crates/core_arch/src/wasm32/mod.rs +++ b/crates/core_arch/src/wasm32/mod.rs @@ -2,17 +2,13 @@ #[cfg(test)] use stdarch_test::assert_instr; -#[cfg(test)] -use wasm_bindgen_test::wasm_bindgen_test; #[cfg(any(target_feature = "atomics", dox))] mod atomic; #[cfg(any(target_feature = "atomics", dox))] pub use self::atomic::*; -#[cfg(any(target_feature = "simd128", dox))] mod simd128; -#[cfg(any(target_feature = "simd128", dox))] pub use self::simd128::*; mod memory; diff --git a/crates/core_arch/src/wasm32/simd128.rs b/crates/core_arch/src/wasm32/simd128.rs index 5ac01a4fae..798035d76a 100644 --- a/crates/core_arch/src/wasm32/simd128.rs +++ b/crates/core_arch/src/wasm32/simd128.rs @@ -3,7 +3,9 @@ //! [WebAssembly `SIMD128` ISA]: //! https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md +#![unstable(feature = "wasm_simd", issue = "74372")] #![allow(non_camel_case_types)] +#![allow(unused_imports)] use crate::{ core_arch::{simd::*, simd_llvm::*}, @@ -14,8 +16,6 @@ use crate::{ #[cfg(test)] use stdarch_test::assert_instr; -#[cfg(test)] -use wasm_bindgen_test::wasm_bindgen_test; types! { /// WASM-specific 128-bit wide SIMD vector type. @@ -26,62 +26,73 @@ types! { #[allow(non_camel_case_types)] #[unstable(feature = "stdimd_internal", issue = "none")] pub(crate) trait v128Ext: Sized { - fn as_v128(self) -> v128; + unsafe fn as_v128(self) -> v128; #[inline] - fn as_u8x16(self) -> u8x16 { - unsafe { transmute(self.as_v128()) } + #[target_feature(enable = "simd128")] + unsafe fn as_u8x16(self) -> u8x16 { + transmute(self.as_v128()) } #[inline] - fn as_u16x8(self) -> u16x8 { - unsafe { transmute(self.as_v128()) } + #[target_feature(enable = "simd128")] + unsafe fn as_u16x8(self) -> u16x8 { + transmute(self.as_v128()) } #[inline] - fn as_u32x4(self) -> u32x4 { - unsafe { transmute(self.as_v128()) } + #[target_feature(enable = "simd128")] + unsafe fn as_u32x4(self) -> u32x4 { + transmute(self.as_v128()) } #[inline] - fn as_u64x2(self) -> u64x2 { - unsafe { transmute(self.as_v128()) } + #[target_feature(enable = "simd128")] + unsafe fn as_u64x2(self) -> u64x2 { + transmute(self.as_v128()) } #[inline] - fn as_i8x16(self) -> i8x16 { - unsafe { transmute(self.as_v128()) } + #[target_feature(enable = "simd128")] + unsafe fn as_i8x16(self) -> i8x16 { + transmute(self.as_v128()) } #[inline] - fn as_i16x8(self) -> i16x8 { - unsafe { transmute(self.as_v128()) } + #[target_feature(enable = "simd128")] + unsafe fn as_i16x8(self) -> i16x8 { + transmute(self.as_v128()) } #[inline] - fn as_i32x4(self) -> i32x4 { - unsafe { transmute(self.as_v128()) } + #[target_feature(enable = "simd128")] + unsafe fn as_i32x4(self) -> i32x4 { + transmute(self.as_v128()) } #[inline] - fn as_i64x2(self) -> i64x2 { - unsafe { transmute(self.as_v128()) } + #[target_feature(enable = "simd128")] + unsafe fn as_i64x2(self) -> i64x2 { + transmute(self.as_v128()) } #[inline] - fn as_f32x4(self) -> f32x4 { - unsafe { transmute(self.as_v128()) } + #[target_feature(enable = "simd128")] + unsafe fn as_f32x4(self) -> f32x4 { + transmute(self.as_v128()) } #[inline] - fn as_f64x2(self) -> f64x2 { - unsafe { transmute(self.as_v128()) } + #[target_feature(enable = "simd128")] + unsafe fn as_f64x2(self) -> f64x2 { + transmute(self.as_v128()) } } impl v128Ext for v128 { #[inline] - fn as_v128(self) -> Self { + #[target_feature(enable = "simd128")] + unsafe fn as_v128(self) -> Self { self } } @@ -119,11 +130,6 @@ extern "C" { #[link_name = "llvm.wasm.alltrue.v4i32"] fn llvm_i32x4_all_true(x: i32x4) -> i32; - #[link_name = "llvm.wasm.anytrue.v2i64"] - fn llvm_i64x2_any_true(x: i64x2) -> i32; - #[link_name = "llvm.wasm.alltrue.v2i64"] - fn llvm_i64x2_all_true(x: i64x2) -> i32; - #[link_name = "llvm.fabs.v4f32"] fn llvm_f32x4_abs(x: f32x4) -> f32x4; #[link_name = "llvm.sqrt.v4f32"] @@ -143,129 +149,441 @@ extern "C" { #[link_name = "llvm.wasm.bitselect.v16i8"] fn llvm_bitselect(a: i8x16, b: i8x16, c: i8x16) -> i8x16; + #[link_name = "llvm.wasm.swizzle"] + fn llvm_swizzle(a: i8x16, b: i8x16) -> i8x16; + + #[link_name = "llvm.wasm.bitmask.v16i8"] + fn llvm_bitmask_i8x16(a: i8x16) -> i32; + #[link_name = "llvm.wasm.narrow.signed.v16i8.v8i16"] + fn llvm_narrow_i8x16_s(a: i16x8, b: i16x8) -> i8x16; + #[link_name = "llvm.wasm.narrow.unsigned.v16i8.v8i16"] + fn llvm_narrow_i8x16_u(a: i16x8, b: i16x8) -> i8x16; + #[link_name = "llvm.wasm.avgr.unsigned.v16i8"] + fn llvm_avgr_u_i8x16(a: i8x16, b: i8x16) -> i8x16; + + #[link_name = "llvm.wasm.bitmask.v8i16"] + fn llvm_bitmask_i16x8(a: i16x8) -> i32; + #[link_name = "llvm.wasm.narrow.signed.v8i16.v8i16"] + fn llvm_narrow_i16x8_s(a: i32x4, b: i32x4) -> i16x8; + #[link_name = "llvm.wasm.narrow.unsigned.v8i16.v8i16"] + fn llvm_narrow_i16x8_u(a: i32x4, b: i32x4) -> i16x8; + #[link_name = "llvm.wasm.avgr.unsigned.v8i16"] + fn llvm_avgr_u_i16x8(a: i16x8, b: i16x8) -> i16x8; + #[link_name = "llvm.wasm.widen.low.signed.v8i16.v16i8"] + fn llvm_widen_low_i16x8_s(a: i8x16) -> i16x8; + #[link_name = "llvm.wasm.widen.high.signed.v8i16.v16i8"] + fn llvm_widen_high_i16x8_s(a: i8x16) -> i16x8; + #[link_name = "llvm.wasm.widen.low.unsigned.v8i16.v16i8"] + fn llvm_widen_low_i16x8_u(a: i8x16) -> i16x8; + #[link_name = "llvm.wasm.widen.high.unsigned.v8i16.v16i8"] + fn llvm_widen_high_i16x8_u(a: i8x16) -> i16x8; + + #[link_name = "llvm.wasm.bitmask.v4i32"] + fn llvm_bitmask_i32x4(a: i32x4) -> i32; + #[link_name = "llvm.wasm.avgr.unsigned.v4i32"] + fn llvm_avgr_u_i32x4(a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.wasm.widen.low.signed.v4i32.v8i16"] + fn llvm_widen_low_i32x4_s(a: i16x8) -> i32x4; + #[link_name = "llvm.wasm.widen.high.signed.v4i32.v8i16"] + fn llvm_widen_high_i32x4_s(a: i16x8) -> i32x4; + #[link_name = "llvm.wasm.widen.low.unsigned.v4i32.v8i16"] + fn llvm_widen_low_i32x4_u(a: i16x8) -> i32x4; + #[link_name = "llvm.wasm.widen.high.unsigned.v4i32.v8i16"] + fn llvm_widen_high_i32x4_u(a: i16x8) -> i32x4; } /// Loads a `v128` vector from the given heap address. #[inline] #[cfg_attr(test, assert_instr(v128.load))] +#[target_feature(enable = "simd128")] pub unsafe fn v128_load(m: *const v128) -> v128 { - ptr::read(m) + *m +} + +/// Load eight 8-bit integers and sign extend each one to a 16-bit lane +#[inline] +#[cfg_attr(all(test, all_simd), assert_instr(i16x8.load8x8_s))] +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_load8x8_s(m: *const i8) -> v128 { + transmute(simd_cast::<_, i16x8>(*(m as *const i8x8))) +} + +/// Load eight 8-bit integers and zero extend each one to a 16-bit lane +#[inline] +#[cfg_attr(all(test, all_simd), assert_instr(i16x8.load8x8_u))] +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_load8x8_u(m: *const u8) -> v128 { + transmute(simd_cast::<_, u16x8>(*(m as *const u8x8))) +} + +/// Load four 16-bit integers and sign extend each one to a 32-bit lane +#[inline] +#[cfg_attr(all(test, all_simd), assert_instr(i32x4.load16x4_s))] +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_load16x4_s(m: *const i16) -> v128 { + transmute(simd_cast::<_, i32x4>(*(m as *const i16x4))) +} + +/// Load four 16-bit integers and zero extend each one to a 32-bit lane +#[inline] +#[cfg_attr(all(test, all_simd), assert_instr(i32x4.load16x4_u))] +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_load16x4_u(m: *const u16) -> v128 { + transmute(simd_cast::<_, u32x4>(*(m as *const u16x4))) +} + +/// Load two 32-bit integers and sign extend each one to a 64-bit lane +#[inline] +#[cfg_attr(all(test, all_simd), assert_instr(i64x2.load32x2_s))] +#[target_feature(enable = "simd128")] +pub unsafe fn i64x2_load32x2_s(m: *const i32) -> v128 { + transmute(simd_cast::<_, i64x2>(*(m as *const i32x2))) +} + +/// Load two 32-bit integers and zero extend each one to a 64-bit lane +#[inline] +#[cfg_attr(all(test, all_simd), assert_instr(i64x2.load32x2_u))] +#[target_feature(enable = "simd128")] +pub unsafe fn i64x2_load32x2_u(m: *const u32) -> v128 { + transmute(simd_cast::<_, u64x2>(*(m as *const u32x2))) +} + +/// Load a single element and splat to all lanes of a v128 vector. +#[inline] +#[cfg_attr(all(test, all_simd), assert_instr(v8x16.load_splat))] +#[target_feature(enable = "simd128")] +pub unsafe fn v8x16_load_splat(m: *const u8) -> v128 { + let v = *m; + transmute(u8x16(v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v)) +} + +/// Load a single element and splat to all lanes of a v128 vector. +#[inline] +#[cfg_attr(all(test, all_simd), assert_instr(v16x8.load_splat))] +#[target_feature(enable = "simd128")] +pub unsafe fn v16x8_load_splat(m: *const u16) -> v128 { + let v = *m; + transmute(u16x8(v, v, v, v, v, v, v, v)) +} + +/// Load a single element and splat to all lanes of a v128 vector. +#[inline] +#[cfg_attr(all(test, all_simd), assert_instr(v32x4.load_splat))] +#[target_feature(enable = "simd128")] +pub unsafe fn v32x4_load_splat(m: *const u32) -> v128 { + let v = *m; + transmute(u32x4(v, v, v, v)) +} + +/// Load a single element and splat to all lanes of a v128 vector. +#[inline] +#[cfg_attr(all(test, all_simd), assert_instr(v64x2.load_splat))] +#[target_feature(enable = "simd128")] +pub unsafe fn v64x2_load_splat(m: *const u64) -> v128 { + let v = *m; + transmute(u64x2(v, v)) } /// Stores a `v128` vector to the given heap address. #[inline] #[cfg_attr(test, assert_instr(v128.store))] +#[target_feature(enable = "simd128")] pub unsafe fn v128_store(m: *mut v128, a: v128) { - ptr::write(m, a) + *m = a; } /// Materializes a constant SIMD value from the immediate operands. /// -/// The `v128.const` instruction is encoded with 16 immediate bytes -/// `imm` which provide the bits of the vector directly. -#[inline] -#[cfg(not(only_node_compatible_functions))] -#[rustc_args_required_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)] -#[cfg_attr(test, assert_instr( - v128.const, - a0 = 0, - a1 = 1, - a2 = 2, - a3 = 3, - a4 = 4, - a5 = 5, - a6 = 6, - a7 = 7, - a8 = 8, - a9 = 9, - a10 = 10, - a11 = 11, - a12 = 12, - a13 = 13, - a14 = 14, - a15 = 15, -))] -pub const fn v128_const( - a0: u8, - a1: u8, - a2: u8, - a3: u8, - a4: u8, - a5: u8, - a6: u8, - a7: u8, - a8: u8, - a9: u8, - a10: u8, - a11: u8, - a12: u8, - a13: u8, - a14: u8, - a15: u8, +/// This function generates a `v128.const` instruction as if the generated +/// vector was interpreted as sixteen 8-bit integers. +#[inline] +#[target_feature(enable = "simd128")] +#[cfg_attr( + all(test, all_simd), + assert_instr( + v128.const, + a0 = 0, + a1 = 1, + a2 = 2, + a3 = 3, + a4 = 4, + a5 = 5, + a6 = 6, + a7 = 7, + a8 = 8, + a9 = 9, + a10 = 10, + a11 = 11, + a12 = 12, + a13 = 13, + a14 = 14, + a15 = 15, + ) +)] +pub const unsafe fn i8x16_const( + a0: i8, + a1: i8, + a2: i8, + a3: i8, + a4: i8, + a5: i8, + a6: i8, + a7: i8, + a8: i8, + a9: i8, + a10: i8, + a11: i8, + a12: i8, + a13: i8, + a14: i8, + a15: i8, ) -> v128 { - union U { - imm: [u8; 16], - vec: v128, - } - unsafe { - U { - imm: [ - a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, - ], - } - .vec - } + transmute(i8x16( + a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, + )) } -/// Creates a vector with identical lanes. +/// Materializes a constant SIMD value from the immediate operands. /// -/// Constructs a vector with `x` replicated to all 16 lanes. +/// This function generates a `v128.const` instruction as if the generated +/// vector was interpreted as eight 16-bit integers. +#[inline] +#[target_feature(enable = "simd128")] +#[cfg_attr( + all(test, all_simd), + assert_instr( + v128.const, + a0 = 0, + a1 = 1, + a2 = 2, + a3 = 3, + a4 = 4, + a5 = 5, + a6 = 6, + a7 = 7, + ) +)] +pub const unsafe fn i16x8_const( + a0: i16, + a1: i16, + a2: i16, + a3: i16, + a4: i16, + a5: i16, + a6: i16, + a7: i16, +) -> v128 { + transmute(i16x8(a0, a1, a2, a3, a4, a5, a6, a7)) +} + +/// Materializes a constant SIMD value from the immediate operands. +/// +/// This function generates a `v128.const` instruction as if the generated +/// vector was interpreted as four 32-bit integers. #[inline] -#[cfg_attr(test, assert_instr(i8x16.splat))] -pub fn i8x16_splat(a: i8) -> v128 { - unsafe { transmute(i8x16::splat(a)) } +#[target_feature(enable = "simd128")] +#[cfg_attr(all(test, all_simd), assert_instr(v128.const, a0 = 0, a1 = 1, a2 = 2, a3 = 3))] +pub const unsafe fn i32x4_const(a0: i32, a1: i32, a2: i32, a3: i32) -> v128 { + transmute(i32x4(a0, a1, a2, a3)) } -/// Extracts a lane from a 128-bit vector interpreted as 16 packed i8 numbers. +/// Materializes a constant SIMD value from the immediate operands. /// -/// Extracts the scalar value of lane specified in the immediate mode operand -/// `imm` from `a`. +/// This function generates a `v128.const` instruction as if the generated +/// vector was interpreted as two 64-bit integers. +#[inline] +#[target_feature(enable = "simd128")] +#[cfg_attr(all(test, all_simd), assert_instr(v128.const, a0 = 0, a1 = 1))] +pub const unsafe fn i64x2_const(a0: i64, a1: i64) -> v128 { + transmute(i64x2(a0, a1)) +} + +/// Materializes a constant SIMD value from the immediate operands. /// -/// # Unsafety +/// This function generates a `v128.const` instruction as if the generated +/// vector was interpreted as four 32-bit floats. +#[inline] +#[target_feature(enable = "simd128")] +#[cfg_attr(all(test, all_simd), assert_instr(v128.const, a0 = 0.0, a1 = 1.0, a2 = 2.0, a3 = 3.0))] +pub const unsafe fn f32x4_const(a0: f32, a1: f32, a2: f32, a3: f32) -> v128 { + transmute(f32x4(a0, a1, a2, a3)) +} + +/// Materializes a constant SIMD value from the immediate operands. /// -/// This function has undefined behavior if `imm` is greater than or equal to -/// 16. +/// This function generates a `v128.const` instruction as if the generated +/// vector was interpreted as two 64-bit floats. +#[inline] +#[target_feature(enable = "simd128")] +#[cfg_attr(all(test, all_simd), assert_instr(v128.const, a0 = 0.0, a1 = 1.0))] +pub const unsafe fn f64x2_const(a0: f64, a1: f64) -> v128 { + transmute(f64x2(a0, a1)) +} + +/// Returns a new vector with lanes selected from the lanes of the two input +/// vectors `$a` and `$b` specified in the 16 immediate operands. +/// +/// The `$a` and `$b` expressions must have type `v128`, and this macro +/// generates a wasm instruction that is encoded with 16 bytes providing the +/// indices of the elements to return. The indices `i` in range [0, 15] select +/// the `i`-th element of `a`. The indices in range [16, 31] select the `i - +/// 16`-th element of `b`. +/// +/// Note that this is a macro due to the codegen requirements of all of the +/// index expressions `$i*` must be constant. A compiler error will be +/// generated if any of the expressions are not constant. +/// +/// All indexes `$i*` must have the type `u32`. +#[inline] +#[target_feature(enable = "simd128")] +pub unsafe fn v8x16_shuffle< + const I0: usize, + const I1: usize, + const I2: usize, + const I3: usize, + const I4: usize, + const I5: usize, + const I6: usize, + const I7: usize, + const I8: usize, + const I9: usize, + const I10: usize, + const I11: usize, + const I12: usize, + const I13: usize, + const I14: usize, + const I15: usize, +>( + a: v128, + b: v128, +) -> v128 { + let shuf = simd_shuffle16::( + a.as_u8x16(), + b.as_u8x16(), + [ + I0 as u32, I1 as u32, I2 as u32, I3 as u32, I4 as u32, I5 as u32, I6 as u32, I7 as u32, + I8 as u32, I9 as u32, I10 as u32, I11 as u32, I12 as u32, I13 as u32, I14 as u32, + I15 as u32, + ], + ); + transmute(shuf) +} + +#[cfg(test)] +#[assert_instr(v8x16.shuffle)] +#[target_feature(enable = "simd128")] +unsafe fn v8x16_shuffle_test(a: v128, b: v128) -> v128 { + v8x16_shuffle::<0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30>(a, b) +} + +/// Same as [`v8x16_shuffle`], except operates as if the inputs were eight +/// 16-bit integers, only taking 8 indices to shuffle. +/// +/// Indices in the range [0, 7] select from `a` while [8, 15] select from `b`. +/// Note that this will generate the `v8x16.shuffle` instruction, since there +/// is no native `v16x8.shuffle` instruction (there is no need for one since +/// `v8x16.shuffle` suffices). +#[inline] +#[target_feature(enable = "simd128")] +pub unsafe fn v16x8_shuffle< + const I0: usize, + const I1: usize, + const I2: usize, + const I3: usize, + const I4: usize, + const I5: usize, + const I6: usize, + const I7: usize, +>( + a: v128, + b: v128, +) -> v128 { + let shuf = simd_shuffle8::( + a.as_u16x8(), + b.as_u16x8(), + [ + I0 as u32, I1 as u32, I2 as u32, I3 as u32, I4 as u32, I5 as u32, I6 as u32, I7 as u32, + ], + ); + transmute(shuf) +} + +#[cfg(test)] +#[assert_instr(v8x16.shuffle)] +#[target_feature(enable = "simd128")] +unsafe fn v16x8_shuffle_test(a: v128, b: v128) -> v128 { + v16x8_shuffle::<0, 2, 4, 6, 8, 10, 12, 14>(a, b) +} + +/// Same as [`v8x16_shuffle`], except operates as if the inputs were four +/// 32-bit integers, only taking 4 indices to shuffle. +/// +/// Indices in the range [0, 3] select from `a` while [4, 7] select from `b`. +/// Note that this will generate the `v8x16.shuffle` instruction, since there +/// is no native `v32x4.shuffle` instruction (there is no need for one since +/// `v8x16.shuffle` suffices). #[inline] -#[rustc_args_required_const(1)] -pub unsafe fn i8x16_extract_lane(a: v128, imm: usize) -> i8 { - #[cfg(test)] - #[assert_instr(i8x16.extract_lane_s)] - fn extract_lane_s(a: v128) -> i32 { - unsafe { i8x16_extract_lane(a, 0) as i32 } - } - #[cfg(test)] - #[cfg(not(only_node_compatible_functions))] - #[assert_instr(i8x16.extract_lane_u)] - fn extract_lane_u(a: v128) -> u32 { - unsafe { i8x16_extract_lane(a, 0) as u32 } - } - simd_extract(a.as_i8x16(), imm as u32) +#[target_feature(enable = "simd128")] +pub unsafe fn v32x4_shuffle( + a: v128, + b: v128, +) -> v128 { + let shuf = simd_shuffle4::( + a.as_u32x4(), + b.as_u32x4(), + [I0 as u32, I1 as u32, I2 as u32, I3 as u32], + ); + transmute(shuf) } -/// Replaces a lane from a 128-bit vector interpreted as 16 packed i8 numbers. +#[cfg(test)] +#[assert_instr(v8x16.shuffle)] +#[target_feature(enable = "simd128")] +unsafe fn v32x4_shuffle_test(a: v128, b: v128) -> v128 { + v32x4_shuffle::<0, 2, 4, 6>(a, b) +} + +/// Same as [`v8x16_shuffle`], except operates as if the inputs were two +/// 64-bit integers, only taking 2 indices to shuffle. /// -/// Replaces the scalar value of lane specified in the immediate mode operand -/// `imm` with `a`. +/// Indices in the range [0, 1] select from `a` while [2, 3] select from `b`. +/// Note that this will generate the `v8x16.shuffle` instruction, since there +/// is no native `v64x2.shuffle` instruction (there is no need for one since +/// `v8x16.shuffle` suffices). +#[inline] +#[target_feature(enable = "simd128")] +pub unsafe fn v64x2_shuffle(a: v128, b: v128) -> v128 { + let shuf = simd_shuffle2::(a.as_u64x2(), b.as_u64x2(), [I0 as u32, I1 as u32]); + transmute(shuf) +} + +#[cfg(test)] +#[assert_instr(v8x16.shuffle)] +#[target_feature(enable = "simd128")] +unsafe fn v64x2_shuffle_test(a: v128, b: v128) -> v128 { + v64x2_shuffle::<0, 2>(a, b) +} + +/// Returns a new vector with lanes selected from the lanes of the first input +/// vector `a` specified in the second input vector `s`. /// -/// # Unsafety +/// The indices `i` in range [0, 15] select the `i`-th element of `a`. For +/// indices outside of the range the resulting lane is 0. +#[inline] +#[cfg_attr(test, assert_instr(v8x16.swizzle))] +#[target_feature(enable = "simd128")] +pub unsafe fn v8x16_swizzle(a: v128, s: v128) -> v128 { + transmute(llvm_swizzle(transmute(a), transmute(s))) +} + +/// Creates a vector with identical lanes. /// -/// This function has undefined behavior if `imm` is greater than or equal to -/// 16. +/// Constructs a vector with `x` replicated to all 16 lanes. #[inline] -#[cfg_attr(test, assert_instr(i8x16.replace_lane, imm = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn i8x16_replace_lane(a: v128, imm: usize, val: i8) -> v128 { - transmute(simd_insert(a.as_i8x16(), imm as u32, val)) +#[cfg_attr(test, assert_instr(i8x16.splat))] +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_splat(a: i8) -> v128 { + transmute(i8x16::splat(a)) } /// Creates a vector with identical lanes. @@ -273,220 +591,267 @@ pub unsafe fn i8x16_replace_lane(a: v128, imm: usize, val: i8) -> v128 { /// Construct a vector with `x` replicated to all 8 lanes. #[inline] #[cfg_attr(test, assert_instr(i16x8.splat))] -pub fn i16x8_splat(a: i16) -> v128 { - unsafe { transmute(i16x8::splat(a)) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_splat(a: i16) -> v128 { + transmute(i16x8::splat(a)) } -/// Extracts a lane from a 128-bit vector interpreted as 8 packed i16 numbers. -/// -/// Extracts a the scalar value of lane specified in the immediate mode operand -/// `imm` from `a`. -/// -/// # Unsafety +/// Creates a vector with identical lanes. /// -/// This function has undefined behavior if `imm` is greater than or equal to -/// 8. +/// Constructs a vector with `x` replicated to all 4 lanes. #[inline] -#[rustc_args_required_const(1)] -pub unsafe fn i16x8_extract_lane(a: v128, imm: usize) -> i16 { - #[cfg(test)] - #[assert_instr(i16x8.extract_lane_s)] - fn extract_lane_s(a: v128) -> i32 { - unsafe { i16x8_extract_lane(a, 0) as i32 } - } - #[cfg(test)] - #[cfg(not(only_node_compatible_functions))] - #[assert_instr(i16x8.extract_lane_u)] - fn extract_lane_u(a: v128) -> u32 { - unsafe { i16x8_extract_lane(a, 0) as u32 } - } - simd_extract(a.as_i16x8(), imm as u32) +#[cfg_attr(test, assert_instr(i32x4.splat))] +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_splat(a: i32) -> v128 { + transmute(i32x4::splat(a)) } -/// Replaces a lane from a 128-bit vector interpreted as 8 packed i16 numbers. -/// -/// Replaces the scalar value of lane specified in the immediate mode operand -/// `imm` with `a`. -/// -/// # Unsafety +/// Creates a vector with identical lanes. /// -/// This function has undefined behavior if `imm` is greater than or equal to -/// 8. +/// Construct a vector with `x` replicated to all 2 lanes. #[inline] -#[cfg_attr(test, assert_instr(i16x8.replace_lane, imm = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn i16x8_replace_lane(a: v128, imm: usize, val: i16) -> v128 { - transmute(simd_insert(a.as_i16x8(), imm as u32, val)) +#[cfg_attr(test, assert_instr(i64x2.splat))] +#[target_feature(enable = "simd128")] +pub unsafe fn i64x2_splat(a: i64) -> v128 { + transmute(i64x2::splat(a)) } /// Creates a vector with identical lanes. /// /// Constructs a vector with `x` replicated to all 4 lanes. #[inline] -#[cfg_attr(test, assert_instr(i32x4.splat))] -pub fn i32x4_splat(a: i32) -> v128 { - unsafe { transmute(i32x4::splat(a)) } +#[cfg_attr(test, assert_instr(f32x4.splat))] +#[target_feature(enable = "simd128")] +pub unsafe fn f32x4_splat(a: f32) -> v128 { + transmute(f32x4::splat(a)) } -/// Extracts a lane from a 128-bit vector interpreted as 4 packed i32 numbers. +/// Creates a vector with identical lanes. +/// +/// Constructs a vector with `x` replicated to all 2 lanes. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.splat))] +#[target_feature(enable = "simd128")] +pub unsafe fn f64x2_splat(a: f64) -> v128 { + transmute(f64x2::splat(a)) +} + +/// Extracts a lane from a 128-bit vector interpreted as 16 packed i8 numbers. /// /// Extracts the scalar value of lane specified in the immediate mode operand -/// `imm` from `a`. +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_extract_lane(a: v128) -> i8 { + simd_extract(a.as_i8x16(), N as u32) +} + +#[cfg(test)] +#[assert_instr(i8x16.extract_lane_s)] +#[target_feature(enable = "simd128")] +unsafe fn i8x16_extract_lane_s(a: v128) -> i32 { + i8x16_extract_lane::<0>(a) as i32 +} + +#[cfg(test)] +#[assert_instr(i8x16.extract_lane_u)] +#[target_feature(enable = "simd128")] +unsafe fn i8x16_extract_lane_u(a: v128) -> u32 { + i8x16_extract_lane::<0>(a) as u8 as u32 +} + +/// Replaces a lane from a 128-bit vector interpreted as 16 packed i8 numbers. /// -/// # Unsafety +/// Replaces the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_replace_lane(a: v128, val: i8) -> v128 { + transmute(simd_insert(a.as_i8x16(), N as u32, val)) +} + +#[cfg(test)] +#[assert_instr(i8x16.replace_lane)] +#[target_feature(enable = "simd128")] +unsafe fn i8x16_replace_lane_test(a: v128, val: i8) -> v128 { + i8x16_replace_lane::<0>(a, val) +} + +/// Extracts a lane from a 128-bit vector interpreted as 8 packed i16 numbers. /// -/// This function has undefined behavior if `imm` is greater than or equal to -/// 4. +/// Extracts a the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. #[inline] -#[cfg_attr(test, assert_instr(i32x4.extract_lane, imm = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn i32x4_extract_lane(a: v128, imm: usize) -> i32 { - simd_extract(a.as_i32x4(), imm as u32) +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_extract_lane(a: v128) -> i16 { + simd_extract(a.as_i16x8(), N as u32) } -/// Replaces a lane from a 128-bit vector interpreted as 4 packed i32 numbers. +#[cfg(test)] +#[assert_instr(i16x8.extract_lane_s)] +#[target_feature(enable = "simd128")] +unsafe fn i16x8_extract_lane_s(a: v128) -> i32 { + i16x8_extract_lane::<0>(a) as i32 +} + +#[cfg(test)] +#[assert_instr(i16x8.extract_lane_u)] +#[target_feature(enable = "simd128")] +unsafe fn i16x8_extract_lane_u(a: v128) -> u32 { + i16x8_extract_lane::<0>(a) as u16 as u32 +} + +/// Replaces a lane from a 128-bit vector interpreted as 8 packed i16 numbers. /// /// Replaces the scalar value of lane specified in the immediate mode operand -/// `imm` with `a`. -/// -/// # Unsafety +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. +#[inline] +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_replace_lane(a: v128, val: i16) -> v128 { + transmute(simd_insert(a.as_i16x8(), N as u32, val)) +} + +#[cfg(test)] +#[assert_instr(i16x8.replace_lane)] +#[target_feature(enable = "simd128")] +unsafe fn i16x8_replace_lane_test(a: v128, val: i16) -> v128 { + i16x8_replace_lane::<0>(a, val) +} + +/// Extracts a lane from a 128-bit vector interpreted as 4 packed i32 numbers. /// -/// This function has undefined behavior if `imm` is greater than or equal to -/// 4. +/// Extracts the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. #[inline] -#[cfg_attr(test, assert_instr(i32x4.replace_lane, imm = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn i32x4_replace_lane(a: v128, imm: usize, val: i32) -> v128 { - transmute(simd_insert(a.as_i32x4(), imm as u32, val)) +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_extract_lane(a: v128) -> i32 { + simd_extract(a.as_i32x4(), N as u32) } -/// Creates a vector with identical lanes. +#[cfg(test)] +#[assert_instr(i32x4.extract_lane)] +#[target_feature(enable = "simd128")] +unsafe fn i32x4_extract_lane_test(a: v128) -> i32 { + i32x4_extract_lane::<0>(a) +} + +/// Replaces a lane from a 128-bit vector interpreted as 4 packed i32 numbers. /// -/// Construct a vector with `x` replicated to all 2 lanes. +/// Replaces the scalar value of lane specified in the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. #[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr(i8x16.splat))] -pub fn i64x2_splat(a: i64) -> v128 { - unsafe { transmute(i64x2::splat(a)) } +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_replace_lane(a: v128, val: i32) -> v128 { + transmute(simd_insert(a.as_i32x4(), N as u32, val)) +} + +#[cfg(test)] +#[assert_instr(i32x4.replace_lane)] +#[target_feature(enable = "simd128")] +unsafe fn i32x4_replace_lane_test(a: v128, val: i32) -> v128 { + i32x4_replace_lane::<0>(a, val) } /// Extracts a lane from a 128-bit vector interpreted as 2 packed i64 numbers. /// /// Extracts the scalar value of lane specified in the immediate mode operand -/// `imm` from `a`. -/// -/// # Unsafety -/// -/// This function has undefined behavior if `imm` is greater than or equal to -/// 2. +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. #[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr(i64x2.extract_lane_s, imm = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn i64x2_extract_lane(a: v128, imm: usize) -> i64 { - simd_extract(a.as_i64x2(), imm as u32) +#[target_feature(enable = "simd128")] +pub unsafe fn i64x2_extract_lane(a: v128) -> i64 { + simd_extract(a.as_i64x2(), N as u32) +} + +#[cfg(test)] +#[assert_instr(i64x2.extract_lane)] +#[target_feature(enable = "simd128")] +unsafe fn i64x2_extract_lane_test(a: v128) -> i64 { + i64x2_extract_lane::<0>(a) } /// Replaces a lane from a 128-bit vector interpreted as 2 packed i64 numbers. /// /// Replaces the scalar value of lane specified in the immediate mode operand -/// `imm` with `a`. -/// -/// # Unsafety -/// -/// This function has undefined behavior if `imm` is greater than or equal to -/// 2. +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. #[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr(i64x2.replace_lane, imm = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn i64x2_replace_lane(a: v128, imm: usize, val: i64) -> v128 { - transmute(simd_insert(a.as_i64x2(), imm as u32, val)) +#[target_feature(enable = "simd128")] +pub unsafe fn i64x2_replace_lane(a: v128, val: i64) -> v128 { + transmute(simd_insert(a.as_i64x2(), N as u32, val)) } -/// Creates a vector with identical lanes. -/// -/// Constructs a vector with `x` replicated to all 4 lanes. -#[inline] -#[cfg_attr(test, assert_instr(f32x4.splat))] -pub fn f32x4_splat(a: f32) -> v128 { - unsafe { transmute(f32x4::splat(a)) } +#[cfg(test)] +#[assert_instr(i64x2.replace_lane)] +#[target_feature(enable = "simd128")] +unsafe fn i64x2_replace_lane_test(a: v128, val: i64) -> v128 { + i64x2_replace_lane::<0>(a, val) } /// Extracts a lane from a 128-bit vector interpreted as 4 packed f32 numbers. /// -/// Extracts the scalar value of lane specified in the immediate mode operand -/// `imm` from `a`. -/// -/// # Unsafety -/// -/// This function has undefined behavior if `imm` is greater than or equal to -/// 4. +/// Extracts the scalar value of lane specified fn the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. #[inline] -#[cfg_attr(test, assert_instr(f32x4.extract_lane, imm = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn f32x4_extract_lane(a: v128, imm: usize) -> f32 { - simd_extract(a.as_f32x4(), imm as u32) +#[target_feature(enable = "simd128")] +pub unsafe fn f32x4_extract_lane(a: v128) -> f32 { + simd_extract(a.as_f32x4(), N as u32) +} + +#[cfg(test)] +#[assert_instr(f32x4.extract_lane)] +#[target_feature(enable = "simd128")] +unsafe fn f32x4_extract_lane_test(a: v128) -> f32 { + f32x4_extract_lane::<0>(a) } /// Replaces a lane from a 128-bit vector interpreted as 4 packed f32 numbers. /// -/// Replaces the scalar value of lane specified in the immediate mode operand -/// `imm` with `a`. -/// -/// # Unsafety -/// -/// This function has undefined behavior if `imm` is greater than or equal to -/// 4. +/// Replaces the scalar value of lane specified fn the immediate mode operand +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. #[inline] -#[cfg_attr(test, assert_instr(f32x4.replace_lane, imm = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn f32x4_replace_lane(a: v128, imm: usize, val: f32) -> v128 { - transmute(simd_insert(a.as_f32x4(), imm as u32, val)) +#[target_feature(enable = "simd128")] +pub unsafe fn f32x4_replace_lane(a: v128, val: f32) -> v128 { + transmute(simd_insert(a.as_f32x4(), N as u32, val)) } -/// Creates a vector with identical lanes. -/// -/// Constructs a vector with `x` replicated to all 2 lanes. -#[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr(f64x2.splat))] -pub fn f64x2_splat(a: f64) -> v128 { - unsafe { transmute(f64x2::splat(a)) } +#[cfg(test)] +#[assert_instr(f32x4.replace_lane)] +#[target_feature(enable = "simd128")] +unsafe fn f32x4_replace_lane_test(a: v128, val: f32) -> v128 { + f32x4_replace_lane::<0>(a, val) } -/// Extracts lane from a 128-bit vector interpreted as 2 packed f64 numbers. -/// -/// Extracts the scalar value of lane specified in the immediate mode operand -/// `imm` from `a`. -/// -/// # Unsafety +/// Extracts a lane from a 128-bit vector interpreted as 2 packed f64 numbers. /// -/// This function has undefined behavior if `imm` is greater than or equal to -/// 2. +/// Extracts the scalar value of lane specified fn the immediate mode operand +/// `N` from `a`. If `N` fs out of bounds then it is a compile time error. #[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr(f64x2.extract_lane_s, imm = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn f64x2_extract_lane(a: v128, imm: usize) -> f64 { - simd_extract(a.as_f64x2(), imm as u32) +#[target_feature(enable = "simd128")] +pub unsafe fn f64x2_extract_lane(a: v128) -> f64 { + simd_extract(a.as_f64x2(), N as u32) +} + +#[cfg(test)] +#[assert_instr(f64x2.extract_lane)] +#[target_feature(enable = "simd128")] +unsafe fn f64x2_extract_lane_test(a: v128) -> f64 { + f64x2_extract_lane::<0>(a) } /// Replaces a lane from a 128-bit vector interpreted as 2 packed f64 numbers. /// /// Replaces the scalar value of lane specified in the immediate mode operand -/// `imm` with `a`. -/// -/// # Unsafety -/// -/// This function has undefined behavior if `imm` is greater than or equal to -/// 2. +/// `N` from `a`. If `N` is out of bounds then it is a compile time error. #[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr(f64x2.replace_lane, imm = 0))] -#[rustc_args_required_const(1)] -pub unsafe fn f64x2_replace_lane(a: v128, imm: usize, val: f64) -> v128 { - transmute(simd_insert(a.as_f64x2(), imm as u32, val)) +#[target_feature(enable = "simd128")] +pub unsafe fn f64x2_replace_lane(a: v128, val: f64) -> v128 { + transmute(simd_insert(a.as_f64x2(), N as u32, val)) +} + +#[cfg(test)] +#[assert_instr(f64x2.replace_lane)] +#[target_feature(enable = "simd128")] +unsafe fn f64x2_replace_lane_test(a: v128, val: f64) -> v128 { + f64x2_replace_lane::<0>(a, val) } /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit @@ -496,8 +861,9 @@ pub unsafe fn f64x2_replace_lane(a: v128, imm: usize, val: f64) -> v128 { /// were equal, or all zeros if the elements were not equal. #[inline] #[cfg_attr(test, assert_instr(i8x16.eq))] -pub fn i8x16_eq(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_eq::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_eq(a: v128, b: v128) -> v128 { + transmute(simd_eq::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) } /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit @@ -507,8 +873,9 @@ pub fn i8x16_eq(a: v128, b: v128) -> v128 { /// were not equal, or all zeros if the elements were equal. #[inline] #[cfg_attr(test, assert_instr(i8x16.ne))] -pub fn i8x16_ne(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_ne::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_ne(a: v128, b: v128) -> v128 { + transmute(simd_ne::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) } /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit @@ -518,8 +885,9 @@ pub fn i8x16_ne(a: v128, b: v128) -> v128 { /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i8x16.lt_s))] -pub fn i8x16_lt_s(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_lt::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_lt_s(a: v128, b: v128) -> v128 { + transmute(simd_lt::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) } /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit @@ -529,8 +897,9 @@ pub fn i8x16_lt_s(a: v128, b: v128) -> v128 { /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i8x16.lt_u))] -pub fn i8x16_lt_u(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_lt::<_, i8x16>(a.as_u8x16(), b.as_u8x16())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_lt_u(a: v128, b: v128) -> v128 { + transmute(simd_lt::<_, i8x16>(a.as_u8x16(), b.as_u8x16())) } /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit @@ -540,8 +909,9 @@ pub fn i8x16_lt_u(a: v128, b: v128) -> v128 { /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i8x16.gt_s))] -pub fn i8x16_gt_s(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_gt::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_gt_s(a: v128, b: v128) -> v128 { + transmute(simd_gt::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) } /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit @@ -551,8 +921,9 @@ pub fn i8x16_gt_s(a: v128, b: v128) -> v128 { /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i8x16.gt_u))] -pub fn i8x16_gt_u(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_gt::<_, i8x16>(a.as_u8x16(), b.as_u8x16())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_gt_u(a: v128, b: v128) -> v128 { + transmute(simd_gt::<_, i8x16>(a.as_u8x16(), b.as_u8x16())) } /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit @@ -562,8 +933,9 @@ pub fn i8x16_gt_u(a: v128, b: v128) -> v128 { /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i8x16.le_s))] -pub fn i8x16_le_s(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_le::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_le_s(a: v128, b: v128) -> v128 { + transmute(simd_le::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) } /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit @@ -573,8 +945,9 @@ pub fn i8x16_le_s(a: v128, b: v128) -> v128 { /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i8x16.le_u))] -pub fn i8x16_le_u(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_le::<_, i8x16>(a.as_u8x16(), b.as_u8x16())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_le_u(a: v128, b: v128) -> v128 { + transmute(simd_le::<_, i8x16>(a.as_u8x16(), b.as_u8x16())) } /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit @@ -584,8 +957,9 @@ pub fn i8x16_le_u(a: v128, b: v128) -> v128 { /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i8x16.ge_s))] -pub fn i8x16_ge_s(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_ge::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_ge_s(a: v128, b: v128) -> v128 { + transmute(simd_ge::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) } /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit @@ -595,8 +969,9 @@ pub fn i8x16_ge_s(a: v128, b: v128) -> v128 { /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i8x16.ge_u))] -pub fn i8x16_ge_u(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_ge::<_, i8x16>(a.as_u8x16(), b.as_u8x16())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_ge_u(a: v128, b: v128) -> v128 { + transmute(simd_ge::<_, i8x16>(a.as_u8x16(), b.as_u8x16())) } /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit @@ -606,8 +981,9 @@ pub fn i8x16_ge_u(a: v128, b: v128) -> v128 { /// were equal, or all zeros if the elements were not equal. #[inline] #[cfg_attr(test, assert_instr(i16x8.eq))] -pub fn i16x8_eq(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_eq::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_eq(a: v128, b: v128) -> v128 { + transmute(simd_eq::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) } /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit @@ -617,8 +993,9 @@ pub fn i16x8_eq(a: v128, b: v128) -> v128 { /// were not equal, or all zeros if the elements were equal. #[inline] #[cfg_attr(test, assert_instr(i16x8.ne))] -pub fn i16x8_ne(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_ne::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_ne(a: v128, b: v128) -> v128 { + transmute(simd_ne::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) } /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit @@ -628,8 +1005,9 @@ pub fn i16x8_ne(a: v128, b: v128) -> v128 { /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i16x8.lt_s))] -pub fn i16x8_lt_s(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_lt::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_lt_s(a: v128, b: v128) -> v128 { + transmute(simd_lt::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) } /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit @@ -639,8 +1017,9 @@ pub fn i16x8_lt_s(a: v128, b: v128) -> v128 { /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i16x8.lt_u))] -pub fn i16x8_lt_u(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_lt::<_, i16x8>(a.as_u16x8(), b.as_u16x8())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_lt_u(a: v128, b: v128) -> v128 { + transmute(simd_lt::<_, i16x8>(a.as_u16x8(), b.as_u16x8())) } /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit @@ -650,8 +1029,9 @@ pub fn i16x8_lt_u(a: v128, b: v128) -> v128 { /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i16x8.gt_s))] -pub fn i16x8_gt_s(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_gt::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_gt_s(a: v128, b: v128) -> v128 { + transmute(simd_gt::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) } /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit @@ -661,8 +1041,9 @@ pub fn i16x8_gt_s(a: v128, b: v128) -> v128 { /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i16x8.gt_u))] -pub fn i16x8_gt_u(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_gt::<_, i16x8>(a.as_u16x8(), b.as_u16x8())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_gt_u(a: v128, b: v128) -> v128 { + transmute(simd_gt::<_, i16x8>(a.as_u16x8(), b.as_u16x8())) } /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit @@ -672,8 +1053,9 @@ pub fn i16x8_gt_u(a: v128, b: v128) -> v128 { /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i16x8.le_s))] -pub fn i16x8_le_s(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_le::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_le_s(a: v128, b: v128) -> v128 { + transmute(simd_le::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) } /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit @@ -683,8 +1065,9 @@ pub fn i16x8_le_s(a: v128, b: v128) -> v128 { /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i16x8.le_u))] -pub fn i16x8_le_u(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_le::<_, i16x8>(a.as_u16x8(), b.as_u16x8())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_le_u(a: v128, b: v128) -> v128 { + transmute(simd_le::<_, i16x8>(a.as_u16x8(), b.as_u16x8())) } /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit @@ -694,8 +1077,9 @@ pub fn i16x8_le_u(a: v128, b: v128) -> v128 { /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i16x8.ge_s))] -pub fn i16x8_ge_s(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_ge::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_ge_s(a: v128, b: v128) -> v128 { + transmute(simd_ge::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) } /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit @@ -705,8 +1089,9 @@ pub fn i16x8_ge_s(a: v128, b: v128) -> v128 { /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i16x8.ge_u))] -pub fn i16x8_ge_u(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_ge::<_, i16x8>(a.as_u16x8(), b.as_u16x8())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_ge_u(a: v128, b: v128) -> v128 { + transmute(simd_ge::<_, i16x8>(a.as_u16x8(), b.as_u16x8())) } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit @@ -716,8 +1101,9 @@ pub fn i16x8_ge_u(a: v128, b: v128) -> v128 { /// were equal, or all zeros if the elements were not equal. #[inline] #[cfg_attr(test, assert_instr(i32x4.eq))] -pub fn i32x4_eq(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_eq::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_eq(a: v128, b: v128) -> v128 { + transmute(simd_eq::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit @@ -727,8 +1113,9 @@ pub fn i32x4_eq(a: v128, b: v128) -> v128 { /// were not equal, or all zeros if the elements were equal. #[inline] #[cfg_attr(test, assert_instr(i32x4.ne))] -pub fn i32x4_ne(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_ne::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_ne(a: v128, b: v128) -> v128 { + transmute(simd_ne::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit @@ -738,8 +1125,9 @@ pub fn i32x4_ne(a: v128, b: v128) -> v128 { /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i32x4.lt_s))] -pub fn i32x4_lt_s(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_lt::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_lt_s(a: v128, b: v128) -> v128 { + transmute(simd_lt::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit @@ -749,8 +1137,9 @@ pub fn i32x4_lt_s(a: v128, b: v128) -> v128 { /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i32x4.lt_u))] -pub fn i32x4_lt_u(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_lt::<_, i32x4>(a.as_u32x4(), b.as_u32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_lt_u(a: v128, b: v128) -> v128 { + transmute(simd_lt::<_, i32x4>(a.as_u32x4(), b.as_u32x4())) } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit @@ -760,8 +1149,9 @@ pub fn i32x4_lt_u(a: v128, b: v128) -> v128 { /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i32x4.gt_s))] -pub fn i32x4_gt_s(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_gt::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_gt_s(a: v128, b: v128) -> v128 { + transmute(simd_gt::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit @@ -771,8 +1161,9 @@ pub fn i32x4_gt_s(a: v128, b: v128) -> v128 { /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i32x4.gt_u))] -pub fn i32x4_gt_u(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_gt::<_, i32x4>(a.as_u32x4(), b.as_u32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_gt_u(a: v128, b: v128) -> v128 { + transmute(simd_gt::<_, i32x4>(a.as_u32x4(), b.as_u32x4())) } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit @@ -782,8 +1173,9 @@ pub fn i32x4_gt_u(a: v128, b: v128) -> v128 { /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i32x4.le_s))] -pub fn i32x4_le_s(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_le::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_le_s(a: v128, b: v128) -> v128 { + transmute(simd_le::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit @@ -793,8 +1185,9 @@ pub fn i32x4_le_s(a: v128, b: v128) -> v128 { /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i32x4.le_u))] -pub fn i32x4_le_u(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_le::<_, i32x4>(a.as_u32x4(), b.as_u32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_le_u(a: v128, b: v128) -> v128 { + transmute(simd_le::<_, i32x4>(a.as_u32x4(), b.as_u32x4())) } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit @@ -804,8 +1197,9 @@ pub fn i32x4_le_u(a: v128, b: v128) -> v128 { /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i32x4.ge_s))] -pub fn i32x4_ge_s(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_ge::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_ge_s(a: v128, b: v128) -> v128 { + transmute(simd_ge::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit @@ -815,8 +1209,9 @@ pub fn i32x4_ge_s(a: v128, b: v128) -> v128 { /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i32x4.ge_u))] -pub fn i32x4_ge_u(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_ge::<_, i32x4>(a.as_u32x4(), b.as_u32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_ge_u(a: v128, b: v128) -> v128 { + transmute(simd_ge::<_, i32x4>(a.as_u32x4(), b.as_u32x4())) } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit @@ -826,8 +1221,9 @@ pub fn i32x4_ge_u(a: v128, b: v128) -> v128 { /// were equal, or all zeros if the elements were not equal. #[inline] #[cfg_attr(test, assert_instr(f32x4.eq))] -pub fn f32x4_eq(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_eq::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f32x4_eq(a: v128, b: v128) -> v128 { + transmute(simd_eq::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit @@ -837,8 +1233,9 @@ pub fn f32x4_eq(a: v128, b: v128) -> v128 { /// were not equal, or all zeros if the elements were equal. #[inline] #[cfg_attr(test, assert_instr(f32x4.ne))] -pub fn f32x4_ne(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_ne::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f32x4_ne(a: v128, b: v128) -> v128 { + transmute(simd_ne::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit @@ -848,8 +1245,9 @@ pub fn f32x4_ne(a: v128, b: v128) -> v128 { /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(f32x4.lt))] -pub fn f32x4_lt(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_lt::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f32x4_lt(a: v128, b: v128) -> v128 { + transmute(simd_lt::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit @@ -859,8 +1257,9 @@ pub fn f32x4_lt(a: v128, b: v128) -> v128 { /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(f32x4.gt))] -pub fn f32x4_gt(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_gt::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f32x4_gt(a: v128, b: v128) -> v128 { + transmute(simd_gt::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit @@ -870,8 +1269,9 @@ pub fn f32x4_gt(a: v128, b: v128) -> v128 { /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(f32x4.le))] -pub fn f32x4_le(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_le::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f32x4_le(a: v128, b: v128) -> v128 { + transmute(simd_le::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit @@ -881,8 +1281,9 @@ pub fn f32x4_le(a: v128, b: v128) -> v128 { /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(f32x4.ge))] -pub fn f32x4_ge(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_ge::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f32x4_ge(a: v128, b: v128) -> v128 { + transmute(simd_ge::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) } /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit @@ -891,10 +1292,10 @@ pub fn f32x4_ge(a: v128, b: v128) -> v128 { /// Returns a new vector where each lane is all ones if the pairwise elements /// were equal, or all zeros if the elements were not equal. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(f64x2.eq))] -pub fn f64x2_eq(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_eq::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f64x2_eq(a: v128, b: v128) -> v128 { + transmute(simd_eq::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) } /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit @@ -903,10 +1304,10 @@ pub fn f64x2_eq(a: v128, b: v128) -> v128 { /// Returns a new vector where each lane is all ones if the pairwise elements /// were not equal, or all zeros if the elements were equal. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(f64x2.ne))] -pub fn f64x2_ne(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_ne::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f64x2_ne(a: v128, b: v128) -> v128 { + transmute(simd_ne::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) } /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit @@ -915,10 +1316,10 @@ pub fn f64x2_ne(a: v128, b: v128) -> v128 { /// Returns a new vector where each lane is all ones if the pairwise left /// element is less than the pairwise right element, or all zeros otherwise. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(f64x2.lt))] -pub fn f64x2_lt(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_lt::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f64x2_lt(a: v128, b: v128) -> v128 { + transmute(simd_lt::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) } /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit @@ -927,10 +1328,10 @@ pub fn f64x2_lt(a: v128, b: v128) -> v128 { /// Returns a new vector where each lane is all ones if the pairwise left /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(f64x2.gt))] -pub fn f64x2_gt(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_gt::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f64x2_gt(a: v128, b: v128) -> v128 { + transmute(simd_gt::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) } /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit @@ -939,10 +1340,10 @@ pub fn f64x2_gt(a: v128, b: v128) -> v128 { /// Returns a new vector where each lane is all ones if the pairwise left /// element is less than the pairwise right element, or all zeros otherwise. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(f64x2.le))] -pub fn f64x2_le(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_le::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f64x2_le(a: v128, b: v128) -> v128 { + transmute(simd_le::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) } /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit @@ -951,69 +1352,137 @@ pub fn f64x2_le(a: v128, b: v128) -> v128 { /// Returns a new vector where each lane is all ones if the pairwise left /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(f64x2.ge))] -pub fn f64x2_ge(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_ge::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f64x2_ge(a: v128, b: v128) -> v128 { + transmute(simd_ge::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) } /// Flips each bit of the 128-bit input vector. #[inline] #[cfg_attr(test, assert_instr(v128.not))] -pub fn v128_not(a: v128) -> v128 { - unsafe { transmute(simd_xor(a.as_i64x2(), i64x2(!0, !0))) } +#[target_feature(enable = "simd128")] +pub unsafe fn v128_not(a: v128) -> v128 { + transmute(simd_xor(a.as_i64x2(), i64x2(!0, !0))) } /// Performs a bitwise and of the two input 128-bit vectors, returning the /// resulting vector. #[inline] #[cfg_attr(test, assert_instr(v128.and))] -pub fn v128_and(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_and(a.as_i64x2(), b.as_i64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn v128_and(a: v128, b: v128) -> v128 { + transmute(simd_and(a.as_i64x2(), b.as_i64x2())) +} + +/// Bitwise AND of bits of `a` and the logical inverse of bits of `b`. +/// +/// This operation is equivalent to `v128.and(a, v128.not(b))` +#[inline] +#[cfg_attr(all(test, all_simd), assert_instr(v128.andnot))] +#[target_feature(enable = "simd128")] +pub unsafe fn v128_andnot(a: v128, b: v128) -> v128 { + transmute(simd_and( + a.as_i64x2(), + simd_xor(b.as_i64x2(), i64x2(-1, -1)), + )) } /// Performs a bitwise or of the two input 128-bit vectors, returning the /// resulting vector. #[inline] #[cfg_attr(test, assert_instr(v128.or))] -pub fn v128_or(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_or(a.as_i64x2(), b.as_i64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn v128_or(a: v128, b: v128) -> v128 { + transmute(simd_or(a.as_i64x2(), b.as_i64x2())) } /// Performs a bitwise xor of the two input 128-bit vectors, returning the /// resulting vector. #[inline] #[cfg_attr(test, assert_instr(v128.xor))] -pub fn v128_xor(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_xor(a.as_i64x2(), b.as_i64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn v128_xor(a: v128, b: v128) -> v128 { + transmute(simd_xor(a.as_i64x2(), b.as_i64x2())) } /// Use the bitmask in `c` to select bits from `v1` when 1 and `v2` when 0. #[inline] #[cfg_attr(test, assert_instr(v128.bitselect))] -pub fn v128_bitselect(v1: v128, v2: v128, c: v128) -> v128 { - unsafe { transmute(llvm_bitselect(c.as_i8x16(), v1.as_i8x16(), v2.as_i8x16())) } +#[target_feature(enable = "simd128")] +pub unsafe fn v128_bitselect(v1: v128, v2: v128, c: v128) -> v128 { + transmute(llvm_bitselect(v1.as_i8x16(), v2.as_i8x16(), c.as_i8x16())) +} + +/// Lane-wise wrapping absolute value. +#[inline] +// #[cfg_attr(test, assert_instr(i8x16.abs))] // FIXME support not in our LLVM yet +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_abs(a: v128) -> v128 { + let a = transmute::<_, i8x16>(a); + let zero = i8x16::splat(0); + transmute(simd_select::( + simd_lt(a, zero), + simd_sub(zero, a), + a, + )) } /// Negates a 128-bit vectors intepreted as sixteen 8-bit signed integers #[inline] #[cfg_attr(test, assert_instr(i8x16.neg))] -pub fn i8x16_neg(a: v128) -> v128 { - unsafe { transmute(simd_mul(a.as_i8x16(), i8x16::splat(-1))) } +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_neg(a: v128) -> v128 { + transmute(simd_mul(a.as_i8x16(), i8x16::splat(-1))) } /// Returns 1 if any lane is nonzero or 0 if all lanes are zero. #[inline] #[cfg_attr(test, assert_instr(i8x16.any_true))] -pub fn i8x16_any_true(a: v128) -> i32 { - unsafe { llvm_i8x16_any_true(a.as_i8x16()) } +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_any_true(a: v128) -> i32 { + llvm_i8x16_any_true(a.as_i8x16()) } /// Returns 1 if all lanes are nonzero or 0 if any lane is nonzero. #[inline] #[cfg_attr(test, assert_instr(i8x16.all_true))] -pub fn i8x16_all_true(a: v128) -> i32 { - unsafe { llvm_i8x16_all_true(a.as_i8x16()) } +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_all_true(a: v128) -> i32 { + llvm_i8x16_all_true(a.as_i8x16()) +} + +// FIXME: not available in our LLVM yet +// /// Extracts the high bit for each lane in `a` and produce a scalar mask with +// /// all bits concatenated. +// #[inline] +// #[cfg_attr(test, assert_instr(i8x16.all_true))] +// pub unsafe fn i8x16_bitmask(a: v128) -> i32 { +// llvm_bitmask_i8x16(transmute(a)) +// } + +/// Converts two input vectors into a smaller lane vector by narrowing each +/// lane. +/// +/// Signed saturation to 0x7f or 0x80 is used and the input lanes are always +/// interpreted as signed integers. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.narrow_i16x8_s))] +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_narrow_i16x8_s(a: v128, b: v128) -> v128 { + transmute(llvm_narrow_i8x16_s(transmute(a), transmute(b))) +} + +/// Converts two input vectors into a smaller lane vector by narrowing each +/// lane. +/// +/// Signed saturation to 0x00 or 0xff is used and the input lanes are always +/// interpreted as signed integers. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.narrow_i16x8_u))] +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_narrow_i16x8_u(a: v128, b: v128) -> v128 { + transmute(llvm_narrow_i8x16_u(transmute(a), transmute(b))) } /// Shifts each lane to the left by the specified number of bits. @@ -1021,10 +1490,10 @@ pub fn i8x16_all_true(a: v128) -> i32 { /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(i8x16.shl))] -pub fn i8x16_shl(a: v128, amt: u32) -> v128 { - unsafe { transmute(simd_shl(a.as_i8x16(), i8x16::splat(amt as i8))) } +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_shl(a: v128, amt: u32) -> v128 { + transmute(simd_shl(a.as_i8x16(), i8x16::splat(amt as i8))) } /// Shifts each lane to the right by the specified number of bits, sign @@ -1033,10 +1502,10 @@ pub fn i8x16_shl(a: v128, amt: u32) -> v128 { /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr(i8x16.shl))] -pub fn i8x16_shr_s(a: v128, amt: u32) -> v128 { - unsafe { transmute(simd_shr(a.as_i8x16(), i8x16::splat(amt as i8))) } +#[cfg_attr(test, assert_instr(i8x16.shr_s))] +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_shr_s(a: v128, amt: u32) -> v128 { + transmute(simd_shr(a.as_i8x16(), i8x16::splat(amt as i8))) } /// Shifts each lane to the right by the specified number of bits, shifting in @@ -1045,85 +1514,217 @@ pub fn i8x16_shr_s(a: v128, amt: u32) -> v128 { /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr(i8x16.shl))] -pub fn i8x16_shr_u(a: v128, amt: u32) -> v128 { - unsafe { transmute(simd_shr(a.as_u8x16(), u8x16::splat(amt as u8))) } +#[cfg_attr(test, assert_instr(i8x16.shr_u))] +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_shr_u(a: v128, amt: u32) -> v128 { + transmute(simd_shr(a.as_u8x16(), u8x16::splat(amt as u8))) } /// Adds two 128-bit vectors as if they were two packed sixteen 8-bit integers. #[inline] #[cfg_attr(test, assert_instr(i8x16.add))] -pub fn i8x16_add(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_add(a.as_i8x16(), b.as_i8x16())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_add(a: v128, b: v128) -> v128 { + transmute(simd_add(a.as_i8x16(), b.as_i8x16())) } /// Adds two 128-bit vectors as if they were two packed sixteen 8-bit signed /// integers, saturating on overflow to `i8::MAX`. #[inline] #[cfg_attr(test, assert_instr(i8x16.add_saturate_s))] -pub fn i8x16_add_saturate_s(a: v128, b: v128) -> v128 { - unsafe { transmute(llvm_i8x16_add_saturate_s(a.as_i8x16(), b.as_i8x16())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_add_saturate_s(a: v128, b: v128) -> v128 { + transmute(llvm_i8x16_add_saturate_s(a.as_i8x16(), b.as_i8x16())) } /// Adds two 128-bit vectors as if they were two packed sixteen 8-bit unsigned /// integers, saturating on overflow to `u8::MAX`. #[inline] #[cfg_attr(test, assert_instr(i8x16.add_saturate_u))] -pub fn i8x16_add_saturate_u(a: v128, b: v128) -> v128 { - unsafe { transmute(llvm_i8x16_add_saturate_u(a.as_i8x16(), b.as_i8x16())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_add_saturate_u(a: v128, b: v128) -> v128 { + transmute(llvm_i8x16_add_saturate_u(a.as_i8x16(), b.as_i8x16())) } /// Subtracts two 128-bit vectors as if they were two packed sixteen 8-bit integers. #[inline] #[cfg_attr(test, assert_instr(i8x16.sub))] -pub fn i8x16_sub(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_sub(a: v128, b: v128) -> v128 { + transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) } /// Subtracts two 128-bit vectors as if they were two packed sixteen 8-bit /// signed integers, saturating on overflow to `i8::MIN`. #[inline] #[cfg_attr(test, assert_instr(i8x16.sub_saturate_s))] -pub fn i8x16_sub_saturate_s(a: v128, b: v128) -> v128 { - unsafe { transmute(llvm_i8x16_sub_saturate_s(a.as_i8x16(), b.as_i8x16())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_sub_saturate_s(a: v128, b: v128) -> v128 { + transmute(llvm_i8x16_sub_saturate_s(a.as_i8x16(), b.as_i8x16())) } /// Subtracts two 128-bit vectors as if they were two packed sixteen 8-bit /// unsigned integers, saturating on overflow to 0. #[inline] #[cfg_attr(test, assert_instr(i8x16.sub_saturate_u))] -pub fn i8x16_sub_saturate_u(a: v128, b: v128) -> v128 { - unsafe { transmute(llvm_i8x16_sub_saturate_u(a.as_i8x16(), b.as_i8x16())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_sub_saturate_u(a: v128, b: v128) -> v128 { + transmute(llvm_i8x16_sub_saturate_u(a.as_i8x16(), b.as_i8x16())) } -/// Multiplies two 128-bit vectors as if they were two packed sixteen 8-bit -/// signed integers. +/// Compares lane-wise signed integers, and returns the minimum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.min_s))] +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_min_s(a: v128, b: v128) -> v128 { + let a = a.as_i8x16(); + let b = b.as_i8x16(); + transmute(simd_select::(simd_lt(a, b), a, b)) +} + +/// Compares lane-wise unsigned integers, and returns the minimum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.min_u))] +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_min_u(a: v128, b: v128) -> v128 { + let a = transmute::<_, u8x16>(a); + let b = transmute::<_, u8x16>(b); + transmute(simd_select::(simd_lt(a, b), a, b)) +} + +/// Compares lane-wise signed integers, and returns the maximum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.max_s))] +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_max_s(a: v128, b: v128) -> v128 { + let a = transmute::<_, i8x16>(a); + let b = transmute::<_, i8x16>(b); + transmute(simd_select::(simd_gt(a, b), a, b)) +} + +/// Compares lane-wise unsigned integers, and returns the maximum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.max_u))] +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_max_u(a: v128, b: v128) -> v128 { + let a = transmute::<_, u8x16>(a); + let b = transmute::<_, u8x16>(b); + transmute(simd_select::(simd_gt(a, b), a, b)) +} + +/// Lane-wise rounding average. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.avgr_u))] +#[target_feature(enable = "simd128")] +pub unsafe fn i8x16_avgr_u(a: v128, b: v128) -> v128 { + transmute(llvm_avgr_u_i8x16(transmute(a), transmute(b))) +} + +/// Lane-wise wrapping absolute value. #[inline] -#[cfg_attr(test, assert_instr(i8x16.mul))] -pub fn i8x16_mul(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_mul(a.as_i8x16(), b.as_i8x16())) } +// #[cfg_attr(test, assert_instr(i16x8.abs))] // FIXME support not in our LLVM yet +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_abs(a: v128) -> v128 { + let a = transmute::<_, i16x8>(a); + let zero = i16x8::splat(0); + transmute(simd_select::( + simd_lt(a, zero), + simd_sub(zero, a), + a, + )) } /// Negates a 128-bit vectors intepreted as eight 16-bit signed integers #[inline] #[cfg_attr(test, assert_instr(i16x8.neg))] -pub fn i16x8_neg(a: v128) -> v128 { - unsafe { transmute(simd_mul(a.as_i16x8(), i16x8::splat(-1))) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_neg(a: v128) -> v128 { + transmute(simd_mul(a.as_i16x8(), i16x8::splat(-1))) } /// Returns 1 if any lane is nonzero or 0 if all lanes are zero. #[inline] #[cfg_attr(test, assert_instr(i16x8.any_true))] -pub fn i16x8_any_true(a: v128) -> i32 { - unsafe { llvm_i16x8_any_true(a.as_i16x8()) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_any_true(a: v128) -> i32 { + llvm_i16x8_any_true(a.as_i16x8()) } /// Returns 1 if all lanes are nonzero or 0 if any lane is nonzero. #[inline] #[cfg_attr(test, assert_instr(i16x8.all_true))] -pub fn i16x8_all_true(a: v128) -> i32 { - unsafe { llvm_i16x8_all_true(a.as_i16x8()) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_all_true(a: v128) -> i32 { + llvm_i16x8_all_true(a.as_i16x8()) +} + +// FIXME: not available in our LLVM yet +// /// Extracts the high bit for each lane in `a` and produce a scalar mask with +// /// all bits concatenated. +// #[inline] +// #[cfg_attr(test, assert_instr(i16x8.all_true))] +// pub unsafe fn i16x8_bitmask(a: v128) -> i32 { +// llvm_bitmask_i16x8(transmute(a)) +// } + +/// Converts two input vectors into a smaller lane vector by narrowing each +/// lane. +/// +/// Signed saturation to 0x7fff or 0x8000 is used and the input lanes are always +/// interpreted as signed integers. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.narrow_i32x4_s))] +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_narrow_i32x4_s(a: v128, b: v128) -> v128 { + transmute(llvm_narrow_i16x8_s(transmute(a), transmute(b))) +} + +/// Converts two input vectors into a smaller lane vector by narrowing each +/// lane. +/// +/// Signed saturation to 0x0000 or 0xffff is used and the input lanes are always +/// interpreted as signed integers. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.narrow_i32x4_u))] +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_narrow_i32x4_u(a: v128, b: v128) -> v128 { + transmute(llvm_narrow_i16x8_u(transmute(a), transmute(b))) +} + +/// Converts low half of the smaller lane vector to a larger lane +/// vector, sign extended. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.widen_low_i8x16_s))] +pub unsafe fn i16x8_widen_low_i8x16_s(a: v128) -> v128 { + transmute(llvm_widen_low_i16x8_s(transmute(a))) +} + +/// Converts high half of the smaller lane vector to a larger lane +/// vector, sign extended. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.widen_high_i8x16_s))] +pub unsafe fn i16x8_widen_high_i8x16_s(a: v128) -> v128 { + transmute(llvm_widen_high_i16x8_s(transmute(a))) +} + +/// Converts low half of the smaller lane vector to a larger lane +/// vector, zero extended. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.widen_low_i8x16_u))] +pub unsafe fn i16x8_widen_low_i8x16_u(a: v128) -> v128 { + transmute(llvm_widen_low_i16x8_u(transmute(a))) +} + +/// Converts high half of the smaller lane vector to a larger lane +/// vector, zero extended. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.widen_high_i8x16_u))] +pub unsafe fn i16x8_widen_high_i8x16_u(a: v128) -> v128 { + transmute(llvm_widen_high_i16x8_u(transmute(a))) } /// Shifts each lane to the left by the specified number of bits. @@ -1131,10 +1732,10 @@ pub fn i16x8_all_true(a: v128) -> i32 { /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(i16x8.shl))] -pub fn i16x8_shl(a: v128, amt: u32) -> v128 { - unsafe { transmute(simd_shl(a.as_i16x8(), i16x8::splat(amt as i16))) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_shl(a: v128, amt: u32) -> v128 { + transmute(simd_shl(a.as_i16x8(), i16x8::splat(amt as i16))) } /// Shifts each lane to the right by the specified number of bits, sign @@ -1143,10 +1744,10 @@ pub fn i16x8_shl(a: v128, amt: u32) -> v128 { /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr(i16x8.shl))] -pub fn i16x8_shr_s(a: v128, amt: u32) -> v128 { - unsafe { transmute(simd_shr(a.as_i16x8(), i16x8::splat(amt as i16))) } +#[cfg_attr(test, assert_instr(i16x8.shr_s))] +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_shr_s(a: v128, amt: u32) -> v128 { + transmute(simd_shr(a.as_i16x8(), i16x8::splat(amt as i16))) } /// Shifts each lane to the right by the specified number of bits, shifting in @@ -1155,85 +1756,202 @@ pub fn i16x8_shr_s(a: v128, amt: u32) -> v128 { /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr(i16x8.shl))] -pub fn i16x8_shr_u(a: v128, amt: u32) -> v128 { - unsafe { transmute(simd_shr(a.as_u16x8(), u16x8::splat(amt as u16))) } +#[cfg_attr(test, assert_instr(i16x8.shr_u))] +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_shr_u(a: v128, amt: u32) -> v128 { + transmute(simd_shr(a.as_u16x8(), u16x8::splat(amt as u16))) } /// Adds two 128-bit vectors as if they were two packed eight 16-bit integers. #[inline] #[cfg_attr(test, assert_instr(i16x8.add))] -pub fn i16x8_add(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_add(a.as_i16x8(), b.as_i16x8())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_add(a: v128, b: v128) -> v128 { + transmute(simd_add(a.as_i16x8(), b.as_i16x8())) } /// Adds two 128-bit vectors as if they were two packed eight 16-bit signed /// integers, saturating on overflow to `i16::MAX`. #[inline] #[cfg_attr(test, assert_instr(i16x8.add_saturate_s))] -pub fn i16x8_add_saturate_s(a: v128, b: v128) -> v128 { - unsafe { transmute(llvm_i16x8_add_saturate_s(a.as_i16x8(), b.as_i16x8())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_add_saturate_s(a: v128, b: v128) -> v128 { + transmute(llvm_i16x8_add_saturate_s(a.as_i16x8(), b.as_i16x8())) } /// Adds two 128-bit vectors as if they were two packed eight 16-bit unsigned /// integers, saturating on overflow to `u16::MAX`. #[inline] #[cfg_attr(test, assert_instr(i16x8.add_saturate_u))] -pub fn i16x8_add_saturate_u(a: v128, b: v128) -> v128 { - unsafe { transmute(llvm_i16x8_add_saturate_u(a.as_i16x8(), b.as_i16x8())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_add_saturate_u(a: v128, b: v128) -> v128 { + transmute(llvm_i16x8_add_saturate_u(a.as_i16x8(), b.as_i16x8())) } /// Subtracts two 128-bit vectors as if they were two packed eight 16-bit integers. #[inline] #[cfg_attr(test, assert_instr(i16x8.sub))] -pub fn i16x8_sub(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_sub(a: v128, b: v128) -> v128 { + transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) } /// Subtracts two 128-bit vectors as if they were two packed eight 16-bit /// signed integers, saturating on overflow to `i16::MIN`. #[inline] #[cfg_attr(test, assert_instr(i16x8.sub_saturate_s))] -pub fn i16x8_sub_saturate_s(a: v128, b: v128) -> v128 { - unsafe { transmute(llvm_i16x8_sub_saturate_s(a.as_i16x8(), b.as_i16x8())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_sub_saturate_s(a: v128, b: v128) -> v128 { + transmute(llvm_i16x8_sub_saturate_s(a.as_i16x8(), b.as_i16x8())) } /// Subtracts two 128-bit vectors as if they were two packed eight 16-bit /// unsigned integers, saturating on overflow to 0. #[inline] #[cfg_attr(test, assert_instr(i16x8.sub_saturate_u))] -pub fn i16x8_sub_saturate_u(a: v128, b: v128) -> v128 { - unsafe { transmute(llvm_i16x8_sub_saturate_u(a.as_i16x8(), b.as_i16x8())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_sub_saturate_u(a: v128, b: v128) -> v128 { + transmute(llvm_i16x8_sub_saturate_u(a.as_i16x8(), b.as_i16x8())) } /// Multiplies two 128-bit vectors as if they were two packed eight 16-bit /// signed integers. #[inline] #[cfg_attr(test, assert_instr(i16x8.mul))] -pub fn i16x8_mul(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_mul(a: v128, b: v128) -> v128 { + transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) +} + +/// Compares lane-wise signed integers, and returns the minimum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.min_s))] +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_min_s(a: v128, b: v128) -> v128 { + let a = transmute::<_, i16x8>(a); + let b = transmute::<_, i16x8>(b); + transmute(simd_select::(simd_lt(a, b), a, b)) +} + +/// Compares lane-wise unsigned integers, and returns the minimum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.min_u))] +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_min_u(a: v128, b: v128) -> v128 { + let a = transmute::<_, u16x8>(a); + let b = transmute::<_, u16x8>(b); + transmute(simd_select::(simd_lt(a, b), a, b)) +} + +/// Compares lane-wise signed integers, and returns the maximum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.max_s))] +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_max_s(a: v128, b: v128) -> v128 { + let a = transmute::<_, i16x8>(a); + let b = transmute::<_, i16x8>(b); + transmute(simd_select::(simd_gt(a, b), a, b)) +} + +/// Compares lane-wise unsigned integers, and returns the maximum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.max_u))] +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_max_u(a: v128, b: v128) -> v128 { + let a = transmute::<_, u16x8>(a); + let b = transmute::<_, u16x8>(b); + transmute(simd_select::(simd_gt(a, b), a, b)) +} + +/// Lane-wise rounding average. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.avgr_u))] +#[target_feature(enable = "simd128")] +pub unsafe fn i16x8_avgr_u(a: v128, b: v128) -> v128 { + transmute(llvm_avgr_u_i16x8(transmute(a), transmute(b))) +} + +/// Lane-wise wrapping absolute value. +#[inline] +// #[cfg_attr(test, assert_instr(i32x4.abs))] // FIXME support not in our LLVM yet +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_abs(a: v128) -> v128 { + let a = transmute::<_, i32x4>(a); + let zero = i32x4::splat(0); + transmute(simd_select::( + simd_lt(a, zero), + simd_sub(zero, a), + a, + )) } /// Negates a 128-bit vectors intepreted as four 32-bit signed integers #[inline] #[cfg_attr(test, assert_instr(i32x4.neg))] -pub fn i32x4_neg(a: v128) -> v128 { - unsafe { transmute(simd_mul(a.as_i32x4(), i32x4::splat(-1))) } +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_neg(a: v128) -> v128 { + transmute(simd_mul(a.as_i32x4(), i32x4::splat(-1))) } /// Returns 1 if any lane is nonzero or 0 if all lanes are zero. #[inline] #[cfg_attr(test, assert_instr(i32x4.any_true))] -pub fn i32x4_any_true(a: v128) -> i32 { - unsafe { llvm_i32x4_any_true(a.as_i32x4()) } +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_any_true(a: v128) -> i32 { + llvm_i32x4_any_true(a.as_i32x4()) } /// Returns 1 if all lanes are nonzero or 0 if any lane is nonzero. #[inline] #[cfg_attr(test, assert_instr(i32x4.all_true))] -pub fn i32x4_all_true(a: v128) -> i32 { - unsafe { llvm_i32x4_all_true(a.as_i32x4()) } +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_all_true(a: v128) -> i32 { + llvm_i32x4_all_true(a.as_i32x4()) +} + +// FIXME: not available in our LLVM yet +// /// Extracts the high bit for each lane in `a` and produce a scalar mask with +// /// all bits concatenated. +// #[inline] +// #[cfg_attr(test, assert_instr(i32x4.all_true))] +// pub unsafe fn i32x4_bitmask(a: v128) -> i32 { +// llvm_bitmask_i32x4(transmute(a)) +// } + +/// Converts low half of the smaller lane vector to a larger lane +/// vector, sign extended. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.widen_low_i16x8_s))] +pub unsafe fn i32x4_widen_low_i16x8_s(a: v128) -> v128 { + transmute(llvm_widen_low_i32x4_s(transmute(a))) +} + +/// Converts high half of the smaller lane vector to a larger lane +/// vector, sign extended. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.widen_high_i16x8_s))] +pub unsafe fn i32x4_widen_high_i16x8_s(a: v128) -> v128 { + transmute(llvm_widen_high_i32x4_s(transmute(a))) +} + +/// Converts low half of the smaller lane vector to a larger lane +/// vector, zero extended. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.widen_low_i16x8_u))] +pub unsafe fn i32x4_widen_low_i16x8_u(a: v128) -> v128 { + transmute(llvm_widen_low_i32x4_u(transmute(a))) +} + +/// Converts high half of the smaller lane vector to a larger lane +/// vector, zero extended. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.widen_high_i16x8_u))] +pub unsafe fn i32x4_widen_high_i16x8_u(a: v128) -> v128 { + transmute(llvm_widen_high_i32x4_u(transmute(a))) } /// Shifts each lane to the left by the specified number of bits. @@ -1241,10 +1959,10 @@ pub fn i32x4_all_true(a: v128) -> i32 { /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(i32x4.shl))] -pub fn i32x4_shl(a: v128, amt: u32) -> v128 { - unsafe { transmute(simd_shl(a.as_i32x4(), i32x4::splat(amt as i32))) } +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_shl(a: v128, amt: u32) -> v128 { + transmute(simd_shl(a.as_i32x4(), i32x4::splat(amt as i32))) } /// Shifts each lane to the right by the specified number of bits, sign @@ -1253,10 +1971,10 @@ pub fn i32x4_shl(a: v128, amt: u32) -> v128 { /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr(i32x4.shl))] -pub fn i32x4_shr_s(a: v128, amt: u32) -> v128 { - unsafe { transmute(simd_shr(a.as_i32x4(), i32x4::splat(amt as i32))) } +#[cfg_attr(test, assert_instr(i32x4.shr_s))] +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_shr_s(a: v128, amt: u32) -> v128 { + transmute(simd_shr(a.as_i32x4(), i32x4::splat(amt as i32))) } /// Shifts each lane to the right by the specified number of bits, shifting in @@ -1265,56 +1983,87 @@ pub fn i32x4_shr_s(a: v128, amt: u32) -> v128 { /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr(i32x4.shl))] -pub fn i32x4_shr_u(a: v128, amt: u32) -> v128 { - unsafe { transmute(simd_shr(a.as_u32x4(), u32x4::splat(amt as u32))) } +#[cfg_attr(test, assert_instr(i32x4.shr_u))] +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_shr_u(a: v128, amt: u32) -> v128 { + transmute(simd_shr(a.as_u32x4(), u32x4::splat(amt as u32))) } /// Adds two 128-bit vectors as if they were two packed four 32-bit integers. #[inline] #[cfg_attr(test, assert_instr(i32x4.add))] -pub fn i32x4_add(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_add(a.as_i32x4(), b.as_i32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_add(a: v128, b: v128) -> v128 { + transmute(simd_add(a.as_i32x4(), b.as_i32x4())) } /// Subtracts two 128-bit vectors as if they were two packed four 32-bit integers. #[inline] #[cfg_attr(test, assert_instr(i32x4.sub))] -pub fn i32x4_sub(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_sub(a: v128, b: v128) -> v128 { + transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) } /// Multiplies two 128-bit vectors as if they were two packed four 32-bit /// signed integers. #[inline] #[cfg_attr(test, assert_instr(i32x4.mul))] -pub fn i32x4_mul(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_mul(a.as_i32x4(), b.as_i32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_mul(a: v128, b: v128) -> v128 { + transmute(simd_mul(a.as_i32x4(), b.as_i32x4())) } -/// Negates a 128-bit vectors intepreted as two 64-bit signed integers +/// Compares lane-wise signed integers, and returns the minimum of +/// each pair. #[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr(i32x4.neg))] -pub fn i64x2_neg(a: v128) -> v128 { - unsafe { transmute(simd_mul(a.as_i64x2(), i64x2::splat(-1))) } +#[cfg_attr(test, assert_instr(i32x4.min_s))] +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_min_s(a: v128, b: v128) -> v128 { + let a = transmute::<_, i32x4>(a); + let b = transmute::<_, i32x4>(b); + transmute(simd_select::(simd_lt(a, b), a, b)) } -/// Returns 1 if any lane is nonzero or 0 if all lanes are zero. +/// Compares lane-wise unsigned integers, and returns the minimum of +/// each pair. #[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr(i64x2.any_true))] -pub fn i64x2_any_true(a: v128) -> i32 { - unsafe { llvm_i64x2_any_true(a.as_i64x2()) } +#[cfg_attr(test, assert_instr(i32x4.min_u))] +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_min_u(a: v128, b: v128) -> v128 { + let a = transmute::<_, u32x4>(a); + let b = transmute::<_, u32x4>(b); + transmute(simd_select::(simd_lt(a, b), a, b)) } -/// Returns 1 if all lanes are nonzero or 0 if any lane is nonzero. +/// Compares lane-wise signed integers, and returns the maximum of +/// each pair. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.max_s))] +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_max_s(a: v128, b: v128) -> v128 { + let a = transmute::<_, i32x4>(a); + let b = transmute::<_, i32x4>(b); + transmute(simd_select::(simd_gt(a, b), a, b)) +} + +/// Compares lane-wise unsigned integers, and returns the maximum of +/// each pair. #[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr(i64x2.all_true))] -pub fn i64x2_all_true(a: v128) -> i32 { - unsafe { llvm_i64x2_all_true(a.as_i64x2()) } +#[cfg_attr(test, assert_instr(i32x4.max_u))] +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_max_u(a: v128, b: v128) -> v128 { + let a = transmute::<_, u32x4>(a); + let b = transmute::<_, u32x4>(b); + transmute(simd_select::(simd_gt(a, b), a, b)) +} + +/// Negates a 128-bit vectors intepreted as two 64-bit signed integers +#[inline] +#[cfg_attr(test, assert_instr(i64x2.neg))] +#[target_feature(enable = "simd128")] +pub unsafe fn i64x2_neg(a: v128) -> v128 { + transmute(simd_mul(a.as_i64x2(), i64x2::splat(-1))) } /// Shifts each lane to the left by the specified number of bits. @@ -1322,10 +2071,10 @@ pub fn i64x2_all_true(a: v128) -> i32 { /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(i64x2.shl))] -pub fn i64x2_shl(a: v128, amt: u32) -> v128 { - unsafe { transmute(simd_shl(a.as_i64x2(), i64x2::splat(amt as i64))) } +#[target_feature(enable = "simd128")] +pub unsafe fn i64x2_shl(a: v128, amt: u32) -> v128 { + transmute(simd_shl(a.as_i64x2(), i64x2::splat(amt as i64))) } /// Shifts each lane to the right by the specified number of bits, sign @@ -1334,10 +2083,10 @@ pub fn i64x2_shl(a: v128, amt: u32) -> v128 { /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr(i64x2.shl))] -pub fn i64x2_shr_s(a: v128, amt: u32) -> v128 { - unsafe { transmute(simd_shr(a.as_i64x2(), i64x2::splat(amt as i64))) } +#[cfg_attr(test, assert_instr(i64x2.shr_s))] +#[target_feature(enable = "simd128")] +pub unsafe fn i64x2_shr_s(a: v128, amt: u32) -> v128 { + transmute(simd_shr(a.as_i64x2(), i64x2::splat(amt as i64))) } /// Shifts each lane to the right by the specified number of bits, shifting in @@ -1346,181 +2095,196 @@ pub fn i64x2_shr_s(a: v128, amt: u32) -> v128 { /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr(i64x2.shl))] -pub fn i64x2_shr_u(a: v128, amt: u32) -> v128 { - unsafe { transmute(simd_shr(a.as_u64x2(), u64x2::splat(amt as u64))) } +#[cfg_attr(test, assert_instr(i64x2.shr_u))] +#[target_feature(enable = "simd128")] +pub unsafe fn i64x2_shr_u(a: v128, amt: u32) -> v128 { + transmute(simd_shr(a.as_u64x2(), u64x2::splat(amt as u64))) } /// Adds two 128-bit vectors as if they were two packed two 64-bit integers. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(i64x2.add))] -pub fn i64x2_add(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_add(a.as_i64x2(), b.as_i64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i64x2_add(a: v128, b: v128) -> v128 { + transmute(simd_add(a.as_i64x2(), b.as_i64x2())) } /// Subtracts two 128-bit vectors as if they were two packed two 64-bit integers. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(i64x2.sub))] -pub fn i64x2_sub(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i64x2_sub(a: v128, b: v128) -> v128 { + transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) +} + +/// Multiplies two 128-bit vectors as if they were two packed two 64-bit integers. +#[inline] +// #[cfg_attr(test, assert_instr(i64x2.mul))] // FIXME: not present in our LLVM +#[target_feature(enable = "simd128")] +pub unsafe fn i64x2_mul(a: v128, b: v128) -> v128 { + transmute(simd_mul(a.as_i64x2(), b.as_i64x2())) } /// Calculates the absolute value of each lane of a 128-bit vector interpreted /// as four 32-bit floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f32x4.abs))] -pub fn f32x4_abs(a: v128) -> v128 { - unsafe { transmute(llvm_f32x4_abs(a.as_f32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f32x4_abs(a: v128) -> v128 { + transmute(llvm_f32x4_abs(a.as_f32x4())) } /// Negates each lane of a 128-bit vector interpreted as four 32-bit floating /// point numbers. #[inline] #[cfg_attr(test, assert_instr(f32x4.neg))] -pub fn f32x4_neg(a: v128) -> v128 { - unsafe { f32x4_mul(a, transmute(f32x4(-1.0, -1.0, -1.0, -1.0))) } +#[target_feature(enable = "simd128")] +pub unsafe fn f32x4_neg(a: v128) -> v128 { + f32x4_mul(a, transmute(f32x4(-1.0, -1.0, -1.0, -1.0))) } /// Calculates the square root of each lane of a 128-bit vector interpreted as /// four 32-bit floating point numbers. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(f32x4.sqrt))] -pub fn f32x4_sqrt(a: v128) -> v128 { - unsafe { transmute(llvm_f32x4_sqrt(a.as_f32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f32x4_sqrt(a: v128) -> v128 { + transmute(llvm_f32x4_sqrt(a.as_f32x4())) } /// Adds pairwise lanes of two 128-bit vectors interpreted as four 32-bit /// floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f32x4.add))] -pub fn f32x4_add(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_add(a.as_f32x4(), b.as_f32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f32x4_add(a: v128, b: v128) -> v128 { + transmute(simd_add(a.as_f32x4(), b.as_f32x4())) } /// Subtracts pairwise lanes of two 128-bit vectors interpreted as four 32-bit /// floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f32x4.sub))] -pub fn f32x4_sub(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_sub(a.as_f32x4(), b.as_f32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f32x4_sub(a: v128, b: v128) -> v128 { + transmute(simd_sub(a.as_f32x4(), b.as_f32x4())) } /// Multiplies pairwise lanes of two 128-bit vectors interpreted as four 32-bit /// floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f32x4.mul))] -pub fn f32x4_mul(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_mul(a.as_f32x4(), b.as_f32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f32x4_mul(a: v128, b: v128) -> v128 { + transmute(simd_mul(a.as_f32x4(), b.as_f32x4())) } /// Divides pairwise lanes of two 128-bit vectors interpreted as four 32-bit /// floating point numbers. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(f32x4.div))] -pub fn f32x4_div(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_div(a.as_f32x4(), b.as_f32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f32x4_div(a: v128, b: v128) -> v128 { + transmute(simd_div(a.as_f32x4(), b.as_f32x4())) } /// Calculates the minimum of pairwise lanes of two 128-bit vectors interpreted /// as four 32-bit floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f32x4.min))] -pub fn f32x4_min(a: v128, b: v128) -> v128 { - unsafe { transmute(llvm_f32x4_min(a.as_f32x4(), b.as_f32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f32x4_min(a: v128, b: v128) -> v128 { + transmute(llvm_f32x4_min(a.as_f32x4(), b.as_f32x4())) } /// Calculates the maximum of pairwise lanes of two 128-bit vectors interpreted /// as four 32-bit floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f32x4.max))] -pub fn f32x4_max(a: v128, b: v128) -> v128 { - unsafe { transmute(llvm_f32x4_max(a.as_f32x4(), b.as_f32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f32x4_max(a: v128, b: v128) -> v128 { + transmute(llvm_f32x4_max(a.as_f32x4(), b.as_f32x4())) } /// Calculates the absolute value of each lane of a 128-bit vector interpreted /// as two 64-bit floating point numbers. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(f64x2.abs))] -pub fn f64x2_abs(a: v128) -> v128 { - unsafe { transmute(llvm_f64x2_abs(a.as_f64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f64x2_abs(a: v128) -> v128 { + transmute(llvm_f64x2_abs(a.as_f64x2())) } /// Negates each lane of a 128-bit vector interpreted as two 64-bit floating /// point numbers. #[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr(f64x2.abs))] -pub fn f64x2_neg(a: v128) -> v128 { - unsafe { f64x2_mul(a, transmute(f64x2(-1.0, -1.0))) } +#[cfg_attr(test, assert_instr(f64x2.neg))] +#[target_feature(enable = "simd128")] +pub unsafe fn f64x2_neg(a: v128) -> v128 { + f64x2_mul(a, transmute(f64x2(-1.0, -1.0))) } /// Calculates the square root of each lane of a 128-bit vector interpreted as /// two 64-bit floating point numbers. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(f64x2.sqrt))] -pub fn f64x2_sqrt(a: v128) -> v128 { - unsafe { transmute(llvm_f64x2_sqrt(a.as_f64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f64x2_sqrt(a: v128) -> v128 { + transmute(llvm_f64x2_sqrt(a.as_f64x2())) } /// Adds pairwise lanes of two 128-bit vectors interpreted as two 64-bit /// floating point numbers. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(f64x2.add))] -pub fn f64x2_add(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_add(a.as_f64x2(), b.as_f64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f64x2_add(a: v128, b: v128) -> v128 { + transmute(simd_add(a.as_f64x2(), b.as_f64x2())) } /// Subtracts pairwise lanes of two 128-bit vectors interpreted as two 64-bit /// floating point numbers. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(f64x2.sub))] -pub fn f64x2_sub(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_sub(a.as_f64x2(), b.as_f64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f64x2_sub(a: v128, b: v128) -> v128 { + transmute(simd_sub(a.as_f64x2(), b.as_f64x2())) } /// Multiplies pairwise lanes of two 128-bit vectors interpreted as two 64-bit /// floating point numbers. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(f64x2.mul))] -pub fn f64x2_mul(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_mul(a.as_f64x2(), b.as_f64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f64x2_mul(a: v128, b: v128) -> v128 { + transmute(simd_mul(a.as_f64x2(), b.as_f64x2())) } /// Divides pairwise lanes of two 128-bit vectors interpreted as two 64-bit /// floating point numbers. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(f64x2.div))] -pub fn f64x2_div(a: v128, b: v128) -> v128 { - unsafe { transmute(simd_div(a.as_f64x2(), b.as_f64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f64x2_div(a: v128, b: v128) -> v128 { + transmute(simd_div(a.as_f64x2(), b.as_f64x2())) } /// Calculates the minimum of pairwise lanes of two 128-bit vectors interpreted /// as two 64-bit floating point numbers. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(f64x2.min))] -pub fn f64x2_min(a: v128, b: v128) -> v128 { - unsafe { transmute(llvm_f64x2_min(a.as_f64x2(), b.as_f64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f64x2_min(a: v128, b: v128) -> v128 { + transmute(llvm_f64x2_min(a.as_f64x2(), b.as_f64x2())) } /// Calculates the maximum of pairwise lanes of two 128-bit vectors interpreted /// as two 64-bit floating point numbers. #[inline] -#[cfg(not(only_node_compatible_functions))] #[cfg_attr(test, assert_instr(f64x2.max))] -pub fn f64x2_max(a: v128, b: v128) -> v128 { - unsafe { transmute(llvm_f64x2_max(a.as_f64x2(), b.as_f64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f64x2_max(a: v128, b: v128) -> v128 { + transmute(llvm_f64x2_max(a.as_f64x2(), b.as_f64x2())) } /// Converts a 128-bit vector interpreted as four 32-bit floating point numbers @@ -1530,8 +2294,9 @@ pub fn f64x2_max(a: v128, b: v128) -> v128 { /// representable intger. #[inline] #[cfg_attr(test, assert_instr("i32x4.trunc_sat_f32x4_s"))] -pub fn i32x4_trunc_s_f32x4_sat(a: v128) -> v128 { - unsafe { transmute(simd_cast::<_, i32x4>(a.as_f32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_trunc_sat_f32x4_s(a: v128) -> v128 { + transmute(simd_cast::<_, i32x4>(a.as_f32x4())) } /// Converts a 128-bit vector interpreted as four 32-bit floating point numbers @@ -1541,66 +2306,27 @@ pub fn i32x4_trunc_s_f32x4_sat(a: v128) -> v128 { /// representable intger. #[inline] #[cfg_attr(test, assert_instr("i32x4.trunc_sat_f32x4_u"))] -pub fn i32x4_trunc_u_f32x4_sat(a: v128) -> v128 { - unsafe { transmute(simd_cast::<_, u32x4>(a.as_f32x4())) } -} - -/// Converts a 128-bit vector interpreted as two 64-bit floating point numbers -/// into a 128-bit vector of two 64-bit signed integers. -/// -/// NaN is converted to 0 and if it's out of bounds it becomes the nearest -/// representable intger. -#[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr("i64x2.trunc_s/f64x2:sat"))] -pub fn i64x2_trunc_s_f64x2_sat(a: v128) -> v128 { - unsafe { transmute(simd_cast::<_, i64x2>(a.as_f64x2())) } -} - -/// Converts a 128-bit vector interpreted as two 64-bit floating point numbers -/// into a 128-bit vector of two 64-bit unsigned integers. -/// -/// NaN is converted to 0 and if it's out of bounds it becomes the nearest -/// representable intger. -#[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr("i64x2.trunc_u/f64x2:sat"))] -pub fn i64x2_trunc_u_f64x2_sat(a: v128) -> v128 { - unsafe { transmute(simd_cast::<_, u64x2>(a.as_f64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn i32x4_trunc_sat_f32x4_u(a: v128) -> v128 { + transmute(simd_cast::<_, u32x4>(a.as_f32x4())) } /// Converts a 128-bit vector interpreted as four 32-bit signed integers into a /// 128-bit vector of four 32-bit floating point numbers. #[inline] #[cfg_attr(test, assert_instr("f32x4.convert_i32x4_s"))] -pub fn f32x4_convert_i32x4_s(a: v128) -> v128 { - unsafe { transmute(simd_cast::<_, f32x4>(a.as_i32x4())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f32x4_convert_i32x4_s(a: v128) -> v128 { + transmute(simd_cast::<_, f32x4>(a.as_i32x4())) } /// Converts a 128-bit vector interpreted as four 32-bit unsigned integers into a /// 128-bit vector of four 32-bit floating point numbers. #[inline] #[cfg_attr(test, assert_instr("f32x4.convert_i32x4_u"))] -pub fn f32x4_convert_i32x4_u(a: v128) -> v128 { - unsafe { transmute(simd_cast::<_, f32x4>(a.as_u32x4())) } -} - -/// Converts a 128-bit vector interpreted as two 64-bit signed integers into a -/// 128-bit vector of two 64-bit floating point numbers. -#[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr("f64x2.convert_s/i64x2"))] -pub fn f64x2_convert_s_i64x2(a: v128) -> v128 { - unsafe { transmute(simd_cast::<_, f64x2>(a.as_i64x2())) } -} - -/// Converts a 128-bit vector interpreted as two 64-bit unsigned integers into a -/// 128-bit vector of two 64-bit floating point numbers. -#[inline] -#[cfg(not(only_node_compatible_functions))] -#[cfg_attr(test, assert_instr("f64x2.convert_u/i64x2"))] -pub fn f64x2_convert_u_i64x2(a: v128) -> v128 { - unsafe { transmute(simd_cast::<_, f64x2>(a.as_u64x2())) } +#[target_feature(enable = "simd128")] +pub unsafe fn f32x4_convert_i32x4_u(a: v128) -> v128 { + transmute(simd_cast::<_, f32x4>(a.as_u32x4())) } #[cfg(test)] @@ -1610,7 +2336,6 @@ pub mod tests { use std::mem; use std::num::Wrapping; use std::prelude::v1::*; - use wasm_bindgen_test::*; fn compare_bytes(a: v128, b: v128) { let a: [u8; 16] = unsafe { transmute(a) }; @@ -1618,23 +2343,22 @@ pub mod tests { assert_eq!(a, b); } - #[wasm_bindgen_test] - #[cfg(not(only_node_compatible_functions))] + #[test] fn test_v128_const() { const A: v128 = - unsafe { super::v128_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) }; + unsafe { super::i8x16_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) }; compare_bytes(A, A); } macro_rules! test_splat { ($test_id:ident: $val:expr => $($vals:expr),*) => { - #[wasm_bindgen_test] + #[test] fn $test_id() { + unsafe { let a = super::$test_id($val); - let b: v128 = unsafe { - transmute([$($vals as u8),*]) - }; + let b: v128 = transmute([$($vals as u8),*]); compare_bytes(a, b); + } } } } @@ -1642,10 +2366,8 @@ pub mod tests { test_splat!(i8x16_splat: 42 => 42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42); test_splat!(i16x8_splat: 42 => 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0); test_splat!(i32x4_splat: 42 => 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0); - #[cfg(not(only_node_compatible_functions))] test_splat!(i64x2_splat: 42 => 42, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0); test_splat!(f32x4_splat: 42. => 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66); - #[cfg(not(only_node_compatible_functions))] test_splat!(f64x2_splat: 42. => 0, 0, 0, 0, 0, 0, 69, 64, 0, 0, 0, 0, 0, 0, 69, 64); // tests extract and replace lanes @@ -1658,25 +2380,25 @@ pub mod tests { count: $count:expr, indices: [$($idx:expr),*], ) => { - #[wasm_bindgen_test] + #[test] fn $test_id() { unsafe { let arr: [$elem; $count] = [123 as $elem; $count]; let vec: v128 = transmute(arr); $( - assert_eq!($extract(vec, $idx), 123 as $elem); - )*; + assert_eq!($extract::<$idx>(vec), 123 as $elem); + )* // create a vector from array and check that the indices contain // the same values as in the array: let arr: [$elem; $count] = [$($idx as $elem),*]; let vec: v128 = transmute(arr); $( - assert_eq!($extract(vec, $idx), $idx as $elem); + assert_eq!($extract::<$idx>(vec), $idx as $elem); - let tmp = $replace(vec, $idx, 124 as $elem); - assert_eq!($extract(tmp, $idx), 124 as $elem); - )*; + let tmp = $replace::<$idx>(vec, 124 as $elem); + assert_eq!($extract::<$idx>(tmp), 124 as $elem); + )* } } } @@ -1706,7 +2428,6 @@ pub mod tests { count: 4, indices: [0, 1, 2, 3], } - #[cfg(not(only_node_compatible_functions))] test_extract! { name: test_i64x2_extract_replace, extract: i64x2_extract_lane, @@ -1723,7 +2444,6 @@ pub mod tests { count: 4, indices: [0, 1, 2, 3], } - #[cfg(not(only_node_compatible_functions))] test_extract! { name: test_f64x2_extract_replace, extract: f64x2_extract_lane, @@ -1739,7 +2459,7 @@ pub mod tests { $([$($vec1:tt)*] ($op:tt | $f:ident) [$($vec2:tt)*],)* })* ) => ($( - #[wasm_bindgen_test] + #[test] fn $name() { unsafe { $( @@ -1768,7 +2488,7 @@ pub mod tests { $(($op:tt | $f:ident) [$($vec1:tt)*],)* })* ) => ($( - #[wasm_bindgen_test] + #[test] fn $name() { unsafe { $( @@ -1816,19 +2536,6 @@ pub mod tests { (- | i8x16_sub) [-127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 4, 8], } - test_i8x16_mul => { - [0i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - (* | i8x16_mul) - [1i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - - [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] - (* | i8x16_mul) - [-2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -18], - - [1i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] - (* | i8x16_mul) - [-127, -44, 43, 126, 4, 2, 9, -3, -59, -43, 39, -69, 79, -3, 30, 3], - } test_i16x8_add => { [0i16, 0, 0, 0, 0, 0, 0, 0] @@ -1910,425 +2617,401 @@ pub mod tests { // TODO: test_i64x2_neg } - // #[wasm_bindgen_test] - // fn v8x16_shuffle() { - // unsafe { - // let a = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]; - // let b = [ - // 16_u8, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, - // 31, - // ]; - // - // let vec_a: v128 = transmute(a); - // let vec_b: v128 = transmute(b); - // - // let vec_r = v8x16_shuffle!( - // vec_a, - // vec_b, - // [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] - // ); - // - // let e = - // [0_u8, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]; - // let vec_e: v128 = transmute(e); - // compare_bytes(vec_r, vec_e); - // } - // } - // - // macro_rules! floating_point { - // (f32) => { - // true - // }; - // (f64) => { - // true - // }; - // ($id:ident) => { - // false - // }; - // } - // - // trait IsNan: Sized { - // fn is_nan(self) -> bool { - // false - // } - // } - // impl IsNan for i8 {} - // impl IsNan for i16 {} - // impl IsNan for i32 {} - // impl IsNan for i64 {} - // - // macro_rules! test_bop { - // ($id:ident[$ety:ident; $ecount:expr] | - // $binary_op:ident [$op_test_id:ident] : - // ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => { - // test_bop!( - // $id[$ety; $ecount] => $ety | $binary_op [ $op_test_id ]: - // ([$($in_a),*], [$($in_b),*]) => [$($out),*] - // ); - // - // }; - // ($id:ident[$ety:ident; $ecount:expr] => $oty:ident | - // $binary_op:ident [$op_test_id:ident] : - // ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => { - // #[wasm_bindgen_test] - // fn $op_test_id() { - // unsafe { - // let a_input: [$ety; $ecount] = [$($in_a),*]; - // let b_input: [$ety; $ecount] = [$($in_b),*]; - // let output: [$oty; $ecount] = [$($out),*]; - // - // let a_vec_in: v128 = transmute(a_input); - // let b_vec_in: v128 = transmute(b_input); - // let vec_res: v128 = $id::$binary_op(a_vec_in, b_vec_in); - // - // let res: [$oty; $ecount] = transmute(vec_res); - // - // if !floating_point!($ety) { - // assert_eq!(res, output); - // } else { - // for i in 0..$ecount { - // let r = res[i]; - // let o = output[i]; - // assert_eq!(r.is_nan(), o.is_nan()); - // if !r.is_nan() { - // assert_eq!(r, o); - // } - // } - // } - // } - // } - // } - // } - // - // macro_rules! test_bops { - // ($id:ident[$ety:ident; $ecount:expr] | - // $binary_op:ident [$op_test_id:ident]: - // ([$($in_a:expr),*], $in_b:expr) => [$($out:expr),*]) => { - // #[wasm_bindgen_test] - // fn $op_test_id() { - // unsafe { - // let a_input: [$ety; $ecount] = [$($in_a),*]; - // let output: [$ety; $ecount] = [$($out),*]; - // - // let a_vec_in: v128 = transmute(a_input); - // let vec_res: v128 = $id::$binary_op(a_vec_in, $in_b); - // - // let res: [$ety; $ecount] = transmute(vec_res); - // assert_eq!(res, output); - // } - // } - // } - // } - // - // macro_rules! test_uop { - // ($id:ident[$ety:ident; $ecount:expr] | - // $unary_op:ident [$op_test_id:ident]: [$($in_a:expr),*] => [$($out:expr),*]) => { - // #[wasm_bindgen_test] - // fn $op_test_id() { - // unsafe { - // let a_input: [$ety; $ecount] = [$($in_a),*]; - // let output: [$ety; $ecount] = [$($out),*]; - // - // let a_vec_in: v128 = transmute(a_input); - // let vec_res: v128 = $id::$unary_op(a_vec_in); - // - // let res: [$ety; $ecount] = transmute(vec_res); - // assert_eq!(res, output); - // } - // } - // } - // } - // - // - // - // test_bops!(i8x16[i8; 16] | shl[i8x16_shl_test]: - // ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) => - // [0, -2, 4, 6, 8, 10, 12, -2, 2, 2, 2, 2, 2, 2, 2, 2]); - // test_bops!(i16x8[i16; 8] | shl[i16x8_shl_test]: - // ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) => - // [0, -2, 4, 6, 8, 10, 12, -2]); - // test_bops!(i32x4[i32; 4] | shl[i32x4_shl_test]: - // ([0, -1, 2, 3], 1) => [0, -2, 4, 6]); - // test_bops!(i64x2[i64; 2] | shl[i64x2_shl_test]: - // ([0, -1], 1) => [0, -2]); - // - // test_bops!(i8x16[i8; 16] | shr_s[i8x16_shr_s_test]: - // ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) => - // [0, -1, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]); - // test_bops!(i16x8[i16; 8] | shr_s[i16x8_shr_s_test]: - // ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) => - // [0, -1, 1, 1, 2, 2, 3, i16::MAX / 2]); - // test_bops!(i32x4[i32; 4] | shr_s[i32x4_shr_s_test]: - // ([0, -1, 2, 3], 1) => [0, -1, 1, 1]); - // test_bops!(i64x2[i64; 2] | shr_s[i64x2_shr_s_test]: - // ([0, -1], 1) => [0, -1]); - // - // test_bops!(i8x16[i8; 16] | shr_u[i8x16_uhr_u_test]: - // ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) => - // [0, i8::MAX, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]); - // test_bops!(i16x8[i16; 8] | shr_u[i16x8_uhr_u_test]: - // ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) => - // [0, i16::MAX, 1, 1, 2, 2, 3, i16::MAX / 2]); - // test_bops!(i32x4[i32; 4] | shr_u[i32x4_uhr_u_test]: - // ([0, -1, 2, 3], 1) => [0, i32::MAX, 1, 1]); - // test_bops!(i64x2[i64; 2] | shr_u[i64x2_uhr_u_test]: - // ([0, -1], 1) => [0, i64::MAX]); - // - // #[wasm_bindgen_test] - // fn v128_bitwise_logical_ops() { - // unsafe { - // let a: [u32; 4] = [u32::MAX, 0, u32::MAX, 0]; - // let b: [u32; 4] = [u32::MAX; 4]; - // let c: [u32; 4] = [0; 4]; - // - // let vec_a: v128 = transmute(a); - // let vec_b: v128 = transmute(b); - // let vec_c: v128 = transmute(c); - // - // let r: v128 = v128::and(vec_a, vec_a); - // compare_bytes(r, vec_a); - // let r: v128 = v128::and(vec_a, vec_b); - // compare_bytes(r, vec_a); - // let r: v128 = v128::or(vec_a, vec_b); - // compare_bytes(r, vec_b); - // let r: v128 = v128::not(vec_b); - // compare_bytes(r, vec_c); - // let r: v128 = v128::xor(vec_a, vec_c); - // compare_bytes(r, vec_a); - // - // let r: v128 = v128::bitselect(vec_b, vec_c, vec_b); - // compare_bytes(r, vec_b); - // let r: v128 = v128::bitselect(vec_b, vec_c, vec_c); - // compare_bytes(r, vec_c); - // let r: v128 = v128::bitselect(vec_b, vec_c, vec_a); - // compare_bytes(r, vec_a); - // } - // } - // - // macro_rules! test_bool_red { - // ($id:ident[$test_id:ident] | [$($true:expr),*] | [$($false:expr),*] | [$($alt:expr),*]) => { - // #[wasm_bindgen_test] - // fn $test_id() { - // unsafe { - // let vec_a: v128 = transmute([$($true),*]); // true - // let vec_b: v128 = transmute([$($false),*]); // false - // let vec_c: v128 = transmute([$($alt),*]); // alternating - // - // assert_eq!($id::any_true(vec_a), 1); - // assert_eq!($id::any_true(vec_b), 0); - // assert_eq!($id::any_true(vec_c), 1); - // - // assert_eq!($id::all_true(vec_a), 1); - // assert_eq!($id::all_true(vec_b), 0); - // assert_eq!($id::all_true(vec_c), 0); - // } - // } - // } - // } - // - // test_bool_red!( - // i8x16[i8x16_boolean_reductions] - // | [1_i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - // | [0_i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - // | [1_i8, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] - // ); - // test_bool_red!( - // i16x8[i16x8_boolean_reductions] - // | [1_i16, 1, 1, 1, 1, 1, 1, 1] - // | [0_i16, 0, 0, 0, 0, 0, 0, 0] - // | [1_i16, 0, 1, 0, 1, 0, 1, 0] - // ); - // test_bool_red!( - // i32x4[i32x4_boolean_reductions] - // | [1_i32, 1, 1, 1] - // | [0_i32, 0, 0, 0] - // | [1_i32, 0, 1, 0] - // ); - // test_bool_red!( - // i64x2[i64x2_boolean_reductions] | [1_i64, 1] | [0_i64, 0] | [1_i64, 0] - // ); - // - // test_bop!(i8x16[i8; 16] | eq[i8x16_eq_test]: - // ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], - // [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => - // [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]); - // test_bop!(i16x8[i16; 8] | eq[i16x8_eq_test]: - // ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => - // [-1, 0, -1, 0 ,-1, 0, -1, -1]); - // test_bop!(i32x4[i32; 4] | eq[i32x4_eq_test]: - // ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]); - // test_bop!(i64x2[i64; 2] | eq[i64x2_eq_test]: ([0, 1], [0, 2]) => [-1, 0]); - // test_bop!(f32x4[f32; 4] => i32 | eq[f32x4_eq_test]: - // ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]); - // test_bop!(f64x2[f64; 2] => i64 | eq[f64x2_eq_test]: ([0., 1.], [0., 2.]) => [-1, 0]); - // - // test_bop!(i8x16[i8; 16] | ne[i8x16_ne_test]: - // ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], - // [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => - // [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]); - // test_bop!(i16x8[i16; 8] | ne[i16x8_ne_test]: - // ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => - // [0, -1, 0, -1 ,0, -1, 0, 0]); - // test_bop!(i32x4[i32; 4] | ne[i32x4_ne_test]: - // ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]); - // test_bop!(i64x2[i64; 2] | ne[i64x2_ne_test]: ([0, 1], [0, 2]) => [0, -1]); - // test_bop!(f32x4[f32; 4] => i32 | ne[f32x4_ne_test]: - // ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]); - // test_bop!(f64x2[f64; 2] => i64 | ne[f64x2_ne_test]: ([0., 1.], [0., 2.]) => [0, -1]); - // - // test_bop!(i8x16[i8; 16] | lt[i8x16_lt_test]: - // ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], - // [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => - // [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]); - // test_bop!(i16x8[i16; 8] | lt[i16x8_lt_test]: - // ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => - // [0, -1, 0, -1 ,0, -1, 0, 0]); - // test_bop!(i32x4[i32; 4] | lt[i32x4_lt_test]: - // ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]); - // test_bop!(i64x2[i64; 2] | lt[i64x2_lt_test]: ([0, 1], [0, 2]) => [0, -1]); - // test_bop!(f32x4[f32; 4] => i32 | lt[f32x4_lt_test]: - // ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]); - // test_bop!(f64x2[f64; 2] => i64 | lt[f64x2_lt_test]: ([0., 1.], [0., 2.]) => [0, -1]); - // - // test_bop!(i8x16[i8; 16] | gt[i8x16_gt_test]: - // ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15], - // [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) => - // [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]); - // test_bop!(i16x8[i16; 8] | gt[i16x8_gt_test]: - // ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) => - // [0, -1, 0, -1 ,0, -1, 0, 0]); - // test_bop!(i32x4[i32; 4] | gt[i32x4_gt_test]: - // ([0, 2, 2, 4], [0, 1, 2, 3]) => [0, -1, 0, -1]); - // test_bop!(i64x2[i64; 2] | gt[i64x2_gt_test]: ([0, 2], [0, 1]) => [0, -1]); - // test_bop!(f32x4[f32; 4] => i32 | gt[f32x4_gt_test]: - // ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [0, -1, 0, -1]); - // test_bop!(f64x2[f64; 2] => i64 | gt[f64x2_gt_test]: ([0., 2.], [0., 1.]) => [0, -1]); - // - // test_bop!(i8x16[i8; 16] | ge[i8x16_ge_test]: - // ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], - // [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => - // [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]); - // test_bop!(i16x8[i16; 8] | ge[i16x8_ge_test]: - // ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => - // [-1, 0, -1, 0 ,-1, 0, -1, -1]); - // test_bop!(i32x4[i32; 4] | ge[i32x4_ge_test]: - // ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]); - // test_bop!(i64x2[i64; 2] | ge[i64x2_ge_test]: ([0, 1], [0, 2]) => [-1, 0]); - // test_bop!(f32x4[f32; 4] => i32 | ge[f32x4_ge_test]: - // ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]); - // test_bop!(f64x2[f64; 2] => i64 | ge[f64x2_ge_test]: ([0., 1.], [0., 2.]) => [-1, 0]); - // - // test_bop!(i8x16[i8; 16] | le[i8x16_le_test]: - // ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15], - // [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - // ) => - // [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]); - // test_bop!(i16x8[i16; 8] | le[i16x8_le_test]: - // ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) => - // [-1, 0, -1, 0 ,-1, 0, -1, -1]); - // test_bop!(i32x4[i32; 4] | le[i32x4_le_test]: - // ([0, 2, 2, 4], [0, 1, 2, 3]) => [-1, 0, -1, 0]); - // test_bop!(i64x2[i64; 2] | le[i64x2_le_test]: ([0, 2], [0, 1]) => [-1, 0]); - // test_bop!(f32x4[f32; 4] => i32 | le[f32x4_le_test]: - // ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [-1, 0, -1, -0]); - // test_bop!(f64x2[f64; 2] => i64 | le[f64x2_le_test]: ([0., 2.], [0., 1.]) => [-1, 0]); - // - // #[wasm_bindgen_test] - // fn v128_bitwise_load_store() { - // unsafe { - // let mut arr: [i32; 4] = [0, 1, 2, 3]; - // - // let vec = v128::load(arr.as_ptr() as *const v128); - // let vec = i32x4::add(vec, vec); - // v128::store(arr.as_mut_ptr() as *mut v128, vec); - // - // assert_eq!(arr, [0, 2, 4, 6]); - // } - // } - // - // test_uop!(f32x4[f32; 4] | neg[f32x4_neg_test]: [0., 1., 2., 3.] => [ 0., -1., -2., -3.]); - // test_uop!(f32x4[f32; 4] | abs[f32x4_abs_test]: [0., -1., 2., -3.] => [ 0., 1., 2., 3.]); - // test_bop!(f32x4[f32; 4] | min[f32x4_min_test]: - // ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., -3., -4., 8.]); - // test_bop!(f32x4[f32; 4] | min[f32x4_min_test_nan]: - // ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN]) - // => [0., -3., -4., std::f32::NAN]); - // test_bop!(f32x4[f32; 4] | max[f32x4_max_test]: - // ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -1., 7., 10.]); - // test_bop!(f32x4[f32; 4] | max[f32x4_max_test_nan]: - // ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN]) - // => [1., -1., 7., std::f32::NAN]); - // test_bop!(f32x4[f32; 4] | add[f32x4_add_test]: - // ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -4., 3., 18.]); - // test_bop!(f32x4[f32; 4] | sub[f32x4_sub_test]: - // ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [-1., 2., 11., -2.]); - // test_bop!(f32x4[f32; 4] | mul[f32x4_mul_test]: - // ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., 3., -28., 80.]); - // test_bop!(f32x4[f32; 4] | div[f32x4_div_test]: - // ([0., -8., 70., 8.], [1., 4., 10., 2.]) => [0., -2., 7., 4.]); - // - // test_uop!(f64x2[f64; 2] | neg[f64x2_neg_test]: [0., 1.] => [ 0., -1.]); - // test_uop!(f64x2[f64; 2] | abs[f64x2_abs_test]: [0., -1.] => [ 0., 1.]); - // test_bop!(f64x2[f64; 2] | min[f64x2_min_test]: - // ([0., -1.], [1., -3.]) => [0., -3.]); - // test_bop!(f64x2[f64; 2] | min[f64x2_min_test_nan]: - // ([7., 8.], [-4., std::f64::NAN]) - // => [ -4., std::f64::NAN]); - // test_bop!(f64x2[f64; 2] | max[f64x2_max_test]: - // ([0., -1.], [1., -3.]) => [1., -1.]); - // test_bop!(f64x2[f64; 2] | max[f64x2_max_test_nan]: - // ([7., 8.], [ -4., std::f64::NAN]) - // => [7., std::f64::NAN]); - // test_bop!(f64x2[f64; 2] | add[f64x2_add_test]: - // ([0., -1.], [1., -3.]) => [1., -4.]); - // test_bop!(f64x2[f64; 2] | sub[f64x2_sub_test]: - // ([0., -1.], [1., -3.]) => [-1., 2.]); - // test_bop!(f64x2[f64; 2] | mul[f64x2_mul_test]: - // ([0., -1.], [1., -3.]) => [0., 3.]); - // test_bop!(f64x2[f64; 2] | div[f64x2_div_test]: - // ([0., -8.], [1., 4.]) => [0., -2.]); - // - // macro_rules! test_conv { - // ($test_id:ident | $conv_id:ident | $to_ty:ident | $from:expr, $to:expr) => { - // #[wasm_bindgen_test] - // fn $test_id() { - // unsafe { - // let from: v128 = transmute($from); - // let to: v128 = transmute($to); - // - // let r: v128 = $to_ty::$conv_id(from); - // - // compare_bytes(r, to); - // } - // } - // }; - // } - // - // test_conv!( - // f32x4_convert_s_i32x4 | convert_s_i32x4 | f32x4 | [1_i32, 2, 3, 4], - // [1_f32, 2., 3., 4.] - // ); - // test_conv!( - // f32x4_convert_u_i32x4 - // | convert_u_i32x4 - // | f32x4 - // | [u32::MAX, 2, 3, 4], - // [u32::MAX as f32, 2., 3., 4.] - // ); - // test_conv!( - // f64x2_convert_s_i64x2 | convert_s_i64x2 | f64x2 | [1_i64, 2], - // [1_f64, 2.] - // ); + #[test] + fn test_v8x16_shuffle() { + unsafe { + let a = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]; + let b = [ + 16_u8, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + ]; + + let vec_a: v128 = transmute(a); + let vec_b: v128 = transmute(b); + + let vec_r = v8x16_shuffle::<0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30>( + vec_a, vec_b, + ); + + let e = [0_u8, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]; + let vec_e: v128 = transmute(e); + compare_bytes(vec_r, vec_e); + } + } + + macro_rules! floating_point { + (f32) => { + true + }; + (f64) => { + true + }; + ($id:ident) => { + false + }; + } + + trait IsNan: Sized { + fn is_nan(self) -> bool { + false + } + } + impl IsNan for i8 {} + impl IsNan for i16 {} + impl IsNan for i32 {} + impl IsNan for i64 {} + + macro_rules! test_bop { + ($id:ident[$ety:ident; $ecount:expr] | + $binary_op:ident [$op_test_id:ident] : + ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => { + test_bop!( + $id[$ety; $ecount] => $ety | $binary_op [ $op_test_id ]: + ([$($in_a),*], [$($in_b),*]) => [$($out),*] + ); + + }; + ($id:ident[$ety:ident; $ecount:expr] => $oty:ident | + $binary_op:ident [$op_test_id:ident] : + ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => { + #[test] + fn $op_test_id() { + unsafe { + let a_input: [$ety; $ecount] = [$($in_a),*]; + let b_input: [$ety; $ecount] = [$($in_b),*]; + let output: [$oty; $ecount] = [$($out),*]; + + let a_vec_in: v128 = transmute(a_input); + let b_vec_in: v128 = transmute(b_input); + let vec_res: v128 = $binary_op(a_vec_in, b_vec_in); + + let res: [$oty; $ecount] = transmute(vec_res); + + if !floating_point!($ety) { + assert_eq!(res, output); + } else { + for i in 0..$ecount { + let r = res[i]; + let o = output[i]; + assert_eq!(r.is_nan(), o.is_nan()); + if !r.is_nan() { + assert_eq!(r, o); + } + } + } + } + } + } + } + + macro_rules! test_bops { + ($id:ident[$ety:ident; $ecount:expr] | + $binary_op:ident [$op_test_id:ident]: + ([$($in_a:expr),*], $in_b:expr) => [$($out:expr),*]) => { + #[test] + fn $op_test_id() { + unsafe { + let a_input: [$ety; $ecount] = [$($in_a),*]; + let output: [$ety; $ecount] = [$($out),*]; + + let a_vec_in: v128 = transmute(a_input); + let vec_res: v128 = $binary_op(a_vec_in, $in_b); + + let res: [$ety; $ecount] = transmute(vec_res); + assert_eq!(res, output); + } + } + } + } + + macro_rules! test_uop { + ($id:ident[$ety:ident; $ecount:expr] | + $unary_op:ident [$op_test_id:ident]: [$($in_a:expr),*] => [$($out:expr),*]) => { + #[test] + fn $op_test_id() { + unsafe { + let a_input: [$ety; $ecount] = [$($in_a),*]; + let output: [$ety; $ecount] = [$($out),*]; + + let a_vec_in: v128 = transmute(a_input); + let vec_res: v128 = $unary_op(a_vec_in); + + let res: [$ety; $ecount] = transmute(vec_res); + assert_eq!(res, output); + } + } + } + } + + test_bops!(i8x16[i8; 16] | i8x16_shl[i8x16_shl_test]: + ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) => + [0, -2, 4, 6, 8, 10, 12, -2, 2, 2, 2, 2, 2, 2, 2, 2]); + test_bops!(i16x8[i16; 8] | i16x8_shl[i16x8_shl_test]: + ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) => + [0, -2, 4, 6, 8, 10, 12, -2]); + test_bops!(i32x4[i32; 4] | i32x4_shl[i32x4_shl_test]: + ([0, -1, 2, 3], 1) => [0, -2, 4, 6]); + test_bops!(i64x2[i64; 2] | i64x2_shl[i64x2_shl_test]: + ([0, -1], 1) => [0, -2]); + + test_bops!(i8x16[i8; 16] | i8x16_shr_s[i8x16_shr_s_test]: + ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) => + [0, -1, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]); + test_bops!(i16x8[i16; 8] | i16x8_shr_s[i16x8_shr_s_test]: + ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) => + [0, -1, 1, 1, 2, 2, 3, i16::MAX / 2]); + test_bops!(i32x4[i32; 4] | i32x4_shr_s[i32x4_shr_s_test]: + ([0, -1, 2, 3], 1) => [0, -1, 1, 1]); + test_bops!(i64x2[i64; 2] | i64x2_shr_s[i64x2_shr_s_test]: + ([0, -1], 1) => [0, -1]); + + test_bops!(i8x16[i8; 16] | i8x16_shr_u[i8x16_uhr_u_test]: + ([0, -1, 2, 3, 4, 5, 6, i8::MAX, 1, 1, 1, 1, 1, 1, 1, 1], 1) => + [0, i8::MAX, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]); + test_bops!(i16x8[i16; 8] | i16x8_shr_u[i16x8_uhr_u_test]: + ([0, -1, 2, 3, 4, 5, 6, i16::MAX], 1) => + [0, i16::MAX, 1, 1, 2, 2, 3, i16::MAX / 2]); + test_bops!(i32x4[i32; 4] | i32x4_shr_u[i32x4_uhr_u_test]: + ([0, -1, 2, 3], 1) => [0, i32::MAX, 1, 1]); + test_bops!(i64x2[i64; 2] | i64x2_shr_u[i64x2_uhr_u_test]: + ([0, -1], 1) => [0, i64::MAX]); + + #[test] + fn v128_bitwise_logical_ops() { + unsafe { + let a: [u32; 4] = [u32::MAX, 0, u32::MAX, 0]; + let b: [u32; 4] = [u32::MAX; 4]; + let c: [u32; 4] = [0; 4]; + + let vec_a: v128 = transmute(a); + let vec_b: v128 = transmute(b); + let vec_c: v128 = transmute(c); + + let r: v128 = v128_and(vec_a, vec_a); + compare_bytes(r, vec_a); + let r: v128 = v128_and(vec_a, vec_b); + compare_bytes(r, vec_a); + let r: v128 = v128_or(vec_a, vec_b); + compare_bytes(r, vec_b); + let r: v128 = v128_not(vec_b); + compare_bytes(r, vec_c); + let r: v128 = v128_xor(vec_a, vec_c); + compare_bytes(r, vec_a); + + let r: v128 = v128_bitselect(vec_b, vec_c, vec_b); + compare_bytes(r, vec_b); + let r: v128 = v128_bitselect(vec_b, vec_c, vec_c); + compare_bytes(r, vec_c); + let r: v128 = v128_bitselect(vec_b, vec_c, vec_a); + compare_bytes(r, vec_a); + } + } + + macro_rules! test_bool_red { + ([$test_id:ident, $any:ident, $all:ident] | [$($true:expr),*] | [$($false:expr),*] | [$($alt:expr),*]) => { + #[test] + fn $test_id() { + unsafe { + let vec_a: v128 = transmute([$($true),*]); // true + let vec_b: v128 = transmute([$($false),*]); // false + let vec_c: v128 = transmute([$($alt),*]); // alternating + + assert_eq!($any(vec_a), 1); + assert_eq!($any(vec_b), 0); + assert_eq!($any(vec_c), 1); + + assert_eq!($all(vec_a), 1); + assert_eq!($all(vec_b), 0); + assert_eq!($all(vec_c), 0); + } + } + } + } + + test_bool_red!( + [i8x16_boolean_reductions, i8x16_any_true, i8x16_all_true] + | [1_i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + | [0_i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + | [1_i8, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] + ); + test_bool_red!( + [i16x8_boolean_reductions, i16x8_any_true, i16x8_all_true] + | [1_i16, 1, 1, 1, 1, 1, 1, 1] + | [0_i16, 0, 0, 0, 0, 0, 0, 0] + | [1_i16, 0, 1, 0, 1, 0, 1, 0] + ); + test_bool_red!( + [i32x4_boolean_reductions, i32x4_any_true, i32x4_all_true] + | [1_i32, 1, 1, 1] + | [0_i32, 0, 0, 0] + | [1_i32, 0, 1, 0] + ); + + test_bop!(i8x16[i8; 16] | i8x16_eq[i8x16_eq_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]); + test_bop!(i16x8[i16; 8] | i16x8_eq[i16x8_eq_test]: + ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1]); + test_bop!(i32x4[i32; 4] | i32x4_eq[i32x4_eq_test]: + ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]); + test_bop!(f32x4[f32; 4] => i32 | f32x4_eq[f32x4_eq_test]: + ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]); + test_bop!(f64x2[f64; 2] => i64 | f64x2_eq[f64x2_eq_test]: ([0., 1.], [0., 2.]) => [-1, 0]); + + test_bop!(i8x16[i8; 16] | i8x16_ne[i8x16_ne_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => + [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]); + test_bop!(i16x8[i16; 8] | i16x8_ne[i16x8_ne_test]: + ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => + [0, -1, 0, -1 ,0, -1, 0, 0]); + test_bop!(i32x4[i32; 4] | i32x4_ne[i32x4_ne_test]: + ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]); + test_bop!(f32x4[f32; 4] => i32 | f32x4_ne[f32x4_ne_test]: + ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]); + test_bop!(f64x2[f64; 2] => i64 | f64x2_ne[f64x2_ne_test]: ([0., 1.], [0., 2.]) => [0, -1]); + + test_bop!(i8x16[i8; 16] | i8x16_lt_s[i8x16_lt_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => + [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]); + test_bop!(i16x8[i16; 8] | i16x8_lt_s[i16x8_lt_test]: + ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => + [0, -1, 0, -1 ,0, -1, 0, 0]); + test_bop!(i32x4[i32; 4] | i32x4_lt_s[i32x4_lt_test]: + ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]); + test_bop!(f32x4[f32; 4] => i32 | f32x4_lt[f32x4_lt_test]: + ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]); + test_bop!(f64x2[f64; 2] => i64 | f64x2_lt[f64x2_lt_test]: ([0., 1.], [0., 2.]) => [0, -1]); + + test_bop!(i8x16[i8; 16] | i8x16_gt_s[i8x16_gt_test]: + ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) => + [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]); + test_bop!(i16x8[i16; 8] | i16x8_gt_s[i16x8_gt_test]: + ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) => + [0, -1, 0, -1 ,0, -1, 0, 0]); + test_bop!(i32x4[i32; 4] | i32x4_gt_s[i32x4_gt_test]: + ([0, 2, 2, 4], [0, 1, 2, 3]) => [0, -1, 0, -1]); + test_bop!(f32x4[f32; 4] => i32 | f32x4_gt[f32x4_gt_test]: + ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [0, -1, 0, -1]); + test_bop!(f64x2[f64; 2] => i64 | f64x2_gt[f64x2_gt_test]: ([0., 2.], [0., 1.]) => [0, -1]); + + test_bop!(i8x16[i8; 16] | i8x16_ge_s[i8x16_ge_test]: + ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]); + test_bop!(i16x8[i16; 8] | i16x8_ge_s[i16x8_ge_test]: + ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1]); + test_bop!(i32x4[i32; 4] | i32x4_ge_s[i32x4_ge_test]: + ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]); + test_bop!(f32x4[f32; 4] => i32 | f32x4_ge[f32x4_ge_test]: + ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]); + test_bop!(f64x2[f64; 2] => i64 | f64x2_ge[f64x2_ge_test]: ([0., 1.], [0., 2.]) => [-1, 0]); + + test_bop!(i8x16[i8; 16] | i8x16_le_s[i8x16_le_test]: + ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] + ) => + [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]); + test_bop!(i16x8[i16; 8] | i16x8_le_s[i16x8_le_test]: + ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) => + [-1, 0, -1, 0 ,-1, 0, -1, -1]); + test_bop!(i32x4[i32; 4] | i32x4_le_s[i32x4_le_test]: + ([0, 2, 2, 4], [0, 1, 2, 3]) => [-1, 0, -1, 0]); + test_bop!(f32x4[f32; 4] => i32 | f32x4_le[f32x4_le_test]: + ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [-1, 0, -1, -0]); + test_bop!(f64x2[f64; 2] => i64 | f64x2_le[f64x2_le_test]: ([0., 2.], [0., 1.]) => [-1, 0]); + + #[test] + fn v128_bitwise_load_store() { + unsafe { + let mut arr: [i32; 4] = [0, 1, 2, 3]; + + let vec = v128_load(arr.as_ptr() as *const v128); + let vec = i32x4_add(vec, vec); + v128_store(arr.as_mut_ptr() as *mut v128, vec); + + assert_eq!(arr, [0, 2, 4, 6]); + } + } + + test_uop!(f32x4[f32; 4] | f32x4_neg[f32x4_neg_test]: [0., 1., 2., 3.] => [ 0., -1., -2., -3.]); + test_uop!(f32x4[f32; 4] | f32x4_abs[f32x4_abs_test]: [0., -1., 2., -3.] => [ 0., 1., 2., 3.]); + test_bop!(f32x4[f32; 4] | f32x4_min[f32x4_min_test]: + ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., -3., -4., 8.]); + test_bop!(f32x4[f32; 4] | f32x4_min[f32x4_min_test_nan]: + ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN]) + => [0., -3., -4., std::f32::NAN]); + test_bop!(f32x4[f32; 4] | f32x4_max[f32x4_max_test]: + ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -1., 7., 10.]); + test_bop!(f32x4[f32; 4] | f32x4_max[f32x4_max_test_nan]: + ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN]) + => [1., -1., 7., std::f32::NAN]); + test_bop!(f32x4[f32; 4] | f32x4_add[f32x4_add_test]: + ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -4., 3., 18.]); + test_bop!(f32x4[f32; 4] | f32x4_sub[f32x4_sub_test]: + ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [-1., 2., 11., -2.]); + test_bop!(f32x4[f32; 4] | f32x4_mul[f32x4_mul_test]: + ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., 3., -28., 80.]); + test_bop!(f32x4[f32; 4] | f32x4_div[f32x4_div_test]: + ([0., -8., 70., 8.], [1., 4., 10., 2.]) => [0., -2., 7., 4.]); + + test_uop!(f64x2[f64; 2] | f64x2_neg[f64x2_neg_test]: [0., 1.] => [ 0., -1.]); + test_uop!(f64x2[f64; 2] | f64x2_abs[f64x2_abs_test]: [0., -1.] => [ 0., 1.]); + test_bop!(f64x2[f64; 2] | f64x2_min[f64x2_min_test]: + ([0., -1.], [1., -3.]) => [0., -3.]); + test_bop!(f64x2[f64; 2] | f64x2_min[f64x2_min_test_nan]: + ([7., 8.], [-4., std::f64::NAN]) + => [ -4., std::f64::NAN]); + test_bop!(f64x2[f64; 2] | f64x2_max[f64x2_max_test]: + ([0., -1.], [1., -3.]) => [1., -1.]); + test_bop!(f64x2[f64; 2] | f64x2_max[f64x2_max_test_nan]: + ([7., 8.], [ -4., std::f64::NAN]) + => [7., std::f64::NAN]); + test_bop!(f64x2[f64; 2] | f64x2_add[f64x2_add_test]: + ([0., -1.], [1., -3.]) => [1., -4.]); + test_bop!(f64x2[f64; 2] | f64x2_sub[f64x2_sub_test]: + ([0., -1.], [1., -3.]) => [-1., 2.]); + test_bop!(f64x2[f64; 2] | f64x2_mul[f64x2_mul_test]: + ([0., -1.], [1., -3.]) => [0., 3.]); + test_bop!(f64x2[f64; 2] | f64x2_div[f64x2_div_test]: + ([0., -8.], [1., 4.]) => [0., -2.]); + + macro_rules! test_conv { + ($test_id:ident | $conv_id:ident | $to_ty:ident | $from:expr, $to:expr) => { + #[test] + fn $test_id() { + unsafe { + let from: v128 = transmute($from); + let to: v128 = transmute($to); + + let r: v128 = $conv_id(from); + + compare_bytes(r, to); + } + } + }; + } + + test_conv!( + f32x4_convert_s_i32x4 | f32x4_convert_i32x4_s | f32x4 | [1_i32, 2, 3, 4], + [1_f32, 2., 3., 4.] + ); + test_conv!( + f32x4_convert_u_i32x4 | f32x4_convert_i32x4_u | f32x4 | [u32::MAX, 2, 3, 4], + [u32::MAX as f32, 2., 3., 4.] + ); + + // FIXME: this fails, and produces 0 instead of saturating at i32::MAX // test_conv!( - // f64x2_convert_u_i64x2 - // | convert_u_i64x2 - // | f64x2 - // | [u64::MAX, 2], - // [18446744073709552000.0, 2.] + // i32x4_trunc_s_f32x4_sat + // | i32x4_trunc_sat_f32x4_s + // | i32x4 + // | [f32::NAN, 2., (i32::MAX as f32 + 1.), 4.], + // [0, 2, i32::MAX, 4] // ); - // - // // FIXME: this fails, and produces -2147483648 instead of saturating at - // // i32::MAX test_conv!(i32x4_trunc_s_f32x4_sat | trunc_s_f32x4_sat - // // | i32x4 | [1_f32, 2., (i32::MAX as f32 + 1.), 4.], - // // [1_i32, 2, i32::MAX, 4]); FIXME: add other saturating tests + // FIXME: add other saturating tests } diff --git a/crates/std_detect/src/detect/mod.rs b/crates/std_detect/src/detect/mod.rs index 77d1f7c506..c44f44c1b3 100644 --- a/crates/std_detect/src/detect/mod.rs +++ b/crates/std_detect/src/detect/mod.rs @@ -56,6 +56,7 @@ cfg_if! { mod arch; } else { // Unimplemented architecture: + #[allow(dead_code)] mod arch { #[doc(hidden)] pub(crate) enum Feature { @@ -117,6 +118,7 @@ cfg_if! { /// Performs run-time feature detection. #[inline] +#[allow(dead_code)] fn check_for(x: Feature) -> bool { cache::test(x as u32, self::os::detect_features) } diff --git a/crates/std_detect/src/detect/os/other.rs b/crates/std_detect/src/detect/os/other.rs index bf7be87f07..091fafc4eb 100644 --- a/crates/std_detect/src/detect/os/other.rs +++ b/crates/std_detect/src/detect/os/other.rs @@ -2,6 +2,7 @@ use crate::detect::cache; +#[allow(dead_code)] pub(crate) fn detect_features() -> cache::Initializer { cache::Initializer::default() } diff --git a/crates/stdarch-test/Cargo.toml b/crates/stdarch-test/Cargo.toml index 2b445f8dc5..9eb6b64d16 100644 --- a/crates/stdarch-test/Cargo.toml +++ b/crates/stdarch-test/Cargo.toml @@ -11,10 +11,13 @@ lazy_static = "1.0" rustc-demangle = "0.1.8" cfg-if = "0.1" -[target.wasm32-unknown-unknown.dependencies] -wasm-bindgen = "0.2.47" -js-sys = "0.3" -console_error_panic_hook = "0.1" +# We use a crates.io dependency to disassemble wasm binaries to look for +# instructions for `#[assert_instr]`. Note that we use an `=` dependency here +# instead of a floating dependency because the text format for wasm changes over +# time, and we want to make updates to this explicit rather than automatically +# picking up updates which might break CI with new instruction names. +[target.'cfg(target_arch = "wasm32")'.dependencies] +wasmprinter = "=0.2.6" [features] default = [] diff --git a/crates/stdarch-test/src/lib.rs b/crates/stdarch-test/src/lib.rs index fa73a7bba6..c66b6a8d9d 100644 --- a/crates/stdarch-test/src/lib.rs +++ b/crates/stdarch-test/src/lib.rs @@ -3,7 +3,6 @@ //! This basically just disassembles the current executable and then parses the //! output once globally and then provides the `assert` function which makes //! assertions about the disassembly of a function. -#![feature(const_transmute)] #![feature(vec_leak)] #![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)] @@ -20,19 +19,8 @@ pub use assert_instr_macro::*; pub use simd_test_macro::*; use std::{cmp, collections::HashSet, env, hash, str, sync::atomic::AtomicPtr}; -// `println!` doesn't work on wasm32 right now, so shadow the compiler's `println!` -// macro with our own shim that redirects to `console.log`. -#[allow(unused)] -#[cfg(target_arch = "wasm32")] -#[macro_export] -macro_rules! println { - ($($args:tt)*) => (crate::wasm::js_console_log(&format!($($args)*))) -} - cfg_if! { if #[cfg(target_arch = "wasm32")] { - extern crate wasm_bindgen; - extern crate console_error_panic_hook; pub mod wasm; use wasm::disassemble_myself; } else { diff --git a/crates/stdarch-test/src/wasm.rs b/crates/stdarch-test/src/wasm.rs index 612ff10d90..bf411c1214 100644 --- a/crates/stdarch-test/src/wasm.rs +++ b/crates/stdarch-test/src/wasm.rs @@ -1,49 +1,17 @@ //! Disassembly calling function for `wasm32` targets. -use wasm_bindgen::prelude::*; use crate::Function; use std::collections::HashSet; -#[wasm_bindgen(module = "child_process")] -extern "C" { - #[wasm_bindgen(js_name = execFileSync)] - fn exec_file_sync(cmd: &str, args: &js_sys::Array, opts: &js_sys::Object) -> Buffer; -} - -#[wasm_bindgen(module = "buffer")] -extern "C" { - type Buffer; - #[wasm_bindgen(method, js_name = toString)] - fn to_string(this: &Buffer) -> String; -} - -#[wasm_bindgen] -extern "C" { - #[wasm_bindgen(js_namespace = require)] - fn resolve(module: &str) -> String; - #[wasm_bindgen(js_namespace = console, js_name = log)] - pub fn js_console_log(s: &str); -} - pub(crate) fn disassemble_myself() -> HashSet { - use std::path::Path; - ::console_error_panic_hook::set_once(); - // Our wasm module in the wasm-bindgen test harness is called - // "wasm-bindgen-test_bg". When running in node this is actually a shim JS - // file. Ask node where that JS file is, and then we use that with a wasm - // extension to find the wasm file itself. - let js_shim = resolve("wasm-bindgen-test"); - let js_shim = Path::new(&js_shim).with_file_name("wasm-bindgen-test_bg.wasm"); - - // Execute `wasm2wat` synchronously, waiting for and capturing all of its - // output. Note that we pass in a custom `maxBuffer` parameter because we're - // generating a ton of output that needs to be buffered. - let args = js_sys::Array::new(); - args.push(&js_shim.display().to_string().into()); - args.push(&"--enable-simd".into()); - let opts = js_sys::Object::new(); - js_sys::Reflect::set(&opts, &"maxBuffer".into(), &(200 * 1024 * 1024).into()).unwrap(); - let output = exec_file_sync("wasm2wat", &args, &opts).to_string(); + // Use `std::env::args` to find the path to our executable. Assume the + // environment is configured such that we can read that file. Read it and + // use the `wasmprinter` crate to transform the binary to text, then search + // the text for appropriately named functions. + let me = std::env::args() + .next() + .expect("failed to find current wasm file"); + let output = wasmprinter::print_file(&me).unwrap(); let mut ret: HashSet = HashSet::new(); let mut lines = output.lines().map(|s| s.trim()); diff --git a/examples/Cargo.toml b/examples/Cargo.toml index 6f00d46230..72599b4182 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -7,16 +7,14 @@ authors = [ "Gonzalo Brito Gadeschi ", ] description = "Examples of the stdarch crate." +edition = "2018" [dependencies] core_arch = { path = "../crates/core_arch" } std_detect = { path = "../crates/std_detect" } -quickcheck = "0.8" +quickcheck = "0.9" rand = "0.7" -[target.'cfg(target_arch = "wasm32")'.dependencies] -rand = { version = "0.6", features = ["wasm-bindgen"] } - [[bin]] name = "hex" path = "hex.rs" diff --git a/examples/hex.rs b/examples/hex.rs index b3d6fb0786..43826989b5 100644 --- a/examples/hex.rs +++ b/examples/hex.rs @@ -12,8 +12,9 @@ //! //! and you should see `746573740a` get printed out. -#![feature(stdsimd)] +#![feature(stdsimd, wasm_target_feature)] #![cfg_attr(test, feature(test))] +#![cfg_attr(target_arch = "wasm32", feature(wasm_simd))] #![allow( clippy::result_unwrap_used, clippy::print_stdout, @@ -25,25 +26,15 @@ clippy::missing_docs_in_private_items )] -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -#[macro_use(is_x86_feature_detected)] -extern crate std_detect; - -extern crate core_arch; - -#[cfg(test)] -#[macro_use] -extern crate quickcheck; - use std::{ io::{self, Read}, str, }; #[cfg(target_arch = "x86")] -use core_arch::x86::*; +use {core_arch::arch::x86::*, std_detect::is_x86_feature_detected}; #[cfg(target_arch = "x86_64")] -use core_arch::x86_64::*; +use {core_arch::arch::x86_64::*, std_detect::is_x86_feature_detected}; fn main() { let mut input = Vec::new(); @@ -68,6 +59,12 @@ fn hex_encode<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> { return unsafe { hex_encode_sse41(src, dst) }; } } + #[cfg(target_arch = "wasm32")] + { + if true { + return unsafe { hex_encode_simd128(src, dst) }; + } + } hex_encode_fallback(src, dst) } @@ -157,6 +154,54 @@ unsafe fn hex_encode_sse41<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2])) } +#[cfg(target_arch = "wasm32")] +#[target_feature(enable = "simd128")] +unsafe fn hex_encode_simd128<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> { + use core_arch::arch::wasm32::*; + + let ascii_zero = i8x16_splat(b'0' as i8); + let nines = i8x16_splat(9); + let ascii_a = i8x16_splat((b'a' - 9 - 1) as i8); + let and4bits = i8x16_splat(0xf); + + let mut i = 0_isize; + while src.len() >= 16 { + let invec = v128_load(src.as_ptr() as *const _); + + let masked1 = v128_and(invec, and4bits); + let masked2 = v128_and(i8x16_shr_u(invec, 4), and4bits); + + // return 0xff corresponding to the elements > 9, or 0x00 otherwise + let cmpmask1 = i8x16_gt_u(masked1, nines); + let cmpmask2 = i8x16_gt_u(masked2, nines); + + // add '0' or the offset depending on the masks + let masked1 = i8x16_add(masked1, v128_bitselect(ascii_a, ascii_zero, cmpmask1)); + let masked2 = i8x16_add(masked2, v128_bitselect(ascii_a, ascii_zero, cmpmask2)); + + // Next we need to shuffle around masked{1,2} to get back to the + // original source text order. The first element (res1) we'll store uses + // all the low bytes from the 2 masks and the second element (res2) uses + // all the upper bytes. + let res1 = v8x16_shuffle::<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23>( + masked2, masked1, + ); + let res2 = v8x16_shuffle::<8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31>( + masked2, masked1, + ); + + v128_store(dst.as_mut_ptr().offset(i * 2) as *mut _, res1); + v128_store(dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2); + src = &src[16..]; + i += 16; + } + + let i = i as usize; + let _ = hex_encode_fallback(src, &mut dst[i * 2..]); + + Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2])) +} + fn hex_encode_fallback<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> { fn hex(byte: u8) -> u8 { static TABLE: &[u8] = b"0123456789abcdef"; @@ -186,10 +231,10 @@ mod tests { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] unsafe { - if is_x86_feature_detected!("avx2") { + if self::is_x86_feature_detected!("avx2") { assert_eq!(hex_encode_avx2(input, &mut tmp()).unwrap(), output); } - if is_x86_feature_detected!("sse4.1") { + if self::is_x86_feature_detected!("sse4.1") { assert_eq!(hex_encode_sse41(input, &mut tmp()).unwrap(), output); } } @@ -236,7 +281,7 @@ mod tests { ); } - quickcheck! { + quickcheck::quickcheck! { fn encode_equals_fallback(input: Vec) -> bool { let mut space1 = vec![0; input.len() * 2]; let mut space2 = vec![0; input.len() * 2]; @@ -247,7 +292,7 @@ mod tests { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] fn avx_equals_fallback(input: Vec) -> bool { - if !is_x86_feature_detected!("avx2") { + if !self::is_x86_feature_detected!("avx2") { return true } let mut space1 = vec![0; input.len() * 2]; @@ -259,7 +304,7 @@ mod tests { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] fn sse41_equals_fallback(input: Vec) -> bool { - if !is_x86_feature_detected!("avx2") { + if !self::is_x86_feature_detected!("avx2") { return true } let mut space1 = vec![0; input.len() * 2]; @@ -328,28 +373,28 @@ mod benches { #[bench] fn small_avx2(b: &mut test::Bencher) { - if is_x86_feature_detected!("avx2") { + if self::is_x86_feature_detected!("avx2") { doit(b, SMALL_LEN, hex_encode_avx2); } } #[bench] fn small_sse41(b: &mut test::Bencher) { - if is_x86_feature_detected!("sse4.1") { + if self::is_x86_feature_detected!("sse4.1") { doit(b, SMALL_LEN, hex_encode_sse41); } } #[bench] fn large_avx2(b: &mut test::Bencher) { - if is_x86_feature_detected!("avx2") { + if self::is_x86_feature_detected!("avx2") { doit(b, LARGE_LEN, hex_encode_avx2); } } #[bench] fn large_sse41(b: &mut test::Bencher) { - if is_x86_feature_detected!("sse4.1") { + if self::is_x86_feature_detected!("sse4.1") { doit(b, LARGE_LEN, hex_encode_sse41); } } diff --git a/examples/wasm.rs b/examples/wasm.rs index 53f9c55d4e..6b92ae9b87 100644 --- a/examples/wasm.rs +++ b/examples/wasm.rs @@ -3,11 +3,9 @@ #![feature(stdsimd)] #![cfg(target_arch = "wasm32")] -extern crate core_arch; - use std::ptr; -use core_arch::wasm32::*; +use core_arch::arch::wasm32::*; static mut HEAD: *mut *mut u8 = 0 as _;