Skip to content

Commit

Permalink
implement floor and ceil for f64 and f32 types (#183)
Browse files Browse the repository at this point in the history
* implement floor and ceil

* implement floor and ceil for neon

* implement floor and ceil for no std

* fix typo

* include std implementation

* fix workflow for wasm targets

* check for wasip1
  • Loading branch information
kralverde authored Dec 3, 2024
1 parent 3752fd0 commit a9b883f
Show file tree
Hide file tree
Showing 9 changed files with 237 additions and 3 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ jobs:
# wasm32
- { target: wasm32-wasi, toolchain: "1.61", os: ubuntu-latest, wasmtime: v5.0.0 }
- { target: wasm32-wasi, toolchain: stable, os: ubuntu-latest, wasmtime: v5.0.0 }
- { target: wasm32-wasi, toolchain: beta, os: ubuntu-latest, wasmtime: v5.0.0 }
- { target: wasm32-wasi, toolchain: nightly, os: ubuntu-latest, wasmtime: v5.0.0 }
- { target: wasm32-wasip1, toolchain: beta, os: ubuntu-latest, wasmtime: v5.0.0 }
- { target: wasm32-wasip1, toolchain: nightly, os: ubuntu-latest, wasmtime: v5.0.0 }
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@master
Expand All @@ -43,7 +43,7 @@ jobs:
target: ${{ matrix.rust.target }}

- name: Install wasmtime
if: matrix.rust.target == 'wasm32-wasi'
if: matrix.rust.target == 'wasm32-wasi' || matrix.rust.target == 'wasm32-wasip1'
run: |
curl https://wasmtime.dev/install.sh -sSf | bash -s -- --version ${{ matrix.rust.wasmtime }}
echo "$HOME/.wasmtime/bin" >> $GITHUB_PATH
Expand Down
50 changes: 50 additions & 0 deletions src/f32x4_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,56 @@ impl f32x4 {
}
}
}
#[inline]
#[must_use]
pub fn floor(self) -> Self {
pick! {
if #[cfg(target_feature="simd128")] {
Self { simd: f32x4_floor(self.simd) }
} else if #[cfg(target_feature="sse4.1")] {
Self { sse: floor_m128(self.sse) }
} else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
unsafe {Self { neon: vrndmq_f32(self.neon) }}
} else if #[cfg(feature="std")] {
let base: [f32; 4] = cast(self);
cast(base.map(|val| val.floor()))
} else {
let base: [f32; 4] = cast(self);
let rounded: [f32; 4] = cast(self.round());
cast([
if base[0] < rounded[0] { rounded[0] - 1.0 } else { rounded[0] },
if base[1] < rounded[1] { rounded[1] - 1.0 } else { rounded[1] },
if base[2] < rounded[2] { rounded[2] - 1.0 } else { rounded[2] },
if base[3] < rounded[3] { rounded[3] - 1.0 } else { rounded[3] },
])
}
}
}
#[inline]
#[must_use]
pub fn ceil(self) -> Self {
pick! {
if #[cfg(target_feature="simd128")] {
Self { simd: f32x4_ceil(self.simd) }
} else if #[cfg(target_feature="sse4.1")] {
Self { sse: ceil_m128(self.sse) }
} else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
unsafe {Self { neon: vrndpq_f32(self.neon) }}
} else if #[cfg(feature="std")] {
let base: [f32; 4] = cast(self);
cast(base.map(|val| val.ceil()))
} else {
let base: [f32; 4] = cast(self);
let rounded: [f32; 4] = cast(self.round());
cast([
if base[0] > rounded[0] { rounded[0] + 1.0 } else { rounded[0] },
if base[1] > rounded[1] { rounded[1] + 1.0 } else { rounded[1] },
if base[2] > rounded[2] { rounded[2] + 1.0 } else { rounded[2] },
if base[3] > rounded[3] { rounded[3] + 1.0 } else { rounded[3] },
])
}
}
}

/// Calculates the lanewise maximum of both vectors. This is a faster
/// implementation than `max`, but it doesn't specify any behavior if NaNs are
Expand Down
28 changes: 28 additions & 0 deletions src/f32x8_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,34 @@ impl f32x8 {
}
}
}
#[inline]
#[must_use]
pub fn floor(self) -> Self {
pick! {
if #[cfg(target_feature="avx")] {
Self { avx: floor_m256(self.avx) }
} else {
Self {
a : self.a.floor(),
b : self.b.floor(),
}
}
}
}
#[inline]
#[must_use]
pub fn ceil(self) -> Self {
pick! {
if #[cfg(target_feature="avx")] {
Self { avx: ceil_m256(self.avx) }
} else {
Self {
a : self.a.ceil(),
b : self.b.ceil(),
}
}
}
}

/// Calculates the lanewise maximum of both vectors. This is a faster
/// implementation than `max`, but it doesn't specify any behavior if NaNs are
Expand Down
46 changes: 46 additions & 0 deletions src/f64x2_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,52 @@ impl f64x2 {
}
}
}
#[inline]
#[must_use]
pub fn floor(self) -> Self {
pick! {
if #[cfg(target_feature="simd128")] {
Self { simd: f64x2_floor(self.simd) }
} else if #[cfg(target_feature="sse4.1")] {
Self { sse: floor_m128d(self.sse) }
} else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
unsafe {Self { neon: vrndmq_f64(self.neon) }}
} else if #[cfg(feature="std")] {
let base: [f64; 2] = cast(self);
cast(base.map(|val| val.floor()))
} else {
let base: [f64; 2] = cast(self);
let rounded: [f64; 2] = cast(self.round());
cast([
if base[0] < rounded[0] { rounded[0] - 1.0 } else { rounded[0] },
if base[1] < rounded[1] { rounded[1] - 1.0 } else { rounded[1] },
])
}
}
}
#[inline]
#[must_use]
pub fn ceil(self) -> Self {
pick! {
if #[cfg(target_feature="simd128")] {
Self { simd: f64x2_ceil(self.simd) }
} else if #[cfg(target_feature="sse4.1")] {
Self { sse: ceil_m128d(self.sse) }
} else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
unsafe {Self { neon: vrndpq_f64(self.neon) }}
} else if #[cfg(feature="std")] {
let base: [f64; 2] = cast(self);
cast(base.map(|val| val.ceil()))
} else {
let base: [f64; 2] = cast(self);
let rounded: [f64; 2] = cast(self.round());
cast([
if base[0] > rounded[0] { rounded[0] + 1.0 } else { rounded[0] },
if base[1] > rounded[1] { rounded[1] + 1.0 } else { rounded[1] },
])
}
}
}

/// Calculates the lanewise maximum of both vectors. This is a faster
/// implementation than `max`, but it doesn't specify any behavior if NaNs are
Expand Down
29 changes: 29 additions & 0 deletions src/f64x4_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,35 @@ impl f64x4 {
}
}

#[inline]
#[must_use]
pub fn floor(self) -> Self {
pick! {
if #[cfg(target_feature="avx")] {
Self { avx: floor_m256d(self.avx) }
} else {
Self {
a : self.a.floor(),
b : self.b.floor(),
}
}
}
}
#[inline]
#[must_use]
pub fn ceil(self) -> Self {
pick! {
if #[cfg(target_feature="avx")] {
Self { avx: ceil_m256d(self.avx) }
} else {
Self {
a : self.a.ceil(),
b : self.b.ceil(),
}
}
}
}

/// Calculates the lanewise maximum of both vectors. This is a faster
/// implementation than `max`, but it doesn't specify any behavior if NaNs are
/// involved.
Expand Down
16 changes: 16 additions & 0 deletions tests/all_tests/t_f32x4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,22 @@ fn impl_f32x4_abs() {
assert_eq!(expected, actual);
}

#[test]
fn impl_f32x4_floor() {
let a = f32x4::from([-1.1, 60.9, 1.1, f32::INFINITY]);
let expected = f32x4::from([-2.0, 60.0, 1.0, f32::INFINITY]);
let actual = a.floor();
assert_eq!(expected, actual);
}

#[test]
fn impl_f32x4_ceil() {
let a = f32x4::from([-1.1, 60.9, 1.1, f32::NEG_INFINITY]);
let expected = f32x4::from([-1.0, 61.0, 2.0, f32::NEG_INFINITY]);
let actual = a.ceil();
assert_eq!(expected, actual);
}

#[test]
fn impl_f32x4_fast_max() {
let a = f32x4::from([1.0, 5.0, 3.0, -4.0]);
Expand Down
19 changes: 19 additions & 0 deletions tests/all_tests/t_f32x8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,25 @@ fn impl_f32x8_abs() {
assert_eq!(expected, actual);
}

#[test]
fn impl_f32x8_floor() {
let a = f32x8::from([-1.1, 60.9, 1.1, f32::INFINITY, 96.6, -53.2, 0.1, 9.2]);
let expected =
f32x8::from([-2.0, 60.0, 1.0, f32::INFINITY, 96.0, -54.0, 0.0, 9.0]);
let actual = a.floor();
assert_eq!(expected, actual);
}

#[test]
fn impl_f64x4_ceil() {
let a =
f32x8::from([-1.1, 60.9, 1.1, f32::NEG_INFINITY, 96.6, -53.2, 0.1, 9.2]);
let expected =
f32x8::from([-1.0, 61.0, 2.0, f32::NEG_INFINITY, 97.0, -53.0, 1.0, 10.0]);
let actual = a.ceil();
assert_eq!(expected, actual);
}

#[test]
fn impl_f32x8_fast_max() {
let a = f32x8::from([1.0, 5.0, 3.0, 0.0, 6.0, -8.0, 12.0, 9.0]);
Expand Down
28 changes: 28 additions & 0 deletions tests/all_tests/t_f64x2.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use core::f64;

use wide::*;

use bytemuck::*;
Expand Down Expand Up @@ -215,6 +217,32 @@ fn impl_f64x2_abs() {
assert_eq!(expected, actual);
}

#[test]
fn impl_f64x2_floor() {
let a = f64x2::from([-1.1, 2.0]);
let expected = f64x2::from([-2.0, 2.0]);
let actual = a.floor();
assert_eq!(expected, actual);
//
let a = f64x2::from([60.9, f64::INFINITY]);
let expected = f64x2::from([60.0, f64::INFINITY]);
let actual = a.floor();
assert_eq!(expected, actual);
}

#[test]
fn impl_f64x2_ceil() {
let a = f64x2::from([-1.1, 2.0]);
let expected = f64x2::from([-1.0, 2.0]);
let actual = a.ceil();
assert_eq!(expected, actual);
//
let a = f64x2::from([60.9, f64::NEG_INFINITY]);
let expected = f64x2::from([61.0, f64::NEG_INFINITY]);
let actual = a.ceil();
assert_eq!(expected, actual);
}

#[test]
fn impl_f64x2_fast_max() {
let a = f64x2::from([-0.0, -5.0]);
Expand Down
18 changes: 18 additions & 0 deletions tests/all_tests/t_f64x4.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use core::f64;

use wide::*;

use bytemuck::*;
Expand Down Expand Up @@ -183,6 +185,22 @@ fn impl_f64x4_abs() {
assert_eq!(expected, actual);
}

#[test]
fn impl_f64x4_floor() {
let a = f64x4::from([-1.1, 60.9, 1.1, f64::INFINITY]);
let expected = f64x4::from([-2.0, 60.0, 1.0, f64::INFINITY]);
let actual = a.floor();
assert_eq!(expected, actual);
}

#[test]
fn impl_f64x4_ceil() {
let a = f64x4::from([-1.1, 60.9, 1.1, f64::NEG_INFINITY]);
let expected = f64x4::from([-1.0, 61.0, 2.0, f64::NEG_INFINITY]);
let actual = a.ceil();
assert_eq!(expected, actual);
}

#[test]
fn impl_f64x4_fast_max() {
let a = f64x4::from([1.0, 5.0, 3.0, -0.0]);
Expand Down

0 comments on commit a9b883f

Please sign in to comment.