implement floor and ceil for f64 and f32 types (#183)

* implement floor and ceil * implement floor and ceil for neon * implement floor and ceil for no std * fix typo * include std implementation * fix workflow for wasm targets * check for wasip1
Lokathor · Dec 3, 2024 · a9b883f · a9b883f
1 parent 3752fd0
commit a9b883f
Show file tree

Hide file tree

Showing 9 changed files with 237 additions and 3 deletions.
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -33,8 +33,8 @@ jobs:
         # wasm32
         - { target: wasm32-wasi, toolchain: "1.61", os: ubuntu-latest, wasmtime: v5.0.0 }
         - { target: wasm32-wasi, toolchain: stable, os: ubuntu-latest, wasmtime: v5.0.0 }
-        - { target: wasm32-wasi, toolchain: beta, os: ubuntu-latest, wasmtime: v5.0.0 }
-        - { target: wasm32-wasi, toolchain: nightly, os: ubuntu-latest, wasmtime: v5.0.0 }
+        - { target: wasm32-wasip1, toolchain: beta, os: ubuntu-latest, wasmtime: v5.0.0 }
+        - { target: wasm32-wasip1, toolchain: nightly, os: ubuntu-latest, wasmtime: v5.0.0 }
     steps:
     - uses: actions/checkout@v4
     - uses: dtolnay/rust-toolchain@master
@@ -43,7 +43,7 @@ jobs:
         target:  ${{ matrix.rust.target }}
 
     - name: Install wasmtime
-      if: matrix.rust.target == 'wasm32-wasi'
+      if: matrix.rust.target == 'wasm32-wasi' || matrix.rust.target == 'wasm32-wasip1'
       run: |
         curl https://wasmtime.dev/install.sh -sSf | bash -s -- --version ${{ matrix.rust.wasmtime }}
         echo "$HOME/.wasmtime/bin" >> $GITHUB_PATH

diff --git a/src/f32x4_.rs b/src/f32x4_.rs
@@ -506,6 +506,56 @@ impl f32x4 {
       }
     }
   }
+  #[inline]
+  #[must_use]
+  pub fn floor(self) -> Self {
+    pick! {
+      if #[cfg(target_feature="simd128")] {
+        Self { simd: f32x4_floor(self.simd) }
+      } else if #[cfg(target_feature="sse4.1")] {
+        Self { sse: floor_m128(self.sse) }
+      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
+        unsafe {Self { neon: vrndmq_f32(self.neon) }}
+      } else if #[cfg(feature="std")] {
+        let base: [f32; 4] = cast(self);
+        cast(base.map(|val| val.floor()))
+      } else {
+        let base: [f32; 4] = cast(self);
+        let rounded: [f32; 4] = cast(self.round());
+        cast([
+          if base[0] < rounded[0] { rounded[0] - 1.0 } else { rounded[0] },
+          if base[1] < rounded[1] { rounded[1] - 1.0 } else { rounded[1] },
+          if base[2] < rounded[2] { rounded[2] - 1.0 } else { rounded[2] },
+          if base[3] < rounded[3] { rounded[3] - 1.0 } else { rounded[3] },
+        ])
+      }
+    }
+  }
+  #[inline]
+  #[must_use]
+  pub fn ceil(self) -> Self {
+    pick! {
+      if #[cfg(target_feature="simd128")] {
+        Self { simd: f32x4_ceil(self.simd) }
+      } else if #[cfg(target_feature="sse4.1")] {
+        Self { sse: ceil_m128(self.sse) }
+      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
+        unsafe {Self { neon: vrndpq_f32(self.neon) }}
+      } else if #[cfg(feature="std")] {
+        let base: [f32; 4] = cast(self);
+        cast(base.map(|val| val.ceil()))
+      } else {
+        let base: [f32; 4] = cast(self);
+        let rounded: [f32; 4] = cast(self.round());
+        cast([
+          if base[0] > rounded[0] { rounded[0] + 1.0 } else { rounded[0] },
+          if base[1] > rounded[1] { rounded[1] + 1.0 } else { rounded[1] },
+          if base[2] > rounded[2] { rounded[2] + 1.0 } else { rounded[2] },
+          if base[3] > rounded[3] { rounded[3] + 1.0 } else { rounded[3] },
+        ])
+      }
+    }
+  }
 
   /// Calculates the lanewise maximum of both vectors. This is a faster
   /// implementation than `max`, but it doesn't specify any behavior if NaNs are

diff --git a/src/f32x8_.rs b/src/f32x8_.rs
@@ -389,6 +389,34 @@ impl f32x8 {
       }
     }
   }
+  #[inline]
+  #[must_use]
+  pub fn floor(self) -> Self {
+    pick! {
+      if #[cfg(target_feature="avx")] {
+        Self { avx: floor_m256(self.avx) }
+      } else {
+        Self {
+          a : self.a.floor(),
+          b : self.b.floor(),
+        }
+      }
+    }
+  }
+  #[inline]
+  #[must_use]
+  pub fn ceil(self) -> Self {
+    pick! {
+      if #[cfg(target_feature="avx")] {
+        Self { avx: ceil_m256(self.avx) }
+      } else {
+        Self {
+          a : self.a.ceil(),
+          b : self.b.ceil(),
+        }
+      }
+    }
+  }
 
   /// Calculates the lanewise maximum of both vectors. This is a faster
   /// implementation than `max`, but it doesn't specify any behavior if NaNs are

diff --git a/src/f64x2_.rs b/src/f64x2_.rs
@@ -484,6 +484,52 @@ impl f64x2 {
       }
     }
   }
+  #[inline]
+  #[must_use]
+  pub fn floor(self) -> Self {
+    pick! {
+      if #[cfg(target_feature="simd128")] {
+        Self { simd: f64x2_floor(self.simd) }
+      } else if #[cfg(target_feature="sse4.1")] {
+        Self { sse: floor_m128d(self.sse) }
+      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
+        unsafe {Self { neon: vrndmq_f64(self.neon) }}
+      } else if #[cfg(feature="std")] {
+        let base: [f64; 2] = cast(self);
+        cast(base.map(|val| val.floor()))
+      } else {
+        let base: [f64; 2] = cast(self);
+        let rounded: [f64; 2] = cast(self.round());
+        cast([
+          if base[0] < rounded[0] { rounded[0] - 1.0 } else { rounded[0] },
+          if base[1] < rounded[1] { rounded[1] - 1.0 } else { rounded[1] },
+        ])
+      }
+    }
+  }
+  #[inline]
+  #[must_use]
+  pub fn ceil(self) -> Self {
+    pick! {
+      if #[cfg(target_feature="simd128")] {
+        Self { simd: f64x2_ceil(self.simd) }
+      } else if #[cfg(target_feature="sse4.1")] {
+        Self { sse: ceil_m128d(self.sse) }
+      } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
+        unsafe {Self { neon: vrndpq_f64(self.neon) }}
+      } else if #[cfg(feature="std")] {
+        let base: [f64; 2] = cast(self);
+        cast(base.map(|val| val.ceil()))
+      } else {
+        let base: [f64; 2] = cast(self);
+        let rounded: [f64; 2] = cast(self.round());
+        cast([
+          if base[0] > rounded[0] { rounded[0] + 1.0 } else { rounded[0] },
+          if base[1] > rounded[1] { rounded[1] + 1.0 } else { rounded[1] },
+        ])
+      }
+    }
+  }
 
   /// Calculates the lanewise maximum of both vectors. This is a faster
   /// implementation than `max`, but it doesn't specify any behavior if NaNs are

diff --git a/src/f64x4_.rs b/src/f64x4_.rs
@@ -390,6 +390,35 @@ impl f64x4 {
     }
   }
 
+  #[inline]
+  #[must_use]
+  pub fn floor(self) -> Self {
+    pick! {
+      if #[cfg(target_feature="avx")] {
+        Self { avx: floor_m256d(self.avx) }
+      } else {
+        Self {
+          a : self.a.floor(),
+          b : self.b.floor(),
+        }
+      }
+    }
+  }
+  #[inline]
+  #[must_use]
+  pub fn ceil(self) -> Self {
+    pick! {
+      if #[cfg(target_feature="avx")] {
+        Self { avx: ceil_m256d(self.avx) }
+      } else {
+        Self {
+          a : self.a.ceil(),
+          b : self.b.ceil(),
+        }
+      }
+    }
+  }
+
   /// Calculates the lanewise maximum of both vectors. This is a faster
   /// implementation than `max`, but it doesn't specify any behavior if NaNs are
   /// involved.

diff --git a/tests/all_tests/t_f32x4.rs b/tests/all_tests/t_f32x4.rs
@@ -191,6 +191,22 @@ fn impl_f32x4_abs() {
   assert_eq!(expected, actual);
 }
 
+#[test]
+fn impl_f32x4_floor() {
+  let a = f32x4::from([-1.1, 60.9, 1.1, f32::INFINITY]);
+  let expected = f32x4::from([-2.0, 60.0, 1.0, f32::INFINITY]);
+  let actual = a.floor();
+  assert_eq!(expected, actual);
+}
+
+#[test]
+fn impl_f32x4_ceil() {
+  let a = f32x4::from([-1.1, 60.9, 1.1, f32::NEG_INFINITY]);
+  let expected = f32x4::from([-1.0, 61.0, 2.0, f32::NEG_INFINITY]);
+  let actual = a.ceil();
+  assert_eq!(expected, actual);
+}
+
 #[test]
 fn impl_f32x4_fast_max() {
   let a = f32x4::from([1.0, 5.0, 3.0, -4.0]);

diff --git a/tests/all_tests/t_f32x8.rs b/tests/all_tests/t_f32x8.rs
@@ -195,6 +195,25 @@ fn impl_f32x8_abs() {
   assert_eq!(expected, actual);
 }
 
+#[test]
+fn impl_f32x8_floor() {
+  let a = f32x8::from([-1.1, 60.9, 1.1, f32::INFINITY, 96.6, -53.2, 0.1, 9.2]);
+  let expected =
+    f32x8::from([-2.0, 60.0, 1.0, f32::INFINITY, 96.0, -54.0, 0.0, 9.0]);
+  let actual = a.floor();
+  assert_eq!(expected, actual);
+}
+
+#[test]
+fn impl_f64x4_ceil() {
+  let a =
+    f32x8::from([-1.1, 60.9, 1.1, f32::NEG_INFINITY, 96.6, -53.2, 0.1, 9.2]);
+  let expected =
+    f32x8::from([-1.0, 61.0, 2.0, f32::NEG_INFINITY, 97.0, -53.0, 1.0, 10.0]);
+  let actual = a.ceil();
+  assert_eq!(expected, actual);
+}
+
 #[test]
 fn impl_f32x8_fast_max() {
   let a = f32x8::from([1.0, 5.0, 3.0, 0.0, 6.0, -8.0, 12.0, 9.0]);

diff --git a/tests/all_tests/t_f64x2.rs b/tests/all_tests/t_f64x2.rs
@@ -1,3 +1,5 @@
+use core::f64;
+
 use wide::*;
 
 use bytemuck::*;
@@ -215,6 +217,32 @@ fn impl_f64x2_abs() {
   assert_eq!(expected, actual);
 }
 
+#[test]
+fn impl_f64x2_floor() {
+  let a = f64x2::from([-1.1, 2.0]);
+  let expected = f64x2::from([-2.0, 2.0]);
+  let actual = a.floor();
+  assert_eq!(expected, actual);
+  //
+  let a = f64x2::from([60.9, f64::INFINITY]);
+  let expected = f64x2::from([60.0, f64::INFINITY]);
+  let actual = a.floor();
+  assert_eq!(expected, actual);
+}
+
+#[test]
+fn impl_f64x2_ceil() {
+  let a = f64x2::from([-1.1, 2.0]);
+  let expected = f64x2::from([-1.0, 2.0]);
+  let actual = a.ceil();
+  assert_eq!(expected, actual);
+  //
+  let a = f64x2::from([60.9, f64::NEG_INFINITY]);
+  let expected = f64x2::from([61.0, f64::NEG_INFINITY]);
+  let actual = a.ceil();
+  assert_eq!(expected, actual);
+}
+
 #[test]
 fn impl_f64x2_fast_max() {
   let a = f64x2::from([-0.0, -5.0]);

diff --git a/tests/all_tests/t_f64x4.rs b/tests/all_tests/t_f64x4.rs
@@ -1,3 +1,5 @@
+use core::f64;
+
 use wide::*;
 
 use bytemuck::*;
@@ -183,6 +185,22 @@ fn impl_f64x4_abs() {
   assert_eq!(expected, actual);
 }
 
+#[test]
+fn impl_f64x4_floor() {
+  let a = f64x4::from([-1.1, 60.9, 1.1, f64::INFINITY]);
+  let expected = f64x4::from([-2.0, 60.0, 1.0, f64::INFINITY]);
+  let actual = a.floor();
+  assert_eq!(expected, actual);
+}
+
+#[test]
+fn impl_f64x4_ceil() {
+  let a = f64x4::from([-1.1, 60.9, 1.1, f64::NEG_INFINITY]);
+  let expected = f64x4::from([-1.0, 61.0, 2.0, f64::NEG_INFINITY]);
+  let actual = a.ceil();
+  assert_eq!(expected, actual);
+}
+
 #[test]
 fn impl_f64x4_fast_max() {
   let a = f64x4::from([1.0, 5.0, 3.0, -0.0]);