Skip to content

Commit

Permalink
pathfinder_simd fixes for ARM (#572)
Browse files Browse the repository at this point in the history
* Add missing methods

* Fix arm F32x4::concat_xy_xy
  • Loading branch information
mischnic authored Sep 18, 2024
1 parent 45b7a89 commit 1b7c8bc
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 5 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 38 additions & 4 deletions simd/src/arm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@

use std::arch::aarch64::{self, float32x2_t, float32x4_t, int32x2_t, int32x4_t};
use std::arch::aarch64::{uint32x2_t, uint32x4_t};
use std::intrinsics::simd::*;
use std::f32;
use std::fmt::{self, Debug, Formatter};
use std::intrinsics::simd::*;
use std::mem;
use std::ops::{Add, BitAnd, BitOr, Div, Index, IndexMut, Mul, Not, Shr, Sub};

Expand Down Expand Up @@ -201,7 +201,6 @@ impl IndexMut<usize> for F32x2 {
}
}


impl Add<F32x2> for F32x2 {
type Output = F32x2;
#[inline]
Expand Down Expand Up @@ -352,7 +351,7 @@ impl F32x4 {

#[inline]
pub fn concat_xy_xy(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_shuffle4!(self.0, other.0, [0, 1, 2, 3])) }
unsafe { F32x4(simd_shuffle4!(self.0, other.0, [0, 1, 4, 5])) }
}

#[inline]
Expand All @@ -365,6 +364,11 @@ impl F32x4 {
unsafe { F32x4(simd_shuffle4!(self.0, other.0, [2, 3, 6, 7])) }
}

#[inline]
pub fn concat_wz_yx(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_shuffle4!(self.0, other.0, [3, 2, 5, 4])) }
}

// Conversions

/// Converts these packed floats to integers via rounding.
Expand Down Expand Up @@ -832,13 +836,22 @@ impl BitOr<U32x2> for U32x2 {
}
}


// Four 32-bit unsigned integers

#[derive(Clone, Copy)]
pub struct U32x4(pub uint32x4_t);

impl U32x4 {
#[inline]
pub fn new(a: u32, b: u32, c: u32, d: u32) -> U32x4 {
unsafe { U32x4(mem::transmute([a, b, c, d])) }
}

#[inline]
pub fn splat(x: u32) -> U32x4 {
U32x4::new(x, x, x, x)
}

/// Returns true if all four booleans in this vector are true.
///
/// The result is *undefined* if all four values in this vector are not booleans. A boolean is
Expand All @@ -856,6 +869,20 @@ impl U32x4 {
pub fn all_false(&self) -> bool {
unsafe { aarch64::vmaxvq_u32(self.0) == 0 }
}

// Packed comparisons

#[inline]
pub fn packed_eq(self, other: U32x4) -> U32x4 {
unsafe { U32x4(simd_eq(self.0, other.0)) }
}
}

impl Debug for U32x4 {
#[inline]
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
write!(f, "<{}, {}, {}, {}>", self[0], self[1], self[2], self[3])
}
}

impl Index<usize> for U32x4 {
Expand All @@ -870,6 +897,13 @@ impl Index<usize> for U32x4 {
}
}

impl PartialEq for U32x4 {
#[inline]
fn eq(&self, other: &U32x4) -> bool {
self.packed_eq(*other).all_true()
}
}

extern "C" {
#[link_name = "llvm.fabs.v2f32"]
fn fabs_v2f32(a: float32x2_t) -> float32x2_t;
Expand Down

0 comments on commit 1b7c8bc

Please sign in to comment.