Skip to content

Commit

Permalink
Add SSE2/AVX2/WASM SIMD support (#86)
Browse files Browse the repository at this point in the history
  • Loading branch information
james7132 authored Mar 19, 2024
1 parent 61550cd commit d7ae91f
Show file tree
Hide file tree
Showing 9 changed files with 670 additions and 152 deletions.
26 changes: 20 additions & 6 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ jobs:
strategy:
matrix:
rust: [1.56.0, stable, nightly]

features: ["+avx2", "+sse2", "-avx2,-sse2"]
env:
RUSTCFLAGS: "-C target-features={{matrix.features}}"
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
profile: minimal
Expand All @@ -43,7 +45,7 @@ jobs:
rust: [stable]

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
profile: minimal
Expand All @@ -64,7 +66,7 @@ jobs:
rust: [stable]

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
profile: minimal
Expand All @@ -85,7 +87,7 @@ jobs:
rust: [stable]

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
profile: minimal
Expand All @@ -95,4 +97,16 @@ jobs:
- name: Run Clippy
run: |
cd benches
cargo bench --bench benches --no-run
cargo bench --bench benches --no-run
build-wasm:
runs-on: ubuntu-latest
timeout-minutes: 30
needs: build
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
target: wasm32-unknown-unknown
- name: Check wasm
run: cargo check --target wasm32-unknown-unknown
108 changes: 108 additions & 0 deletions src/block/avx2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
use core::{
cmp::Ordering,
hash::{Hash, Hasher},
iter::Iterator,
ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not},
};

#[derive(Copy, Clone, Debug)]
#[repr(transparent)]
pub struct Block(__m256i);

impl Block {
pub const USIZE_COUNT: usize = core::mem::size_of::<Self>() / core::mem::size_of::<usize>();
pub const NONE: Self = Self::from_usize_array([0; Self::USIZE_COUNT]);
pub const ALL: Self = Self::from_usize_array([core::usize::MAX; Self::USIZE_COUNT]);
pub const BITS: usize = core::mem::size_of::<Self>() * 8;

#[inline]
pub fn into_usize_array(self) -> [usize; Self::USIZE_COUNT] {
unsafe { core::mem::transmute(self.0) }
}

#[inline]
pub const fn from_usize_array(array: [usize; Self::USIZE_COUNT]) -> Self {
Self(unsafe { core::mem::transmute(array) })
}

#[inline]
pub fn is_empty(self) -> bool {
unsafe { _mm256_testz_si256(self.0, self.0) == 1 }
}

#[inline]
pub fn andnot(self, other: Self) -> Self {
Self(unsafe { _mm256_andnot_si256(other.0, self.0) })
}
}

impl Not for Block {
type Output = Block;
#[inline]
fn not(self) -> Self::Output {
unsafe { Self(_mm256_xor_si256(self.0, Self::ALL.0)) }
}
}

impl BitAnd for Block {
type Output = Block;
#[inline]
fn bitand(self, other: Self) -> Self::Output {
unsafe { Self(_mm256_and_si256(self.0, other.0)) }
}
}

impl BitAndAssign for Block {
#[inline]
fn bitand_assign(&mut self, other: Self) {
unsafe {
self.0 = _mm256_and_si256(self.0, other.0);
}
}
}

impl BitOr for Block {
type Output = Block;
#[inline]
fn bitor(self, other: Self) -> Self::Output {
unsafe { Self(_mm256_or_si256(self.0, other.0)) }
}
}

impl BitOrAssign for Block {
#[inline]
fn bitor_assign(&mut self, other: Self) {
unsafe {
self.0 = _mm256_or_si256(self.0, other.0);
}
}
}

impl BitXor for Block {
type Output = Block;
#[inline]
fn bitxor(self, other: Self) -> Self::Output {
unsafe { Self(_mm256_xor_si256(self.0, other.0)) }
}
}

impl BitXorAssign for Block {
#[inline]
fn bitxor_assign(&mut self, other: Self) {
unsafe { self.0 = _mm256_xor_si256(self.0, other.0) }
}
}

impl PartialEq for Block {
#[inline]
fn eq(&self, other: &Self) -> bool {
unsafe {
let eq = _mm256_cmpeq_epi8(self.0, other.0);
_mm256_movemask_epi8(eq) == !(0i32)
}
}
}
76 changes: 76 additions & 0 deletions src/block/default.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
use core::iter::Iterator;
use core::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not};

#[derive(Copy, Clone, PartialEq, Debug)]
#[repr(transparent)]
pub struct Block(usize);

impl Block {
pub const USIZE_COUNT: usize = 1;
pub const NONE: Self = Block(0);
pub const ALL: Self = Block(!0);
pub const BITS: usize = core::mem::size_of::<Self>() * 8;

#[inline]
pub const fn is_empty(self) -> bool {
self.0 == Self::NONE.0
}

#[inline]
pub fn andnot(self, other: Self) -> Self {
Self(!other.0 & self.0)
}
}

impl Not for Block {
type Output = Block;
#[inline]
fn not(self) -> Self::Output {
Self(self.0.not())
}
}

impl BitAnd for Block {
type Output = Block;
#[inline]
fn bitand(self, other: Self) -> Self::Output {
Self(self.0.bitand(other.0))
}
}

impl BitAndAssign for Block {
#[inline]
fn bitand_assign(&mut self, other: Self) {
self.0.bitand_assign(other.0);
}
}

impl BitOr for Block {
type Output = Block;
#[inline]
fn bitor(self, other: Self) -> Self::Output {
Self(self.0.bitor(other.0))
}
}

impl BitOrAssign for Block {
#[inline]
fn bitor_assign(&mut self, other: Self) {
self.0.bitor_assign(other.0)
}
}

impl BitXor for Block {
type Output = Block;
#[inline]
fn bitxor(self, other: Self) -> Self::Output {
Self(self.0.bitxor(other.0))
}
}

impl BitXorAssign for Block {
#[inline]
fn bitxor_assign(&mut self, other: Self) {
self.0.bitxor_assign(other.0)
}
}
76 changes: 76 additions & 0 deletions src/block/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
use core::cmp::Ordering;
use core::hash::{Hash, Hasher};

#[cfg(all(
not(target_arch = "wasm32"),
not(target_feature = "sse2"),
not(target_feature = "avx2"),
))]
mod default;
#[cfg(all(
not(target_arch = "wasm32"),
not(target_feature = "sse2"),
not(target_feature = "avx2"),
))]
pub use self::default::*;

#[cfg(all(
not(target_arch = "wasm32"),
target_feature = "sse2",
not(target_feature = "avx2"),
))]
mod sse2;
#[cfg(all(
not(target_arch = "wasm32"),
target_feature = "sse2",
not(target_feature = "avx2"),
))]
pub use self::sse2::*;

#[cfg(all(not(target_arch = "wasm32"), target_feature = "avx2",))]
mod avx2;
#[cfg(all(not(target_arch = "wasm32"), target_feature = "avx2",))]
pub use self::avx2::*;

#[cfg(target_arch = "wasm32")]
mod wasm32;
#[cfg(target_arch = "wasm32")]
pub use self::wasm32::*;

impl Eq for Block {}

impl PartialOrd for Block {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

impl Ord for Block {
#[inline]
fn cmp(&self, other: &Self) -> Ordering {
let a = self.into_usize_array();
let b = other.into_usize_array();
for i in 0..Self::USIZE_COUNT {
match a[i].cmp(&b[i]) {
Ordering::Equal => continue,
cmp => return cmp,
}
}
Ordering::Equal
}
}

impl Default for Block {
#[inline]
fn default() -> Self {
Self::NONE
}
}

impl Hash for Block {
#[inline]
fn hash<H: Hasher>(&self, hasher: &mut H) {
self.into_usize_array().hash(hasher)
}
}
Loading

0 comments on commit d7ae91f

Please sign in to comment.