From c36db9f21dc5249e6fe9f27a6c821585ce5e92e3 Mon Sep 17 00:00:00 2001 From: Yacin Tmimi Date: Tue, 9 Apr 2024 00:21:20 -0400 Subject: [PATCH 1/2] remove `packed_simd` in favor of `std::simd` Fixes 91 As mentioned in the `packed_simd` README, the crate is superseded by `#![feature(portable_simd)]`. --- Cargo.toml | 3 +-- src/lib.rs | 2 ++ src/simd/generic.rs | 29 +++++++++++++++-------------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1b24de7..38b1bc7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,12 +18,11 @@ appveyor = { repository = "llogiq/bytecount" } bench = false [features] -generic-simd = ["packed_simd"] +generic-simd = [] runtime-dispatch-simd = [] html_report = [] [dependencies] -packed_simd = { version = "0.3.8", optional = true } [dev-dependencies] quickcheck = "1.0" diff --git a/src/lib.rs b/src/lib.rs index e24262b..ba7408c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,6 +31,8 @@ //! [`naive_count_32`](fn.naive_count_32.html) method can be faster //! still on small strings. +#![cfg_attr(feature = "generic-simd", feature(portable_simd))] + #![deny(missing_docs)] #![cfg_attr(not(feature = "runtime-dispatch-simd"), no_std)] diff --git a/src/simd/generic.rs b/src/simd/generic.rs index 640ccd8..cc2dd69 100644 --- a/src/simd/generic.rs +++ b/src/simd/generic.rs @@ -1,11 +1,12 @@ -extern crate packed_simd; + #[cfg(not(feature = "runtime-dispatch-simd"))] -use core::mem; +use core::{mem, simd}; + #[cfg(feature = "runtime-dispatch-simd")] -use std::mem; +use std::{mem, simd}; -use self::packed_simd::{u8x32, u8x64, FromCast}; +use simd::{u8x32, u8x64, cmp::SimdPartialEq, num::SimdInt}; const MASK: [u8; 64] = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -14,20 +15,20 @@ const MASK: [u8; 64] = [ ]; unsafe fn u8x64_from_offset(slice: &[u8], offset: usize) -> u8x64 { - u8x64::from_slice_unaligned_unchecked(slice.get_unchecked(offset..)) + u8x64::from_slice(slice.get_unchecked(offset..)) } unsafe fn u8x32_from_offset(slice: &[u8], offset: usize) -> u8x32 { - u8x32::from_slice_unaligned_unchecked(slice.get_unchecked(offset..)) + u8x32::from_slice(slice.get_unchecked(offset..)) } fn sum_x64(u8s: &u8x64) -> usize { let mut store = [0; mem::size_of::()]; - u8s.write_to_slice_unaligned(&mut store); + u8s.copy_to_slice(&mut store); store.iter().map(|&e| e as usize).sum() } fn sum_x32(u8s: &u8x32) -> usize { let mut store = [0; mem::size_of::()]; - u8s.write_to_slice_unaligned(&mut store); + u8s.copy_to_slice(&mut store); store.iter().map(|&e| e as usize).sum() } @@ -44,7 +45,7 @@ pub fn chunk_count(haystack: &[u8], needle: u8) -> usize { while haystack.len() >= offset + 64 * 255 { let mut counts = u8x64::splat(0); for _ in 0..255 { - counts -= u8x64::from_cast(u8x64_from_offset(haystack, offset).eq(needles_x64)); + counts -= u8x64_from_offset(haystack, offset).simd_eq(needles_x64).to_int().cast(); offset += 64; } count += sum_x64(&counts); @@ -54,7 +55,7 @@ pub fn chunk_count(haystack: &[u8], needle: u8) -> usize { if haystack.len() >= offset + 64 * 128 { let mut counts = u8x64::splat(0); for _ in 0..128 { - counts -= u8x64::from_cast(u8x64_from_offset(haystack, offset).eq(needles_x64)); + counts -= u8x64_from_offset(haystack, offset).simd_eq(needles_x64).to_int().cast(); offset += 64; } count += sum_x64(&counts); @@ -66,7 +67,7 @@ pub fn chunk_count(haystack: &[u8], needle: u8) -> usize { let mut counts = u8x32::splat(0); for i in 0..(haystack.len() - offset) / 32 { counts -= - u8x32::from_cast(u8x32_from_offset(haystack, offset + i * 32).eq(needles_x32)); + u8x32_from_offset(haystack, offset + i * 32).simd_eq(needles_x32).to_int().cast(); } count += sum_x32(&counts); @@ -74,7 +75,7 @@ pub fn chunk_count(haystack: &[u8], needle: u8) -> usize { counts = u8x32::splat(0); if haystack.len() % 32 != 0 { counts -= - u8x32::from_cast(u8x32_from_offset(haystack, haystack.len() - 32).eq(needles_x32)) + u8x32_from_offset(haystack, haystack.len() - 32).simd_eq(needles_x32).to_int().cast() & u8x32_from_offset(&MASK, haystack.len() % 32); } count += sum_x32(&counts); @@ -84,11 +85,11 @@ pub fn chunk_count(haystack: &[u8], needle: u8) -> usize { } fn is_leading_utf8_byte_x64(u8s: u8x64) -> u8x64 { - u8x64::from_cast((u8s & u8x64::splat(0b1100_0000)).ne(u8x64::splat(0b1000_0000))) + (u8s & u8x64::splat(0b1100_0000)).simd_ne(u8x64::splat(0b1000_0000)).to_int().cast() } fn is_leading_utf8_byte_x32(u8s: u8x32) -> u8x32 { - u8x32::from_cast((u8s & u8x32::splat(0b1100_0000)).ne(u8x32::splat(0b1000_0000))) + (u8s & u8x32::splat(0b1100_0000)).simd_ne(u8x32::splat(0b1000_0000)).to_int().cast() } pub fn chunk_num_chars(utf8_chars: &[u8]) -> usize { From 32a098dfd2f058bcf4f42ec354bb71b18c76da0d Mon Sep 17 00:00:00 2001 From: Yacin Tmimi Date: Mon, 15 Apr 2024 21:33:11 -0400 Subject: [PATCH 2/2] update docs to mention `std::simd` and `#![feature(portable_simd)]` This replaces the simd docs that used to reference packed_simd. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ee028de..12301e9 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ Your users can then compile with runtime dispatch using: cargo build --release --features runtime-dispatch-simd ``` -The second, `generic-simd`, uses `packed_simd` to provide a fast +The second, `generic-simd`, uses [`std::simd`](https://doc.rust-lang.org/std/simd/index.html) and [`#![feature(portable_simd)]`](https://github.com/rust-lang/rust/issues/86656) to provide a fast architecture-agnostic SIMD codepath, but requires running on nightly. Your users can compile with this codepath using: