Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use stable SIMD intrinsics with runtime detection #8

Merged
merged 8 commits into from
Nov 17, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "twoway"
version = "0.1.8"
authors = ["bluss"]

description = "Fast substring search for strings and byte strings. Optional SSE4.2 acceleration (requires nightly and cargo feature flag pcmp) using pcmpestri. Memchr is the only mandatory dependency. The two way algorithm is also used by rust's libstd itself, but here it is exposed both for byte strings, using memchr, and optionally using a SSE4.2 accelerated version."
description = "Fast substring search for strings and byte strings. Optional SSE4.2 acceleration (if detected at runtime) using pcmpestri. Memchr is the only mandatory dependency. The two way algorithm is also used by rust's libstd itself, but here it is exposed both for byte strings, using memchr, and optionally using a SSE4.2 accelerated version."

license = "MIT/Apache-2.0"
repository = "https://github.com/bluss/twoway"
Expand All @@ -14,7 +14,7 @@ categories = ["algorithms", "no-std"]

[dependencies]
memchr = { version = "2.0", default-features = false }
unchecked-index = { version = "0.2.2", optional = true }
unchecked-index = { version = "0.2.2" }
jetscii = {version = "0.3", features= ["unstable"], optional = true }
galil-seiferas = { version = "0.1.1", optional = true }

Expand All @@ -31,14 +31,11 @@ quickcheck = { version = "0.5", default-features = false }
default = ["use_std"]
use_std = ["memchr/use_std"]

# pcmpestri, requires nightly
pcmp = ["unchecked-index"]

# Internal features for testing & benchmarking & development
pattern = []
test-set = []
benchmarks = ["galil-seiferas", "pattern", "unchecked-index"]
all = ["jetscii", "pcmp", "pattern", "test-set"]
benchmarks = ["galil-seiferas", "pattern"]
all = ["jetscii", "pattern", "test-set"]


[package.metadata.release]
Expand Down
4 changes: 1 addition & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,9 @@ This is the same code as is included in Rust's libstd to “power” ``str::find
but here it is exposed with some improvements:

- Available for byte string searches using ``&[u8]``
- Having an optional SSE4.2 accelerated version which is even faster.
- Having an optional SSE4.2 accelerated version (if detected at runtime) which is even faster.
- Using ``memchr`` for the single byte case, which is ultra fast.

Use cargo feature ``pcmp`` to enable SSE4.2 / pcmpestri accelerated version (only the forward search).

- ``twoway::find_bytes(text: &[u8], pattern: &[u8]) -> Option<usize>``
- ``twoway::rfind_bytes(text: &[u8], pattern: &[u8]) -> Option<usize>``
- ``twoway::find_str(text: &str, pattern: &str) -> Option<usize>``
Expand Down
5 changes: 3 additions & 2 deletions benches/pathology.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,11 @@ use test::{Bencher, black_box};
use twoway::find_str as tw_find;
use twoway::rfind_str as tw_rfind;

/*
pub fn is_prefix(text: &str, pattern: &str) -> bool {
Str(pattern).is_prefix_of(text)
}
*/

pub fn memmem(text: &str, pattern: &str) -> bool {
#[allow(improper_ctypes)]
Expand Down Expand Up @@ -162,7 +164,6 @@ macro_rules! bench_contains_vs_tw {
}
*/

#[cfg(feature = "pcmp")]
#[bench]
pub fn pcmp_find(b: &mut Bencher) {
let haystack = black_box($hay);
Expand Down Expand Up @@ -535,7 +536,7 @@ pub fn rfind_char_1(b: &mut Bencher) {
t
});
b.bytes = haystack.len() as u64;
}
}

#[cfg(feature = "test-set")]
fn bench_data() -> Vec<u8> { vec![0u8; 256 * 1024] }
Expand Down
3 changes: 0 additions & 3 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@ path = ".."
[dependencies.libfuzzer-sys]
git = "https://github.com/rust-fuzz/libfuzzer-sys.git"

[features]
pcmp = ["twoway/pcmp"]

# Prevent this from interfering with workspaces
[workspace]
members = ["."]
Expand Down
2 changes: 1 addition & 1 deletion fuzz/run1_pcmp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

DIR=$(dirname "$0")
V=$(cat "$DIR"/nightly-version)
cargo +$V fuzz run --features=pcmp -O -a fuzz_target_1 -- -only_ascii=1 -max_len=5000 "$@"
cargo +$V fuzz run -O -a fuzz_target_1 -- -only_ascii=1 -max_len=5000 "$@"
2 changes: 1 addition & 1 deletion fuzz/run_substring_pcmp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

DIR=$(dirname "$0")
V=$(cat "$DIR"/nightly-version)
cargo +$V fuzz run --features=pcmp -O substring -- -only_ascii=1 -max_len=256 "$@"
cargo +$V fuzz run -O substring -- -only_ascii=1 -max_len=256 "$@"
32 changes: 14 additions & 18 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
#![cfg_attr(not(test), no_std)]
#![cfg_attr(not(feature = "use_std"), no_std)]
#![cfg_attr(feature = "pattern", feature(pattern))]
#![cfg_attr(feature = "pcmp", feature(asm))]

#[cfg(not(test))]
#[cfg(not(feature = "use_std"))]
extern crate core as std;

use std::cmp;
Expand All @@ -11,7 +10,7 @@ use std::usize;
extern crate memchr;

mod tw;
#[cfg(feature = "pcmp")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub mod pcmp;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This mod should now preferably not be public. We need some way to benchmark it still, or maybe not? Just use the simd enable/disable override?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought so too, but noticed you're exposing other algorithms as public modules and thought it's intentional and has to be preserved.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll need to think over it again now that it's going stable. It's not 1.0 though, so we can live. For example using doc(hidden).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO now that this is stable, ideally SSE4 helers should become just part of normal two-way code since the algortithm is the same between both and only substring searches should be done in different ways based on runtime feature detection.

pub mod bmh;
#[cfg(feature = "test-set")]
Expand All @@ -28,31 +27,28 @@ use std::str::pattern::{

/// `find_str` finds the first ocurrence of `pattern` in the `text`.
///
/// Uses the SSE42 version if it is compiled in.
/// Uses the SSE42 version if it is available at runtime.
#[inline]
pub fn find_str(text: &str, pattern: &str) -> Option<usize> {
find_bytes(text.as_bytes(), pattern.as_bytes())
}

/// `find_bytes` finds the first ocurrence of `pattern` in the `text`.
///
/// Uses the SSE42 version if it is compiled in.
#[cfg(feature = "pcmp")]
#[inline]
pub fn find_bytes(text: &[u8], pattern: &[u8]) -> Option<usize> {
pcmp::find(text, pattern)
}

/// `find_bytes` finds the first ocurrence of `pattern` in the `text`.
///
/// Uses the SSE42 version if it is compiled in.
#[cfg(not(feature = "pcmp"))]
/// Uses the SSE42 version if it is available at runtime.
pub fn find_bytes(text: &[u8], pattern: &[u8]) -> Option<usize> {
if pattern.is_empty() {
Some(0)
} else if text.len() < pattern.len() {
return None;
} else if pattern.len() == 1 {
memchr::memchr(pattern[0], text)
} else {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
if pcmp::is_supported() {
return unsafe { pcmp::find_inner(text, pattern) };
}
}
let mut searcher = TwoWaySearcher::new(pattern, text.len());
let is_long = searcher.memory == usize::MAX;
// write out `true` and `false` cases to encourage the compiler
Expand Down Expand Up @@ -492,7 +488,7 @@ impl TwoWaySearcher {
}

/// Return the zero-based critical position and period of the provided needle.
///
///
/// The returned period is incorrect when the actual period is "long." In
/// that case the approximation must be computed separately.
#[inline(always)]
Expand Down Expand Up @@ -913,7 +909,7 @@ fn test_contains() {
assert!(contains(h, n));
assert!(contains_rev(h, n));

let h = "\u{0}\u{0}\u{0}\u{0}";
let h = "\u{0}\u{0}\u{0}\u{0}";
let n = "\u{0}";
assert!(contains(h, n));
assert!(contains_rev(h, n));
Expand Down
Loading