Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modify Teddy's match verification. #275

Closed
wants to merge 2 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 37 additions & 56 deletions src/simd_accel/teddy128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ better. Namely:
1. Teddy's core algorithm scans the haystack in 16 byte chunks. 16 is
significant because it corresponds to the number of bytes in a SIMD vector.
If one used AVX2 instructions, then we could scan the haystack in 32 byte
chunks. Similarly, if one used AVX512 instructions, we could sca the
chunks. Similarly, if one used AVX512 instructions, we could scan the
haystack in 64 byte chunks. Hyperscan implements SIMD + AVX2, we only
implement SIMD for the moment. (The author doesn't have a CPU with AVX2
support... yet.)
Expand Down Expand Up @@ -326,11 +326,10 @@ References
// TODO: Make the inner loop do aligned loads.

use std::cmp;
use std::mem::transmute;
use std::ptr;

use simd::u8x16;
use simd::x86::sse2::u64x2;
use simd::x86::sse2::Sse2Bool8ix16;
use simd::x86::ssse3::Ssse3U8x16;

use syntax;
Expand Down Expand Up @@ -456,8 +455,9 @@ impl Teddy {
// N.B. `res0` is our `C` in the module documentation.
let res0 = self.masks.members1(h);
// Only do expensive verification if there are any non-zero bits.
if res0.ne(zero).any() {
if let Some(m) = self.verify_128(haystack, pos, res0) {
let bitfield = res0.ne(zero).move_mask();
if bitfield != 0 {
if let Some(m) = self.verify(haystack, pos, res0, bitfield) {
return Some(m);
}
}
Expand Down Expand Up @@ -510,9 +510,11 @@ impl Teddy {
// `AND`'s our `C` values together.
let res = res0prev0 & res1;
prev0 = res0;
if res.ne(zero).any() {

let bitfield = res.ne(zero).move_mask();
if bitfield != 0 {
let pos = pos.checked_sub(1).unwrap();
if let Some(m) = self.verify_128(haystack, pos, res) {
if let Some(m) = self.verify(haystack, pos, res, bitfield) {
return Some(m);
}
}
Expand Down Expand Up @@ -568,9 +570,11 @@ impl Teddy {

prev0 = res0;
prev1 = res1;
if res.ne(zero).any() {

let bitfield = res.ne(zero).move_mask();
if bitfield != 0 {
let pos = pos.checked_sub(2).unwrap();
if let Some(m) = self.verify_128(haystack, pos, res) {
if let Some(m) = self.verify(haystack, pos, res, bitfield) {
return Some(m);
}
}
Expand All @@ -585,63 +589,40 @@ impl Teddy {

/// Runs the verification procedure on `res` (i.e., `C` from the module
/// documentation), where the haystack block starts at `pos` in
/// `haystack`.
/// `haystack`. `bitfield` has ones in the bit positions that `res` has
/// non-zero bytes.
///
/// If a match exists, it returns the first one.
#[inline(always)]
fn verify_128(
fn verify(
&self,
haystack: &[u8],
pos: usize,
res: u8x16,
mut bitfield: u32,
) -> Option<Match> {
// The verification procedure is more amenable to standard 64 bit
// values, so get those.
let res64: u64x2 = unsafe { transmute(res) };
let reshi = res64.extract(0);
let reslo = res64.extract(1);
if let Some(m) = self.verify_64(haystack, pos, reshi, 0) {
return Some(m);
}
if let Some(m) = self.verify_64(haystack, pos, reslo, 8) {
return Some(m);
}
None
}

/// Runs the verification procedure on half of `C`.
///
/// If a match exists, it returns the first one.
///
/// `offset` is an additional byte offset to add to the position before
/// substring match verification.
#[inline(always)]
fn verify_64(
&self,
haystack: &[u8],
pos: usize,
mut res: u64,
offset: usize,
) -> Option<Match> {
// There's a possible match so long as there's at least one bit set.
while res != 0 {
// The next possible match is at the least significant bit.
let bit = res.trailing_zeros();
// The position of the bit in its corresponding lane gives us the
// corresponding bucket.
let bucket = (bit % 8) as usize;
// The lane that the bit is in gives us its offset.
let bytei = (bit / 8) as usize;
// Compute the start of where a substring would start.
let start = pos + offset + bytei;
// Kill off this bit. If we couldn't match anything, we'll go to
// the next bit.
res &= !(1 << bit);
// Actual substring search verification.
if let Some(m) = self.verify_bucket(haystack, bucket, start) {
return Some(m);
while bitfield != 0 {
// The next offset, relative to pos, where some fingerprint matched.
let byte_pos = bitfield.trailing_zeros();
bitfield &= !(1 << byte_pos);

// Offset relative to the beginning of the haystack.
let start = pos + byte_pos as usize;

// The bitfield telling us which patterns had fingerprints that match at this starting
// position.
let mut patterns = res.extract(byte_pos);
while patterns != 0 {
let bucket = patterns.trailing_zeros() as usize;
patterns &= !(1 << bucket);

// Actual substring search verification.
if let Some(m) = self.verify_bucket(haystack, bucket, start) {
return Some(m);
}
}
}

None
}

Expand Down