Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

correct target features in hash_calc #161

Merged
merged 1 commit into from
Aug 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions zlib-rs/src/deflate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::{

use self::{
algorithm::CONFIGURATION_TABLE,
hash_calc::{Crc32HashCalc, HashCalc, HashCalcVariant, RollHashCalc, StandardHashCalc},
hash_calc::{Crc32HashCalc, HashCalcVariant, RollHashCalc, StandardHashCalc},
pending::Pending,
trees_tbl::STATIC_LTREE,
window::Window,
Expand Down Expand Up @@ -1338,23 +1338,23 @@ impl<'a> State<'a> {
pub(crate) fn update_hash(&self, h: u32, val: u32) -> u32 {
match self.hash_calc_variant {
HashCalcVariant::Standard => StandardHashCalc::update_hash(h, val),
HashCalcVariant::Crc32 => Crc32HashCalc::update_hash(h, val),
HashCalcVariant::Crc32 => unsafe { Crc32HashCalc::update_hash(h, val) },
HashCalcVariant::Roll => RollHashCalc::update_hash(h, val),
}
}

pub(crate) fn quick_insert_string(&mut self, string: usize) -> u16 {
match self.hash_calc_variant {
HashCalcVariant::Standard => StandardHashCalc::quick_insert_string(self, string),
HashCalcVariant::Crc32 => Crc32HashCalc::quick_insert_string(self, string),
HashCalcVariant::Crc32 => unsafe { Crc32HashCalc::quick_insert_string(self, string) },
HashCalcVariant::Roll => RollHashCalc::quick_insert_string(self, string),
}
}

pub(crate) fn insert_string(&mut self, string: usize, count: usize) {
match self.hash_calc_variant {
HashCalcVariant::Standard => StandardHashCalc::insert_string(self, string, count),
HashCalcVariant::Crc32 => Crc32HashCalc::insert_string(self, string, count),
HashCalcVariant::Crc32 => unsafe { Crc32HashCalc::insert_string(self, string, count) },
HashCalcVariant::Roll => RollHashCalc::insert_string(self, string, count),
}
}
Expand Down
166 changes: 107 additions & 59 deletions zlib-rs/src/deflate/hash_calc.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#![warn(unsafe_op_in_unsafe_fn)]
use crate::deflate::{State, HASH_SIZE, STD_MIN_MATCH};

#[derive(Debug, Clone, Copy)]
Expand Down Expand Up @@ -27,21 +28,27 @@ impl HashCalcVariant {
}
}

pub trait HashCalc {
const HASH_CALC_OFFSET: usize;
const HASH_CALC_MASK: u32;
pub struct StandardHashCalc;

impl StandardHashCalc {
const HASH_CALC_OFFSET: usize = 0;

const HASH_CALC_MASK: u32 = (HASH_SIZE - 1) as u32;

fn hash_calc(h: u32, val: u32) -> u32;
fn hash_calc(_: u32, val: u32) -> u32 {
const HASH_SLIDE: u32 = 16;
val.wrapping_mul(2654435761) >> HASH_SLIDE
}

fn update_hash(h: u32, val: u32) -> u32 {
pub fn update_hash(h: u32, val: u32) -> u32 {
Self::hash_calc(h, val) & Self::HASH_CALC_MASK
}

fn quick_insert_string(state: &mut State, string: usize) -> u16 {
pub fn quick_insert_string(state: &mut State, string: usize) -> u16 {
let slice = &state.window.filled()[string + Self::HASH_CALC_OFFSET..];
let val = u32::from_le_bytes(slice[..4].try_into().unwrap());

let hm = (Self::hash_calc(0, val) & Self::HASH_CALC_MASK) as usize;
let hm = Self::update_hash(0, val) as usize;

let head = state.head[hm];
if head != string as u16 {
Expand All @@ -52,7 +59,7 @@ pub trait HashCalc {
head
}

fn insert_string(state: &mut State, string: usize, count: usize) {
pub fn insert_string(state: &mut State, string: usize, count: usize) {
let slice = &state.window.filled()[string + Self::HASH_CALC_OFFSET..];

// .take(count) generates worse assembly
Expand All @@ -61,7 +68,7 @@ pub trait HashCalc {

let val = u32::from_le_bytes(w.try_into().unwrap());

let hm = (Self::hash_calc(0, val) & Self::HASH_CALC_MASK) as usize;
let hm = Self::update_hash(0, val) as usize;

let head = state.head[hm];
if head != idx {
Expand All @@ -72,22 +79,9 @@ pub trait HashCalc {
}
}

pub struct StandardHashCalc;

impl HashCalc for StandardHashCalc {
const HASH_CALC_OFFSET: usize = 0;

const HASH_CALC_MASK: u32 = (HASH_SIZE - 1) as u32;

fn hash_calc(_: u32, val: u32) -> u32 {
const HASH_SLIDE: u32 = 16;
val.wrapping_mul(2654435761) >> HASH_SLIDE
}
}

pub struct RollHashCalc;

impl HashCalc for RollHashCalc {
impl RollHashCalc {
const HASH_CALC_OFFSET: usize = STD_MIN_MATCH - 1;

const HASH_CALC_MASK: u32 = (1 << 15) - 1;
Expand All @@ -97,7 +91,11 @@ impl HashCalc for RollHashCalc {
(h << HASH_SLIDE) ^ val
}

fn quick_insert_string(state: &mut State, string: usize) -> u16 {
pub fn update_hash(h: u32, val: u32) -> u32 {
Self::hash_calc(h, val) & Self::HASH_CALC_MASK
}

pub fn quick_insert_string(state: &mut State, string: usize) -> u16 {
let val = state.window.filled()[string + Self::HASH_CALC_OFFSET] as u32;

state.ins_h = Self::hash_calc(state.ins_h as u32, val) as usize;
Expand All @@ -114,7 +112,7 @@ impl HashCalc for RollHashCalc {
head
}

fn insert_string(state: &mut State, string: usize, count: usize) {
pub fn insert_string(state: &mut State, string: usize, count: usize) {
let slice = &state.window.filled()[string + Self::HASH_CALC_OFFSET..][..count];

for (i, val) in slice.iter().copied().enumerate() {
Expand All @@ -136,7 +134,7 @@ impl HashCalc for RollHashCalc {
pub struct Crc32HashCalc;

impl Crc32HashCalc {
pub fn is_supported() -> bool {
fn is_supported() -> bool {
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "std"))]
return std::arch::is_x86_feature_detected!("sse4.2");

Expand All @@ -147,33 +145,81 @@ impl Crc32HashCalc {
#[allow(unreachable_code)]
false
}
}

impl HashCalc for Crc32HashCalc {
const HASH_CALC_OFFSET: usize = 0;

const HASH_CALC_MASK: u32 = (HASH_SIZE - 1) as u32;

#[cfg(target_arch = "x86")]
fn hash_calc(h: u32, val: u32) -> u32 {
#[target_feature(enable = "sse4.2")]
unsafe fn hash_calc(h: u32, val: u32) -> u32 {
unsafe { core::arch::x86::_mm_crc32_u32(h, val) }
}

#[cfg(target_arch = "x86_64")]
fn hash_calc(h: u32, val: u32) -> u32 {
#[target_feature(enable = "sse4.2")]
unsafe fn hash_calc(h: u32, val: u32) -> u32 {
unsafe { core::arch::x86_64::_mm_crc32_u32(h, val) }
}

#[cfg(target_arch = "aarch64")]
fn hash_calc(h: u32, val: u32) -> u32 {
#[target_feature(enable = "neon")]
unsafe fn hash_calc(h: u32, val: u32) -> u32 {
unsafe { crate::crc32::acle::__crc32w(h, val) }
}

#[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))]
fn hash_calc(_h: u32, _val: u32) -> u32 {
unsafe fn hash_calc(_h: u32, _val: u32) -> u32 {
assert!(!Self::is_supported());
unimplemented!("there is no hardware support on this platform")
}

#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon"))]
#[cfg_attr(target_arch = "x86", target_feature(enable = "sse4.2"))]
#[cfg_attr(target_arch = "x86_64", target_feature(enable = "sse4.2"))]
pub unsafe fn update_hash(h: u32, val: u32) -> u32 {
(unsafe { Self::hash_calc(h, val) }) & Self::HASH_CALC_MASK
}

#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon"))]
#[cfg_attr(target_arch = "x86", target_feature(enable = "sse4.2"))]
#[cfg_attr(target_arch = "x86_64", target_feature(enable = "sse4.2"))]
pub unsafe fn quick_insert_string(state: &mut State, string: usize) -> u16 {
let slice = &state.window.filled()[string + Self::HASH_CALC_OFFSET..];
let val = u32::from_le_bytes(slice[..4].try_into().unwrap());

let hm = unsafe { Self::update_hash(0, val) } as usize;

let head = state.head[hm];
if head != string as u16 {
state.prev[string & state.w_mask] = head;
state.head[hm] = string as u16;
}

head
}

#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon"))]
#[cfg_attr(target_arch = "x86", target_feature(enable = "sse4.2"))]
#[cfg_attr(target_arch = "x86_64", target_feature(enable = "sse4.2"))]
pub unsafe fn insert_string(state: &mut State, string: usize, count: usize) {
let slice = &state.window.filled()[string + Self::HASH_CALC_OFFSET..];

// .take(count) generates worse assembly
for (i, w) in slice[..count + 3].windows(4).enumerate() {
let idx = string as u16 + i as u16;

let val = u32::from_le_bytes(w.try_into().unwrap());

let hm = unsafe { Self::update_hash(0, val) } as usize;

let head = state.head[hm];
if head != idx {
state.prev[idx as usize & state.w_mask] = head;
state.head[hm] = idx;
}
}
}
}

#[cfg(test)]
Expand All @@ -190,33 +236,35 @@ mod tests {
return;
}

if cfg!(target_arch = "x86") || cfg!(target_arch = "x86_64") {
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 1452438466);
assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 435552201);
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 1452438466);
assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 435552201);
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 1452438466);
assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 435552201);
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 1452438466);
assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 435552201);
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
assert_eq!(Crc32HashCalc::hash_calc(0, 170926112), 500028708);
assert_eq!(Crc32HashCalc::hash_calc(0, 537538592), 3694129053);
assert_eq!(Crc32HashCalc::hash_calc(0, 538970672), 373925026);
assert_eq!(Crc32HashCalc::hash_calc(0, 538976266), 4149335727);
assert_eq!(Crc32HashCalc::hash_calc(0, 538976288), 1767342659);
assert_eq!(Crc32HashCalc::hash_calc(0, 941629472), 4090502627);
assert_eq!(Crc32HashCalc::hash_calc(0, 775430176), 1744703325);
} else {
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2067507791);
assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 2086141925);
assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 716394180);
assert_eq!(Crc32HashCalc::hash_calc(0, 775430176), 1396070634);
assert_eq!(Crc32HashCalc::hash_calc(0, 941629472), 637105634);
unsafe {
if cfg!(target_arch = "x86") || cfg!(target_arch = "x86_64") {
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 1452438466);
assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 435552201);
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 1452438466);
assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 435552201);
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 1452438466);
assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 435552201);
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 1452438466);
assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 435552201);
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
assert_eq!(Crc32HashCalc::hash_calc(0, 170926112), 500028708);
assert_eq!(Crc32HashCalc::hash_calc(0, 537538592), 3694129053);
assert_eq!(Crc32HashCalc::hash_calc(0, 538970672), 373925026);
assert_eq!(Crc32HashCalc::hash_calc(0, 538976266), 4149335727);
assert_eq!(Crc32HashCalc::hash_calc(0, 538976288), 1767342659);
assert_eq!(Crc32HashCalc::hash_calc(0, 941629472), 4090502627);
assert_eq!(Crc32HashCalc::hash_calc(0, 775430176), 1744703325);
} else {
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2067507791);
assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 2086141925);
assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 716394180);
assert_eq!(Crc32HashCalc::hash_calc(0, 775430176), 1396070634);
assert_eq!(Crc32HashCalc::hash_calc(0, 941629472), 637105634);
}
}
}

Expand Down
Loading