Skip to content

Commit

Permalink
added comments
Browse files Browse the repository at this point in the history
  • Loading branch information
mcroomp committed Oct 20, 2024
1 parent c74ff46 commit 0e57e32
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 6 deletions.
2 changes: 2 additions & 0 deletions src/u32x4_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,8 @@ impl u32x4 {
}
}

/// Multiplies 32x32 bit to 64 bit and then only keeps the high 32 bits of the result.
/// Useful for implementing divide constant value (see t_usefulness example)
#[inline]
#[must_use]
pub fn mul_keep_high(self, rhs: Self) -> Self {
Expand Down
10 changes: 10 additions & 0 deletions src/u32x8_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,16 @@ impl u32x8 {
}
}

/// Multiplies 32x32 bit to 64 bit and then only keeps the high 32 bits of the result.
/// Useful for implementing divide constant value (see t_usefulness example)
#[inline]
#[must_use]
pub fn mul_keep_high(self: u32x8, rhs: u32x8) -> u32x8 {
// avx2 doesn't benefit here sice the u32x4 is already using it,
// maybe it might help with the shuffling afterwards
u32x8 { a: self.a.mul_keep_high(rhs.a), b: self.b.mul_keep_high(rhs.b) }
}

#[inline]
#[must_use]
pub fn blend(self, t: Self, f: Self) -> Self {
Expand Down
8 changes: 8 additions & 0 deletions tests/all_tests/t_u32x8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -315,3 +315,11 @@ fn test_u32x8_mul_widen_even() {
let actual = a.mul_widen_even(b);
assert_eq!(expected, actual);
}

#[test]
fn impl_u32x8_mul_keep_high() {
crate::test_random_vector_vs_scalar(
|a: u32x8, b| a.mul_keep_high(b),
|a, b| ((u64::from(a) * u64::from(b)) >> 32) as u32,
);
}
7 changes: 1 addition & 6 deletions tests/all_tests/t_usefulness.rs
Original file line number Diff line number Diff line change
Expand Up @@ -391,12 +391,7 @@ fn generate_branch_free_divide_magic_shift(denom: u32x8) -> (u32x8, u32x8) {

// using the previously generated magic and shift, calculate the division
fn branch_free_divide(numerator: u32x8, magic: u32x8, shift: u32x8) -> u32x8 {
let a: [u32x4; 2] = cast(numerator);
let b: [u32x4; 2] = cast(magic);

let q = [a[0].mul_keep_high(b[0]), a[1].mul_keep_high(b[1])];

let q: u32x8 = cast(q);
let q = numerator.mul_keep_high(magic);

let t = ((numerator - q) >> 1) + q;
t >> shift
Expand Down

0 comments on commit 0e57e32

Please sign in to comment.