Skip to content

Commit

Permalink
Do part two, too
Browse files Browse the repository at this point in the history
  • Loading branch information
alion02 committed Dec 5, 2024
1 parent a0458d8 commit 995a686
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 6 deletions.
10 changes: 5 additions & 5 deletions benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@ pub fn day4(c: &mut Criterion) {
let s = s.as_str();

c.bench_function("day4 part1", |b| b.iter(|| part1(black_box(s))));
// c.bench_function("day4 part2", |b| b.iter(|| part2(black_box(s))));
c.bench_function("day4 part2", |b| b.iter(|| part2(black_box(s))));

assert_eq!(
part1(s).to_string(),
read_to_string("./outputs/4p1.txt").unwrap(),
);
// assert_eq!(
// part2(s).to_string(),
// read_to_string("./outputs/4p2.txt").unwrap(),
// );
assert_eq!(
part2(s).to_string(),
read_to_string("./outputs/4p2.txt").unwrap(),
);
}

criterion_group!(benches, day4);
Expand Down
43 changes: 42 additions & 1 deletion src/day4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,48 @@ unsafe fn inner1(s: &[u8]) -> u32 {

#[target_feature(enable = "avx2,bmi1,bmi2,cmpxchg16b,lzcnt,movbe,popcnt")]
unsafe fn inner2(s: &[u8]) -> u32 {
0
let r = s.as_ptr_range();
let mut ptr = r.start;
let end = r.end;
let mut sums0 = i8x32::splat(0);
let mut sums1 = i8x32::splat(0);
loop {
macro_rules! load {
($x:expr, $y:expr) => {
(ptr.add($x).add($y * 141) as *const i8x32).read_unaligned()
};
}
let v00 = load!(0, 0);
let v20 = load!(2, 0);
let v11 = load!(1, 1);
let v02 = load!(0, 2);
let v22 = load!(2, 2);
let cross0 = (v00 + v22).simd_eq(Simd::splat(b'M' + b'S').cast());
let cross1 = (v20 + v02).simd_eq(Simd::splat(b'M' + b'S').cast());
let cross = cross0 & cross1;
let has_a = v11.simd_eq(Simd::splat(b'A').cast());
sums0 -= (cross & has_a).to_int();
ptr = ptr.add(32);
let v00 = load!(0, 0);
let v20 = load!(2, 0);
let v11 = load!(1, 1);
let v02 = load!(0, 2);
let v22 = load!(2, 2);
let cross0 = (v00 + v22).simd_eq(Simd::splat(b'M' + b'S').cast());
let cross1 = (v20 + v02).simd_eq(Simd::splat(b'M' + b'S').cast());
let cross = cross0 & cross1;
let has_a = v11.simd_eq(Simd::splat(b'A').cast());
sums1 -= (cross & has_a).to_int();
ptr = ptr.add(32);
if ptr >= end {
break;
}
}
let sums0: u16x16 = _mm256_maddubs_epi16(sums0.into(), u8x32::splat(1).into()).into();
let sums1: u16x16 = _mm256_maddubs_epi16(sums1.into(), u8x32::splat(1).into()).into();
let sums = sums0 + sums1;
let sums: u32x8 = _mm256_madd_epi16(sums.into(), u16x16::splat(1).into()).into();
sums.reduce_sum()
}

pub fn part1(s: &str) -> impl Display {
Expand Down

0 comments on commit 995a686

Please sign in to comment.