Skip to content

Commit

Permalink
Make faster
Browse files Browse the repository at this point in the history
  • Loading branch information
alion02 committed Dec 3, 2024
1 parent 99a7aaa commit c4e7221
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 18 deletions.
59 changes: 42 additions & 17 deletions src/day2.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::intrinsics::unlikely;

use super::*;

#[target_feature(enable = "avx2,bmi1,bmi2,cmpxchg16b,lzcnt,movbe,popcnt")]
Expand Down Expand Up @@ -25,33 +27,56 @@ unsafe fn inner1(s: &str) -> u32 {
};
}

step!(mut prev);
let mut sign = 0;
step!(mut v0);
step!(mut v1);

for num_idx in 1.. {
step!(value);
if v1 as i32 - v0 as i32 > 0 {
loop {
let diff = v1.wrapping_sub(v0).wrapping_sub(1);

let diff = value.wrapping_sub(prev) as i32;
if diff > 2 {
let chunk =
(s.get_unchecked(i - 1) as *const _ as *const u8x32).read_unaligned();

if num_idx == 1 {
sign = diff;
}
let newlines = chunk.simd_eq(Simd::splat(b'\n')).to_bitmask() as u32;

if diff ^ sign < 0 || value.abs_diff(prev).wrapping_sub(1) > 2 {
let chunk = (s.get_unchecked(i - 1) as *const _ as *const u8x32).read_unaligned();
i += newlines.trailing_zeros() as usize;

let newlines = chunk.simd_eq(Simd::splat(b'\n')).to_bitmask() as u32;
break;
}

i += newlines.trailing_zeros() as usize;
if *s.get_unchecked(i - 1) == b'\n' {
sum += 1;
break;
}

break;
step!(next);
v0 = v1;
v1 = next;
}
} else {
loop {
let diff = v0.wrapping_sub(v1).wrapping_sub(1);

if diff > 2 {
let chunk =
(s.get_unchecked(i - 1) as *const _ as *const u8x32).read_unaligned();

let newlines = chunk.simd_eq(Simd::splat(b'\n')).to_bitmask() as u32;

i += newlines.trailing_zeros() as usize;

break;
}

prev = value;
if *s.get_unchecked(i - 1) == b'\n' {
sum += 1;
break;
}

if *s.get_unchecked(i - 1) == b'\n' {
sum += 1;
break;
step!(next);
v0 = v1;
v1 = next;
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#![feature(thread_local, portable_simd)]
#![feature(thread_local, portable_simd, core_intrinsics)]
#![allow(
internal_features,
clippy::missing_safety_doc,
clippy::identity_op,
clippy::zero_prefixed_literal
Expand Down

0 comments on commit c4e7221

Please sign in to comment.