diff --git a/src/day5.rs b/src/day5.rs index a443100..8b3f491 100644 --- a/src/day5.rs +++ b/src/day5.rs @@ -60,6 +60,51 @@ unsafe fn inner1(s: &[u8]) -> u32 { // } asm!( + "vmovdqa ymmword ptr[{table}], {zero}", + "vmovdqa ymmword ptr[{table} + 32], {zero}", + "vmovdqa ymmword ptr[{table} + 64], {zero}", + "vmovdqa ymmword ptr[{table} + 96], {zero}", + "vmovdqa ymmword ptr[{table} + 128], {zero}", + "vmovdqa ymmword ptr[{table} + 160], {zero}", + "vmovdqa ymmword ptr[{table} + 192], {zero}", + "vmovdqa ymmword ptr[{table} + 224], {zero}", + "vmovdqa ymmword ptr[{table} + 256], {zero}", + "vmovdqa ymmword ptr[{table} + 288], {zero}", + "vmovdqa ymmword ptr[{table} + 320], {zero}", + "vmovdqa ymmword ptr[{table} + 352], {zero}", + "vmovdqa ymmword ptr[{table} + 384], {zero}", + "vmovdqa ymmword ptr[{table} + 416], {zero}", + "vmovdqa ymmword ptr[{table} + 448], {zero}", + "vmovdqa ymmword ptr[{table} + 480], {zero}", + "vmovdqa ymmword ptr[{table} + 512], {zero}", + "vmovdqa ymmword ptr[{table} + 544], {zero}", + "vmovdqa ymmword ptr[{table} + 576], {zero}", + "vmovdqa ymmword ptr[{table} + 608], {zero}", + "vmovdqa ymmword ptr[{table} + 640], {zero}", + "vmovdqa ymmword ptr[{table} + 672], {zero}", + "vmovdqa ymmword ptr[{table} + 704], {zero}", + "vmovdqa ymmword ptr[{table} + 736], {zero}", + "vmovdqa ymmword ptr[{table} + 768], {zero}", + "vmovdqa ymmword ptr[{table} + 800], {zero}", + "vmovdqa ymmword ptr[{table} + 832], {zero}", + "vmovdqa ymmword ptr[{table} + 864], {zero}", + "vmovdqa ymmword ptr[{table} + 896], {zero}", + "vmovdqa ymmword ptr[{table} + 928], {zero}", + "vmovdqa ymmword ptr[{table} + 960], {zero}", + "vmovdqa ymmword ptr[{table} + 992], {zero}", + "vmovdqa ymmword ptr[{table} + 1024], {zero}", + "vmovdqa ymmword ptr[{table} + 1056], {zero}", + "vmovdqa ymmword ptr[{table} + 1088], {zero}", + "vmovdqa ymmword ptr[{table} + 1120], {zero}", + "vmovdqa ymmword ptr[{table} + 1152], {zero}", + "vmovdqa ymmword ptr[{table} + 1184], {zero}", + "vmovdqa ymmword ptr[{table} + 1216], {zero}", + "vmovdqa ymmword ptr[{table} + 1248], {zero}", + "vmovdqa ymmword ptr[{table} + 1280], {zero}", + "vmovdqa ymmword ptr[{table} + 1312], {zero}", + "vmovdqa ymmword ptr[{table} + 1344], {zero}", + "vmovdqa ymmword ptr[{table} + 1376], {zero}", + "vmovdqa ymmword ptr[{table} + 1408], {zero}", "vmovdqu {chunk:x}, xmmword ptr[{ptr}]", "vpsubb {chunk:x}, {chunk:x}, {normalize1:x}", "vpshufb {chunk:x}, {chunk:x}, {shuffle1}", @@ -120,6 +165,7 @@ unsafe fn inner1(s: &[u8]) -> u32 { "bts dword ptr[{table} + {t1}], {t2:e}", table = in(reg) matrix, ptr = inout(reg) s.as_ptr() => _, + zero = in(ymm_reg) u8x32::splat(0), normalize1 = in(xmm_reg) u8x16::from_array([b'1', b'0', 0, b'1', b'0', 0, b'1', b'0', 0, b'1', b'0', 0, 0, 0, 0, 0]), shuffle1 = in(xmm_reg) i8x16::from_array([0, 1, -1, -1, 3, 4, -1, -1, 6, 7, -1, -1, 9, 10, -1, -1]), mults1 = in(xmm_reg) u8x16::from_array([160, 16, 0, 0, 10, 1, 0, 0, 160, 16, 0, 0, 10, 1, 0, 0]),