From dc32fb1546b6f352bbb2549e50ca072b1de7984d Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Tue, 31 Jan 2023 10:53:50 -0800 Subject: [PATCH 1/2] Cranelift: Rewrite `(x>>k)< magic multiplications +;; `(x >> k) << k` is the same as masking off the bottom `k` bits (regardless if +;; this is a signed or unsigned shift right). +(rule (simplify (ishl (fits_in_64 ty) + (ushr ty x (iconst _ (u64_from_imm64 k))) + (iconst _ (u64_from_imm64 k)))) + (let ((mask u64 (u64_shl 0xFFFFFFFFFFFFFFFF k))) + (band ty x (iconst ty (imm64_masked ty mask))))) +(rule (simplify (ishl (fits_in_64 ty) + (sshr ty x (iconst _ (u64_from_imm64 k))) + (iconst _ (u64_from_imm64 k)))) + (let ((mask u64 (u64_shl 0xFFFFFFFFFFFFFFFF k))) + (band ty x (iconst ty (imm64_masked ty mask))))) + ;; Rematerialize ALU-op-with-imm and iconsts in each block where they're ;; used. This is neutral (add-with-imm) or positive (iconst) for ;; register pressure, and these ops are very cheap. diff --git a/cranelift/filetests/filetests/egraph/algebraic.clif b/cranelift/filetests/filetests/egraph/algebraic.clif index 409788ce822a..33e83936c772 100644 --- a/cranelift/filetests/filetests/egraph/algebraic.clif +++ b/cranelift/filetests/filetests/egraph/algebraic.clif @@ -20,3 +20,69 @@ block0: ; check: v2 = iconst.i64 0x9876_5432 ; check: return v2 ; v2 = 0x9876_5432 } + +function %unsigned_shift_right_shift_left_i8(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 5 + v2 = ushr v0, v1 + v3 = ishl v2, v1 + return v3 + ; check: v4 = iconst.i8 224 + ; check: v5 = band v0, v4 + ; return v5 +} + +function %unsigned_shift_right_shift_left_i32(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 5 + v2 = ushr v0, v1 + v3 = ishl v2, v1 + return v3 + ; check: v4 = iconst.i32 0xffff_ffe0 + ; check: v5 = band v0, v4 + ; return v5 +} + +function %unsigned_shift_right_shift_left_i64(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 5 + v2 = ushr v0, v1 + v3 = ishl v2, v1 + return v3 + ; check: v4 = iconst.i64 -32 + ; check: v5 = band v0, v4 + ; return v5 +} + +function %signed_shift_right_shift_left_i8(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 5 + v2 = sshr v0, v1 + v3 = ishl v2, v1 + return v3 + ; check: v4 = iconst.i8 224 + ; check: v5 = band v0, v4 + ; return v5 +} + +function %signed_shift_right_shift_left_i32(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 5 + v2 = sshr v0, v1 + v3 = ishl v2, v1 + return v3 + ; check: v4 = iconst.i32 0xffff_ffe0 + ; check: v5 = band v0, v4 + ; return v5 +} + +function %signed_shift_right_shift_left_i64(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 5 + v2 = sshr v0, v1 + v3 = ishl v2, v1 + return v3 + ; check: v4 = iconst.i64 -32 + ; check: v5 = band v0, v4 + ; return v5 +} From a5a43b9fb2da479b194b1bfb66ac5cf44a215ef6 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Tue, 31 Jan 2023 12:38:17 -0800 Subject: [PATCH 2/2] Add a runtest for exercising our rewrite of `(x >> k) << k` into masking --- .../filetests/runtests/shift-right-left.clif | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 cranelift/filetests/filetests/runtests/shift-right-left.clif diff --git a/cranelift/filetests/filetests/runtests/shift-right-left.clif b/cranelift/filetests/filetests/runtests/shift-right-left.clif new file mode 100644 index 000000000000..258ae78d41bf --- /dev/null +++ b/cranelift/filetests/filetests/runtests/shift-right-left.clif @@ -0,0 +1,74 @@ +;; Test that our rewrite of `(x >> k) << k` into masking is correct. + +test interpret +test run +target aarch64 +target x86_64 +target riscv64 +target s390x + +function %unsigned_shift_right_shift_left_i8(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 5 + v2 = ushr v0, v1 + v3 = ishl v2, v1 + return v3 +} +; run: %unsigned_shift_right_shift_left_i8(-1) == 0xe0 +; run: %unsigned_shift_right_shift_left_i8(0) == 0 +; run: %unsigned_shift_right_shift_left_i8(0xaa) == 0xa0 + +function %unsigned_shift_right_shift_left_i32(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 5 + v2 = ushr v0, v1 + v3 = ishl v2, v1 + return v3 +} +; run: %unsigned_shift_right_shift_left_i32(-1) == 0xffffffe0 +; run: %unsigned_shift_right_shift_left_i32(0) == 0 +; run: %unsigned_shift_right_shift_left_i32(0xaaaaaaaa) == 0xaaaaaaa0 + +function %unsigned_shift_right_shift_left_i64(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 5 + v2 = ushr v0, v1 + v3 = ishl v2, v1 + return v3 +} +; run: %unsigned_shift_right_shift_left_i64(-1) == 0xffffffffffffffe0 +; run: %unsigned_shift_right_shift_left_i64(0) == 0 +; run: %unsigned_shift_right_shift_left_i64(0xaaaaaaaaaaaaaaaa) == 0xaaaaaaaaaaaaaaa0 + +function %signed_shift_right_shift_left_i8(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 5 + v2 = sshr v0, v1 + v3 = ishl v2, v1 + return v3 +} +; run: %signed_shift_right_shift_left_i8(-1) == 0xe0 +; run: %signed_shift_right_shift_left_i8(0) == 0 +; run: %signed_shift_right_shift_left_i8(0xaa) == 0xa0 + +function %signed_shift_right_shift_left_i32(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 5 + v2 = sshr v0, v1 + v3 = ishl v2, v1 + return v3 +} +; run: %signed_shift_right_shift_left_i32(-1) == 0xffffffe0 +; run: %signed_shift_right_shift_left_i32(0) == 0 +; run: %signed_shift_right_shift_left_i32(0xaaaaaaaa) == 0xaaaaaaa0 + +function %signed_shift_right_shift_left_i64(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 5 + v2 = sshr v0, v1 + v3 = ishl v2, v1 + return v3 +} +; run: %signed_shift_right_shift_left_i64(-1) == 0xffffffffffffffe0 +; run: %signed_shift_right_shift_left_i64(0) == 0 +; run: %signed_shift_right_shift_left_i64(0xaaaaaaaaaaaaaaaa) == 0xaaaaaaaaaaaaaaa0