From a2bacf8ab58af4c1a0247026ea131443d6066602 Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Fri, 8 Nov 2024 12:55:28 -0800 Subject: [PATCH] AMDGPU: Use "countMaxActiveBits() <= 5" to define uint5Bits countMaxTrailingOnes() is not correct. This patch follows the suggestion from https://github.com/llvm/llvm-project/pull/115372. --- llvm/lib/Target/AMDGPU/SIInstructions.td | 2 +- llvm/test/CodeGen/AMDGPU/extract-lowbits.ll | 25 +++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 0658e030ffa5d6..755cbb7fb65492 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3554,7 +3554,7 @@ def : AMDGPUPat < >; def uint5Bits : PatLeaf<(i32 VGPR_32:$width), [{ - return CurDAG->computeKnownBits(SDValue(N, 0)).countMaxTrailingOnes() <= 5; + return CurDAG->computeKnownBits(SDValue(N, 0)).countMaxActiveBits() <= 5; }]>; // x << (bitwidth - y) >> (bitwidth - y) diff --git a/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll b/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll index 3de8db2c6a448e..0e5a68773a6ba8 100644 --- a/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll @@ -163,6 +163,31 @@ define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind { ret i32 %masked } +define i32 @bzhi32_d0_even(i32 %val, i32 %numlowbits) nounwind { +; SI-LABEL: bzhi32_d0_even: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 +; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 +; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: bzhi32_d0_even: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1 +; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 +; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; VI-NEXT: s_setpc_b64 s[30:31] + %times2 = shl i32 %numlowbits, 1 + %numhighbits = sub i32 32, %times2 + %highbitscleared = shl i32 %val, %numhighbits + %masked = lshr i32 %highbitscleared, %numhighbits + ret i32 %masked +} + define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind { ; SI-LABEL: bzhi32_d1_indexzext: ; SI: ; %bb.0: