Skip to content

Commit 26ccc6b

Browse files
dtcxzywtstellar
authored andcommitted
[InstCombine] Drop nuw flag when CtlzOp is a sub nuw (#91776)
See the following case: ``` define i32 @SRC1(i32 %x) { %dec = sub nuw i32 -2, %x %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) %sub = sub nsw i32 32, %ctlz %shl = shl i32 1, %sub %ugt = icmp ult i32 %x, -2 %sel = select i1 %ugt, i32 %shl, i32 1 ret i32 %sel } define i32 @tgt1(i32 %x) { %dec = sub nuw i32 -2, %x %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) %sub = sub nsw i32 32, %ctlz %and = and i32 %sub, 31 %shl = shl nuw i32 1, %and ret i32 %shl } ``` `nuw` in `%dec` should be dropped after the select instruction is eliminated. Alive2: https://alive2.llvm.org/ce/z/7S9529 Fixes #91691. (cherry picked from commit b5f4210)
1 parent aa2549e commit 26ccc6b

File tree

2 files changed

+48
-2
lines changed

2 files changed

+48
-2
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

+12-2
Original file line numberDiff line numberDiff line change
@@ -3201,7 +3201,8 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
32013201
// pattern.
32023202
static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
32033203
const APInt *Cond1, Value *CtlzOp,
3204-
unsigned BitWidth) {
3204+
unsigned BitWidth,
3205+
bool &ShouldDropNUW) {
32053206
// The challenge in recognizing std::bit_ceil(X) is that the operand is used
32063207
// for the CTLZ proper and select condition, each possibly with some
32073208
// operation like add and sub.
@@ -3224,6 +3225,8 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
32243225
ConstantRange CR = ConstantRange::makeExactICmpRegion(
32253226
CmpInst::getInversePredicate(Pred), *Cond1);
32263227

3228+
ShouldDropNUW = false;
3229+
32273230
// Match the operation that's used to compute CtlzOp from CommonAncestor. If
32283231
// CtlzOp == CommonAncestor, return true as no operation is needed. If a
32293232
// match is found, execute the operation on CR, update CR, and return true.
@@ -3237,6 +3240,7 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
32373240
return true;
32383241
}
32393242
if (match(CtlzOp, m_Sub(m_APInt(C), m_Specific(CommonAncestor)))) {
3243+
ShouldDropNUW = true;
32403244
CR = ConstantRange(*C).sub(CR);
32413245
return true;
32423246
}
@@ -3306,14 +3310,20 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder) {
33063310
Pred = CmpInst::getInversePredicate(Pred);
33073311
}
33083312

3313+
bool ShouldDropNUW;
3314+
33093315
if (!match(FalseVal, m_One()) ||
33103316
!match(TrueVal,
33113317
m_OneUse(m_Shl(m_One(), m_OneUse(m_Sub(m_SpecificInt(BitWidth),
33123318
m_Value(Ctlz)))))) ||
33133319
!match(Ctlz, m_Intrinsic<Intrinsic::ctlz>(m_Value(CtlzOp), m_Zero())) ||
3314-
!isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth))
3320+
!isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth,
3321+
ShouldDropNUW))
33153322
return nullptr;
33163323

3324+
if (ShouldDropNUW)
3325+
cast<Instruction>(CtlzOp)->setHasNoUnsignedWrap(false);
3326+
33173327
// Build 1 << (-CTLZ & (BitWidth-1)). The negation likely corresponds to a
33183328
// single hardware instruction as opposed to BitWidth - CTLZ, where BitWidth
33193329
// is an integer constant. Masking with BitWidth-1 comes free on some

llvm/test/Transforms/InstCombine/bit_ceil.ll

+36
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,42 @@ define <4 x i32> @bit_ceil_v4i32(<4 x i32> %x) {
284284
ret <4 x i32> %sel
285285
}
286286

287+
define i32 @pr91691(i32 %0) {
288+
; CHECK-LABEL: @pr91691(
289+
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 -2, [[TMP0:%.*]]
290+
; CHECK-NEXT: [[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false)
291+
; CHECK-NEXT: [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]]
292+
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 31
293+
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i32 1, [[TMP5]]
294+
; CHECK-NEXT: ret i32 [[TMP6]]
295+
;
296+
%2 = sub nuw i32 -2, %0
297+
%3 = tail call i32 @llvm.ctlz.i32(i32 %2, i1 false)
298+
%4 = sub i32 32, %3
299+
%5 = shl i32 1, %4
300+
%6 = icmp ult i32 %0, -2
301+
%7 = select i1 %6, i32 %5, i32 1
302+
ret i32 %7
303+
}
304+
305+
define i32 @pr91691_keep_nsw(i32 %0) {
306+
; CHECK-LABEL: @pr91691_keep_nsw(
307+
; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i32 -2, [[TMP0:%.*]]
308+
; CHECK-NEXT: [[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false)
309+
; CHECK-NEXT: [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]]
310+
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 31
311+
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i32 1, [[TMP5]]
312+
; CHECK-NEXT: ret i32 [[TMP6]]
313+
;
314+
%2 = sub nsw i32 -2, %0
315+
%3 = tail call i32 @llvm.ctlz.i32(i32 %2, i1 false)
316+
%4 = sub i32 32, %3
317+
%5 = shl i32 1, %4
318+
%6 = icmp ult i32 %0, -2
319+
%7 = select i1 %6, i32 %5, i32 1
320+
ret i32 %7
321+
}
322+
287323
declare i32 @llvm.ctlz.i32(i32, i1 immarg)
288324
declare i64 @llvm.ctlz.i64(i64, i1 immarg)
289325
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)

0 commit comments

Comments
 (0)