Skip to content

Commit 1fae4b4

Browse files
committed
[InstCombine] Fold mul nuw+lshr to a single multiplication when the latter is a factor
if c is divisible by (1 << ShAmtC), we can fold this pattern: lshr (mul nuw x, c), ShAmtC -> mul nuw x, (c >> ShAmtC) https://alive2.llvm.org/ce/z/ox4wAt Fix #54824 Reviewed By: spatel, lebedev.ri, craig.topper Differential Revision: https://reviews.llvm.org/D123453
1 parent e0ee080 commit 1fae4b4

File tree

2 files changed

+42
-17
lines changed

2 files changed

+42
-17
lines changed

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

+25-8
Original file line numberDiff line numberDiff line change
@@ -1163,15 +1163,32 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
11631163
}
11641164
}
11651165

1166-
// Look for a "splat" mul pattern - it replicates bits across each half of
1167-
// a value, so a right shift is just a mask of the low bits:
1168-
// lshr i[2N] (mul nuw X, (2^N)+1), N --> and iN X, (2^N)-1
1169-
// TODO: Generalize to allow more than just half-width shifts?
11701166
const APInt *MulC;
1171-
if (match(Op0, m_NUWMul(m_Value(X), m_APInt(MulC))) &&
1172-
BitWidth > 2 && ShAmtC * 2 == BitWidth && (*MulC - 1).isPowerOf2() &&
1173-
MulC->logBase2() == ShAmtC)
1174-
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *MulC - 2));
1167+
if (match(Op0, m_NUWMul(m_Value(X), m_APInt(MulC)))) {
1168+
// Look for a "splat" mul pattern - it replicates bits across each half of
1169+
// a value, so a right shift is just a mask of the low bits:
1170+
// lshr i[2N] (mul nuw X, (2^N)+1), N --> and iN X, (2^N)-1
1171+
// TODO: Generalize to allow more than just half-width shifts?
1172+
if (BitWidth > 2 && ShAmtC * 2 == BitWidth && (*MulC - 1).isPowerOf2() &&
1173+
MulC->logBase2() == ShAmtC)
1174+
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *MulC - 2));
1175+
1176+
// The one-use check is not strictly necessary, but codegen may not be
1177+
// able to invert the transform and perf may suffer with an extra mul
1178+
// instruction.
1179+
if (Op0->hasOneUse()) {
1180+
APInt NewMulC = MulC->lshr(ShAmtC);
1181+
// if c is divisible by (1 << ShAmtC):
1182+
// lshr (mul nuw x, MulC), ShAmtC -> mul nuw x, (MulC >> ShAmtC)
1183+
if (MulC->eq(NewMulC.shl(ShAmtC))) {
1184+
auto *NewMul =
1185+
BinaryOperator::CreateNUWMul(X, ConstantInt::get(Ty, NewMulC));
1186+
BinaryOperator *OrigMul = cast<BinaryOperator>(Op0);
1187+
NewMul->setHasNoSignedWrap(OrigMul->hasNoSignedWrap());
1188+
return NewMul;
1189+
}
1190+
}
1191+
}
11751192

11761193
// Try to narrow a bswap:
11771194
// (bswap (zext X)) >> C --> zext (bswap X >> C')

llvm/test/Transforms/InstCombine/shift-logic.ll

+17-9
Original file line numberDiff line numberDiff line change
@@ -259,9 +259,8 @@ define i32 @PR44028(i32 %x) {
259259

260260
define i64 @lshr_mul(i64 %0) {
261261
; CHECK-LABEL: @lshr_mul(
262-
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP0:%.*]], 52
263-
; CHECK-NEXT: [[TMP3:%.*]] = lshr exact i64 [[TMP2]], 2
264-
; CHECK-NEXT: ret i64 [[TMP3]]
262+
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP0:%.*]], 13
263+
; CHECK-NEXT: ret i64 [[TMP2]]
265264
;
266265
%2 = mul nuw i64 %0, 52
267266
%3 = lshr i64 %2, 2
@@ -270,9 +269,8 @@ define i64 @lshr_mul(i64 %0) {
270269

271270
define i64 @lshr_mul_nuw_nsw(i64 %0) {
272271
; CHECK-LABEL: @lshr_mul_nuw_nsw(
273-
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[TMP0:%.*]], 52
274-
; CHECK-NEXT: [[TMP3:%.*]] = lshr exact i64 [[TMP2]], 2
275-
; CHECK-NEXT: ret i64 [[TMP3]]
272+
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[TMP0:%.*]], 13
273+
; CHECK-NEXT: ret i64 [[TMP2]]
276274
;
277275
%2 = mul nuw nsw i64 %0, 52
278276
%3 = lshr i64 %2, 2
@@ -281,9 +279,8 @@ define i64 @lshr_mul_nuw_nsw(i64 %0) {
281279

282280
define <4 x i32> @lshr_mul_vector(<4 x i32> %0) {
283281
; CHECK-LABEL: @lshr_mul_vector(
284-
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw <4 x i32> [[TMP0:%.*]], <i32 52, i32 52, i32 52, i32 52>
285-
; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <4 x i32> [[TMP2]], <i32 2, i32 2, i32 2, i32 2>
286-
; CHECK-NEXT: ret <4 x i32> [[TMP3]]
282+
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw <4 x i32> [[TMP0:%.*]], <i32 13, i32 13, i32 13, i32 13>
283+
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
287284
;
288285
%2 = mul nuw <4 x i32> %0, <i32 52, i32 52, i32 52, i32 52>
289286
%3 = lshr <4 x i32> %2, <i32 2, i32 2, i32 2, i32 2>
@@ -324,3 +321,14 @@ define i64 @lshr_mul_negative_nonuw(i64 %0) {
324321
%3 = lshr i64 %2, 2
325322
ret i64 %3
326323
}
324+
325+
define i64 @lshr_mul_negative_nsw(i64 %0) {
326+
; CHECK-LABEL: @lshr_mul_negative_nsw(
327+
; CHECK-NEXT: [[TMP2:%.*]] = mul nsw i64 [[TMP0:%.*]], 52
328+
; CHECK-NEXT: [[TMP3:%.*]] = lshr exact i64 [[TMP2]], 2
329+
; CHECK-NEXT: ret i64 [[TMP3]]
330+
;
331+
%2 = mul nsw i64 %0, 52
332+
%3 = lshr i64 %2, 2
333+
ret i64 %3
334+
}

0 commit comments

Comments
 (0)