-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Reland [SimplifyCFG] Delete the unnecessary range check for small mask operation #70542
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Allen (vfdff) ChangesFix the compile crash when the default result is not exist for #65835 Full diff: https://github.com/llvm/llvm-project/pull/70542.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 68b5b1a78a3460e..a9585d2ed963f3a 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -6598,9 +6598,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// If the default destination is unreachable, or if the lookup table covers
// all values of the conditional variable, branch directly to the lookup table
// BB. Otherwise, check that the condition is within the case range.
- const bool DefaultIsReachable =
+ bool DefaultIsReachable =
!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
- const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
// Create the BB that does the lookups.
Module &Mod = *CommonDest->getParent()->getParent();
@@ -6631,6 +6630,27 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
BranchInst *RangeCheckBranch = nullptr;
+ // Grow the table to cover all possible index values to avoid the range check.
+ // It will use the default result to fill in the table hole later, so make
+ // sure it exist.
+ if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
+ ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false);
+ // Grow the table shouldn't have any size impact by checking
+ // WouldFitInRegister.
+ // TODO: Consider growing the table also when it doesn't fit in a register
+ // if no optsize is specified.
+ if (all_of(ResultTypes, [&](const auto &KV) {
+ return SwitchLookupTable::WouldFitInRegister(
+ DL, CR.getUpper().getLimitedValue(), KV.second /* ResultType */);
+ })) {
+ // The default branch is unreachable when we enlarge the lookup table.
+ // Adjust DefaultIsReachable to reuse code path.
+ TableSize = CR.getUpper().getZExtValue();
+ DefaultIsReachable = false;
+ }
+ }
+
+ const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
Builder.CreateBr(LookupBB);
if (DTU)
diff --git a/llvm/test/Transforms/SimplifyCFG/switch_mask.ll b/llvm/test/Transforms/SimplifyCFG/switch_mask.ll
index 8c97a0660d07074..867fbc0c5e9b6c2 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch_mask.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch_mask.ll
@@ -8,13 +8,11 @@ define i1 @switch_lookup_with_small_i1(i64 %x) {
; CHECK-LABEL: @switch_lookup_with_small_i1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[AND:%.*]] = and i64 [[X:%.*]], 15
-; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[AND]], 11
-; CHECK-NEXT: [[SWITCH_CAST:%.*]] = trunc i64 [[AND]] to i11
-; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i11 [[SWITCH_CAST]], 1
-; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i11 -1018, [[SWITCH_SHIFTAMT]]
-; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i11 [[SWITCH_DOWNSHIFT]] to i1
-; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i1 [[SWITCH_MASKED]], i1 false
-; CHECK-NEXT: ret i1 [[TMP1]]
+; CHECK-NEXT: [[SWITCH_CAST:%.*]] = trunc i64 [[AND]] to i16
+; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i16 [[SWITCH_CAST]], 1
+; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i16 1030, [[SWITCH_SHIFTAMT]]
+; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i16 [[SWITCH_DOWNSHIFT]] to i1
+; CHECK-NEXT: ret i1 [[SWITCH_MASKED]]
;
entry:
%and = and i64 %x, 15
@@ -37,13 +35,11 @@ define i8 @switch_lookup_with_small_i8(i64 %x) {
; CHECK-LABEL: @switch_lookup_with_small_i8(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[REM:%.*]] = urem i64 [[X:%.*]], 5
-; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[REM]], 3
-; CHECK-NEXT: [[SWITCH_CAST:%.*]] = trunc i64 [[REM]] to i24
-; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i24 [[SWITCH_CAST]], 8
-; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i24 460303, [[SWITCH_SHIFTAMT]]
-; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i24 [[SWITCH_DOWNSHIFT]] to i8
-; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i8 [[SWITCH_MASKED]], i8 0
-; CHECK-NEXT: ret i8 [[TMP1]]
+; CHECK-NEXT: [[SWITCH_CAST:%.*]] = trunc i64 [[REM]] to i40
+; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul nuw nsw i40 [[SWITCH_CAST]], 8
+; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i40 460303, [[SWITCH_SHIFTAMT]]
+; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i40 [[SWITCH_DOWNSHIFT]] to i8
+; CHECK-NEXT: ret i8 [[SWITCH_MASKED]]
;
entry:
%rem = urem i64 %x, 5
@@ -107,3 +103,37 @@ lor.end:
%0 = phi i8 [ 15, %sw.bb0 ], [ 6, %sw.bb1 ], [ 7, %sw.bb2 ], [ 0, %default ]
ret i8 %0
}
+
+; Negative test: The default branch is unreachable.
+define i1 @switch_lookup_with_small_i1_unreachable(i32 %x) {
+; CHECK-LABEL: @switch_lookup_with_small_i1_unreachable(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT: ret i1 false
+;
+entry:
+ %and = and i32 %x, 15
+ switch i32 %and, label %default [
+ i32 4, label %phi.end
+ i32 2, label %phi.end
+ i32 10, label %phi.end
+ i32 9, label %phi.end
+ i32 1, label %sw.bb1.i
+ i32 3, label %sw.bb1.i
+ i32 5, label %sw.bb1.i
+ i32 0, label %sw.bb1.i
+ i32 6, label %sw.bb1.i
+ i32 7, label %sw.bb1.i
+ i32 8, label %sw.bb1.i
+ ]
+
+sw.bb1.i: ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry
+ br label %phi.end
+
+default: ; preds = %entry
+ unreachable
+
+phi.end: ; preds = %sw.bb1.i, %entry, %entry, %entry, %entry
+ %retval.0.i = phi i1 [ false, %sw.bb1.i ], [ false, %entry ], [ false, %entry ], [ false, %entry ], [ false, %entry ]
+ ret i1 false
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks! The fix looks right to me. It would be good to have a test which covers the case where HasDefaultResults is false though.
hi @zmodem, the new added |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
I think I've also confused myself about
so yes, the current test covers where we asserted before, but there are other situations where |
Thanks @zmodem for your suggestion, I add a new caset |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we misunderstood each other. I meant we should have a test where the default case is reasonable, but does not yield a constant value. For example because it calls a function and feeds the return value to the phi node.
br label %phi.end | ||
|
||
default: ; preds = %entry | ||
unreachable |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand. The comment says the default is reachable, but here it's marked unreachable?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I record it reachable because DefaultIsReachable
is true for this case, and there is some case, such as i32 10
will hit the default branch. Does this not fit the usual description?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since the default
block starts with an unreachable
instruction, DefaultIsReachable
will be false:
llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
Lines 6601 to 6602 in 6477b41
const bool DefaultIsReachable = | |
!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()); |
I was thinking of something like this, where the default doesn't yield a constant result, so we can't grow the table:
bool g(int x);
bool f(int x) {
switch (x % 8) {
case 0: return true;
case 1: return false;
case 2: return false;
case 3: return false;
case 4: return true;
case 5: return false;
case 6: return true;
default: return g(x);
}
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, thanks very much for detail case, I'll update this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add switch_lookup_with_small_i1_default_nonconst
for above case, thanks @zmodem
…tion When the small mask value little than 64, we can eliminate the checking for upper limit of the range by enlarge the lookup table size to the maximum index value. (Then the final table size grows to the next pow2 value) ``` bool f(unsigned x) { switch (x % 8) { case 0: return 1; case 1: return 0; case 2: return 0; case 3: return 1; case 4: return 1; case 5: return 0; case 6: return 1; // This would remove the range check: case 7: return 0; } return 0; } ``` Use WouldFitInRegister instead of fitsInLegalInteger to support more result type beside bool. Fixes llvm#65120
This modification will squash to the previous commit if accepted. Seperate it just to make it more clearly to review.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good to me!
This change appears to cause a crash for me when building with ; ModuleID = 'gfx_v8_0.i'
source_filename = "gfx_v8_0.i"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
@max_shader_engines = dso_local global i32 0, align 4
; Function Attrs: nounwind sanitize_thread uwtable
define dso_local void @gfx_v8_0_setup_rb() #0 {
entry:
%num_se = alloca i32, align 4
%tmp = alloca i32, align 4
%__ret_warn_on = alloca i32, align 4
call void @llvm.lifetime.start.p0(i64 4, ptr %num_se) #4
%0 = load i32, ptr @max_shader_engines, align 4, !tbaa !5
%tobool = icmp ne i32 %0, 0
br i1 %tobool, label %cond.true, label %cond.false
cond.true: ; preds = %entry
%1 = load i32, ptr @max_shader_engines, align 4, !tbaa !5
br label %cond.end
cond.false: ; preds = %entry
br label %cond.end
cond.end: ; preds = %cond.false, %cond.true
%cond = phi i32 [ %1, %cond.true ], [ 1, %cond.false ]
store i32 %cond, ptr %tmp, align 4, !tbaa !5
%2 = load i32, ptr %tmp, align 4, !tbaa !5
store i32 %2, ptr %num_se, align 4, !tbaa !5
call void @llvm.lifetime.start.p0(i64 4, ptr %__ret_warn_on) #4
%3 = load i32, ptr %num_se, align 4, !tbaa !5
%cmp = icmp eq i32 %3, 1
br i1 %cmp, label %lor.end, label %lor.lhs.false
lor.lhs.false: ; preds = %cond.end
%4 = load i32, ptr %num_se, align 4, !tbaa !5
%cmp1 = icmp eq i32 %4, 2
br i1 %cmp1, label %lor.end, label %lor.rhs
lor.rhs: ; preds = %lor.lhs.false
%5 = load i32, ptr %num_se, align 4, !tbaa !5
%cmp2 = icmp eq i32 %5, 4
br label %lor.end
lor.end: ; preds = %lor.rhs, %lor.lhs.false, %cond.end
%6 = phi i1 [ true, %lor.lhs.false ], [ true, %cond.end ], [ %cmp2, %lor.rhs ]
%lor.ext = zext i1 %6 to i32
store i32 %lor.ext, ptr %__ret_warn_on, align 4, !tbaa !5
%7 = load i32, ptr %__ret_warn_on, align 4, !tbaa !5
%8 = call i1 @llvm.is.constant.i32(i32 %7)
br i1 %8, label %if.then, label %if.else
if.then: ; preds = %lor.end
br label %if.end5
if.else: ; preds = %lor.end
%9 = load i32, ptr %__ret_warn_on, align 4, !tbaa !5
%conv = sext i32 %9 to i64
%expval = call i64 @llvm.expect.i64(i64 %conv, i64 0)
%tobool3 = icmp ne i64 %expval, 0
br i1 %tobool3, label %if.then4, label %if.end
if.then4: ; preds = %if.else
call void asm sideeffect "", "~{dirflag},~{fpsr},~{flags}"() #4, !srcloc !9
br label %if.end
if.end: ; preds = %if.then4, %if.else
br label %if.end5
if.end5: ; preds = %if.end, %if.then
call void @llvm.lifetime.end.p0(i64 4, ptr %__ret_warn_on) #4
call void @llvm.lifetime.end.p0(i64 4, ptr %num_se) #4
ret void
}
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
; Function Attrs: convergent nocallback nofree nosync nounwind willreturn memory(none)
declare i1 @llvm.is.constant.i32(i32) #2
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
declare i64 @llvm.expect.i64(i64, i64) #3
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
attributes #0 = { nounwind sanitize_thread uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #2 = { convergent nocallback nofree nosync nounwind willreturn memory(none) }
attributes #3 = { nocallback nofree nosync nounwind willreturn memory(none) }
attributes #4 = { nounwind }
!llvm.module.flags = !{!0, !1, !2, !3}
!llvm.ident = !{!4}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{!"ClangBuiltLinux clang version 18.0.0 (https://github.com/llvm/llvm-project 7c4180a36a905b7ed46c09df77af1b65e356f92a)"}
!5 = !{!6, !6, i64 0}
!6 = !{!"int", !7, i64 0}
!7 = !{!"omnipotent char", !8, i64 0}
!8 = !{!"Simple C/C++ TBAA"}
!9 = !{i64 299} At 7c4180a:
At 1021404:
Bisect log below, if there is any additional information I can provide or patches I can test, I am more than happy to do so.
|
thanks @nathanchance for your report, and it is seems a similar issue to #71329 |
FWIW this causes https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=278320 :
I'm currently reducing the test case for a separate ticket. |
Fix the compile crash when the default result is not exist for #65835