Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SimplifyCFG] Select the first instruction that we can handle in passingValueIsAlwaysUndefined #98802

Merged
merged 6 commits into from
Jul 16, 2024

Conversation

@llvmbot
Copy link
Member

llvmbot commented Jul 14, 2024

@llvm/pr-subscribers-llvm-transforms

Author: DianQK (DianQK)

Changes

Fixes #98799.


Full diff: https://github.com/llvm/llvm-project/pull/98802.diff

3 Files Affected:

  • (modified) llvm/lib/Transforms/Utils/SimplifyCFG.cpp (+18-6)
  • (added) llvm/test/Transforms/PhaseOrdering/pr98799-inline-simplifycfg-ub.ll (+56)
  • (modified) llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll (+66-2)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 3fa3c0f1f52b0..a743858855047 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7573,13 +7573,25 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu
     return false;
 
   if (C->isNullValue() || isa<UndefValue>(C)) {
-    // Only look at the first use, avoid hurting compile time with long uselists
-    auto *Use = cast<Instruction>(*I->user_begin());
-    // Bail out if Use is not in the same BB as I or Use == I or Use comes
-    // before I in the block. The latter two can be the case if Use is a PHI
-    // node.
-    if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
+    // Only look at the first use we can hanle, avoid hurting compile time with
+    // long uselists
+    auto FindUse = llvm::find_if(I->users(), [&I](auto *U) {
+      auto *Use = cast<Instruction>(U);
+      // Bail out if Use is not in the same BB as I or Use == I or Use comes
+      // before I in the block. The latter two can be the case if Use is a
+      // PHI node.
+      if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
+        return false;
+      // Change this list when we want to add new instructions.
+      if (!isa<GetElementPtrInst>(Use) && !isa<ReturnInst>(Use) &&
+          !isa<BitCastInst>(Use) && !isa<LoadInst>(Use) &&
+          !isa<StoreInst>(Use) && !isa<AssumeInst>(Use) && !isa<CallBase>(Use))
+        return false;
+      return true;
+    });
+    if (FindUse == I->user_end())
       return false;
+    auto *Use = cast<Instruction>(*FindUse);
 
     // Now make sure that there are no instructions in between that can alter
     // control flow (eg. calls)
diff --git a/llvm/test/Transforms/PhaseOrdering/pr98799-inline-simplifycfg-ub.ll b/llvm/test/Transforms/PhaseOrdering/pr98799-inline-simplifycfg-ub.ll
new file mode 100644
index 0000000000000..17073fa198202
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/pr98799-inline-simplifycfg-ub.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=inline,simplifycfg -S | FileCheck --check-prefix=CUSTOM %s
+; RUN: opt < %s -O2 -S | FileCheck --check-prefix=O2 %s
+
+define internal ptr @bar(ptr %arg, i1 %arg1) {
+bb:
+  br i1 %arg1, label %bb4, label %bb2
+
+bb2:
+  %i = load ptr, ptr %arg, align 8
+  %i3 = getelementptr inbounds i8, ptr %i, i64 1
+  store ptr %i3, ptr %arg, align 8
+  br label %bb4
+
+bb4:
+  %i5 = phi ptr [ %i, %bb2 ], [ null, %bb ]
+  ret ptr %i5
+}
+
+define i32 @foo(ptr %arg, i1 %arg1) {
+; CUSTOM-LABEL: define i32 @foo(
+; CUSTOM-SAME: ptr [[ARG:%.*]], i1 [[ARG1:%.*]]) {
+; CUSTOM-NEXT:  [[BB:.*:]]
+; CUSTOM-NEXT:    [[TMP0:%.*]] = xor i1 [[ARG1]], true
+; CUSTOM-NEXT:    call void @llvm.assume(i1 [[TMP0]])
+; CUSTOM-NEXT:    [[I_I:%.*]] = load ptr, ptr [[ARG]], align 8
+; CUSTOM-NEXT:    [[I3_I:%.*]] = getelementptr inbounds i8, ptr [[I_I]], i64 1
+; CUSTOM-NEXT:    store ptr [[I3_I]], ptr [[ARG]], align 8
+; CUSTOM-NEXT:    [[I2:%.*]] = icmp ne ptr [[I_I]], null
+; CUSTOM-NEXT:    call void @llvm.assume(i1 [[I2]])
+; CUSTOM-NEXT:    [[I3:%.*]] = load i32, ptr [[I_I]], align 4
+; CUSTOM-NEXT:    ret i32 [[I3]]
+;
+; O2-LABEL: define i32 @foo(
+; O2-SAME: ptr nocapture [[ARG:%.*]], i1 [[ARG1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; O2-NEXT:  [[BB:.*:]]
+; O2-NEXT:    [[TMP0:%.*]] = xor i1 [[ARG1]], true
+; O2-NEXT:    tail call void @llvm.assume(i1 [[TMP0]])
+; O2-NEXT:    [[I_I:%.*]] = load ptr, ptr [[ARG]], align 8, !nonnull [[META0:![0-9]+]], !noundef [[META0]]
+; O2-NEXT:    [[I3_I:%.*]] = getelementptr inbounds i8, ptr [[I_I]], i64 1
+; O2-NEXT:    store ptr [[I3_I]], ptr [[ARG]], align 8
+; O2-NEXT:    [[I3:%.*]] = load i32, ptr [[I_I]], align 4
+; O2-NEXT:    ret i32 [[I3]]
+;
+bb:
+  %i = call ptr @bar(ptr %arg, i1 %arg1)
+  %i2 = icmp ne ptr %i, null
+  call void @llvm.assume(i1 %i2)
+  %i3 = load i32, ptr %i, align 4
+  ret i32 %i3
+}
+
+declare void @llvm.assume(i1)
+;.
+; O2: [[META0]] = !{}
+;.
diff --git a/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll b/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
index ef2d3219cca9b..c4602e72ecbce 100644
--- a/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
+++ b/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
@@ -20,7 +20,7 @@ F:
 define void @test2() personality ptr @__gxx_personality_v0 {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @test2() #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    call void @test2() #[[ATTR4:[0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -242,6 +242,8 @@ declare ptr @fn_nonnull_deref_arg(ptr nonnull dereferenceable(4) %p)
 declare ptr @fn_nonnull_deref_or_null_arg(ptr nonnull dereferenceable_or_null(4) %p)
 declare ptr @fn_nonnull_arg(ptr nonnull %p)
 declare ptr @fn_noundef_arg(ptr noundef %p)
+declare ptr @fn_ptr_arg(ptr)
+declare ptr @fn_ptr_arg_nounwind_willreturn(ptr) nounwind willreturn
 
 define void @test9(i1 %X, ptr %Y) {
 ; CHECK-LABEL: @test9(
@@ -855,10 +857,72 @@ exit:
   ret i32 %res
 }
 
+; From bb to bb5 is UB.
+define i32 @test9_null_user_order_1(ptr %arg, i1 %arg1, ptr %arg2) {
+; CHECK-LABEL: @test9_null_user_order_1(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[ARG1:%.*]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP0]])
+; CHECK-NEXT:    [[I:%.*]] = load ptr, ptr [[ARG:%.*]], align 8
+; CHECK-NEXT:    [[I4:%.*]] = getelementptr inbounds i8, ptr [[I]], i64 1
+; CHECK-NEXT:    store ptr [[I4]], ptr [[ARG]], align 8
+; CHECK-NEXT:    [[I7:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[I8:%.*]] = icmp ne ptr [[I]], [[ARG2:%.*]]
+; CHECK-NEXT:    call void @fn_ptr_arg(i1 [[I8]])
+; CHECK-NEXT:    ret i32 [[I7]]
+;
+bb:
+  br i1 %arg1, label %bb5, label %bb3
+
+bb3:                                              ; preds = %bb
+  %i = load ptr, ptr %arg, align 8
+  %i4 = getelementptr inbounds i8, ptr %i, i64 1
+  store ptr %i4, ptr %arg, align 8
+  br label %bb5
+
+bb5:                                              ; preds = %bb3, %bb
+  %i6 = phi ptr [ %i, %bb3 ], [ null, %bb ]
+  %i7 = load i32, ptr %i6, align 4
+  %i8 = icmp ne ptr %i6, %arg2
+  call void @fn_ptr_arg(i1 %i8)
+  ret i32 %i7
+}
+
+define i32 @test9_null_user_order_2(ptr %arg, i1 %arg1, ptr %arg2) {
+; CHECK-LABEL: @test9_null_user_order_2(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[ARG1:%.*]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP0]])
+; CHECK-NEXT:    [[I:%.*]] = load ptr, ptr [[ARG:%.*]], align 8
+; CHECK-NEXT:    [[I4:%.*]] = getelementptr inbounds i8, ptr [[I]], i64 1
+; CHECK-NEXT:    store ptr [[I4]], ptr [[ARG]], align 8
+; CHECK-NEXT:    [[I8:%.*]] = icmp ne ptr [[I]], [[ARG2:%.*]]
+; CHECK-NEXT:    call void @fn_ptr_arg_nounwind_willreturn(i1 [[I8]])
+; CHECK-NEXT:    [[I7:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    ret i32 [[I7]]
+;
+bb:
+  br i1 %arg1, label %bb5, label %bb3
+
+bb3:                                              ; preds = %bb
+  %i = load ptr, ptr %arg, align 8
+  %i4 = getelementptr inbounds i8, ptr %i, i64 1
+  store ptr %i4, ptr %arg, align 8
+  br label %bb5
+
+bb5:                                              ; preds = %bb3, %bb
+  %i6 = phi ptr [ %i, %bb3 ], [ null, %bb ]
+  %i8 = icmp ne ptr %i6, %arg2
+  call void @fn_ptr_arg_nounwind_willreturn(i1 %i8)
+  %i7 = load i32, ptr %i6, align 4
+  ret i32 %i7
+}
+
 attributes #0 = { null_pointer_is_valid }
 ;.
 ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
 ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ; CHECK: attributes #[[ATTR2:[0-9]+]] = { null_pointer_is_valid }
-; CHECK: attributes #[[ATTR3]] = { nounwind }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind willreturn }
+; CHECK: attributes #[[ATTR4]] = { nounwind }
 ;.

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Jul 14, 2024
@dtcxzyw
Copy link
Member

dtcxzyw commented Jul 14, 2024

IR diff LGTM. I am running compile-time tests :)

BTW, I think most of dead blocks are created by JumpThreading. Can we avoid this to further improve compilation time?

@dianqk
Copy link
Member Author

dianqk commented Jul 14, 2024

BTW, I think most of dead blocks are created by JumpThreading. Can we avoid this to further improve compilation time?

I haven't read the JumpThreading code yet, if it can't be handled within it, perhaps we could create some sort of simple matching pattern to hand over to SimplifyCFG?

@dianqk
Copy link
Member Author

dianqk commented Jul 14, 2024

@dtcxzyw
Copy link
Member

dtcxzyw commented Jul 14, 2024

IR diff LGTM. I am running compile-time tests :)

Top 5 improvements:
  yosys/opt_muxtree.ll 6478612386 5814367885 -10.25%
  proxy/proxy_lifetime_tests.cpp.ll 7675508865 6910976274 -9.96%
  luau/InsertionOrderedMap.test.cpp.ll 1564039427 1431728556 -8.46%
  yosys/extract_reduce.ll 6042277718 5548421964 -8.17%
  yosys/attrmvcp.ll 3867732946 3646575352 -5.72%
Top 5 regressions:
  cmake/cmGeneratorExpressionLexer.cxx.ll 280987017 355193810 +26.41%
  hermes/StackPromotion.cpp.ll 5084729293 5932637578 +16.68%
  verilator/V3GraphPathChecker.cpp.ll 384704949 442379033 +14.99%
  yosys/shregmap.ll 6984227787 7798033142 +11.65%
  assimp/Exporter.cpp.ll 2987022380 3312585333 +10.90%
Overall: 0.01310176%

@dianqk
Copy link
Member Author

dianqk commented Jul 14, 2024

I can't reproduce the problem yet, can you provide a more detailed test step?
My local test commands are:

# CMake project
clang++     -D_FILE_OFFSET_BITS=64    -DCMAKE_BOOTSTRAP    -DCMake_HAVE_CXX_MAKE_UNIQUE=1 -DCMake_HAVE_CXX_FILESYSTEM=1   -I/home/dianqk/llvm/CMake/build/Bootstrap.cmk   -I/home/dianqk/llvm/CMake/Source   -I/home/dianqk/llvm/CMake/Source/LexerParser   -I/home/dianqk/llvm/CMake/Utilities/std   -I/home/dianqk/llvm/CMake/Utilities  -c /home/dianqk/llvm/CMake/Source/cmGeneratorExpressionLexer.cxx -o cmGeneratorExpressionLexer.o -emit-llvm -O3 -Xclang -disable-llvm-passes

# cmake (stage 1)
cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=OFF -DBUILD_SHARED_LIBS=ON ...
# main branch
perf stat $LLVM_PROJECT/build-base/bin/opt -O2 cmGeneratorExpressionLexer.o --disable-output
339,409,679      instructions:u
# pr
perf stat $LLVM_PROJECT/build/bin/opt -O2 cmGeneratorExpressionLexer.o --disable-output
338,379,357      instructions:u
# b686e35ec
339,831,704      instructions:u

@dtcxzyw
Copy link
Member

dtcxzyw commented Jul 15, 2024

I can't reproduce the problem yet, can you provide a more detailed test step? My local test commands are:

# CMake project
clang++     -D_FILE_OFFSET_BITS=64    -DCMAKE_BOOTSTRAP    -DCMake_HAVE_CXX_MAKE_UNIQUE=1 -DCMake_HAVE_CXX_FILESYSTEM=1   -I/home/dianqk/llvm/CMake/build/Bootstrap.cmk   -I/home/dianqk/llvm/CMake/Source   -I/home/dianqk/llvm/CMake/Source/LexerParser   -I/home/dianqk/llvm/CMake/Utilities/std   -I/home/dianqk/llvm/CMake/Utilities  -c /home/dianqk/llvm/CMake/Source/cmGeneratorExpressionLexer.cxx -o cmGeneratorExpressionLexer.o -emit-llvm -O3 -Xclang -disable-llvm-passes

# cmake (stage 1)
cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=OFF -DBUILD_SHARED_LIBS=ON ...
# main branch
perf stat $LLVM_PROJECT/build-base/bin/opt -O2 cmGeneratorExpressionLexer.o --disable-output
339,409,679      instructions:u
# pr
perf stat $LLVM_PROJECT/build/bin/opt -O2 cmGeneratorExpressionLexer.o --disable-output
338,379,357      instructions:u
# b686e35ec
339,831,704      instructions:u

Can you try with opt -O3?

@dianqk
Copy link
Member Author

dianqk commented Jul 15, 2024

Oops, I can reproduce it. b686e35 fixed.

@dtcxzyw
Copy link
Member

dtcxzyw commented Jul 15, 2024

Oops, I can reproduce it. b686e35 fixed.

Top 5 improvements:
  faiss/IndexIVFFlat.cpp.ll 1879640556 1806714892 -3.88%
  faiss/utils.cpp.ll 1392144764 1341384014 -3.65%
  faiss/IndexIVFFastScan.cpp.ll 7591452840 7321469207 -3.56%
  gromacs/statepropagatordata.cpp.ll 3289712205 3176646034 -3.44%
  rust-analyzer-rs/5ac99zaxn7b9r9xv.ll 2542407788 2477356558 -2.56%
Top 5 regressions:
  faiss/IVFlib.cpp.ll 1451629900 1522431580 +4.88%
  gromacs/constr.cpp.ll 2086259177 2159075037 +3.49%
  gromacs/muParserBase.cpp.ll 3535512461 3655648190 +3.40%
  faiss/IndexIVFAdditiveQuantizer.cpp.ll 758961920 784502318 +3.37%
  faiss/IndexAdditiveQuantizer.cpp.ll 1739627273 1791928381 +3.01%
Overall: 0.00096778%

@dianqk
Copy link
Member Author

dianqk commented Jul 15, 2024

Hmm, at least locally, I can't reproduce gromacs/constr.cpp. The new overall results seem acceptable?

Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Thank you!

@dianqk dianqk merged commit 8afb643 into llvm:main Jul 16, 2024
7 checks passed
@dianqk dianqk deleted the simplify-ub-user-order branch July 16, 2024 11:06
yuxuanchen1997 pushed a commit that referenced this pull request Jul 25, 2024
…singValueIsAlwaysUndefined` (#98802)

Summary: Fixes #98799.

Test Plan: 

Reviewers: 

Subscribers: 

Tasks: 

Tags: 


Differential Revision: https://phabricator.intern.facebook.com/D60251495
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

[SimplifyCFG] SimplifyCFG does not eliminate the UB branch after inlining
3 participants