diff --git a/llvm/lib/Analysis/GlobalsModRef.cpp b/llvm/lib/Analysis/GlobalsModRef.cpp index a281626505a32..270dad23303c0 100644 --- a/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/llvm/lib/Analysis/GlobalsModRef.cpp @@ -534,6 +534,17 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { if (!F->isIntrinsic()) { KnowNothing = true; break; + } else if (F->getName().contains("nvvm.barrier") or + F->getName().contains("nvvm.membar")) { + // Even if it is an intrinsic, consider that nothing is known for + // NVVM barrier itrinsics to prevent illegal optimizations. + // This is a workaround for the bug on PTX target: barrier + // intrinsics are implemented as llvm intrinsics, as result there + // are cases when globals alias analysis can produce a result that + // barrier doesn't modify internal global which causes illegal + // reordering of memory accesses. + KnowNothing = true; + break; } } continue; diff --git a/llvm/test/Analysis/GlobalsModRef/barrier_intrinsic.ll b/llvm/test/Analysis/GlobalsModRef/barrier_intrinsic.ll new file mode 100644 index 0000000000000..0fa67a95706c6 --- /dev/null +++ b/llvm/test/Analysis/GlobalsModRef/barrier_intrinsic.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -globals-aa -gvn -S | FileCheck %s + +; Check that load from global variable is not moved across barrier. + + +target triple = "nvptx" + +@foo.l.0 = internal unnamed_addr addrspace(3) global i32 undef, align 4 + +define dso_local spir_kernel void @foo(i32 addrspace(1)* nocapture %0) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @_Z13get_global_idj(i32 0) #0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @_Z12get_local_idj(i32 0) #0 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP7:%.*]] +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP2]], 5 +; CHECK-NEXT: store i32 [[TMP6]], i32 addrspace(3)* @foo.l.0, align 4 +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 7: +; CHECK-NEXT: tail call void @llvm.nvvm.barrier0() #2 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32 addrspace(3)* @foo.l.0, align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP0:%.*]], i32 [[TMP2]] +; CHECK-NEXT: store i32 [[TMP8]], i32 addrspace(1)* [[TMP9]], align 4 +; CHECK-NEXT: ret void +; + %2 = tail call i32 @_Z13get_global_idj(i32 0) #0 + %3 = tail call i32 @_Z12get_local_idj(i32 0) #0 + %4 = icmp eq i32 %3, 0 + br i1 %4, label %5, label %7 + +5: ; preds = %1 + %6 = add i32 %2, 5 + store i32 %6, i32 addrspace(3)* @foo.l.0, align 4 + br label %7 + +7: ; preds = %5, %1 + tail call void @llvm.nvvm.barrier0() #1 + %8 = load i32, i32 addrspace(3)* @foo.l.0, align 4 + %9 = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 %2 + store i32 %8, i32 addrspace(1)* %9, align 4 + ret void +} + +; Function Attrs: convergent nounwind readnone +declare dso_local i32 @_Z13get_global_idj(i32) local_unnamed_addr #0 + +; Function Attrs: convergent nounwind readnone +declare dso_local i32 @_Z12get_local_idj(i32) local_unnamed_addr #0 + +; Function Attrs: convergent +declare dso_local void @llvm.nvvm.barrier0() local_unnamed_addr #1 + +attributes #0 = { convergent nounwind readnone } +attributes #1 = { convergent }