Skip to content

Commit 475c632

Browse files
authored
[flang][cuda] Use local scope to avoid duplicate definition (#166249)
1 parent 562e3bf commit 475c632

File tree

2 files changed

+16
-11
lines changed

2 files changed

+16
-11
lines changed

flang/lib/Optimizer/Builder/IntrinsicCall.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3433,13 +3433,15 @@ IntrinsicLibrary::genBarrierTryWait(mlir::Type resultType,
34333433
builder.setInsertionPointToStart(afterBlock);
34343434
auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(builder.getContext());
34353435
auto barrier = builder.createConvert(loc, llvmPtrTy, args[0]);
3436-
mlir::Value ret =
3437-
mlir::NVVM::InlinePtxOp::create(
3438-
builder, loc, {resultType}, {barrier, args[1], ns}, {},
3439-
".reg .pred p; mbarrier.try_wait.shared.b64 p, [%1], %2, %3; "
3440-
"selp.b32 %0, 1, 0, p;",
3441-
{})
3442-
.getResult(0);
3436+
mlir::Value ret = mlir::NVVM::InlinePtxOp::create(
3437+
builder, loc, {resultType}, {barrier, args[1], ns}, {},
3438+
"{\n"
3439+
" .reg .pred p;\n"
3440+
" mbarrier.try_wait.shared.b64 p, [%1], %2, %3;\n"
3441+
" selp.b32 %0, 1, 0, p;\n"
3442+
"}",
3443+
{})
3444+
.getResult(0);
34433445
mlir::scf::YieldOp::create(builder, loc, ret);
34443446
builder.setInsertionPointAfter(whileOp);
34453447
return whileOp.getResult(0);
@@ -3454,8 +3456,11 @@ IntrinsicLibrary::genBarrierTryWaitSleep(mlir::Type resultType,
34543456
auto barrier = builder.createConvert(loc, llvmPtrTy, args[0]);
34553457
return mlir::NVVM::InlinePtxOp::create(
34563458
builder, loc, {resultType}, {barrier, args[1], args[2]}, {},
3457-
".reg .pred p; mbarrier.try_wait.shared.b64 p, [%1], %2, %3; "
3458-
"selp.b32 %0, 1, 0, p;",
3459+
"{\n"
3460+
" .reg .pred p;\n"
3461+
" mbarrier.try_wait.shared.b64 p, [%1], %2, %3;\n"
3462+
" selp.b32 %0, 1, 0, p;\n"
3463+
"}",
34593464
{})
34603465
.getResult(0);
34613466
}

flang/test/Lower/CUDA/cuda-device-proc.cuf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,7 @@ end subroutine
519519

520520
! CHECK-LABEL: func.func @_QPtest_barrier_try_wait()
521521
! CHECK: scf.while
522-
! CHECK: %{{.*}} = nvvm.inline_ptx ".reg .pred p; mbarrier.try_wait.shared.b64 p, [%{{.*}}], %{{.*}}, %{{.*}}; selp.b32 %{{.*}}, 1, 0, p;" ro(%{{.*}}, %{{.*}}, %c1000000{{.*}} : !llvm.ptr, i64, i32) -> i32
522+
! CHECK: %{{.*}} = nvvm.inline_ptx "{\0A .reg .pred p;\0A mbarrier.try_wait.shared.b64 p, [%{{.*}}], %{{.*}}, %{{.*}};\0A selp.b32 %{{.*}}, 1, 0, p;\0A}" ro(%{{.*}}, %{{.*}}, %{{.*}} : !llvm.ptr, i64, i32) -> i32
523523

524524
attributes(global) subroutine test_barrier_try_wait_sleep()
525525
integer :: istat
@@ -530,7 +530,7 @@ attributes(global) subroutine test_barrier_try_wait_sleep()
530530
end subroutine
531531

532532
! CHECK-LABEL: func.func @_QPtest_barrier_try_wait_sleep()
533-
! CHECK: %{{.*}} = nvvm.inline_ptx ".reg .pred p; mbarrier.try_wait.shared.b64 p, [%{{.*}}], %{{.*}}, %{{.*}}; selp.b32 %0, 1, 0, p;" ro(%{{.*}}, %{{.*}}, %{{.*}} : !llvm.ptr, i64, i32) -> i32
533+
! CHECK: %{{.*}} = nvvm.inline_ptx "{\0A .reg .pred p;\0A mbarrier.try_wait.shared.b64 p, [%{{.*}}], %{{.*}}, %{{.*}};\0A selp.b32 %{{.*}}, 1, 0, p;\0A}" ro(%{{.*}}, %{{.*}}, %{{.*}} : !llvm.ptr, i64, i32) -> i32
534534

535535
attributes(global) subroutine test_tma_bulk_load_c4(a, n)
536536
integer(8), shared :: barrier1

0 commit comments

Comments
 (0)