diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 15ea84565dd75..0c28f96552ef6 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -3333,8 +3333,7 @@ IntrinsicLibrary::genBarrierArrive(mlir::Type resultType, assert(args.size() == 1); mlir::Value barrier = convertPtrToNVVMSpace( builder, loc, args[0], mlir::NVVM::NVVMMemorySpace::Shared); - return mlir::NVVM::MBarrierArriveSharedOp::create(builder, loc, resultType, - barrier) + return mlir::NVVM::MBarrierArriveOp::create(builder, loc, resultType, barrier) .getResult(); } diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 09b4302446ee7..9bc135e0f12fd 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -436,7 +436,7 @@ end subroutine ! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref) -> !llvm.ptr ! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3> -! CHECK: %{{.*}} = nvvm.mbarrier.arrive.shared %[[SHARED_PTR]] : !llvm.ptr<3> -> i64 +! CHECK: %{{.*}} = nvvm.mbarrier.arrive %[[SHARED_PTR]] : !llvm.ptr<3> -> i64 ! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref) -> !llvm.ptr ! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3> diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index ba5e48e4ec9ba..93dfeacf6c347 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -656,8 +656,8 @@ def NVVM_MBarrierInvalOp : NVVM_Op<"mbarrier.inval">, } def NVVM_MBarrierArriveOp : NVVM_Op<"mbarrier.arrive">, - Results<(outs LLVM_Type:$res)>, - Arguments<(ins LLVM_AnyPointer:$addr)> { + Results<(outs I64:$res)>, + Arguments<(ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr)> { let summary = "MBarrier Arrive Operation"; let description = [{ The `nvvm.mbarrier.arrive` operation performs an arrive-on operation on the @@ -674,36 +674,32 @@ def NVVM_MBarrierArriveOp : NVVM_Op<"mbarrier.arrive">, value are implementation-specific. The operation takes the following operand: - - `addr`: A pointer to the memory location of the *mbarrier object*. Uses generic - addressing, but the address must still be in the shared memory space. + - `addr`: A pointer to the memory location of the *mbarrier object*. The `addr` + must be a pointer to generic or shared::cta memory. When it is generic, the + underlying address must be within the shared::cta memory space; otherwise + the behavior is undefined. [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive) }]; - string llvmBuilder = [{ - $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive, {$addr}); - }]; let assemblyFormat = "$addr attr-dict `:` type($addr) `->` type($res)"; -} -def NVVM_MBarrierArriveSharedOp : NVVM_Op<"mbarrier.arrive.shared">, - Results<(outs LLVM_Type:$res)>, - Arguments<(ins LLVM_PointerShared:$addr)> { - let summary = "Shared MBarrier Arrive Operation"; - let description = [{ - This Op is the same as `nvvm.mbarrier.arrive` except that the *mbarrier object* - should be accessed using a shared-memory pointer instead of a generic-memory pointer. - - [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive) + let extraClassDeclaration = [{ + static mlir::NVVM::IDArgPair + getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt, + llvm::IRBuilderBase& builder); }]; + string llvmBuilder = [{ - $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive_shared, {$addr}); + auto [id, args] = NVVM::MBarrierArriveOp::getIntrinsicIDAndArgs( + *op, moduleTranslation, builder); + $res = createIntrinsicCall(builder, id, args); }]; - let assemblyFormat = "$addr attr-dict `:` qualified(type($addr)) `->` type($res)"; } def NVVM_MBarrierArriveNocompleteOp : NVVM_Op<"mbarrier.arrive.nocomplete">, - Results<(outs LLVM_Type:$res)>, - Arguments<(ins LLVM_AnyPointer:$addr, I32:$count)> { + Results<(outs I64:$res)>, + Arguments<(ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr, + I32:$count)> { let summary = "MBarrier Arrive No-Complete Operation"; let description = [{ The `nvvm.mbarrier.arrive.nocomplete` operation performs an arrive-on operation @@ -721,33 +717,29 @@ def NVVM_MBarrierArriveNocompleteOp : NVVM_Op<"mbarrier.arrive.nocomplete">, captures the phase of the *mbarrier object* prior to the arrive-on operation. The operation takes the following operands: - - `addr`: A pointer to the memory location of the *mbarrier object*. Uses generic - addressing, but the address must still be in the shared memory space. + - `addr`: A pointer to the memory location of the *mbarrier object*. The `addr` + must be a pointer to generic or shared::cta memory. When it is generic, the + underlying address must be within the shared::cta memory space; otherwise + the behavior is undefined. - `count`: Integer specifying the count argument to the arrive-on operation. Must be in the valid range as specified in the *mbarrier object* contents. [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive) }]; - string llvmBuilder = [{ - $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete, {$addr, $count}); - }]; - let assemblyFormat = "$addr `,` $count attr-dict `:` type(operands) `->` type($res)"; -} -def NVVM_MBarrierArriveNocompleteSharedOp : NVVM_Op<"mbarrier.arrive.nocomplete.shared">, - Results<(outs LLVM_Type:$res)>, - Arguments<(ins LLVM_PointerShared:$addr, I32:$count)> { - let summary = "Shared MBarrier Arrive No-Complete Operation"; - let description = [{ - This Op is the same as `nvvm.mbarrier.arrive.nocomplete` except that the *mbarrier object* - should be accessed using a shared-memory pointer instead of a generic-memory pointer. + let assemblyFormat = "$addr `,` $count attr-dict `:` type(operands) `->` type($res)"; - [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive) + let extraClassDeclaration = [{ + static mlir::NVVM::IDArgPair + getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt, + llvm::IRBuilderBase& builder); }]; + string llvmBuilder = [{ - $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete_shared, {$addr, $count}); + auto [id, args] = NVVM::MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs( + *op, moduleTranslation, builder); + $res = createIntrinsicCall(builder, id, args); }]; - let assemblyFormat = "$addr `,` $count attr-dict `:` type(operands) `->` type($res)"; } def NVVM_MBarrierArriveExpectTxOp : NVVM_PTXBuilder_Op<"mbarrier.arrive.expect_tx">, @@ -896,8 +888,9 @@ def NVVM_MBarrierTryWaitParitySharedOp : NVVM_PTXBuilder_Op<"mbarrier.try_wait.p } def NVVM_MBarrierTestWaitOp : NVVM_Op<"mbarrier.test.wait">, - Results<(outs LLVM_Type:$res)>, - Arguments<(ins LLVM_AnyPointer:$addr, LLVM_Type:$state)> { + Results<(outs I1:$res)>, + Arguments<(ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr, + I64:$state)> { let summary = "MBarrier Non-Blocking Test Wait Operation"; let description = [{ The `nvvm.mbarrier.test.wait` operation performs a non-blocking test for the @@ -944,26 +937,20 @@ def NVVM_MBarrierTestWaitOp : NVVM_Op<"mbarrier.test.wait">, [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-mbarrier-test-wait-try-wait) }]; - string llvmBuilder = [{ - $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_test_wait, {$addr, $state}); - }]; - let assemblyFormat = "$addr `,` $state attr-dict `:` type(operands) `->` type($res)"; -} -def NVVM_MBarrierTestWaitSharedOp : NVVM_Op<"mbarrier.test.wait.shared">, - Results<(outs LLVM_Type:$res)>, - Arguments<(ins LLVM_PointerShared:$addr, LLVM_Type:$state)> { - let summary = "Shared MBarrier Non-Blocking Test Wait Operation"; - let description = [{ - This Op is the same as `nvvm.mbarrier.test.wait` except that the *mbarrier object* - should be accessed using a shared-memory pointer instead of a generic-memory pointer. + let assemblyFormat = "$addr `,` $state attr-dict `:` type(operands) `->` type($res)"; - [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-mbarrier-test-wait-try-wait) + let extraClassDeclaration = [{ + static mlir::NVVM::IDArgPair + getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt, + llvm::IRBuilderBase& builder); }]; + string llvmBuilder = [{ - $res = createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_test_wait_shared, {$addr, $state}); + auto [id, args] = NVVM::MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs( + *op, moduleTranslation, builder); + $res = createIntrinsicCall(builder, id, args); }]; - let assemblyFormat = "$addr `,` $state attr-dict `:` type(operands) `->` type($res)"; } //===----------------------------------------------------------------------===// @@ -1534,47 +1521,30 @@ def NVVM_CpAsyncMBarrierArriveOp : NVVM_Op<"cp.async.mbarrier.arrive"> { The `cp.async.mbarrier.arrive` Op makes the *mbarrier object* track all prior cp.async operations initiated by the executing thread. The `addr` operand specifies the address of the *mbarrier object* - in generic address space. The `noinc` attr impacts how the - mbarrier's state is updated. + in generic or shared::cta address space. When it is generic, the + underlying memory should fall within the shared::cta space; + otherwise the behavior is undefined. The `noinc` attr impacts + how the mbarrier's state is updated. [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-cp-async-mbarrier-arrive) }]; - let assemblyFormat = "$addr attr-dict `:` type(operands)"; let arguments = (ins - LLVM_AnyPointer:$addr, DefaultValuedAttr:$noinc); + AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr, + DefaultValuedAttr:$noinc); - string llvmBuilder = [{ - auto intId = $noinc ? - llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_noinc : - llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive; - - createIntrinsicCall(builder, intId, {$addr}); - }]; -} - -def NVVM_CpAsyncMBarrierArriveSharedOp : NVVM_Op<"cp.async.mbarrier.arrive.shared"> { - let summary = "NVVM Dialect Op for cp.async.mbarrier.arrive.shared"; - let description = [{ - The `cp.async.mbarrier.arrive.shared` Op makes the *mbarrier object* - track all prior cp.async operations initiated by the executing thread. - The `addr` operand specifies the address of the *mbarrier object* in - shared memory. The `noinc` attr impacts how the mbarrier's state - is updated. - - [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-cp-async-mbarrier-arrive) - }]; let assemblyFormat = "$addr attr-dict `:` type(operands)"; - let arguments = (ins - LLVM_PointerShared:$addr, DefaultValuedAttr:$noinc); + let extraClassDeclaration = [{ + static mlir::NVVM::IDArgPair + getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt, + llvm::IRBuilderBase& builder); + }]; string llvmBuilder = [{ - auto intId = $noinc ? - llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_noinc_shared : - llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_shared; - - createIntrinsicCall(builder, intId, {$addr}); + auto [id, args] = NVVM::CpAsyncMBarrierArriveOp::getIntrinsicIDAndArgs( + *op, moduleTranslation, builder); + createIntrinsicCall(builder, id, args); }]; } diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp index ec182f1db48ac..9348d3c172a07 100644 --- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp +++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp @@ -865,13 +865,7 @@ struct NVGPUMBarrierArriveLowering adaptor.getMbarId(), rewriter); Type tokenType = getTypeConverter()->convertType( nvgpu::MBarrierTokenType::get(op->getContext())); - if (isMbarrierShared(op.getBarriers().getType())) { - rewriter.replaceOpWithNewOp(op, tokenType, - barrier); - } else { - rewriter.replaceOpWithNewOp(op, tokenType, - barrier); - } + rewriter.replaceOpWithNewOp(op, tokenType, barrier); return success(); } }; @@ -892,13 +886,8 @@ struct NVGPUMBarrierArriveNoCompleteLowering Type tokenType = getTypeConverter()->convertType( nvgpu::MBarrierTokenType::get(op->getContext())); Value count = truncToI32(b, adaptor.getCount()); - if (isMbarrierShared(op.getBarriers().getType())) { - rewriter.replaceOpWithNewOp( - op, tokenType, barrier, count); - } else { - rewriter.replaceOpWithNewOp( - op, tokenType, barrier, count); - } + rewriter.replaceOpWithNewOp( + op, tokenType, barrier, count); return success(); } }; @@ -915,13 +904,8 @@ struct NVGPUMBarrierTestWaitLowering getMbarrierPtr(b, op.getBarriers().getType(), adaptor.getBarriers(), adaptor.getMbarId(), rewriter); Type retType = rewriter.getI1Type(); - if (isMbarrierShared(op.getBarriers().getType())) { - rewriter.replaceOpWithNewOp( - op, retType, barrier, adaptor.getToken()); - } else { - rewriter.replaceOpWithNewOp( - op, retType, barrier, adaptor.getToken()); - } + rewriter.replaceOpWithNewOp(op, retType, barrier, + adaptor.getToken()); return success(); } }; diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index a5ffb9e77fa9d..78b320906b638 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -1637,15 +1637,21 @@ std::string NVVM::MBarrierInitOp::getPtx() { // getIntrinsicID/getIntrinsicIDAndArgs methods //===----------------------------------------------------------------------===// +static bool isPtrInAddrSpace(mlir::Value ptr, NVVMMemorySpace targetAS) { + auto ptrTy = llvm::cast(ptr.getType()); + return ptrTy.getAddressSpace() == static_cast(targetAS); +} + +static bool isPtrInSharedCTASpace(mlir::Value ptr) { + return isPtrInAddrSpace(ptr, NVVMMemorySpace::Shared); +} + mlir::NVVM::IDArgPair MBarrierInitOp::getIntrinsicIDAndArgs( Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) { auto thisOp = cast(op); - unsigned addressSpace = - llvm::cast(thisOp.getAddr().getType()) - .getAddressSpace(); - llvm::Intrinsic::ID id = (addressSpace == NVVMMemorySpace::Shared) - ? llvm::Intrinsic::nvvm_mbarrier_init_shared - : llvm::Intrinsic::nvvm_mbarrier_init; + bool isShared = isPtrInSharedCTASpace(thisOp.getAddr()); + llvm::Intrinsic::ID id = isShared ? llvm::Intrinsic::nvvm_mbarrier_init_shared + : llvm::Intrinsic::nvvm_mbarrier_init; // Fill the Intrinsic Args llvm::SmallVector args; @@ -1658,16 +1664,72 @@ mlir::NVVM::IDArgPair MBarrierInitOp::getIntrinsicIDAndArgs( mlir::NVVM::IDArgPair MBarrierInvalOp::getIntrinsicIDAndArgs( Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) { auto thisOp = cast(op); - unsigned addressSpace = - llvm::cast(thisOp.getAddr().getType()) - .getAddressSpace(); - llvm::Intrinsic::ID id = (addressSpace == NVVMMemorySpace::Shared) + bool isShared = isPtrInSharedCTASpace(thisOp.getAddr()); + llvm::Intrinsic::ID id = isShared ? llvm::Intrinsic::nvvm_mbarrier_inval_shared : llvm::Intrinsic::nvvm_mbarrier_inval; return {id, {mt.lookupValue(thisOp.getAddr())}}; } +mlir::NVVM::IDArgPair MBarrierArriveOp::getIntrinsicIDAndArgs( + Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) { + auto thisOp = cast(op); + bool isShared = isPtrInSharedCTASpace(thisOp.getAddr()); + llvm::Intrinsic::ID id = isShared + ? llvm::Intrinsic::nvvm_mbarrier_arrive_shared + : llvm::Intrinsic::nvvm_mbarrier_arrive; + + return {id, {mt.lookupValue(thisOp.getAddr())}}; +} + +mlir::NVVM::IDArgPair MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs( + Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) { + auto thisOp = cast(op); + bool isShared = isPtrInSharedCTASpace(thisOp.getAddr()); + llvm::Intrinsic::ID id = + isShared ? llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete_shared + : llvm::Intrinsic::nvvm_mbarrier_arrive_noComplete; + // Fill the Intrinsic Args + llvm::SmallVector args; + args.push_back(mt.lookupValue(thisOp.getAddr())); + args.push_back(mt.lookupValue(thisOp.getCount())); + + return {id, std::move(args)}; +} + +mlir::NVVM::IDArgPair MBarrierTestWaitOp::getIntrinsicIDAndArgs( + Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) { + auto thisOp = cast(op); + bool isShared = isPtrInSharedCTASpace(thisOp.getAddr()); + llvm::Intrinsic::ID id = isShared + ? llvm::Intrinsic::nvvm_mbarrier_test_wait_shared + : llvm::Intrinsic::nvvm_mbarrier_test_wait; + // Fill the Intrinsic Args + llvm::SmallVector args; + args.push_back(mt.lookupValue(thisOp.getAddr())); + args.push_back(mt.lookupValue(thisOp.getState())); + + return {id, std::move(args)}; +} + +mlir::NVVM::IDArgPair CpAsyncMBarrierArriveOp::getIntrinsicIDAndArgs( + Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) { + auto thisOp = cast(op); + bool isShared = isPtrInSharedCTASpace(thisOp.getAddr()); + + llvm::Intrinsic::ID id; + if (thisOp.getNoinc()) { + id = isShared ? llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_noinc_shared + : llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_noinc; + } else { + id = isShared ? llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive_shared + : llvm::Intrinsic::nvvm_cp_async_mbarrier_arrive; + } + + return {id, {mt.lookupValue(thisOp.getAddr())}}; +} + #define CP_ASYNC_ID_IMPL(mod, size, suffix) \ llvm::Intrinsic::nvvm_cp_async_##mod##_shared_global_##size##suffix diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir index 8cce6308018e2..dcf4ddb2dd48c 100644 --- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir +++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir @@ -491,12 +491,12 @@ func.func @mbarrier() { // CHECK: %[[base2:.+]] = llvm.extractvalue %[[barStr]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> // CHECK: %[[barPtr2:.+]] = llvm.getelementptr %[[base2]][%[[mid]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i64 - // CHECK: %[[token:.+]] = nvvm.mbarrier.arrive.shared %[[barPtr2]] + // CHECK: %[[token:.+]] = nvvm.mbarrier.arrive %[[barPtr2]] %token = nvgpu.mbarrier.arrive %barrier[%c0] : !barrierType -> !tokenType // CHECK: %[[base3:.+]] = llvm.extractvalue %[[barStr]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> // CHECK: %[[barPtr3:.+]] = llvm.getelementptr %[[base3]][%[[mid]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i64 - // CHECK: nvvm.mbarrier.test.wait.shared %[[barPtr3]], %[[token]] + // CHECK: nvvm.mbarrier.test.wait %[[barPtr3]], %[[token]] %isDone = nvgpu.mbarrier.test.wait %barrier[%c0], %token : !barrierType, !tokenType func.return @@ -521,12 +521,12 @@ func.func @mbarrier_nocomplete() { // CHECK: %[[base2:.+]] = llvm.extractvalue %[[barStr]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> // CHECK: %[[barPtr2:.+]] = llvm.getelementptr %[[base2]][%[[mid]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i64 - // CHECK: %[[token:.+]] = nvvm.mbarrier.arrive.nocomplete.shared %[[barPtr2]] + // CHECK: %[[token:.+]] = nvvm.mbarrier.arrive.nocomplete %[[barPtr2]] %token = nvgpu.mbarrier.arrive.nocomplete %barrier[%c0], %count : !barrierType -> !tokenType // CHECK: %[[base3:.+]] = llvm.extractvalue %[[barStr]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> // CHECK: %[[barPtr3:.+]] = llvm.getelementptr %[[base3]][%[[mid]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i64 - // CHECK: nvvm.mbarrier.test.wait.shared %[[barPtr3]], %[[token]] + // CHECK: nvvm.mbarrier.test.wait %[[barPtr3]], %[[token]] %isDone = nvgpu.mbarrier.test.wait %barrier[%c0], %token : !barrierType, !tokenType func.return @@ -572,7 +572,7 @@ func.func @mbarrier_wait(%barriers : !nvgpu.mbarrier.group, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> // CHECK: %[[S5:.+]] = llvm.getelementptr %[[S4]][%[[S3]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i64 -// CHECK: nvvm.mbarrier.test.wait.shared {{.*}}, %[[CARG1]] +// CHECK: nvvm.mbarrier.test.wait {{.*}}, %[[CARG1]] %mbarId = arith.remui %i, %numBarriers : index %isDone = nvgpu.mbarrier.test.wait %barriers[%mbarId], %token : !nvgpu.mbarrier.group, num_barriers = 5>, !tokenType } diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir index fbc4c0af60360..a9356c5cb60bb 100644 --- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir +++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir @@ -88,10 +88,10 @@ func.func @cp_async_mbarrier_arrive(%bar_shared: !llvm.ptr<3>, %bar_gen: !llvm.p nvvm.cp.async.mbarrier.arrive %bar_gen : !llvm.ptr // CHECK: nvvm.cp.async.mbarrier.arrive %{{.*}} {noinc = true} nvvm.cp.async.mbarrier.arrive %bar_gen {noinc = true} : !llvm.ptr - // CHECK: nvvm.cp.async.mbarrier.arrive.shared %{{.*}} - nvvm.cp.async.mbarrier.arrive.shared %bar_shared : !llvm.ptr<3> - // CHECK: nvvm.cp.async.mbarrier.arrive.shared %{{.*}} {noinc = true} - nvvm.cp.async.mbarrier.arrive.shared %bar_shared {noinc = true} : !llvm.ptr<3> + // CHECK: nvvm.cp.async.mbarrier.arrive %{{.*}} + nvvm.cp.async.mbarrier.arrive %bar_shared : !llvm.ptr<3> + // CHECK: nvvm.cp.async.mbarrier.arrive %{{.*}} {noinc = true} + nvvm.cp.async.mbarrier.arrive %bar_shared {noinc = true} : !llvm.ptr<3> llvm.return } diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir index 2505e56407c2b..cd7bd37da5763 100644 --- a/mlir/test/Dialect/LLVMIR/nvvm.mlir +++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir @@ -445,8 +445,8 @@ llvm.func private @mbarrier_arrive(%barrier: !llvm.ptr) { } llvm.func private @mbarrier_arrive_shared(%barrier: !llvm.ptr<3>) { - // CHECK: nvvm.mbarrier.arrive.shared %{{.*}} : !llvm.ptr<3> - %0 = nvvm.mbarrier.arrive.shared %barrier : !llvm.ptr<3> -> i64 + // CHECK: nvvm.mbarrier.arrive %{{.*}} : !llvm.ptr<3> + %0 = nvvm.mbarrier.arrive %barrier : !llvm.ptr<3> -> i64 llvm.return } @@ -459,8 +459,8 @@ llvm.func private @mbarrier_arrive_nocomplete(%barrier: !llvm.ptr) { llvm.func private @mbarrier_arrive_nocomplete_shared(%barrier: !llvm.ptr<3>) { %count = nvvm.read.ptx.sreg.ntid.x : i32 - // CHECK: nvvm.mbarrier.arrive.nocomplete.shared %{{.*}} : !llvm.ptr<3> - %0 = nvvm.mbarrier.arrive.nocomplete.shared %barrier, %count : !llvm.ptr<3>, i32 -> i64 + // CHECK: nvvm.mbarrier.arrive.nocomplete %{{.*}} : !llvm.ptr<3> + %0 = nvvm.mbarrier.arrive.nocomplete %barrier, %count : !llvm.ptr<3>, i32 -> i64 llvm.return } @@ -472,8 +472,8 @@ llvm.func private @mbarrier_test_wait(%barrier: !llvm.ptr, %token : i64) -> i1 { llvm.func private @mbarrier_test_wait_shared(%barrier: !llvm.ptr<3>, %token : i64) { %count = nvvm.read.ptx.sreg.ntid.x : i32 - // CHECK: nvvm.mbarrier.test.wait.shared %{{.*}} - %isComplete = nvvm.mbarrier.test.wait.shared %barrier, %token : !llvm.ptr<3>, i64 -> i1 + // CHECK: nvvm.mbarrier.test.wait %{{.*}} + %isComplete = nvvm.mbarrier.test.wait %barrier, %token : !llvm.ptr<3>, i64 -> i1 llvm.return } diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index 9115de65ff0e8..3fc09f371a347 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -538,9 +538,9 @@ llvm.func @cp_async_mbarrier_arrive(%bar_shared: !llvm.ptr<3>, %bar_gen: !llvm.p // CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(ptr %{{.*}}) nvvm.cp.async.mbarrier.arrive %bar_gen {noinc = true} : !llvm.ptr // CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive.shared(ptr addrspace(3) %{{.*}}) - nvvm.cp.async.mbarrier.arrive.shared %bar_shared : !llvm.ptr<3> + nvvm.cp.async.mbarrier.arrive %bar_shared : !llvm.ptr<3> // CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(ptr addrspace(3) %{{.*}}) - nvvm.cp.async.mbarrier.arrive.shared %bar_shared {noinc = true} : !llvm.ptr<3> + nvvm.cp.async.mbarrier.arrive %bar_shared {noinc = true} : !llvm.ptr<3> llvm.return }