diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h index e6ba71d53e92b..167e613816394 100644 --- a/flang/include/flang/Semantics/semantics.h +++ b/flang/include/flang/Semantics/semantics.h @@ -215,8 +215,10 @@ class SemanticsContext { void UseFortranBuiltinsModule(); const Scope *GetBuiltinsScope() const { return builtinsScope_; } - void UsePPCBuiltinTypesModule(); const Scope &GetCUDABuiltinsScope(); + const Scope &GetCUDADeviceScope(); + + void UsePPCBuiltinTypesModule(); void UsePPCBuiltinsModule(); Scope *GetPPCBuiltinTypesScope() { return ppcBuiltinTypesScope_; } const Scope *GetPPCBuiltinsScope() const { return ppcBuiltinsScope_; } @@ -292,6 +294,7 @@ class SemanticsContext { const Scope *builtinsScope_{nullptr}; // module __Fortran_builtins Scope *ppcBuiltinTypesScope_{nullptr}; // module __Fortran_PPC_types std::optional cudaBuiltinsScope_; // module __CUDA_builtins + std::optional cudaDeviceScope_; // module cudadevice const Scope *ppcBuiltinsScope_{nullptr}; // module __ppc_intrinsics std::list modFileParseTrees_; std::unique_ptr commonBlockMap_; diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp index 96ab902392633..2830d5f0be6ea 100644 --- a/flang/lib/Semantics/check-cuda.cpp +++ b/flang/lib/Semantics/check-cuda.cpp @@ -82,6 +82,11 @@ struct DeviceExprChecker } } } + if (sym->owner().IsModule() && + sym->owner().parent().IsIntrinsicModules() && + DEREF(sym->owner().symbol()).name() == "__cuda_device_builtins") { + return {}; + } } else if (x.GetSpecificIntrinsic()) { // TODO(CUDA): Check for unsupported intrinsics here return {}; diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 61394b0f41de7..9e32463fa54b9 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -3797,6 +3797,19 @@ bool SubprogramVisitor::Pre(const parser::PrefixSpec::Attributes &attrs) { subp->set_cudaSubprogramAttrs(attr); } } + if (auto attrs{subp->cudaSubprogramAttrs()}) { + if (*attrs == common::CUDASubprogramAttrs::Global || + *attrs == common::CUDASubprogramAttrs::Device) { + // Implicitly USE the cudadevice module by copying its symbols in the + // current scope. + const Scope &scope{context().GetCUDADeviceScope()}; + for (auto sym : scope.GetSymbols()) { + if (!currScope().FindSymbol(sym->name())) { + currScope().CopySymbol(sym); + } + } + } + } } return false; } diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp index 6ccd915c4dcbf..d51cc62d804e8 100644 --- a/flang/lib/Semantics/semantics.cpp +++ b/flang/lib/Semantics/semantics.cpp @@ -543,6 +543,14 @@ const Scope &SemanticsContext::GetCUDABuiltinsScope() { return **cudaBuiltinsScope_; } +const Scope &SemanticsContext::GetCUDADeviceScope() { + if (!cudaDeviceScope_) { + cudaDeviceScope_ = GetBuiltinModule("cudadevice"); + CHECK(cudaDeviceScope_.value() != nullptr); + } + return **cudaDeviceScope_; +} + void SemanticsContext::UsePPCBuiltinsModule() { if (ppcBuiltinsScope_ == nullptr) { ppcBuiltinsScope_ = GetBuiltinModule("__ppc_intrinsics"); diff --git a/flang/module/__cuda_device_builtins.f90 b/flang/module/__cuda_device_builtins.f90 new file mode 100644 index 0000000000000..738dc97242f2b --- /dev/null +++ b/flang/module/__cuda_device_builtins.f90 @@ -0,0 +1,74 @@ +!===-- module/__cuda_device_builtins.f90 -----------------------------------===! +! +! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +! See https://llvm.org/LICENSE.txt for license information. +! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +! +!===------------------------------------------------------------------------===! + +! CUDA Fortran procedures available in device subprogram + +module __CUDA_device_builtins + + implicit none + + ! Set PRIVATE by default to explicitly only export what is meant + ! to be exported by this MODULE. + private + + ! Synchronization Functions + + interface + subroutine __cuda_device_builtins_syncthreads() + end subroutine + end interface + public :: __cuda_device_builtins_syncthreads + + interface + integer function __cuda_device_builtins_syncthreads_and(value) + integer :: value + end function + end interface + public :: __cuda_device_builtins_syncthreads_and + + interface + integer function __cuda_device_builtins_syncthreads_count(value) + integer :: value + end function + end interface + public :: __cuda_device_builtins_syncthreads_count + + interface + integer function __cuda_device_builtins_syncthreads_or(int_value) + end function + end interface + public :: __cuda_device_builtins_syncthreads_or + + interface + subroutine __cuda_device_builtins_syncwarp(mask) + integer :: mask + end subroutine + end interface + public :: __cuda_device_builtins_syncwarp + + ! Memory Fences + + interface + subroutine __cuda_device_builtins_threadfence() + end subroutine + end interface + public :: __cuda_device_builtins_threadfence + + interface + subroutine __cuda_device_builtins_threadfence_block() + end subroutine + end interface + public :: __cuda_device_builtins_threadfence_block + + interface + subroutine __cuda_device_builtins_threadfence_system() + end subroutine + end interface + public :: __cuda_device_builtins_threadfence_system + +end module diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90 new file mode 100644 index 0000000000000..b635d77ea4529 --- /dev/null +++ b/flang/module/cudadevice.f90 @@ -0,0 +1,21 @@ +!===-- module/cudedevice.f90 -----------------------------------------------===! +! +! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +! See https://llvm.org/LICENSE.txt for license information. +! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +! +!===------------------------------------------------------------------------===! + +! CUDA Fortran procedures available in device subprogram + +module cudadevice + use __cuda_device_builtins, only: & + syncthreads => __cuda_device_builtins_syncthreads, & + syncthreads_and => __cuda_device_builtins_syncthreads_and, & + syncthreads_count => __cuda_device_builtins_syncthreads_count, & + syncthreads_or => __cuda_device_builtins_syncthreads_or, & + syncwarp => __cuda_device_builtins_syncwarp, & + threadfence => __cuda_device_builtins_threadfence, & + threadfence_block => __cuda_device_builtins_threadfence_block, & + threadfence_system => __cuda_device_builtins_threadfence_system +end module diff --git a/flang/test/Semantics/cuf-device-procedures01.cuf b/flang/test/Semantics/cuf-device-procedures01.cuf new file mode 100644 index 0000000000000..e79423e3587a1 --- /dev/null +++ b/flang/test/Semantics/cuf-device-procedures01.cuf @@ -0,0 +1,35 @@ +! RUN: %flang_fc1 -fdebug-dump-symbols %s | FileCheck %s + +! Test CUDA Fortran intrinsic can pass semantic + +attributes(global) subroutine devsub() + implicit none + integer :: ret + + ! 3.6.4. Synchronization Functions + call syncthreads() + call syncwarp(1) + call threadfence() + call threadfence_block() + call threadfence_system() + ret = syncthreads_and(1) + ret = syncthreads_count(1) + ret = syncthreads_or(1) +end + +! CHECK-LABEL: Subprogram scope: devsub +! CHECK: syncthreads, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_syncthreads in __cuda_device_builtins +! CHECK: syncthreads_and, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_and in __cuda_device_builtins +! CHECK: syncthreads_count, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_count in __cuda_device_builtins +! CHECK: syncthreads_or, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_or in __cuda_device_builtins +! CHECK: syncwarp, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_syncwarp in __cuda_device_builtins +! CHECK: threadfence, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence in __cuda_device_builtins +! CHECK: threadfence_block, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence_block in __cuda_device_builtins +! CHECK: threadfence_system, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence_system in __cuda_device_builtins + +subroutine host() + call syncthreads() +end subroutine + +! CHECK-LABEL: Subprogram scope: host +! CHECK: syncthreads, EXTERNAL: HostAssoc{{$}} diff --git a/flang/test/Semantics/cuf-device-procedures02.cuf b/flang/test/Semantics/cuf-device-procedures02.cuf new file mode 100644 index 0000000000000..ea6a094ed5c38 --- /dev/null +++ b/flang/test/Semantics/cuf-device-procedures02.cuf @@ -0,0 +1,17 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 + +module dev + integer, device :: syncthreads + +contains + + attributes(device) subroutine sub1() + syncthreads = 1 ! syncthreads not overwritten by cudadevice + end subroutine + + attributes(global) subroutine sub2() +!ERROR: 'threadfence' is use-associated from module '__cuda_device_builtins' and cannot be re-declared + integer :: threadfence + end subroutine +end module + diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt index 64815a1f5da62..0222654c8e5d8 100644 --- a/flang/tools/f18/CMakeLists.txt +++ b/flang/tools/f18/CMakeLists.txt @@ -12,6 +12,8 @@ set(MODULES "__ppc_intrinsics" "mma" "__cuda_builtins" + "__cuda_device_builtins" + "cudadevice" "ieee_arithmetic" "ieee_exceptions" "ieee_features" @@ -31,6 +33,8 @@ if (NOT CMAKE_CROSSCOMPILING) elseif(${filename} STREQUAL "__ppc_intrinsics" OR ${filename} STREQUAL "mma") set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod) + elseif(${filename} STREQUAL "cudadevice") + set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_device_builtins.mod) else() set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod) if(NOT ${filename} STREQUAL "__fortran_type_info")