Skip to content

[SYCL][CUDA] Add sub-group barrier #2606

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions libclc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -261,10 +261,13 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
foreach( d ${${t}_devices} )
# Some targets don't have a specific GPU to target
if( ${d} STREQUAL "none" OR ${ARCH} STREQUAL "spirv" OR ${ARCH} STREQUAL "spirv64" )
set( mcpu )
# FIXME: Ideally we would not be tied to a specific PTX ISA version
if( ${ARCH} STREQUAL nvptx OR ${ARCH} STREQUAL nvptx64 )
set( flags "SHELL:-Xclang -target-feature" "SHELL:-Xclang +ptx64")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why using "SHELL: and string( REGEX REPLACE "SHELL:" later is needed ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add_target_options only works if the SHELL: is there, but add_custom_command only works if the SHELL: is not there.

This is definitely a bit of a hack, but it seemed less error-prone than defining the same set of flags twice. If there's a more standard way to do this, please let me know and I'll fix it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. I'm no CMake expert so I'm not quite sure how to make it better.

endif()
set( arch_suffix "${t}" )
else()
set( mcpu "-mcpu=${d}" )
set( flags "-mcpu=${d}" )
set( arch_suffix "${d}-${t}" )
endif()
message( " DEVICE: ${d} ( ${${d}_aliases} )" )
Expand All @@ -276,14 +279,14 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
if( ${ARCH} STREQUAL nvptx OR ${ARCH} STREQUAL nvptx64 )
add_libclc_sycl_binding(libspirv_files
TRIPLE ${t}
COMPILE_OPT ${mcpu}
COMPILE_OPT ${flags}
FILES generic/libspirv/sycldevice-binding.cpp)
endif()

add_libclc_builtin_set(libspirv-${arch_suffix}
TRIPLE ${t}
TARGET_ENV libspirv
COMPILE_OPT ${mcpu}
COMPILE_OPT ${flags}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

COMPILE_OPT is a multi value option, so you should be able to add the extra flags directly.

A more long term solution would be perhaps to define flag per arch_sufix (they can then be accessed later), but should be for later I guess.

FILES ${libspirv_files}
ALIASES ${${d}_aliases}
GENERATE_TARGET "generate_convert_spirv.cl" "generate_convert_core.cl"
Expand All @@ -292,7 +295,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
add_libclc_builtin_set(clc-${arch_suffix}
TRIPLE ${t}
TARGET_ENV clc
COMPILE_OPT ${mcpu}
COMPILE_OPT ${flags}
FILES ${lib_files}
LIB_DEP libspirv-${arch_suffix}
ALIASES ${${d}_aliases}
Expand Down
3 changes: 2 additions & 1 deletion libclc/cmake/modules/AddLibclc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ function(add_libclc_sycl_binding OUT_LIST)
file( TO_CMAKE_PATH ${LIBCLC_ROOT_DIR}/${file} SYCLDEVICE_BINDING )
if( EXISTS ${SYCLDEVICE_BINDING} )
set( SYCLDEVICE_BINDING_OUT ${CMAKE_CURRENT_BINARY_DIR}/sycldevice-binding-${ARG_TRIPLE}/sycldevice-binding.bc )
string( REGEX REPLACE "SHELL:" "" SYLCDEVICE_OPT ${ARG_COMPILE_OPT} )
add_custom_command( OUTPUT ${SYCLDEVICE_BINDING_OUT}
COMMAND ${CMAKE_COMMAND} -E make_directory
${CMAKE_CURRENT_BINARY_DIR}/sycldevice-binding-${ARG_TRIPLE}
Expand All @@ -223,7 +224,7 @@ function(add_libclc_sycl_binding OUT_LIST)
-fsycl-device-only
-Dcl_khr_fp64
-I${LIBCLC_ROOT_DIR}/generic/include
${ARG_COMPILE_OPT}
${SYCLDEVICE_OPT}
${SYCLDEVICE_BINDING}
-o ${SYCLDEVICE_BINDING_OUT}
MAIN_DEPENDENCY ${SYCLDEVICE_BINDING}
Expand Down
10 changes: 9 additions & 1 deletion libclc/ptx-nvidiacl/libspirv/synchronization/barrier.cl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

_CLC_OVERLOAD _CLC_DEF void __spirv_MemoryBarrier(unsigned int memory,
unsigned int semantics) {
Expand All @@ -16,5 +17,12 @@ _CLC_OVERLOAD _CLC_DEF void __spirv_MemoryBarrier(unsigned int memory,
_CLC_OVERLOAD _CLC_DEF _CLC_CONVERGENT void
__spirv_ControlBarrier(unsigned int scope, unsigned int memory,
unsigned int semantics) {
__syncthreads();
if (scope == Subgroup) {
uint FULL_MASK = 0xFFFFFFFF;
uint max_size = __spirv_SubgroupMaxSize();
uint sg_size = __spirv_SubgroupSize();
__nvvm_bar_warp_sync(FULL_MASK >> (max_size - sg_size));
} else {
__syncthreads();
}
}
5 changes: 1 addition & 4 deletions sycl/test/sub_group/barrier.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
// UNSUPPORTED: cuda
// CUDA compilation and runtime do not yet support sub-groups.
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
Expand Down Expand Up @@ -73,7 +70,7 @@ template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
}
int main() {
queue Queue;
if (!core_sg_supported(Queue.get_device())) {
if (Queue.get_device().is_host()) {
std::cout << "Skipping test\n";
return 0;
}
Expand Down