|
1 |
| -# Build a list of gencode arguments, based on CUDA verison. |
2 |
| -# Accepts user override via CUDA_ARCH |
3 |
| - |
4 |
| -# CMAKE > 3.18 introduces CUDA_ARCHITECTURES as a cmake-native way of generating gencodes (Policy CMP0104). Set the value to OFF to prevent errors for it being not provided. |
5 |
| -if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.18") |
6 |
| - set(CMAKE_CUDA_ARCHITECTURES "OFF") |
7 |
| -endif() |
8 |
| - |
9 |
| - |
10 |
| -# Check if any have been provided by the users |
11 |
| -string(LENGTH "${CUDA_ARCH}" CUDA_ARCH_LENGTH) |
12 |
| - |
13 |
| -# Define the default compute capabilites incase not provided by the user |
14 |
| -set(DEFAULT_CUDA_ARCH "35;50;60;70;80;") |
15 |
| - |
16 |
| -# Get the valid options for the current compiler. |
17 |
| -# Run nvcc --help to get the help string which contains all valid compute_ sm_ for that version. |
18 |
| -execute_process(COMMAND ${CMAKE_CUDA_COMPILER} "--help" OUTPUT_VARIABLE NVCC_HELP_STR ERROR_VARIABLE NVCC_HELP_STR) |
19 |
| -# Match all comptue_XX or sm_XXs |
20 |
| -string(REGEX MATCHALL "'(sm|compute)_[0-9]+'" SUPPORTED_CUDA_ARCH "${NVCC_HELP_STR}" ) |
21 |
| -# Strip just the numeric component |
22 |
| -string(REGEX REPLACE "'(sm|compute)_([0-9]+)'" "\\2" SUPPORTED_CUDA_ARCH "${SUPPORTED_CUDA_ARCH}" ) |
23 |
| -# Remove dupes and sort to build the correct list of supported CUDA_ARCH. |
24 |
| -list(REMOVE_DUPLICATES SUPPORTED_CUDA_ARCH) |
25 |
| -list(REMOVE_ITEM SUPPORTED_CUDA_ARCH "") |
26 |
| -list(SORT SUPPORTED_CUDA_ARCH) |
27 |
| - |
28 |
| -# Update defaults to only be those supported |
29 |
| -# @todo might be better to instead do a dry run compilation with each gencode to validate? |
30 |
| -foreach(ARCH IN LISTS DEFAULT_CUDA_ARCH) |
31 |
| - if (NOT ARCH IN_LIST SUPPORTED_CUDA_ARCH) |
32 |
| - list(REMOVE_ITEM DEFAULT_CUDA_ARCH "${ARCH}") |
| 1 | +# Provides a per target function to set gencode compiler options. |
| 2 | +# Function to suppress compiler warnings for a given target |
| 3 | +# If the cmake variable CUDA_ARCH is set, to a non emtpy list or space separated string this will be used instead. |
| 4 | +# @todo - find a way to warn about deprecated architectures once and only once (at cmake time?) Might need to just try compiling with old warnings and capture / post process the output. |
| 5 | +# @todo - figure out how to do this once and only once as a function rather than a macro. |
| 6 | +macro(SetCUDAGencodes) |
| 7 | + # @todo - only get the available gencodes from nvcc once, rather than per target. |
| 8 | + |
| 9 | + # Parse the expected arguments, prefixing variables. |
| 10 | + cmake_parse_arguments( |
| 11 | + SCG |
| 12 | + "" |
| 13 | + "TARGET" |
| 14 | + "" |
| 15 | + ${ARGN} |
| 16 | + ) |
| 17 | + # Ensure that a target has been passed, and that it is a valid target. |
| 18 | + if(NOT SCG_TARGET) |
| 19 | + message( FATAL_ERROR "SetCUDAGencodes: 'TARGET' argument required." ) |
| 20 | + elseif(NOT TARGET ${SCG_TARGET} ) |
| 21 | + message( FATAL_ERROR "SetCUDAGencodes: TARGET '${SCG_TARGET}' is not a valid target" ) |
33 | 22 | endif()
|
34 |
| - list(REMOVE_DUPLICATES CUDA_ARCH) |
35 |
| - list(REMOVE_ITEM CUDA_ARCH "") |
36 |
| - list(SORT CUDA_ARCH) |
37 |
| -endforeach() |
38 | 23 |
|
39 |
| - |
40 |
| -if(NOT CUDA_ARCH_LENGTH EQUAL 0) |
41 |
| - # Convert user provided string argument to a list. |
42 |
| - string (REPLACE " " ";" CUDA_ARCH "${CUDA_ARCH}") |
43 |
| - string (REPLACE "," ";" CUDA_ARCH "${CUDA_ARCH}") |
44 |
| - |
45 |
| - # Remove duplicates, empty items and sort. |
46 |
| - list(REMOVE_DUPLICATES CUDA_ARCH) |
47 |
| - list(REMOVE_ITEM CUDA_ARCH "") |
48 |
| - list(SORT CUDA_ARCH) |
49 |
| - |
50 |
| - # Validate the list. |
51 |
| - foreach(ARCH IN LISTS CUDA_ARCH) |
52 |
| - if (NOT ARCH IN_LIST SUPPORTED_CUDA_ARCH) |
53 |
| - message(WARNING |
54 |
| - " CUDA_ARCH '${ARCH}' not supported by CUDA ${CMAKE_CUDA_COMPILER_VERSION} and is being ignored.\n" |
55 |
| - " Choose from: ${SUPPORTED_CUDA_ARCH}") |
56 |
| - list(REMOVE_ITEM CUDA_ARCH "${ARCH}") |
57 |
| - endif() |
58 |
| - endforeach() |
59 |
| - |
60 |
| - # @todo - validate that the CUDA_ARCH provided are supported by the compiler |
61 |
| -endif() |
62 |
| - |
63 |
| -# If the list is empty post validation, set it to the (validated) defaults |
64 |
| -list(LENGTH CUDA_ARCH CUDA_ARCH_LENGTH) |
65 |
| -if(CUDA_ARCH_LENGTH EQUAL 0) |
66 |
| - set(CUDA_ARCH ${DEFAULT_CUDA_ARCH}) |
67 |
| -endif() |
68 |
| - |
69 |
| -# Propagate the validated values to the parent scope, to reduce warning duplication. |
70 |
| -get_directory_property(hasParent PARENT_DIRECTORY) |
71 |
| -if(hasParent) |
72 |
| - set(CUDA_ARCH ${CUDA_ARCH} PARENT_SCOPE) |
73 |
| -endif() |
74 |
| -# If the list is somehow empty now, do not set any gencodes arguments, instead using the compiler defaults. |
75 |
| -list(LENGTH CUDA_ARCH CUDA_ARCH_LENGTH) |
76 |
| -if(NOT CUDA_ARCH_LENGTH EQUAL 0) |
77 |
| - # Only do this if required.I.e. CUDA_ARCH is the same as the last time this file was included |
78 |
| - if(NOT CUDA_ARCH_APPLIED EQUAL CUDA_ARCH) |
79 |
| - message(STATUS "Generating Compute Capabilities: ${CUDA_ARCH}") |
| 24 | + # CMAKE > 3.18 introduces CUDA_ARCHITECTURES as a cmake-native way of generating gencodes (Policy CMP0104). Set the value to OFF to prevent errors for it being not provided. |
| 25 | + # We manually set gencode arguments, so we can (potentially) use LTO and are not restricted to CMake's availble options. |
| 26 | + set_property(TARGET ${SCG_TARGET} PROPERTY CUDA_ARCHITECTURES OFF) |
| 27 | + |
| 28 | + # Define the default compute capabilites incase not provided by the user |
| 29 | + set(DEFAULT_CUDA_ARCH "35;50;60;70;80;") |
| 30 | + |
| 31 | + # Determine if the user has provided a non default CUDA_ARCH value |
| 32 | + string(LENGTH "${CUDA_ARCH}" CUDA_ARCH_LENGTH) |
| 33 | + |
| 34 | + # Query NVCC in order to filter the provided list. |
| 35 | + # @todo only do this once, and re-use the output for a given cmake configure? |
| 36 | + |
| 37 | + # Get the valid options for the current compiler. |
| 38 | + # Run nvcc --help to get the help string which contains all valid compute_ sm_ for that version. |
| 39 | + if(NOT DEFINED SUPPORTED_CUDA_ARCH) |
| 40 | + execute_process(COMMAND ${CMAKE_CUDA_COMPILER} "--help" OUTPUT_VARIABLE NVCC_HELP_STR ERROR_VARIABLE NVCC_HELP_STR) |
| 41 | + # Match all comptue_XX or sm_XXs |
| 42 | + string(REGEX MATCHALL "'(sm|compute)_[0-9]+'" SUPPORTED_CUDA_ARCH "${NVCC_HELP_STR}" ) |
| 43 | + # Strip just the numeric component |
| 44 | + string(REGEX REPLACE "'(sm|compute)_([0-9]+)'" "\\2" SUPPORTED_CUDA_ARCH "${SUPPORTED_CUDA_ARCH}" ) |
| 45 | + # Remove dupes and sort to build the correct list of supported CUDA_ARCH. |
| 46 | + list(REMOVE_DUPLICATES SUPPORTED_CUDA_ARCH) |
| 47 | + list(REMOVE_ITEM SUPPORTED_CUDA_ARCH "") |
| 48 | + list(SORT SUPPORTED_CUDA_ARCH) |
| 49 | + |
| 50 | + # Store the supported arch's once and only once. This could be a cache var given the cuda compiler should not be able to change without clearing th cache? |
| 51 | + get_directory_property(hasParent PARENT_DIRECTORY) |
80 | 52 | if(hasParent)
|
81 |
| - set(CUDA_ARCH_APPLIED "${CUDA_ARCH}" PARENT_SCOPE ) |
| 53 | + set(SUPPORTED_CUDA_ARCH ${SUPPORTED_CUDA_ARCH} PARENT_SCOPE) |
82 | 54 | endif()
|
83 | 55 | endif()
|
84 |
| - set(GENCODES_FLAGS) |
85 |
| - set(MIN_CUDA_ARCH) |
86 |
| - # Convert to gencode arguments |
87 |
| - |
88 |
| - foreach(ARCH IN LISTS CUDA_ARCH) |
89 |
| - set(GENCODES_FLAGS "${GENCODES_FLAGS} -gencode arch=compute_${ARCH},code=sm_${ARCH}") |
| 56 | + |
| 57 | + |
| 58 | + # Update defaults to only be those supported |
| 59 | + # @todo might be better to instead do a dry run compilation with each gencode to validate? |
| 60 | + foreach(ARCH IN LISTS DEFAULT_CUDA_ARCH) |
| 61 | + if (NOT ARCH IN_LIST SUPPORTED_CUDA_ARCH) |
| 62 | + list(REMOVE_ITEM DEFAULT_CUDA_ARCH "${ARCH}") |
| 63 | + endif() |
| 64 | + list(REMOVE_DUPLICATES CUDA_ARCH) |
| 65 | + list(REMOVE_ITEM CUDA_ARCH "") |
| 66 | + list(SORT CUDA_ARCH) |
90 | 67 | endforeach()
|
91 | 68 |
|
92 |
| - # Add the last arch again as compute_, compute_ to enable forward looking JIT |
93 |
| - list(GET CUDA_ARCH -1 LAST_ARCH) |
94 |
| - set(GENCODES_FLAGS "${GENCODES_FLAGS} -gencode arch=compute_${LAST_ARCH},code=compute_${LAST_ARCH}") |
95 |
| - |
96 |
| - # Get the minimum device architecture to pass through to nvcc to enable graceful failure prior to cuda execution. |
97 |
| - list(GET CUDA_ARCH 0 MIN_CUDA_ARCH) |
98 |
| - |
99 |
| - # Set the gencode flags on NVCC |
100 |
| - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${GENCODES_FLAGS}") |
| 69 | + if(NOT CUDA_ARCH_LENGTH EQUAL 0) |
| 70 | + # Convert user provided string argument to a list. |
| 71 | + string (REPLACE " " ";" CUDA_ARCH "${CUDA_ARCH}") |
| 72 | + string (REPLACE "," ";" CUDA_ARCH "${CUDA_ARCH}") |
| 73 | + |
| 74 | + # Remove duplicates, empty items and sort. |
| 75 | + list(REMOVE_DUPLICATES CUDA_ARCH) |
| 76 | + list(REMOVE_ITEM CUDA_ARCH "") |
| 77 | + list(SORT CUDA_ARCH) |
| 78 | + |
| 79 | + # Validate the list. |
| 80 | + foreach(ARCH IN LISTS CUDA_ARCH) |
| 81 | + if (NOT ARCH IN_LIST SUPPORTED_CUDA_ARCH) |
| 82 | + message(WARNING |
| 83 | + " CUDA_ARCH '${ARCH}' not supported by CUDA ${CMAKE_CUDA_COMPILER_VERSION} and is being ignored.\n" |
| 84 | + " Choose from: ${SUPPORTED_CUDA_ARCH}") |
| 85 | + list(REMOVE_ITEM CUDA_ARCH "${ARCH}") |
| 86 | + endif() |
| 87 | + endforeach() |
| 88 | + endif() |
101 | 89 |
|
102 |
| - # Set the minimum arch flags for all compilers |
103 |
| - set(CMAKE_CC_FLAGS "${CMAKE_C_FLAGS} -DMIN_CUDA_ARCH=${MIN_CUDA_ARCH}") |
104 |
| - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMIN_CUDA_ARCH=${MIN_CUDA_ARCH}") |
105 |
| - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DMIN_CUDA_ARCH=${MIN_CUDA_ARCH}") |
106 |
| -else() |
107 |
| - message(STATUS "Generating default CUDA Compute Capabilities ${CUDA_ARCH}") |
108 |
| -endif() |
109 | 90 |
|
110 |
| -# Supress deprecated architecture warnings, as they are not fitered out by checking against nvcc help. |
111 |
| -# Ideally a warning would be output once at config time (i.e. above) and not at every file compilation. |
112 |
| -# But this is challenging due to multiline string detection. |
113 |
| -# Could potentially compile a simple program, without this flag to detect if its valid/deprecated? Would likely increase build time. |
114 |
| -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") |
| 91 | + # If the list is empty post validation, set it to the (validated) defaults |
| 92 | + list(LENGTH CUDA_ARCH CUDA_ARCH_LENGTH) |
| 93 | + if(CUDA_ARCH_LENGTH EQUAL 0) |
| 94 | + set(CUDA_ARCH ${DEFAULT_CUDA_ARCH}) |
| 95 | + endif() |
115 | 96 |
|
116 |
| -# If CUDA 11.2+, can build multiple architectures in parallel. Note this will be multiplicative against the number of threads launched for parallel cmake build, which may anger some systems. |
117 |
| -if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.2" AND USE_NVCC_THREADS AND DEFINED NVCC_THREADS AND NVCC_THREADS GREATER_EQUAL 0) |
118 |
| - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --threads ${NVCC_THREADS}") |
119 |
| -endif() |
| 97 | + # Propagate the validated values to the parent scope, to reduce warning duplication. |
| 98 | + get_directory_property(hasParent PARENT_DIRECTORY) |
| 99 | + if(hasParent) |
| 100 | + set(CUDA_ARCH ${CUDA_ARCH} PARENT_SCOPE) |
| 101 | + endif() |
120 | 102 |
|
| 103 | + # If the list is somehow empty now, do not set any gencodes arguments, instead using the compiler defaults. |
| 104 | + list(LENGTH CUDA_ARCH CUDA_ARCH_LENGTH) |
| 105 | + if(NOT CUDA_ARCH_LENGTH EQUAL 0) |
| 106 | + # Only do this if required.I.e. CUDA_ARCH is the same as the last time this file was included |
| 107 | + if(NOT CUDA_ARCH_APPLIED EQUAL CUDA_ARCH) |
| 108 | + message(STATUS "Generating Compute Capabilities: ${CUDA_ARCH}") |
| 109 | + if(hasParent) |
| 110 | + set(CUDA_ARCH_APPLIED "${CUDA_ARCH}" PARENT_SCOPE ) |
| 111 | + endif() |
| 112 | + endif() |
| 113 | + set(MIN_CUDA_ARCH) |
| 114 | + # Convert to gencode arguments |
| 115 | + |
| 116 | + foreach(ARCH IN LISTS CUDA_ARCH) |
| 117 | + target_compile_options(${SCG_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-gencode arch=compute_${ARCH}$<COMMA>code=sm_${ARCH}>") |
| 118 | + target_link_options(${SCG_TARGET} PRIVATE "$<DEVICE_LINK:SHELL:-gencode arch=compute_${ARCH}$<COMMA>code=sm_${ARCH}>") |
| 119 | + endforeach() |
| 120 | + |
| 121 | + # Add the last arch again as compute_, compute_ to enable forward looking JIT |
| 122 | + list(GET CUDA_ARCH -1 LAST_ARCH) |
| 123 | + target_compile_options(${SCG_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-gencode arch=compute_${LAST_ARCH}$<COMMA>code=compute_${LAST_ARCH}>") |
| 124 | + target_link_options(${SCG_TARGET} PRIVATE "$<DEVICE_LINK:SHELL:-gencode arch=compute_${LAST_ARCH}$<COMMA>code=compute_${LAST_ARCH}>") |
| 125 | + |
| 126 | + # Get the minimum device architecture to pass through to nvcc to enable graceful failure prior to cuda execution. |
| 127 | + list(GET CUDA_ARCH 0 MIN_CUDA_ARCH) |
| 128 | + |
| 129 | + # Set the minimum arch flags for all compilers |
| 130 | + target_compile_definitions(${SCG_TARGET} PRIVATE -DMIN_CUDA_ARCH=${MIN_CUDA_ARCH}) |
| 131 | + else() |
| 132 | + message(STATUS "Generating default CUDA Compute Capabilities ${CUDA_ARCH}") |
| 133 | + endif() |
| 134 | +endmacro() |
0 commit comments