forked from horovod/horovod
-
Notifications
You must be signed in to change notification settings - Fork 2
/
CMakeLists.txt
362 lines (330 loc) · 15.4 KB
/
CMakeLists.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
if(POLICY CMP0074)
# 1. Introduced with 3.12.4.
# 2. *_ROOT variables will be checked
cmake_policy(SET CMP0074 NEW)
endif()
find_program(CCACHE_PROGRAM ccache)
if(CCACHE_PROGRAM)
set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE_PROGRAM}")
set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_PROGRAM}")
set(CMAKE_CUDA_COMPILER_LAUNCHER "${CCACHE_PROGRAM}")
else()
message(STATUS "Could not find CCache. Consider installing CCache to speed up compilation.")
endif()
project(horovod CXX)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
# Configure path to modules (for find_package)
set(CMAKE_MODULE_PATH
${CMAKE_MODULE_PATH}
"${PROJECT_SOURCE_DIR}/cmake/Modules/"
"${PROJECT_SOURCE_DIR}/cmake/upstream/")
include(cmake/Utilities.cmake)
create_metadata()
# 3rd-parties
include_directories("third_party/HTTPRequest/include"
"third_party/boost/assert/include"
"third_party/boost/config/include"
"third_party/boost/core/include"
"third_party/boost/detail/include"
"third_party/boost/iterator/include"
"third_party/boost/lockfree/include"
"third_party/boost/mpl/include"
"third_party/boost/parameter/include"
"third_party/boost/predef/include"
"third_party/boost/preprocessor/include"
"third_party/boost/static_assert/include"
"third_party/boost/type_traits/include"
"third_party/boost/utility/include"
"third_party/lbfgs/include")
# Predefined Eigen and Flatbuffers headers path, they could be replaced by tensorflow headers path
set(EIGEN_INCLUDE_PATH "${PROJECT_SOURCE_DIR}/third_party/eigen")
set(FLATBUFFERS_INCLUDE_PATH "${PROJECT_SOURCE_DIR}/third_party/flatbuffers/include")
# Sources
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/common.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/controller.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/fusion_buffer_manager.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/group_table.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/half.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/logging.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/message.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/operations.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/parameter_manager.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/process_set.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/response_cache.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/stall_inspector.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/thread_pool.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/timeline.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/tensor_queue.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/ops/collective_operations.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/ops/operation_manager.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/optim/bayesian_optimization.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/optim/gaussian_process.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/utils/env_parser.cc")
# Default Macro
add_definitions(-DEIGEN_MPL2_ONLY=1)
# Remove platform default std
string(REGEX REPLACE "-std=[^ ]+" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
# Pickup ar from environmental variable if available
if(DEFINED ENV{AR})
set(CMAKE_AR $ENV{AR})
endif()
# Add default project CXX flags
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -fPIC -Wall -ftree-vectorize")
# RelWithDebInfo uses -O2, prefer performance over debug info in RelWithDebInfo build type
string(REPLACE "-O2" "-O3" CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO}")
string(REPLACE "-O2" "-O3" CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
string(REPLACE "-O2" "-O3" CMAKE_CUDA_FLAGS_RELWITHDEBINFO "${CMAKE_CUDA_FLAGS_RELWITHDEBINFO}")
# Add architecture specific optimization flags
set(ARCH_FLAGS "-mf16c" "-mavx" "-mfma")
set_build_arch_flags("${ARCH_FLAGS}")
# Specify Horovod exports
if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -undefined dynamic_lookup -Wl,-exported_symbols_list,${CMAKE_SOURCE_DIR}/horovod.exp")
set(CMAKE_MACOSX_RPATH TRUE)
else()
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--version-script=${CMAKE_SOURCE_DIR}/horovod.lds -Wl,-Bsymbolic-functions -Wl,-z,relro,-z,now")
endif()
# GPU Operations
set(HOROVOD_GPU $ENV{HOROVOD_GPU})
set(HOROVOD_GPU_OPERATIONS $ENV{HOROVOD_GPU_OPERATIONS})
if(DEFINED HOROVOD_GPU_OPERATIONS AND NOT "${HOROVOD_GPU_OPERATIONS}" MATCHES "^(MPI|NCCL)$")
message(FATAL_ERROR "HOROVOD_GPU_OPERATIONS=${HOROVOD_GPU_OPERATIONS} is invalid, supported values are '', 'MPI', and 'NCCL'.")
endif()
set_gpu_op(HOROVOD_GPU_ALLREDUCE "MPI;NCCL;DDL")
set_gpu_op(HOROVOD_GPU_ALLGATHER "MPI;NCCL")
set_gpu_op(HOROVOD_GPU_BROADCAST "MPI;NCCL")
set_gpu_op(HOROVOD_GPU_ALLTOALL "MPI;NCCL")
set_gpu_op(HOROVOD_GPU_REDUCESCATTER "MPI;NCCL")
foreach(VAR in ITEMS HOROVOD_GPU_ALLREDUCE HOROVOD_GPU_ALLGATHER HOROVOD_GPU_BROADCAST HOROVOD_GPU_ALLTOALL HOROVOD_GPU_REDUCESCATTER)
if(DEFINED ${VAR})
string(SUBSTRING ${${VAR}} 0 1 ${VAR})
convert_to_ascii_dec(ASCII_DEC ${${VAR}})
add_definitions(-D${VAR}=${ASCII_DEC})
endif()
endforeach()
# PYTHON
if(NOT PYTHON_EXECUTABLE)
find_package(Python 3.6 COMPONENTS Interpreter REQUIRED)
set(PY_EXE ${Python_EXECUTABLE})
else()
set(PY_EXE ${PYTHON_EXECUTABLE})
endif()
message(STATUS "Using command ${PY_EXE}")
# MPI
if (NOT "$ENV{HOROVOD_WITHOUT_MPI}" STREQUAL "1")
set(MPI_REQUIRED "")
if ("$ENV{HOROVOD_WITH_MPI}" STREQUAL "1")
set(MPI_REQUIRED "REQUIRED")
endif ()
find_package(MPI ${MPI_REQUIRED})
if(MPI_FOUND)
include_directories(SYSTEM ${MPI_INCLUDE_PATH})
list(APPEND LINKER_LIBS ${MPI_LIBRARIES})
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/mpi/mpi_context.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/mpi/mpi_controller.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/ops/mpi_operations.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/ops/adasum/adasum_mpi.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/ops/adasum_mpi_operations.cc")
add_definitions(-DHAVE_MPI=1)
set(HAVE_MPI TRUE)
endif()
endif()
# CUDA and ROCM
set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}")
if(NOT DEFINED CMAKE_CUDA_RUNTIME_LIBRARY)
set(CMAKE_CUDA_RUNTIME_LIBRARY "Shared") # Set to "Static" or "Shared"
endif()
if(DEFINED ENV{HOROVOD_CUDA_HOME})
set(CMAKE_CUDA_COMPILER "$ENV{HOROVOD_CUDA_HOME}/bin/nvcc")
endif()
include(CheckLanguage)
check_language(CUDA)
if(NOT CMAKE_CUDA_COMPILER)
find_package(CUDAToolkit)
if(CUDAToolkit_BIN_DIR)
message("CUDA compiler was not found in $PATH, but searching again in CUDA Toolkit binary directory")
set(CMAKE_CUDA_COMPILER "${CUDAToolkit_BIN_DIR}/nvcc")
check_language(CUDA)
endif()
endif()
if(CMAKE_CUDA_COMPILER)
if((CMAKE_CXX_COMPILER_ID MATCHES GNU) AND (CMAKE_SYSTEM_PROCESSOR MATCHES ppc64le))
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -std=c++11")
endif()
endif()
enable_language(CUDA)
endif()
macro(ADD_CUDA)
find_package(CUDAToolkit REQUIRED)
include_directories(SYSTEM ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
string(TOLOWER "${CMAKE_CUDA_RUNTIME_LIBRARY}" lowercase_CMAKE_CUDA_RUNTIME_LIBRARY)
if (lowercase_CMAKE_CUDA_RUNTIME_LIBRARY STREQUAL "static")
list(APPEND LINKER_LIBS CUDA::cudart_static)
elseif (lowercase_CMAKE_CUDA_RUNTIME_LIBRARY STREQUAL "shared")
list(APPEND LINKER_LIBS CUDA::cudart)
endif()
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/cuda_operations.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/ops/gpu_operations.cc")
# CUDA + MPI
if(HAVE_MPI)
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/mpi_gpu_operations.cc")
endif()
add_definitions(-DHAVE_CUDA=1 -DHAVE_GPU=1)
set(HAVE_CUDA TRUE)
if(NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
set(HAVE_SUB_PROJECT_CUDA TRUE PARENT_SCOPE)
endif()
endmacro()
if(DEFINED HOROVOD_GPU_ALLREDUCE OR DEFINED HOROVOD_GPU_ALLGATHER OR DEFINED HOROVOD_GPU_BROADCAST OR DEFINED HOROVOD_GPU_ALLTOALL OR DEFINED HOROVOD_GPU_REDUCESCATTER)
if(NOT DEFINED HOROVOD_GPU OR HOROVOD_GPU STREQUAL "CUDA")
add_cuda()
elseif(HOROVOD_GPU STREQUAL "ROCM")
find_package(ROCM REQUIRED)
include_directories(SYSTEM ${ROCM_INCLUDE_DIRS})
list(APPEND LINKER_LIBS ${ROCM_LIBRARIES})
set(CMAKE_CXX_FLAGS "${ROCM_COMPILE_FLAGS} ${CMAKE_CXX_FLAGS}")
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/hip_operations.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/ops/gpu_operations.cc")
add_definitions(-DHAVE_ROCM=1 -DHAVE_GPU=1)
set(HAVE_ROCM TRUE)
else()
message(FATAL_ERROR "Unknown HOROVOD_GPU type: ${HOROVOD_GPU}")
endif()
endif()
# NCCL
if(HOROVOD_GPU_ALLREDUCE STREQUAL "N" OR HOROVOD_GPU_ALLGATHER STREQUAL "N" OR HOROVOD_GPU_BROADCAST STREQUAL "N" OR HOROVOD_GPU_ALLTOALL STREQUAL "N" OR HOROVOD_GPU_REDUCESCATTER STREQUAL "N")
if(HAVE_ROCM)
find_package(rccl REQUIRED)
include_directories(SYSTEM ${RCCL_INCLUDE_DIRS})
list(APPEND LINKER_LIBS roc::rccl)
else()
find_package(NCCL REQUIRED)
if (NCCL_MAJOR_VERSION LESS "2")
message(FATAL_ERROR "Horovod requires NCCL 2.0 or later version please upgrade.")
endif()
include_directories(SYSTEM ${NCCL_INCLUDE_DIRS})
list(APPEND LINKER_LIBS ${NCCL_LIBRARIES})
endif()
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/nccl_operations.cc")
add_definitions(-DHAVE_NCCL=1)
set(HAVE_NCCL TRUE)
endif()
# DDL
if(HOROVOD_GPU_ALLREDUCE STREQUAL "D")
message(DEPRECATION "DDL backend has been deprecated. Please, start using the NCCL backend by building Horovod with "
"'HOROVOD_GPU_OPERATIONS=NCCL'. Will be removed in v0.21.0.")
list(APPEND LINKER_LIBS "${CUDAToolkit_LIBRARY_DIR}/libddl.so" "${CUDAToolkit_LIBRARY_DIR}/libddl_pack.so")
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/mpi/ddl_mpi_context_manager.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/ops/ddl_operations.cc")
add_definitions(-DHAVE_DDL=1)
set(HAVE_DDL TRUE)
endif()
# oneCCL
set(CCL_ROOT $ENV{CCL_ROOT})
if(DEFINED CCL_ROOT)
set(CCL_CONFIGURATION $ENV{CCL_CONFIGURATION})
include_directories(${CCL_ROOT}/include/${CCL_CONFIGURATION})
list(APPEND LINKER_LIBS "${CCL_ROOT}/lib/${CCL_CONFIGURATION}/libccl.so")
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/ccl_operations.cc")
add_definitions(-DHAVE_CCL=1)
set(HAVE_CCL TRUE)
endif()
set(HOROVOD_ALLOW_MIXED_GPU_IMPL $ENV{HOROVOD_ALLOW_MIXED_GPU_IMPL})
if(HOROVOD_GPU_ALLREDUCE STREQUAL "N" AND (HOROVOD_GPU_ALLGATHER STREQUAL "M" OR HOROVOD_GPU_BROADCAST STREQUAL "M" OR HOROVOD_GPU_ALLTOALL STREQUAL "M" OR HOROVOD_GPU_REDUCESCATTER STREQUAL "M") AND
NOT HOROVOD_ALLOW_MIXED_GPU_IMPL STREQUAL "1")
message(FATAL_ERROR "You should not mix NCCL and MPI GPU due to a possible deadlock.\n"
"If you are sure you want to mix them, set the "
"HOROVOD_ALLOW_MIXED_GPU_IMPL environment variable to '1'.")
endif()
# NVTX
if (NOT "$ENV{HOROVOD_WITHOUT_NVTX}" STREQUAL "1")
set(NVTX_REQUIRED "")
if ("$ENV{HOROVOD_WITH_NVTX}" STREQUAL "1")
set(NVTX_REQUIRED "REQUIRED")
endif ()
find_package(NVTX ${NVTX_REQUIRED})
if(NVTX_FOUND)
include_directories(SYSTEM ${NVTX_INCLUDE_DIRS})
list(APPEND LINKER_LIBS ${NVTX_LIBRARIES})
add_definitions(-DHAVE_NVTX=1)
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/nvtx_op_range.cc")
set(HAVE_NVTX TRUE)
endif()
endif()
# Gloo
if (NOT "$ENV{HOROVOD_WITHOUT_GLOO}" STREQUAL "1")
if(HAVE_MPI)
set(USE_MPI TRUE)
else()
set(USE_MPI FALSE)
endif()
if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(USE_LIBUV ON CACHE BOOL "use libuv for gloo transport" FORCE)
endif()
set(CMAKE_POLICY_DEFAULT_CMP0074 NEW)
add_subdirectory(third_party/gloo)
include_directories(third_party/gloo)
target_compile_definitions(gloo PRIVATE _GLIBCXX_USE_CXX11_ABI=1)
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/gloo/gloo_context.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/gloo/gloo_controller.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/gloo/http_store.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/gloo/memory_store.cc"
"${PROJECT_SOURCE_DIR}/horovod/common/ops/gloo_operations.cc")
add_definitions(-DHAVE_GLOO=1)
set(HAVE_GLOO TRUE)
endif()
# NCCL + MPI
if (HAVE_NCCL AND HAVE_MPI)
list(APPEND SOURCES "${PROJECT_SOURCE_DIR}/horovod/common/ops/adasum_gpu_operations.cc")
endif()
set(HOROVOD_CPU_OPERATIONS $ENV{HOROVOD_CPU_OPERATIONS})
if(DEFINED HOROVOD_CPU_OPERATIONS)
message(STATUS "Set default CPU operation to " ${HOROVOD_CPU_OPERATIONS})
if(HOROVOD_CPU_OPERATIONS STREQUAL "MPI")
if(NOT HAVE_MPI)
message(FATAL_ERROR "MPI is not installed, try changing HOROVOD_CPU_OPERATIONS.")
endif()
add_definitions(-DHOROVOD_CPU_OPERATIONS_DEFAULT=M)
elseif(HOROVOD_CPU_OPERATIONS STREQUAL "MLSL")
message(FATAL_ERROR "Intel(R) MLSL was removed. Upgrade to oneCCL and set HOROVOD_CPU_OPERATIONS=CCL.")
elseif(HOROVOD_CPU_OPERATIONS STREQUAL "CCL")
if(NOT HAVE_CCL)
message(FATAL_ERROR "oneCCL is not installed, try changing HOROVOD_CPU_OPERATIONS.")
endif()
add_definitions(-DHOROVOD_CPU_OPERATIONS_DEFAULT=C)
elseif(HOROVOD_CPU_OPERATIONS STREQUAL "GLOO")
if(NOT HAVE_GLOO)
message(FATAL_ERROR "Cannot set both HOROVOD_WITHOUT_GLOO and HOROVOD_CPU_OPERATIONS=GLOO.")
endif()
add_definitions(-DHOROVOD_CPU_OPERATIONS_DEFAULT=G)
endif()
endif()
# Get Python suffix
execute_process(COMMAND ${PY_EXE} -c "import sysconfig; print(next(x for x in [sysconfig.get_config_var('EXT_SUFFIX'), sysconfig.get_config_var('SO'), '.so'] if x))"
OUTPUT_VARIABLE Python_SUFFIX OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET)
# TF
add_subdirectory(horovod/tensorflow)
# PyTorch
add_subdirectory(horovod/torch)
#MXNet
add_subdirectory(horovod/mxnet)
# Correctly wrap up json format
file(APPEND "${CMAKE_LIBRARY_OUTPUT_DIRECTORY_ROOT}/metadata.json" "\"dummy\": \"none\"\n}")
# CUDA kernels
if(HAVE_CUDA OR HAVE_SUB_PROJECT_CUDA)
add_subdirectory(horovod/common/ops/cuda)
endif()
# if we need compatible c++ abi
# Duplicate gloo folder and add it as a new sub-project
if(HAVE_GLOO AND ((DEFINED Tensorflow_CXX11 AND NOT Tensorflow_CXX11) OR (DEFINED Pytorch_CXX11 AND NOT Pytorch_CXX11) OR (DEFINED Mxnet_CXX11 AND NOT Mxnet_CXX11)))
file(COPY ${PROJECT_SOURCE_DIR}/third_party/gloo/ DESTINATION ${PROJECT_SOURCE_DIR}/third_party/compatible_gloo)
file(READ ${PROJECT_SOURCE_DIR}/third_party/compatible_gloo/gloo/CMakeLists.txt GLOO_CMAKE)
string(REPLACE "gloo " "compatible_gloo " GLOO_CMAKE "${GLOO_CMAKE}")
file(WRITE ${PROJECT_SOURCE_DIR}/third_party/compatible_gloo/gloo/CMakeLists.txt "${GLOO_CMAKE}")
add_subdirectory(third_party/compatible_gloo)
target_compile_definitions(compatible_gloo PRIVATE _GLIBCXX_USE_CXX11_ABI=0)
endif()