Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cmake: migrate from FindCUDA to CUDA language #2634

Merged
merged 12 commits into from
Jun 26, 2023
4 changes: 2 additions & 2 deletions doc/install/install-from-source.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ One may set the following environment variables before executing `pip`:
| Environment variables | Allowed value | Default value | Usage |
| --------------------- | ---------------------- | ------------- | -------------------------- |
| DP_VARIANT | `cpu`, `cuda`, `rocm` | `cpu` | Build CPU variant or GPU variant with CUDA or ROCM support. |
| CUDA_TOOLKIT_ROOT_DIR | Path | Detected automatically | The path to the CUDA toolkit directory. CUDA 7.0 or later is supported. NVCC is required. |
| CUDAToolkit_ROOT | Path | Detected automatically | The path to the CUDA toolkit directory. CUDA 7.0 or later is supported. NVCC is required. |
| ROCM_ROOT | Path | Detected automatically | The path to the ROCM toolkit directory. |
| TENSORFLOW_ROOT | Path | Detected automatically | The path to TensorFlow Python library. By default the installer only finds TensorFlow under user site-package directory (`site.getusersitepackages()`) or system site-package directory (`sysconfig.get_path("purelib")`) due to limitation of [PEP-517](https://peps.python.org/pep-0517/). If not found, the latest TensorFlow (or the environment variable `TENSORFLOW_VERSION` if given) from PyPI will be built against.|
| DP_ENABLE_NATIVE_OPTIMIZATION | 0, 1 | 0 | Enable compilation optimization for the native machine's CPU type. Do not enable it if generated code will run on different CPUs. |
Expand Down Expand Up @@ -180,7 +180,7 @@ One may add the following arguments to `cmake`:
| -DTENSORFLOW_ROOT=<value> | Path | - | The Path to TensorFlow's C++ interface. |
| -DCMAKE_INSTALL_PREFIX=<value> | Path | - | The Path where DeePMD-kit will be installed. |
| -DUSE_CUDA_TOOLKIT=<value> | `TRUE` or `FALSE` | `FALSE` | If `TRUE`, Build GPU support with CUDA toolkit. |
| -DCUDA_TOOLKIT_ROOT_DIR=<value> | Path | Detected automatically | The path to the CUDA toolkit directory. CUDA 7.0 or later is supported. NVCC is required. |
| -DCUDAToolkit_ROOT=<value> | Path | Detected automatically | The path to the CUDA toolkit directory. CUDA 7.0 or later is supported. NVCC is required. |
| -DUSE_ROCM_TOOLKIT=<value> | `TRUE` or `FALSE` | `FALSE` | If `TRUE`, Build GPU support with ROCM toolkit. |
| -DCMAKE_HIP_COMPILER_ROCM_ROOT=<value> | Path | Detected automatically | The path to the ROCM toolkit directory. |
| -DLAMMPS_SOURCE_ROOT=<value> | Path | - | Only neccessary for LAMMPS plugin mode. The path to the [LAMMPS source code](install-lammps.md). LAMMPS 8Apr2021 or later is supported. If not assigned, the plugin mode will not be enabled. |
Expand Down
2 changes: 1 addition & 1 deletion doc/install/install-gromacs.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ cd build
cmake3 .. -DCMAKE_CXX_STANDARD=14 \ # not required, but c++14 seems to be more compatible with higher version of tensorflow
-DGMX_MPI=ON \
-DGMX_GPU=CUDA \ # Gromacs on ROCm has not been fully developed yet
-DCUDA_TOOLKIT_ROOT_DIR=/path/to/cuda \
-DCUDAToolkit_ROOT=/path/to/cuda \
-DCMAKE_INSTALL_PREFIX=/path/to/gromacs-2020.2-deepmd
make -j
make install
Expand Down
10 changes: 6 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@
# get variant option from the environment varibles, available: cpu, cuda, rocm
dp_variant = os.environ.get("DP_VARIANT", "cpu").lower()
if dp_variant == "cpu" or dp_variant == "":
pass
cmake_minimum_required_version = "3.16"
elif dp_variant == "cuda":
cmake_minimum_required_version = "3.23"
cmake_args.append("-DUSE_CUDA_TOOLKIT:BOOL=TRUE")
cuda_root = os.environ.get("CUDA_TOOLKIT_ROOT_DIR")
cuda_root = os.environ.get("CUDAToolkit_ROOT")
if cuda_root:
cmake_args.append(f"-DCUDA_TOOLKIT_ROOT_DIR:STRING={cuda_root}")
cmake_args.append(f"-DCUDAToolkit_ROOT:STRING={cuda_root}")
elif dp_variant == "rocm":
cmake_minimum_required_version = "3.21"
cmake_args.append("-DUSE_ROCM_TOOLKIT:BOOL=TRUE")
rocm_root = os.environ.get("ROCM_ROOT")
if rocm_root:
Expand Down Expand Up @@ -111,7 +113,7 @@ def get_tag(self):
*cmake_args,
],
cmake_source_dir="source",
cmake_minimum_required_version="3.16",
cmake_minimum_required_version=cmake_minimum_required_version,
extras_require={
"test": ["dpdata>=0.1.9", "ase", "pytest", "pytest-cov", "pytest-sugar"],
"docs": [
Expand Down
13 changes: 8 additions & 5 deletions source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,15 @@ set(DP_VARIANT "cpu")

# define USE_CUDA_TOOLKIT
if(USE_CUDA_TOOLKIT)
set(CUDA_USE_STATIC_CUDA_RUNTIME
OFF
CACHE INTERNAL "")
find_package(CUDA REQUIRED)
find_package(CUDAToolkit REQUIRED)
if(NOT DEFINED CMAKE_CUDA_COMPILER)
set(CMAKE_CUDA_COMPILER ${CUDAToolkit_NVCC_EXECUTABLE})
endif()
if(NOT DEFINED CMAKE_CUDA_HOST_COMPILER)
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
endif()
add_definitions("-DGOOGLE_CUDA")
message(STATUS "Found CUDA in ${CUDA_TOOLKIT_ROOT_DIR}, build nv GPU support")
message(STATUS "Found CUDA in ${CUDAToolkit_BIN_DIR}, build nv GPU support")
set(DP_VARIANT "cuda")
else()
message(STATUS "Will not build nv GPU support")
Expand Down
2 changes: 1 addition & 1 deletion source/install/package_c.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
-DPACKAGE_C=TRUE \
-DUSE_TF_PYTHON_LIBS=TRUE \
..
make -j${NPROC}
make -j${NPROC} VERBOSE=1
wanghan-iapcm marked this conversation as resolved.
Show resolved Hide resolved
make install

#------------------
Expand Down
2 changes: 1 addition & 1 deletion source/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ if(USE_CUDA_TOOLKIT)
target_link_libraries(${libname} INTERFACE deepmd_dyn_cudart ${EXTRA_LIBS})
# gpu_cuda.h
target_include_directories(
${libname} PUBLIC $<BUILD_INTERFACE:${CUDA_INCLUDE_DIRS}>
${libname} PUBLIC $<BUILD_INTERFACE:${CUDAToolkit_INCLUDE_DIRS}>
$<INSTALL_INTERFACE:include>)
endif()

Expand Down
215 changes: 16 additions & 199 deletions source/lib/src/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@
# required cmake version
cmake_minimum_required(VERSION 3.16)
# required cmake version 3.23: CMAKE_CUDA_ARCHITECTURES all
cmake_minimum_required(VERSION 3.23)
# project name
project(deepmd_op_cuda)

# SET(CUDA_SEPARABLE_COMPILATION ON)
find_package(CUDA REQUIRED)
if(NOT CUDA_FOUND)
message(STATUS "CUDA not found. Project will not be built.")
endif(NOT CUDA_FOUND)
set(CMAKE_CUDA_ARCHITECTURES all)
enable_language(CUDA)
set(CMAKE_CUDA_STANDARD 11)
add_compile_definitions(
"$<$<COMPILE_LANGUAGE:CUDA>:_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI}>")

find_package(CUDAToolkit REQUIRED)

# take dynamic open cudart library replace of static one so it's not required
# when using CPUs
add_subdirectory(cudart)
# important: it must be before cuda_add_library and any link target to cudart
set(CUDA_LIBRARIES deepmd_dyn_cudart)

# set c++ version c++11
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CUDA_STANDARD 11)
# nvcc -o libdeepmd_op_cuda.so -I/usr/local/cub-1.8.0 -rdc=true -DHIGH_PREC=true
# -gencode arch=compute_61,code=sm_61 -shared -Xcompiler -fPIC deepmd_op.cu
# -L/usr/local/cuda/lib64 -lcudadevrt very important here! Include path to cub.
Expand All @@ -26,201 +23,21 @@ set(CMAKE_CUDA_STANDARD 11)

# cub has been included in CUDA Toolkit 11, we do not need to include it any
# more see https://github.com/NVIDIA/cub
if(${CUDA_VERSION_MAJOR} LESS_EQUAL "10")
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS "11")
include_directories(cub)
endif()

message(STATUS "CUDA major version is " ${CUDA_VERSION_MAJOR})

if(${CUDA_VERSION_MAJOR} GREATER "11"
OR (${CUDA_VERSION_MAJOR} STREQUAL "11" AND ${CUDA_VERSION_MINOR}
GREATER_EQUAL "5"))
# nvcc flags
set(CUDA_NVCC_FLAGS
-arch=all; # embeds a compiled code image for all supported architectures
# (sm_*), and a PTX program for the highest major virtual
# architecture
-O3;
-Xcompiler
-fPIC;
${CUDA_NVCC_FLAGS})
elseif(${CUDA_VERSION_MAJOR} STREQUAL "11" AND ${CUDA_VERSION_MINOR} GREATER
"0")
# nvcc flags
set(CUDA_NVCC_FLAGS
-gencode
arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
-gencode
arch=compute_53,code=sm_53;
-gencode
arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic
# Pascal)
-gencode
arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX
# 1050, GTX 1030, Titan Xp, Tesla P40, Tesla
# P4, Discrete GPU on the NVIDIA Drive PX2
-gencode
arch=compute_70,code=sm_70; # Volta - GV100/Tesla V100, GTX 1180 (GV104)
-gencode
arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
-gencode
arch=compute_80,code=sm_80; # Anpere - A100
-gencode
arch=compute_86,code=sm_86; # Anpere - RTX 3090
-O3;
-Xcompiler
-fPIC;
${CUDA_NVCC_FLAGS})
elseif(${CUDA_VERSION_MAJOR} STREQUAL "11" AND ${CUDA_VERSION_MINOR} STREQUAL
"0")
# nvcc flags
set(CUDA_NVCC_FLAGS
-gencode
arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
-gencode
arch=compute_53,code=sm_53;
-gencode
arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic
# Pascal)
-gencode
arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX
# 1050, GTX 1030, Titan Xp, Tesla P40, Tesla
# P4, Discrete GPU on the NVIDIA Drive PX2
-gencode
arch=compute_70,code=sm_70; # Volta - GV100/Tesla V100, GTX 1180 (GV104)
-gencode
arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
-gencode
arch=compute_80,code=sm_80; # Anpere - A100
-O3;
-Xcompiler
-fPIC;
${CUDA_NVCC_FLAGS})
elseif(${CUDA_VERSION_MAJOR} STREQUAL "10")
set(CUDA_NVCC_FLAGS
-gencode
arch=compute_30,code=sm_30; # Tesla K10, Quadro K600 K420 K410,
-gencode
arch=compute_35,code=sm_35; # Tesla K20 K40, TITAN Z Black, GTX 780Ti 780
-gencode
arch=compute_37,code=sm_37; # Tesla K80
-gencode
arch=compute_50,code=sm_50; # Quadro 620 1200
-gencode
arch=compute_52,code=sm_52; # Tesla M40 M40, Quadro M6000 M5000 M4000
# M2000, TITAN X, GTX 980Ti 980 970 960 950
-gencode
arch=compute_53,code=sm_53; # Jetson TX1, Tegra X1
-gencode
arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic
# Pascal)
-gencode
arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX
# 1050, GTX 1030, Titan Xp, Tesla P40, Tesla
# P4, Discrete GPU on the NVIDIA Drive PX2
-gencode
arch=compute_70,code=sm_70; # Volta - GV100/Tesla V100, GTX 1180 (GV104)
-gencode
arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
-O3;
-Xcompiler
-fPIC;
${CUDA_NVCC_FLAGS})
elseif(${CUDA_VERSION_MAJOR} STREQUAL "9")
set(CUDA_NVCC_FLAGS
-gencode
arch=compute_30,code=sm_30;
-gencode
arch=compute_35,code=sm_35;
-gencode
arch=compute_37,code=sm_37;
-gencode
arch=compute_50,code=sm_50;
-gencode
arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
-gencode
arch=compute_53,code=sm_53;
-gencode
arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic
# Pascal)
-gencode
arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX
# 1050, GTX 1030, Titan Xp, Tesla P40, Tesla
# P4, Discrete GPU on the NVIDIA Drive PX2
-gencode
arch=compute_70,code=sm_70; # Volta - GV100/Tesla V100, GTX 1180 (GV104)
-O3;
-Xcompiler
-fPIC;
${CUDA_NVCC_FLAGS})
elseif(${CUDA_VERSION_MAJOR} STREQUAL "8")
set(CUDA_NVCC_FLAGS
-gencode
arch=compute_30,code=sm_30;
-gencode
arch=compute_35,code=sm_35;
-gencode
arch=compute_37,code=sm_37;
-gencode
arch=compute_50,code=sm_50;
-gencode
arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
-gencode
arch=compute_53,code=sm_53;
-gencode
arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic
# Pascal)
-gencode
arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX
# 1050, GTX 1030, Titan Xp, Tesla P40, Tesla
# P4, Discrete GPU on the NVIDIA Drive PX2
-O3;
-Xcompiler
-fPIC;
${CUDA_NVCC_FLAGS})
elseif(${CUDA_VERSION_MAJOR} STREQUAL "7")
set(CUDA_NVCC_FLAGS
-gencode
arch=compute_30,code=sm_30;
-gencode
arch=compute_35,code=sm_35;
-gencode
arch=compute_37,code=sm_37;
-gencode
arch=compute_50,code=sm_50;
-gencode
arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
-gencode
arch=compute_53,code=sm_53;
-O3;
-Xcompiler
-fPIC;
${CUDA_NVCC_FLAGS})
else()
message(FATAL_ERROR "unsupported CUDA_VERSION " ${CUDA_VERSION}
", please use a newer version (>=7.0) of CUDA toolkit!")
endif()
message(STATUS "NVCC version is " ${CMAKE_CUDA_COMPILER_VERSION})

set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -std=c++11 -DCUB_IGNORE_DEPRECATED_CPP_DIALECT -DCUB_IGNORE_DEPRECATED_CPP_DIALECT"
# arch will be configured by CMAKE_CUDA_ARCHITECTURES
set(CMAKE_CUDA_FLAGS
"${CMAKE_CUDA_FLAGS} -DCUB_IGNORE_DEPRECATED_CPP_DIALECT -DCUB_IGNORE_DEPRECATED_CPP_DIALECT"
)

if(${CUDA_VERSION_MAJOR} LESS_EQUAL "11")
# check unsupported -std=c++17
set(CMAKE_CXX_FLAGS_LIST "${CMAKE_CXX_FLAGS}")
separate_arguments(CMAKE_CXX_FLAGS_LIST)
if("-std=c++17" IN_LIST CMAKE_CXX_FLAGS_LIST)
message(
WARNING
"Environment variable CXXFLAGS contains flag --std=c++17 which is unsupported by CUDA ${CUDA_VERSION}. Such flag will be removed automatically."
)
string(REPLACE "-std=c++17" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()
endif()

file(GLOB SOURCE_FILES "*.cu")

cuda_add_library(deepmd_op_cuda SHARED ${SOURCE_FILES})
add_library(deepmd_op_cuda SHARED ${SOURCE_FILES})
target_link_libraries(deepmd_op_cuda PRIVATE deepmd_dyn_cudart)
target_include_directories(
deepmd_op_cuda
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../../include/>
Expand Down
5 changes: 2 additions & 3 deletions source/lib/src/cuda/cudart/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
add_library(deepmd_dyn_cudart SHARED cudart_stub.cc)
target_include_directories(deepmd_dyn_cudart PUBLIC ${CUDA_INCLUDE_DIRS})
get_filename_component(CUDA_LIBRARY_DIR ${CUDA_cudart_static_LIBRARY} DIRECTORY)
target_include_directories(deepmd_dyn_cudart PUBLIC ${CUDAToolkit_INCLUDE_DIRS})
set_target_properties(deepmd_dyn_cudart PROPERTIES INSTALL_RPATH
"${CUDA_LIBRARY_DIR}")
"${CUDAToolkit_LIBRARY_DIR}")
if(BUILD_CPP_IF AND NOT BUILD_PY_IF)
install(
TARGETS deepmd_dyn_cudart
Expand Down