deepmodeling · wanghan-iapcm · Jun 26, 2023 · Jun 25, 2023 · Jun 25, 2023 · Jun 25, 2023
diff --git a/doc/install/install-from-source.md b/doc/install/install-from-source.md
@@ -74,7 +74,7 @@ One may set the following environment variables before executing `pip`:
 | Environment variables | Allowed value          | Default value | Usage                      |
 | --------------------- | ---------------------- | ------------- | -------------------------- |
 | DP_VARIANT            | `cpu`, `cuda`, `rocm`  | `cpu`         | Build CPU variant or GPU variant with CUDA or ROCM support. |
-| CUDA_TOOLKIT_ROOT_DIR | Path                   | Detected automatically | The path to the CUDA toolkit directory. CUDA 7.0 or later is supported. NVCC is required. |
+| CUDAToolkit_ROOT | Path                   | Detected automatically | The path to the CUDA toolkit directory. CUDA 7.0 or later is supported. NVCC is required. |
 | ROCM_ROOT             | Path                   | Detected automatically | The path to the ROCM toolkit directory. |
 | TENSORFLOW_ROOT       | Path                   | Detected automatically | The path to TensorFlow Python library. By default the installer only finds TensorFlow under user site-package directory (`site.getusersitepackages()`) or system site-package directory (`sysconfig.get_path("purelib")`) due to limitation of [PEP-517](https://peps.python.org/pep-0517/). If not found, the latest TensorFlow (or the environment variable `TENSORFLOW_VERSION` if given) from PyPI will be built against.|
 | DP_ENABLE_NATIVE_OPTIMIZATION | 0, 1           | 0             | Enable compilation optimization for the native machine's CPU type. Do not enable it if generated code will run on different CPUs. |
@@ -180,7 +180,7 @@ One may add the following arguments to `cmake`:
 | -DTENSORFLOW_ROOT=&lt;value&gt;  | Path              | -             | The Path to TensorFlow's C++ interface. |
 | -DCMAKE_INSTALL_PREFIX=&lt;value&gt; | Path          | -             | The Path where DeePMD-kit will be installed. |
 | -DUSE_CUDA_TOOLKIT=&lt;value&gt; | `TRUE` or `FALSE` | `FALSE`       | If `TRUE`, Build GPU support with CUDA toolkit. |
-| -DCUDA_TOOLKIT_ROOT_DIR=&lt;value&gt; | Path         | Detected automatically | The path to the CUDA toolkit directory. CUDA 7.0 or later is supported. NVCC is required. |
+| -DCUDAToolkit_ROOT=&lt;value&gt; | Path         | Detected automatically | The path to the CUDA toolkit directory. CUDA 7.0 or later is supported. NVCC is required. |
 | -DUSE_ROCM_TOOLKIT=&lt;value&gt; | `TRUE` or `FALSE` | `FALSE`       | If `TRUE`, Build GPU support with ROCM toolkit. |
 | -DCMAKE_HIP_COMPILER_ROCM_ROOT=&lt;value&gt; | Path         | Detected automatically | The path to the ROCM toolkit directory. |
 | -DLAMMPS_SOURCE_ROOT=&lt;value&gt; | Path         | - | Only neccessary for LAMMPS plugin mode. The path to the [LAMMPS source code](install-lammps.md). LAMMPS 8Apr2021 or later is supported. If not assigned, the plugin mode will not be enabled. |

diff --git a/doc/install/install-gromacs.md b/doc/install/install-gromacs.md
@@ -27,7 +27,7 @@ cd build
 cmake3 .. -DCMAKE_CXX_STANDARD=14 \ # not required, but c++14 seems to be more compatible with higher version of tensorflow
           -DGMX_MPI=ON \
           -DGMX_GPU=CUDA \ # Gromacs on ROCm has not been fully developed yet
-          -DCUDA_TOOLKIT_ROOT_DIR=/path/to/cuda \
+          -DCUDAToolkit_ROOT=/path/to/cuda \
           -DCMAKE_INSTALL_PREFIX=/path/to/gromacs-2020.2-deepmd
 make -j
 make install

diff --git a/setup.py b/setup.py
@@ -27,13 +27,15 @@
 # get variant option from the environment varibles, available: cpu, cuda, rocm
 dp_variant = os.environ.get("DP_VARIANT", "cpu").lower()
 if dp_variant == "cpu" or dp_variant == "":
-    pass
+    cmake_minimum_required_version = "3.16"
 elif dp_variant == "cuda":
+    cmake_minimum_required_version = "3.23"
     cmake_args.append("-DUSE_CUDA_TOOLKIT:BOOL=TRUE")
-    cuda_root = os.environ.get("CUDA_TOOLKIT_ROOT_DIR")
+    cuda_root = os.environ.get("CUDAToolkit_ROOT")
     if cuda_root:
-        cmake_args.append(f"-DCUDA_TOOLKIT_ROOT_DIR:STRING={cuda_root}")
+        cmake_args.append(f"-DCUDAToolkit_ROOT:STRING={cuda_root}")
 elif dp_variant == "rocm":
+    cmake_minimum_required_version = "3.21"
     cmake_args.append("-DUSE_ROCM_TOOLKIT:BOOL=TRUE")
     rocm_root = os.environ.get("ROCM_ROOT")
     if rocm_root:
@@ -111,7 +113,7 @@ def get_tag(self):
         *cmake_args,
     ],
     cmake_source_dir="source",
-    cmake_minimum_required_version="3.16",
+    cmake_minimum_required_version=cmake_minimum_required_version,
     extras_require={
         "test": ["dpdata>=0.1.9", "ase", "pytest", "pytest-cov", "pytest-sugar"],
         "docs": [

diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
@@ -97,12 +97,15 @@ set(DP_VARIANT "cpu")
 
 # define USE_CUDA_TOOLKIT
 if(USE_CUDA_TOOLKIT)
-  set(CUDA_USE_STATIC_CUDA_RUNTIME
-      OFF
-      CACHE INTERNAL "")
-  find_package(CUDA REQUIRED)
+  find_package(CUDAToolkit REQUIRED)
+  if(NOT DEFINED CMAKE_CUDA_COMPILER)
+    set(CMAKE_CUDA_COMPILER ${CUDAToolkit_NVCC_EXECUTABLE})
+  endif()
+  if(NOT DEFINED CMAKE_CUDA_HOST_COMPILER)
+    set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
+  endif()
   add_definitions("-DGOOGLE_CUDA")
-  message(STATUS "Found CUDA in ${CUDA_TOOLKIT_ROOT_DIR}, build nv GPU support")
+  message(STATUS "Found CUDA in ${CUDAToolkit_BIN_DIR}, build nv GPU support")
   set(DP_VARIANT "cuda")
 else()
   message(STATUS "Will not build nv GPU support")

diff --git a/source/install/package_c.sh b/source/install/package_c.sh
@@ -20,7 +20,7 @@ cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
 	-DPACKAGE_C=TRUE \
 	-DUSE_TF_PYTHON_LIBS=TRUE \
 	..
-make -j${NPROC}
+make -j${NPROC} VERBOSE=1
 make install
 
 #------------------

diff --git a/source/lib/CMakeLists.txt b/source/lib/CMakeLists.txt
@@ -16,7 +16,7 @@ if(USE_CUDA_TOOLKIT)
   target_link_libraries(${libname} INTERFACE deepmd_dyn_cudart ${EXTRA_LIBS})
   # gpu_cuda.h
   target_include_directories(
-    ${libname} PUBLIC $<BUILD_INTERFACE:${CUDA_INCLUDE_DIRS}>
+    ${libname} PUBLIC $<BUILD_INTERFACE:${CUDAToolkit_INCLUDE_DIRS}>
                       $<INSTALL_INTERFACE:include>)
 endif()
 

diff --git a/source/lib/src/cuda/CMakeLists.txt b/source/lib/src/cuda/CMakeLists.txt
@@ -1,23 +1,20 @@
-# required cmake version
-cmake_minimum_required(VERSION 3.16)
+# required cmake version 3.23: CMAKE_CUDA_ARCHITECTURES all
+cmake_minimum_required(VERSION 3.23)
 # project name
 project(deepmd_op_cuda)
 
-# SET(CUDA_SEPARABLE_COMPILATION ON)
-find_package(CUDA REQUIRED)
-if(NOT CUDA_FOUND)
-  message(STATUS "CUDA not found. Project will not be built.")
-endif(NOT CUDA_FOUND)
+set(CMAKE_CUDA_ARCHITECTURES all)
+enable_language(CUDA)
+set(CMAKE_CUDA_STANDARD 11)
+add_compile_definitions(
+  "$<$<COMPILE_LANGUAGE:CUDA>:_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI}>")
+
+find_package(CUDAToolkit REQUIRED)
 
 # take dynamic open cudart library replace of static one so it's not required
 # when using CPUs
 add_subdirectory(cudart)
-# important: it must be before cuda_add_library and any link target to cudart
-set(CUDA_LIBRARIES deepmd_dyn_cudart)
 
-# set c++ version c++11
-set(CMAKE_CXX_STANDARD 11)
-set(CMAKE_CUDA_STANDARD 11)
 # nvcc -o libdeepmd_op_cuda.so -I/usr/local/cub-1.8.0 -rdc=true -DHIGH_PREC=true
 # -gencode arch=compute_61,code=sm_61 -shared -Xcompiler -fPIC deepmd_op.cu
 # -L/usr/local/cuda/lib64 -lcudadevrt very important here! Include path to cub.
@@ -26,201 +23,21 @@ set(CMAKE_CUDA_STANDARD 11)
 
 # cub has been included in CUDA Toolkit 11, we do not need to include it any
 # more see https://github.com/NVIDIA/cub
-if(${CUDA_VERSION_MAJOR} LESS_EQUAL "10")
+if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS "11")
   include_directories(cub)
 endif()
 
-message(STATUS "CUDA major version is " ${CUDA_VERSION_MAJOR})
-
-if(${CUDA_VERSION_MAJOR} GREATER "11"
-   OR (${CUDA_VERSION_MAJOR} STREQUAL "11" AND ${CUDA_VERSION_MINOR}
-                                               GREATER_EQUAL "5"))
-  # nvcc flags
-  set(CUDA_NVCC_FLAGS
-      -arch=all; # embeds a compiled code image for all supported architectures
-                 # (sm_*), and a PTX program for the highest major virtual
-                 # architecture
-      -O3;
-      -Xcompiler
-      -fPIC;
-      ${CUDA_NVCC_FLAGS})
-elseif(${CUDA_VERSION_MAJOR} STREQUAL "11" AND ${CUDA_VERSION_MINOR} GREATER
-                                               "0")
-  # nvcc flags
-  set(CUDA_NVCC_FLAGS
-      -gencode
-      arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
-      -gencode
-      arch=compute_53,code=sm_53;
-      -gencode
-      arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic
-                                  # Pascal)
-      -gencode
-      arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX
-                                  # 1050, GTX 1030, Titan Xp, Tesla P40, Tesla
-                                  # P4, Discrete GPU on the NVIDIA Drive PX2
-      -gencode
-      arch=compute_70,code=sm_70; # Volta  - GV100/Tesla V100, GTX 1180 (GV104)
-      -gencode
-      arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
-      -gencode
-      arch=compute_80,code=sm_80; # Anpere - A100
-      -gencode
-      arch=compute_86,code=sm_86; # Anpere - RTX 3090
-      -O3;
-      -Xcompiler
-      -fPIC;
-      ${CUDA_NVCC_FLAGS})
-elseif(${CUDA_VERSION_MAJOR} STREQUAL "11" AND ${CUDA_VERSION_MINOR} STREQUAL
-                                               "0")
-  # nvcc flags
-  set(CUDA_NVCC_FLAGS
-      -gencode
-      arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
-      -gencode
-      arch=compute_53,code=sm_53;
-      -gencode
-      arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic
-                                  # Pascal)
-      -gencode
-      arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX
-                                  # 1050, GTX 1030, Titan Xp, Tesla P40, Tesla
-                                  # P4, Discrete GPU on the NVIDIA Drive PX2
-      -gencode
-      arch=compute_70,code=sm_70; # Volta  - GV100/Tesla V100, GTX 1180 (GV104)
-      -gencode
-      arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
-      -gencode
-      arch=compute_80,code=sm_80; # Anpere - A100
-      -O3;
-      -Xcompiler
-      -fPIC;
-      ${CUDA_NVCC_FLAGS})
-elseif(${CUDA_VERSION_MAJOR} STREQUAL "10")
-  set(CUDA_NVCC_FLAGS
-      -gencode
-      arch=compute_30,code=sm_30; # Tesla K10, Quadro K600 K420 K410,
-      -gencode
-      arch=compute_35,code=sm_35; # Tesla K20 K40, TITAN Z Black, GTX 780Ti 780
-      -gencode
-      arch=compute_37,code=sm_37; # Tesla K80
-      -gencode
-      arch=compute_50,code=sm_50; # Quadro 620 1200
-      -gencode
-      arch=compute_52,code=sm_52; # Tesla M40 M40, Quadro M6000 M5000 M4000
-                                  # M2000, TITAN X, GTX 980Ti 980 970 960 950
-      -gencode
-      arch=compute_53,code=sm_53; # Jetson TX1, Tegra X1
-      -gencode
-      arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic
-                                  # Pascal)
-      -gencode
-      arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX
-                                  # 1050, GTX 1030, Titan Xp, Tesla P40, Tesla
-                                  # P4, Discrete GPU on the NVIDIA Drive PX2
-      -gencode
-      arch=compute_70,code=sm_70; # Volta  - GV100/Tesla V100, GTX 1180 (GV104)
-      -gencode
-      arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
-      -O3;
-      -Xcompiler
-      -fPIC;
-      ${CUDA_NVCC_FLAGS})
-elseif(${CUDA_VERSION_MAJOR} STREQUAL "9")
-  set(CUDA_NVCC_FLAGS
-      -gencode
-      arch=compute_30,code=sm_30;
-      -gencode
-      arch=compute_35,code=sm_35;
-      -gencode
-      arch=compute_37,code=sm_37;
-      -gencode
-      arch=compute_50,code=sm_50;
-      -gencode
-      arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
-      -gencode
-      arch=compute_53,code=sm_53;
-      -gencode
-      arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic
-                                  # Pascal)
-      -gencode
-      arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX
-                                  # 1050, GTX 1030, Titan Xp, Tesla P40, Tesla
-                                  # P4, Discrete GPU on the NVIDIA Drive PX2
-      -gencode
-      arch=compute_70,code=sm_70; # Volta  - GV100/Tesla V100, GTX 1180 (GV104)
-      -O3;
-      -Xcompiler
-      -fPIC;
-      ${CUDA_NVCC_FLAGS})
-elseif(${CUDA_VERSION_MAJOR} STREQUAL "8")
-  set(CUDA_NVCC_FLAGS
-      -gencode
-      arch=compute_30,code=sm_30;
-      -gencode
-      arch=compute_35,code=sm_35;
-      -gencode
-      arch=compute_37,code=sm_37;
-      -gencode
-      arch=compute_50,code=sm_50;
-      -gencode
-      arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
-      -gencode
-      arch=compute_53,code=sm_53;
-      -gencode
-      arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic
-                                  # Pascal)
-      -gencode
-      arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX
-                                  # 1050, GTX 1030, Titan Xp, Tesla P40, Tesla
-                                  # P4, Discrete GPU on the NVIDIA Drive PX2
-      -O3;
-      -Xcompiler
-      -fPIC;
-      ${CUDA_NVCC_FLAGS})
-elseif(${CUDA_VERSION_MAJOR} STREQUAL "7")
-  set(CUDA_NVCC_FLAGS
-      -gencode
-      arch=compute_30,code=sm_30;
-      -gencode
-      arch=compute_35,code=sm_35;
-      -gencode
-      arch=compute_37,code=sm_37;
-      -gencode
-      arch=compute_50,code=sm_50;
-      -gencode
-      arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
-      -gencode
-      arch=compute_53,code=sm_53;
-      -O3;
-      -Xcompiler
-      -fPIC;
-      ${CUDA_NVCC_FLAGS})
-else()
-  message(FATAL_ERROR "unsupported CUDA_VERSION " ${CUDA_VERSION}
-                      ", please use a newer version (>=7.0) of CUDA toolkit!")
-endif()
+message(STATUS "NVCC version is " ${CMAKE_CUDA_COMPILER_VERSION})
 
-set(CMAKE_CXX_FLAGS
-    "${CMAKE_CXX_FLAGS} -std=c++11 -DCUB_IGNORE_DEPRECATED_CPP_DIALECT -DCUB_IGNORE_DEPRECATED_CPP_DIALECT"
+# arch will be configured by CMAKE_CUDA_ARCHITECTURES
+set(CMAKE_CUDA_FLAGS
+    "${CMAKE_CUDA_FLAGS} -DCUB_IGNORE_DEPRECATED_CPP_DIALECT -DCUB_IGNORE_DEPRECATED_CPP_DIALECT"
 )
 
-if(${CUDA_VERSION_MAJOR} LESS_EQUAL "11")
-  # check unsupported -std=c++17
-  set(CMAKE_CXX_FLAGS_LIST "${CMAKE_CXX_FLAGS}")
-  separate_arguments(CMAKE_CXX_FLAGS_LIST)
-  if("-std=c++17" IN_LIST CMAKE_CXX_FLAGS_LIST)
-    message(
-      WARNING
-        "Environment variable CXXFLAGS contains flag --std=c++17 which is unsupported by CUDA ${CUDA_VERSION}. Such flag will be removed automatically."
-    )
-    string(REPLACE "-std=c++17" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
-  endif()
-endif()
-
 file(GLOB SOURCE_FILES "*.cu")
 
-cuda_add_library(deepmd_op_cuda SHARED ${SOURCE_FILES})
+add_library(deepmd_op_cuda SHARED ${SOURCE_FILES})
+target_link_libraries(deepmd_op_cuda PRIVATE deepmd_dyn_cudart)
 target_include_directories(
   deepmd_op_cuda
   PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../../include/>

diff --git a/source/lib/src/cuda/cudart/CMakeLists.txt b/source/lib/src/cuda/cudart/CMakeLists.txt
@@ -1,8 +1,7 @@
 add_library(deepmd_dyn_cudart SHARED cudart_stub.cc)
-target_include_directories(deepmd_dyn_cudart PUBLIC ${CUDA_INCLUDE_DIRS})
-get_filename_component(CUDA_LIBRARY_DIR ${CUDA_cudart_static_LIBRARY} DIRECTORY)
+target_include_directories(deepmd_dyn_cudart PUBLIC ${CUDAToolkit_INCLUDE_DIRS})
 set_target_properties(deepmd_dyn_cudart PROPERTIES INSTALL_RPATH
-                                                   "${CUDA_LIBRARY_DIR}")
+                                                   "${CUDAToolkit_LIBRARY_DIR}")
 if(BUILD_CPP_IF AND NOT BUILD_PY_IF)
   install(
     TARGETS deepmd_dyn_cudart