diff --git a/backend/dp_backend.py b/backend/dp_backend.py index 2ca0ff2f93..dbd2d2a52b 100644 --- a/backend/dp_backend.py +++ b/backend/dp_backend.py @@ -7,6 +7,9 @@ from scikit_build_core import build as _orig +from .find_pytorch import ( + find_pytorch, +) from .find_tensorflow import ( find_tensorflow, ) @@ -40,10 +43,18 @@ def __dir__() -> List[str]: def get_requires_for_build_wheel( config_settings: dict, ) -> List[str]: - return _orig.get_requires_for_build_wheel(config_settings) + find_tensorflow()[1] + return ( + _orig.get_requires_for_build_wheel(config_settings) + + find_tensorflow()[1] + + find_pytorch()[1] + ) def get_requires_for_build_editable( config_settings: dict, ) -> List[str]: - return _orig.get_requires_for_build_editable(config_settings) + find_tensorflow()[1] + return ( + _orig.get_requires_for_build_editable(config_settings) + + find_tensorflow()[1] + + find_pytorch()[1] + ) diff --git a/backend/dynamic_metadata.py b/backend/dynamic_metadata.py index 2a66ff065c..83123e6e41 100644 --- a/backend/dynamic_metadata.py +++ b/backend/dynamic_metadata.py @@ -9,6 +9,9 @@ Optional, ) +from .find_pytorch import ( + get_pt_requirement, +) from .find_tensorflow import ( get_tf_requirement, ) @@ -33,7 +36,9 @@ def dynamic_metadata( settings: Optional[Dict[str, object]] = None, ): assert field in ["optional-dependencies", "entry-points", "scripts"] - _, _, find_libpython_requires, extra_scripts, tf_version = get_argument_from_env() + _, _, find_libpython_requires, extra_scripts, tf_version, pt_version = ( + get_argument_from_env() + ) with Path("pyproject.toml").open("rb") as f: pyproject = tomllib.load(f) @@ -51,4 +56,5 @@ def dynamic_metadata( return { **optional_dependencies, **get_tf_requirement(tf_version), + **get_pt_requirement(pt_version), } diff --git a/backend/find_pytorch.py b/backend/find_pytorch.py index f039b6f289..04f297a963 100644 --- a/backend/find_pytorch.py +++ b/backend/find_pytorch.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +import importlib import os import site from functools import ( @@ -17,12 +18,19 @@ get_path, ) from typing import ( + List, Optional, + Tuple, + Union, +) + +from packaging.version import ( + Version, ) @lru_cache -def find_pytorch() -> Optional[str]: +def find_pytorch() -> Tuple[Optional[str], List[str]]: """Find PyTorch library. Tries to find PyTorch in the order of: @@ -39,9 +47,12 @@ def find_pytorch() -> Optional[str]: ------- str, optional PyTorch library path if found. + list of str + TensorFlow requirement if not found. Empty if found. """ if os.environ.get("DP_ENABLE_PYTORCH", "0") == "0": - return None + return None, [] + requires = [] pt_spec = None if (pt_spec is None or not pt_spec) and os.environ.get("PYTORCH_ROOT") is not None: @@ -73,4 +84,62 @@ def find_pytorch() -> Optional[str]: # IndexError if submodule_search_locations is an empty list except (AttributeError, TypeError, IndexError): pt_install_dir = None - return pt_install_dir + requires.extend(get_pt_requirement()["torch"]) + return pt_install_dir, requires + + +@lru_cache +def get_pt_requirement(pt_version: str = "") -> dict: + """Get PyTorch requirement when PT is not installed. + + If pt_version is not given and the environment variable `PYTORCH_VERSION` is set, use it as the requirement. + + Parameters + ---------- + pt_version : str, optional + PT version + + Returns + ------- + dict + PyTorch requirement. + """ + if pt_version is None: + return {"torch": []} + if pt_version == "": + pt_version = os.environ.get("PYTORCH_VERSION", "") + + return { + "torch": [ + # uv has different local version behaviors, i.e. `==2.3.1` cannot match `==2.3.1+cpu` + # https://github.com/astral-sh/uv/blob/main/PIP_COMPATIBILITY.md#local-version-identifiers + # luckily, .* (prefix matching) defined in PEP 440 can match any local version + # https://peps.python.org/pep-0440/#version-matching + f"torch=={Version(pt_version).base_version}.*" + if pt_version != "" + else "torch>=2a", + ], + } + + +@lru_cache +def get_pt_version(pt_path: Optional[Union[str, Path]]) -> str: + """Get TF version from a TF Python library path. + + Parameters + ---------- + pt_path : str or Path + PT Python library path + + Returns + ------- + str + version + """ + if pt_path is None or pt_path == "": + return "" + version_file = Path(pt_path) / "version.py" + spec = importlib.util.spec_from_file_location("torch.version", version_file) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module.__version__ diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py index 8ba62c9814..06452ab1f5 100644 --- a/backend/find_tensorflow.py +++ b/backend/find_tensorflow.py @@ -204,7 +204,7 @@ def get_tf_requirement(tf_version: str = "") -> dict: @lru_cache -def get_tf_version(tf_path: Union[str, Path]) -> str: +def get_tf_version(tf_path: Optional[Union[str, Path]]) -> str: """Get TF version from a TF Python library path. Parameters diff --git a/backend/read_env.py b/backend/read_env.py index 14935dcc0f..c3fe2d5127 100644 --- a/backend/read_env.py +++ b/backend/read_env.py @@ -15,6 +15,7 @@ from .find_pytorch import ( find_pytorch, + get_pt_version, ) from .find_tensorflow import ( find_tensorflow, @@ -23,7 +24,7 @@ @lru_cache -def get_argument_from_env() -> Tuple[str, list, list, dict, str]: +def get_argument_from_env() -> Tuple[str, list, list, dict, str, str]: """Get the arguments from environment variables. The environment variables are assumed to be not changed during the build. @@ -40,6 +41,8 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]: The extra scripts to be installed. str The TensorFlow version. + str + The PyTorch version. """ cmake_args = [] extra_scripts = {} @@ -103,9 +106,8 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]: tf_version = None if os.environ.get("DP_ENABLE_PYTORCH", "0") == "1": - pt_install_dir = find_pytorch() - if pt_install_dir is None: - raise RuntimeError("Cannot find installed PyTorch.") + pt_install_dir, _ = find_pytorch() + pt_version = get_pt_version(pt_install_dir) cmake_args.extend( [ "-DENABLE_PYTORCH=ON", @@ -114,6 +116,7 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]: ) else: cmake_args.append("-DENABLE_PYTORCH=OFF") + pt_version = None cmake_args = [ "-DBUILD_PY_IF:BOOL=TRUE", @@ -125,11 +128,12 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]: find_libpython_requires, extra_scripts, tf_version, + pt_version, ) def set_scikit_build_env(): """Set scikit-build environment variables before executing scikit-build.""" - cmake_minimum_required_version, cmake_args, _, _, _ = get_argument_from_env() + cmake_minimum_required_version, cmake_args, _, _, _, _ = get_argument_from_env() os.environ["SKBUILD_CMAKE_MINIMUM_VERSION"] = cmake_minimum_required_version os.environ["SKBUILD_CMAKE_ARGS"] = ";".join(cmake_args) diff --git a/doc/install/easy-install.md b/doc/install/easy-install.md index 0c56fdb0c5..8d3ec16e36 100644 --- a/doc/install/easy-install.md +++ b/doc/install/easy-install.md @@ -132,7 +132,11 @@ pip install deepmd-kit[cpu] pip install deepmd-kit[gpu,cu12,torch,lmp,ipi] ``` -MPICH is required for parallel running. (The macOS arm64 package doesn't support MPI yet.) +MPICH is required for parallel running. + +:::{Warning} +When installing from pip, only the TensorFlow {{ tensorflow_icon }} backend is supported with LAMMPS and i-PI. +::: It is suggested to install the package into an isolated environment. The supported platform includes Linux x86-64 and aarch64 with GNU C Library 2.28 or above, macOS x86-64 and arm64, and Windows x86-64. diff --git a/pyproject.toml b/pyproject.toml index 2cb489ce43..d9cbeb44e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -126,9 +126,6 @@ cu12 = [ "nvidia-cudnn-cu12<9", "nvidia-cuda-nvcc-cu12", ] -torch = [ - "torch>=2a", -] [tool.deepmd_build_backend.scripts] dp = "deepmd.main:main" @@ -198,12 +195,13 @@ replacement = '\1="https://github.com/deepmodeling/deepmd-kit/raw/master/\g<2>"' [tool.cibuildwheel] test-command = [ "python -m deepmd -h", + """python -c "import deepmd.tf;import deepmd.pt" """, "dp -h", "dp_ipi", "pytest {project}/source/tests/tf/test_lammps.py" ] -test-extras = ["cpu", "test", "lmp", "ipi"] -build = ["cp310-*"] +test-extras = ["cpu", "test", "lmp", "ipi", "torch"] +build = ["cp311-*"] skip = ["*-win32", "*-manylinux_i686", "*-musllinux*"] # TODO: uncomment to use the latest image when CUDA 11 is deprecated # manylinux-x86_64-image = "manylinux_2_28" @@ -211,14 +209,27 @@ manylinux-x86_64-image = "quay.io/pypa/manylinux_2_28_x86_64:2022-11-19-1b19e81" manylinux-aarch64-image = "manylinux_2_28" [tool.cibuildwheel.macos] -environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update3", DP_ENABLE_IPI="1" } before-all = [ - """brew install mpich""", + '''pip install -i https://pypi.anaconda.org/mpi4py/simple mpich''', ] repair-wheel-command = """delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel} --ignore-missing-dependencies""" +[tool.cibuildwheel.macos.environment] +PIP_PREFER_BINARY = "1" +DP_LAMMPS_VERSION = "stable_2Aug2023_update3" +DP_ENABLE_IPI = "1" +DP_ENABLE_PYTORCH = "1" +# for unclear reason, when enabling PyTorch, OpenMP is found accidentally +CMAKE_ARGS = "-DCMAKE_DISABLE_FIND_PACKAGE_OpenMP=1" + +[[tool.cibuildwheel.overrides]] +# error: 'value' is unavailable: introduced in macOS 10.13 +select = "*-macosx_x86_64" +inherit.environment = "append" +environment.MACOSX_DEPLOYMENT_TARGET = "10.13" + [tool.cibuildwheel.linux] -repair-wheel-command = "auditwheel repair --exclude libtensorflow_framework.so.2 --exclude libtensorflow_framework.so.1 --exclude libtensorflow_framework.so --exclude _pywrap_tensorflow_internal.so --exclude libtensorflow_cc.so.2 -w {dest_dir} {wheel}" +repair-wheel-command = "auditwheel repair --exclude libtensorflow_framework.so.2 --exclude libtensorflow_framework.so.1 --exclude libtensorflow_framework.so --exclude _pywrap_tensorflow_internal.so --exclude libtensorflow_cc.so.2 --exclude libc10.so --exclude libtorch.so --exclude libtorch_cpu.so -w {dest_dir} {wheel}" environment-pass = [ "CIBW_BUILD", "DP_VARIANT", @@ -226,13 +237,12 @@ environment-pass = [ "DP_PKG_NAME", "SETUPTOOLS_SCM_PRETEND_VERSION", ] -environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update3", DP_ENABLE_IPI="1", MPI_HOME="/usr/lib64/mpich", PATH="/usr/lib64/mpich/bin:$PATH" } before-all = [ """if [ ! -z "${DP_PKG_NAME}" ]; then sed -i "s/name = \\"deepmd-kit\\"/name = \\"${DP_PKG_NAME}\\"/g" pyproject.toml; fi""", # https://almalinux.org/blog/2023-12-20-almalinux-8-key-update/ """rpm --import https://repo.almalinux.org/almalinux/RPM-GPG-KEY-AlmaLinux""", """{ if [ "$(uname -m)" = "x86_64" ] ; then yum config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && yum install -y cuda-nvcc-${CUDA_VERSION/./-} cuda-cudart-devel-${CUDA_VERSION/./-}; fi }""", - "yum install -y mpich-devel", + '''/opt/python/cp311-cp311/bin/python -m pip install -i https://pypi.anaconda.org/mpi4py/simple mpich''', # uv is not available in the old manylinux image """{ if [ "$(uname -m)" = "x86_64" ] ; then pipx install uv; fi }""", ] @@ -240,14 +250,29 @@ before-build = [ # old build doesn't support uv """{ if [ "$(uname -m)" = "x86_64" ] ; then uv pip install --system -U build; fi }""", ] +[tool.cibuildwheel.linux.environment] +PIP_PREFER_BINARY = "1" +DP_LAMMPS_VERSION = "stable_2Aug2023_update3" +DP_ENABLE_IPI = "1" +DP_ENABLE_PYTORCH = "1" +MPI_HOME = "/usr/lib64/mpich" +PATH = "/usr/lib64/mpich/bin:$PATH" +# use CPU version of torch for building, which should also work for GPU +# note: uv has different behavior from pip on extra index url +# https://github.com/astral-sh/uv/blob/main/PIP_COMPATIBILITY.md#packages-that-exist-on-multiple-indexes +UV_EXTRA_INDEX_URL = "https://download.pytorch.org/whl/cpu" +# trick to find the correction version of mpich +CMAKE_PREFIX_PATH="/opt/python/cp311-cp311/" [tool.cibuildwheel.windows] -environment = { PIP_PREFER_BINARY="1" } -test-extras = ["cpu"] +test-extras = ["cpu", "torch"] test-command = [ "python -m deepmd -h", "dp -h", ] +[tool.cibuildwheel.windows.environment] +PIP_PREFER_BINARY = "1" +DP_ENABLE_PYTORCH = "1" # One can run `tox` or `tox -e gpu` # to run pytest in an isolated environment diff --git a/source/api_cc/CMakeLists.txt b/source/api_cc/CMakeLists.txt index 6a60a91b57..228a6657d3 100644 --- a/source/api_cc/CMakeLists.txt +++ b/source/api_cc/CMakeLists.txt @@ -16,7 +16,10 @@ if(ENABLE_TENSORFLOW) TensorFlow::tensorflow_framework) target_compile_definitions(${libname} PRIVATE BUILD_TENSORFLOW) endif() -if(ENABLE_PYTORCH AND "${OP_CXX_ABI_PT}" EQUAL "${OP_CXX_ABI}") +if(ENABLE_PYTORCH + AND "${OP_CXX_ABI_PT}" EQUAL "${OP_CXX_ABI}" + # LAMMPS and i-PI in the Python package are not ready - needs more work + AND NOT BUILD_PY_IF) target_link_libraries(${libname} PRIVATE "${TORCH_LIBRARIES}") target_compile_definitions(${libname} PRIVATE BUILD_PYTORCH) endif()