Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(pt): add op library #3620

Merged
merged 3 commits into from
Mar 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions backend/find_pytorch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
import os
import site
from functools import (
lru_cache,
)
from importlib.machinery import (
FileFinder,
)
from importlib.util import (
find_spec,
)
from pathlib import (
Path,
)
from sysconfig import (
get_path,
)
from typing import (
Optional,
)


@lru_cache
def find_pytorch() -> Optional[str]:
"""Find PyTorch library.

Tries to find PyTorch in the order of:

1. Environment variable `PYTORCH_ROOT` if set
2. The current Python environment.
3. user site packages directory if enabled
4. system site packages directory (purelib)

Considering the default PyTorch package still uses old CXX11 ABI, we
cannot install it automatically.

Returns
-------
str, optional
PyTorch library path if found.
"""
if os.environ.get("DP_ENABLE_PYTORCH", "0") == "0":
return None
pt_spec = None

if (pt_spec is None or not pt_spec) and os.environ.get("PYTORCH_ROOT") is not None:
site_packages = Path(os.environ.get("PYTORCH_ROOT")).parent.absolute()
pt_spec = FileFinder(str(site_packages)).find_spec("torch")

# get pytorch spec
# note: isolated build will not work for backend
if pt_spec is None or not pt_spec:
pt_spec = find_spec("torch")

if not pt_spec and site.ENABLE_USER_SITE:
# first search TF from user site-packages before global site-packages
site_packages = site.getusersitepackages()
if site_packages:
pt_spec = FileFinder(site_packages).find_spec("torch")

if not pt_spec:
# purelib gets site-packages path
site_packages = get_path("purelib")
if site_packages:
pt_spec = FileFinder(site_packages).find_spec("torch")

# get install dir from spec
try:
pt_install_dir = pt_spec.submodule_search_locations[0] # type: ignore
# AttributeError if ft_spec is None
# TypeError if submodule_search_locations are None
# IndexError if submodule_search_locations is an empty list
except (AttributeError, TypeError, IndexError):
pt_install_dir = None
return pt_install_dir
16 changes: 16 additions & 0 deletions backend/read_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
Version,
)

from .find_pytorch import (
find_pytorch,
)
from .find_tensorflow import (
find_tensorflow,
get_tf_version,
Expand Down Expand Up @@ -99,6 +102,19 @@ def get_argument_from_env() -> Tuple[str, list, list, dict, str]:
cmake_args.append("-DENABLE_TENSORFLOW=OFF")
tf_version = None

if os.environ.get("DP_ENABLE_PYTORCH", "0") == "1":
pt_install_dir = find_pytorch()
if pt_install_dir is None:
raise RuntimeError("Cannot find installed PyTorch.")
cmake_args.extend(
[
"-DENABLE_PYTORCH=ON",
f"-DCMAKE_PREFIX_PATH={pt_install_dir}",
]
)
else:
cmake_args.append("-DENABLE_PYTORCH=OFF")

cmake_args = [
"-DBUILD_PY_IF:BOOL=TRUE",
*cmake_args,
Expand Down
9 changes: 9 additions & 0 deletions deepmd/pt/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,10 @@
# SPDX-License-Identifier: LGPL-3.0-or-later

# import customized OPs globally
from deepmd.pt.cxx_op import (
ENABLE_CUSTOMIZED_OP,
)

__all__ = [
"ENABLE_CUSTOMIZED_OP",
]
43 changes: 43 additions & 0 deletions deepmd/pt/cxx_op.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
import platform

import torch

from deepmd.env import (
SHARED_LIB_DIR,
)


def load_library(module_name: str) -> bool:
"""Load OP library.

Parameters
----------
module_name : str
Name of the module

Returns
-------
bool
Whether the library is loaded successfully
"""
if platform.system() == "Windows":
ext = ".dll"
prefix = ""

Check warning on line 26 in deepmd/pt/cxx_op.py

View check run for this annotation

Codecov / codecov/patch

deepmd/pt/cxx_op.py#L25-L26

Added lines #L25 - L26 were not covered by tests
else:
ext = ".so"
prefix = "lib"

module_file = (SHARED_LIB_DIR / (prefix + module_name)).with_suffix(ext).resolve()

if module_file.is_file():
torch.ops.load_library(module_file)
return True

Check warning on line 35 in deepmd/pt/cxx_op.py

View check run for this annotation

Codecov / codecov/patch

deepmd/pt/cxx_op.py#L34-L35

Added lines #L34 - L35 were not covered by tests
return False


ENABLE_CUSTOMIZED_OP = load_library("deepmd_op_pt")

__all__ = [
"ENABLE_CUSTOMIZED_OP",
]
4 changes: 4 additions & 0 deletions deepmd/pt/entrypoints/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@
from deepmd.main import (
parse_args,
)
from deepmd.pt.cxx_op import (
ENABLE_CUSTOMIZED_OP,
)
from deepmd.pt.infer import (
inference,
)
Expand Down Expand Up @@ -224,6 +227,7 @@ def get_backend_info(self) -> dict:
return {
"Backend": "PyTorch",
"PT ver": f"v{torch.__version__}-g{torch.version.git_version[:11]}",
"Enable custom OP": ENABLE_CUSTOMIZED_OP,
}


Expand Down
11 changes: 11 additions & 0 deletions doc/install/install-from-source.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,16 @@ Note that TensorFlow may have specific requirements for the compiler version to

:::

:::{tab-item} PyTorch {{ pytorch_icon }}

You can set the environment variable `export DP_ENABLE_PYTORCH=1` to enable customized C++ OPs in the PyTorch backend.
Note that PyTorch may have specific requirements for the compiler version to support the C++ standard version and [`_GLIBCXX_USE_CXX11_ABI`](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html) used by PyTorch.

The customized C++ OPs are not enabled by default because TensorFlow and PyTorch packages from the PyPI use different `_GLIBCXX_USE_CXX11_ABI` flags.
We recommend conda-forge packages in this case.

:::

::::

Execute
Expand All @@ -135,6 +145,7 @@ One may set the following environment variables before executing `pip`:
| CUDAToolkit_ROOT | Path | Detected automatically | The path to the CUDA toolkit directory. CUDA 9.0 or later is supported. NVCC is required. |
| ROCM_ROOT | Path | Detected automatically | The path to the ROCM toolkit directory. |
| DP_ENABLE_TENSORFLOW | 0, 1 | 1 | {{ tensorflow_icon }} Enable the TensorFlow backend. |
| DP_ENABLE_PYTORCH | 0, 1 | 0 | {{ pytorch_icon }} Enable customized C++ OPs for the PyTorch backend. PyTorch can still run without customized C++ OPs, but features will be limited. |
| TENSORFLOW_ROOT | Path | Detected automatically | {{ tensorflow_icon }} The path to TensorFlow Python library. By default the installer only finds TensorFlow under user site-package directory (`site.getusersitepackages()`) or system site-package directory (`sysconfig.get_path("purelib")`) due to limitation of [PEP-517](https://peps.python.org/pep-0517/). If not found, the latest TensorFlow (or the environment variable `TENSORFLOW_VERSION` if given) from PyPI will be built against. |
| DP_ENABLE_NATIVE_OPTIMIZATION | 0, 1 | 0 | Enable compilation optimization for the native machine's CPU type. Do not enable it if generated code will run on different CPUs. |
| CMAKE_ARGS | str | - | Additional CMake arguments |
Expand Down
3 changes: 3 additions & 0 deletions source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,9 @@ if(NOT DEEPMD_C_ROOT)
if(ENABLE_TENSORFLOW)
add_subdirectory(op/)
endif()
if(ENABLE_PYTORCH)
add_subdirectory(op/pt/)
endif()
add_subdirectory(lib/)
endif()
if(BUILD_PY_IF)
Expand Down
2 changes: 1 addition & 1 deletion source/api_cc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ set_target_properties(
${libname}
PROPERTIES INSTALL_RPATH "$ORIGIN;${TensorFlow_LIBRARY_PATH}"
INSTALL_RPATH_USE_LINK_PATH TRUE
BUILD_RPATH "$ORIGIN/../op")
BUILD_RPATH "$ORIGIN/../op;$ORIGIN/../op/pt")
target_compile_definitions(${libname} PRIVATE TF_PRIVATE)
if(CMAKE_TESTING_ENABLED)
target_link_libraries(${libname} PRIVATE coverage_config)
Expand Down
1 change: 1 addition & 0 deletions source/api_cc/src/DeepPotPT.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ void DeepPotPT::init(const std::string& model,
<< std::endl;
return;
}
deepmd::load_op_library();
int gpu_num = torch::cuda::device_count();
if (gpu_num > 0) {
gpu_id = gpu_rank % gpu_num;
Expand Down
16 changes: 11 additions & 5 deletions source/api_cc/src/common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -377,21 +377,27 @@ void deepmd::get_env_nthreads(int& num_intra_nthreads,
}
}

void deepmd::load_op_library() {
#ifdef BUILD_TENSORFLOW
tensorflow::Env* env = tensorflow::Env::Default();
static inline void _load_single_op_library(std::string library_name) {
#if defined(_WIN32)
std::string dso_path = "deepmd_op.dll";
std::string dso_path = library_name + ".dll";
void* dso_handle = LoadLibrary(dso_path.c_str());
#else
std::string dso_path = "libdeepmd_op.so";
std::string dso_path = "lib" + library_name + ".so";
void* dso_handle = dlopen(dso_path.c_str(), RTLD_NOW | RTLD_LOCAL);
#endif
if (!dso_handle) {
throw deepmd::deepmd_exception(
dso_path +
" is not found! You can add the library directory to LD_LIBRARY_PATH");
}
}

void deepmd::load_op_library() {
#ifdef BUILD_TENSORFLOW
_load_single_op_library("deepmd_op");
#endif
#ifdef BUILD_PYTORCH
_load_single_op_library("deepmd_op_pt");
#endif
}

Expand Down
26 changes: 26 additions & 0 deletions source/op/pt/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
file(GLOB OP_SRC print_summary.cc)

add_library(deepmd_op_pt MODULE ${OP_SRC})
# link: libdeepmd libtorch
target_link_libraries(deepmd_op_pt PRIVATE ${TORCH_LIBRARIES} ${LIB_DEEPMD})
if(APPLE)
set_target_properties(deepmd_op_pt PROPERTIES INSTALL_RPATH "@loader_path")
else()
set_target_properties(deepmd_op_pt PROPERTIES INSTALL_RPATH "$ORIGIN")
endif()

find_package(MPI)
if(MPI_FOUND)
target_link_libraries(deepmd_op_pt INTERFACE MPI::MPI_CXX)
target_compile_definitions(deepmd_op_pt PRIVATE USE_MPI)
endif()

if(CMAKE_TESTING_ENABLED)
target_link_libraries(deepmd_op_pt PRIVATE coverage_config)
endif()

if(BUILD_PY_IF)
install(TARGETS deepmd_op_pt DESTINATION deepmd/lib/)
else(BUILD_PY_IF)
install(TARGETS deepmd_op_pt DESTINATION lib/)
endif(BUILD_PY_IF)
14 changes: 14 additions & 0 deletions source/op/pt/print_summary.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
#include <torch/torch.h>

#include <iostream>

torch::Tensor enable_mpi() {

Check warning on line 6 in source/op/pt/print_summary.cc

View check run for this annotation

Codecov / codecov/patch

source/op/pt/print_summary.cc#L6

Added line #L6 was not covered by tests
#ifdef USE_MPI
return torch::ones({1}, torch::kBool);

Check warning on line 8 in source/op/pt/print_summary.cc

View check run for this annotation

Codecov / codecov/patch

source/op/pt/print_summary.cc#L8

Added line #L8 was not covered by tests
#else
return torch::zeros({1}, torch::kBool);
#endif
}

TORCH_LIBRARY(deepmd, m) { m.def("enable_mpi", enable_mpi); }

Check notice

Code scanning / CodeQL

Unused static function Note

Static function TORCH_LIBRARY_init_deepmd is unreachable (
TORCH_LIBRARY_static_init_deepmd
must be removed at the same time)