merge with devel

deepmodeling · Jan 29, 2024 · ce87afc · ce87afc
2 parents 19069f3 + 4a29c8c
commit ce87afc
Show file tree

Hide file tree

Showing 227 changed files with 786 additions and 579 deletions.
diff --git a/CITATIONS.bib b/CITATIONS.bib
@@ -105,6 +105,25 @@ @misc{Zhang_2022_DPA1
     doi = {10.48550/arXiv.2208.08236},
 }
 
+@misc{Zhang_2023_DPA2,
+    annote = {DPA-2},
+    author = {Duo Zhang and Xinzijian Liu and Xiangyu Zhang and Chengqian Zhang and
+              Chun Cai and Hangrui Bi and Yiming Du and Xuejian Qin and Jiameng Huang
+              and Bowen Li and Yifan Shan and Jinzhe Zeng and Yuzhi Zhang and Siyuan
+              Liu and Yifan Li and Junhan Chang and Xinyan Wang and Shuo Zhou and
+              Jianchuan Liu and Xiaoshan Luo and Zhenyu Wang and Wanrun Jiang and Jing
+              Wu and Yudi Yang and Jiyuan Yang and Manyi Yang and Fu-Qiang Gong and
+              Linshuang Zhang and Mengchao Shi and Fu-Zhi Dai and Darrin M. York and
+              Shi Liu and Tong Zhu and Zhicheng Zhong and Jian Lv and Jun Cheng and
+              Weile Jia and Mohan Chen and Guolin Ke and Weinan E and Linfeng Zhang
+              and Han Wang},
+    title = {{DPA-2: Towards a universal large atomic model for molecular and material
+              simulation}},
+    publisher = {arXiv},
+    year = {2023},
+    doi = {10.48550/arXiv.2312.15492},
+}
+
 @article{Zhang_PhysPlasmas_2020_v27_p122704,
     annote = {frame-specific parameters (e.g. electronic temperature)},
     author = {Zhang, Yuzhi and Gao, Chang and Liu, Qianrui and Zhang, Linfeng and Wang, Han and Chen, Mohan},

diff --git a/backend/dynamic_metadata.py b/backend/dynamic_metadata.py
@@ -46,6 +46,7 @@ def dynamic_metadata(
                 "sphinx_markdown_tables",
                 "myst-nb>=1.0.0rc0",
                 "myst-parser>=0.19.2",
+                "sphinx-design",
                 "breathe",
                 "exhale",
                 "numpydoc",
@@ -90,6 +91,5 @@ def dynamic_metadata(
             ],
             "torch": [
                 "torch>=2a",
-                "tqdm",
             ],
         }
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
@@ -14,12 +14,6 @@
 
 import numpy as np
 import torch
-from tqdm import (
-    tqdm,
-)
-from tqdm.contrib.logging import (
-    logging_redirect_tqdm,
-)
 
 from deepmd.common import (
     symlink_prefix_files,
@@ -47,7 +41,6 @@
 )
 from deepmd.pt.utils.env import (
     DEVICE,
-    DISABLE_TQDM,
     JIT,
     LOCAL_RANK,
     NUM_WORKERS,
@@ -662,29 +655,24 @@ def log_loss_valid(_task_key="Default"):
                     f.write(str(self.latest_model))
 
         self.t0 = time.time()
-        with logging_redirect_tqdm():
-            for step_id in tqdm(
-                range(self.num_steps),
-                disable=(bool(dist.get_rank()) if dist.is_initialized() else False)
-                or DISABLE_TQDM,
-            ):  # set to None to disable on non-TTY; disable on not rank 0
-                if step_id < self.start_step:
-                    continue
-                if self.multi_task:
-                    chosen_index_list = dp_random.choice(
-                        np.arange(self.num_model),
-                        p=np.array(self.model_prob),
-                        size=self.world_size,
-                        replace=True,
-                    )
-                    assert chosen_index_list.size == self.world_size
-                    model_index = chosen_index_list[self.rank]
-                    model_key = self.model_keys[model_index]
-                else:
-                    model_key = "Default"
-                step(step_id, model_key)
-                if JIT:
-                    break
+        for step_id in range(self.num_steps):
+            if step_id < self.start_step:
+                continue
+            if self.multi_task:
+                chosen_index_list = dp_random.choice(
+                    np.arange(self.num_model),
+                    p=np.array(self.model_prob),
+                    size=self.world_size,
+                    replace=True,
+                )
+                assert chosen_index_list.size == self.world_size
+                model_index = chosen_index_list[self.rank]
+                model_key = self.model_keys[model_index]
+            else:
+                model_key = "Default"
+            step(step_id, model_key)
+            if JIT:
+                break
 
         if (
             self.rank == 0 or dist.get_rank() == 0

diff --git a/deepmd/pt/utils/dataset.py b/deepmd/pt/utils/dataset.py
@@ -13,9 +13,6 @@
 from torch.utils.data import (
     Dataset,
 )
-from tqdm import (
-    trange,
-)
 
 from deepmd.pt.utils import (
     dp_random,
@@ -506,7 +503,7 @@ def preprocess(self, batch):
         assert batch["atype"].max() < len(self._type_map)
         nlist, nlist_loc, nlist_type, shift, mapping = [], [], [], [], []
 
-        for sid in trange(n_frames, disable=env.DISABLE_TQDM):
+        for sid in range(n_frames):
             region = Region3D(box[sid])
             nloc = atype[sid].shape[0]
             _coord = normalize_coord(coord[sid], region, nloc)
@@ -879,7 +876,7 @@ def __len__(self):
     def __getitem__(self, index=None):
         """Get a batch of frames from the selected system."""
         if index is None:
-            index = dp_random.choice(np.arange(self.nsystems), self.probs)
+            index = dp_random.choice(np.arange(self.nsystems), p=self.probs)
         b_data = self._data_systems[index].get_batch(self._batch_size)
         b_data["natoms"] = torch.tensor(
             self._natoms_vec[index], device=env.PREPROCESS_DEVICE
@@ -892,7 +889,7 @@ def __getitem__(self, index=None):
     def get_training_batch(self, index=None):
         """Get a batch of frames from the selected system."""
         if index is None:
-            index = dp_random.choice(np.arange(self.nsystems), self.probs)
+            index = dp_random.choice(np.arange(self.nsystems), p=self.probs)
         b_data = self._data_systems[index].get_batch_for_train(self._batch_size)
         b_data["natoms"] = torch.tensor(
             self._natoms_vec[index], device=env.PREPROCESS_DEVICE

diff --git a/deepmd/pt/utils/env.py b/deepmd/pt/utils/env.py
@@ -8,7 +8,6 @@
 GLOBAL_NP_FLOAT_PRECISION = getattr(np, PRECISION)
 GLOBAL_PT_FLOAT_PRECISION = getattr(torch, PRECISION)
 GLOBAL_ENER_FLOAT_PRECISION = getattr(np, PRECISION)
-DISABLE_TQDM = os.environ.get("DISABLE_TQDM", False)
 SAMPLER_RECORD = os.environ.get("SAMPLER_RECORD", False)
 try:
     # only linux

diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py
@@ -3,9 +3,6 @@
 
 import numpy as np
 import torch
-from tqdm import (
-    trange,
-)
 
 from deepmd.pt.utils import (
     env,
@@ -40,7 +37,7 @@ def make_stat_input(datasets, dataloaders, nbatches):
     if datasets[0].mixed_type:
         keys.append("real_natoms_vec")
     logging.info(f"Packing data for statistics from {len(datasets)} systems")
-    for i in trange(len(datasets), disable=env.DISABLE_TQDM):
+    for i in range(len(datasets)):
         sys_stat = {key: [] for key in keys}
         iterator = iter(dataloaders[i])
         for _ in range(nbatches):

diff --git a/deepmd/utils/random.py b/deepmd/utils/random.py
@@ -1,29 +1,44 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Optional,
+    Tuple,
+    Union,
 )
 
 import numpy as np
 
 _RANDOM_GENERATOR = np.random.RandomState()
 
 
-def choice(a: np.ndarray, p: Optional[np.ndarray] = None):
+def choice(
+    a: Union[np.ndarray, int],
+    size: Optional[Union[int, Tuple[int, ...]]] = None,
+    replace: bool = True,
+    p: Optional[np.ndarray] = None,
+):
     """Generates a random sample from a given 1-D array.
 
     Parameters
     ----------
-    a : np.ndarray
-        A random sample is generated from its elements.
-    p : np.ndarray
-        The probabilities associated with each entry in a.
+    a : 1-D array-like or int
+        If an ndarray, a random sample is generated from its elements. If an int,
+        the random sample is generated as if it were np.arange(a)
+    size : int or tuple of ints, optional
+        Output shape. If the given shape is, e.g., (m, n, k), then m * n * k samples
+        are drawn. Default is None, in which case a single value is returned.
+    replace : boolean, optional
+        Whether the sample is with or without replacement. Default is True, meaning
+        that a value of a can be selected multiple times.
+    p : 1-D array-like, optional
+        The probabilities associated with each entry in a. If not given, the sample
+        assumes a uniform distribution over all entries in a.
 
     Returns
     -------
     np.ndarray
         arrays with results and their shapes
     """
-    return _RANDOM_GENERATOR.choice(a, p=p)
+    return _RANDOM_GENERATOR.choice(a, size=size, replace=replace, p=p)
 
 
 def random(size=None):

diff --git a/doc/backend.md b/doc/backend.md
@@ -0,0 +1,28 @@
+# Backend
+
+## Supported backends
+
+DeePMD-kit supports multiple backends: TensorFlow and PyTorch.
+To use DeePMD-kit, you must install at least one backend.
+Each backend does not support all features.
+In the documentation, TensorFlow {{ tensorflow_icon }} and PyTorch {{ pytorch_icon }} icons are used to mark whether a backend supports a feature.
+
+### TensorFlow {{ tensorflow_icon }}
+
+TensorFlow 2.2 or above is required.
+DeePMD-kit does not use the TensorFlow v2 API but uses the TensorFlow v1 API (`tf.compat.v1`) in the graph mode.
+
+### PyTorch {{ pytorch_icon }}
+
+PyTorch 2.0 or above is required.
+
+## Switch the backend
+
+### Training
+
+When training and freezing a model, you can use `dp --tf` or `dp --pt` in the command line to switch the backend.
+
+### Inference
+
+When doing inference, DeePMD-kit detects the backend from the model filename.
+For example, when the model filename ends with `.pb` (the ProtoBuf file), DeePMD-kit will consider it using the TensorFlow backend.
diff --git a/doc/conf.py b/doc/conf.py
@@ -94,6 +94,7 @@ def setup(app):
     "breathe",
     "exhale",
     "sphinxcontrib.bibtex",
+    "sphinx_design",
 ]
 
 # breathe_domain_by_extension = {

diff --git a/doc/credits.rst b/doc/credits.rst
@@ -49,6 +49,13 @@ Cite DeePMD-kit and methods
 
    Zhang_2022_DPA1
 
+- If DPA-2 descriptor (`dpa2`) is used,
+
+.. bibliography::
+   :filter: False
+
+   Zhang_2023_DPA2
+
 - If frame-specific parameters (`fparam`, e.g. electronic temperature) is used,
 
 .. bibliography::

diff --git a/doc/index.rst b/doc/index.rst
@@ -34,6 +34,7 @@ DeePMD-kit is a package written in Python/C++, designed to minimize the effort r
    :numbered:
    :caption: Advanced
 
+   backend
    install/index
    data/index
    model/index

diff --git a/doc/inference/python.md b/doc/inference/python.md
@@ -26,9 +26,14 @@ graphs = [DP("graph.000.pb"), DP("graph.001.pb")]
 model_devi = calc_model_devi(coord, cell, atype, graphs)
 ```
 
-Note that if the model inference or model deviation is performed cyclically, one should avoid calling the same model multiple times. Otherwise, tensorFlow will never release the memory and this may lead to an out-of-memory (OOM) error.
+Note that if the model inference or model deviation is performed cyclically, one should avoid calling the same model multiple times.
+Otherwise, TensorFlow or PyTorch will never release the memory, and this may lead to an out-of-memory (OOM) error.
 
-## External neighbor list algorithm
+## External neighbor list algorithm {{ tensorflow_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}
+:::
 
 The native neighbor list algorithm of the DeePMD-kit is in $O(N^2)$ complexity ($N$ is the number of atoms).
 While this is not a problem for small systems that quantum methods can afford, the large systems for molecular dynamics have slow performance.

diff --git a/doc/install/easy-install-dev.md b/doc/install/easy-install-dev.md
@@ -19,12 +19,16 @@ For CUDA 11.8 support, use the `devel_cu11` tag.
 Below is an one-line shell command to download the [artifact](https://nightly.link/deepmodeling/deepmd-kit/workflows/build_wheel/devel/artifact.zip) containing wheels and install it with `pip`:
 
 ```sh
-pip install -U --pre deepmd-kit[gpu,cu12,lmp] --extra-index-url https://deepmodeling.github.io/deepmd-kit/simple
+pip install -U --pre deepmd-kit[gpu,cu12,lmp,torch] --extra-index-url https://deepmodeling.github.io/deepmd-kit/simple
 ```
 
 `cu12` and `lmp` are optional, which is the same as the stable version.
 
-## Download pre-compiled C Library
+## Download pre-compiled C Library {{ tensorflow_icon }}
+
+:::{note}
+**Supported backends**: TensorFlow {{ tensorflow_icon }}
+:::
 
 The [pre-comiled C library](./install-from-c-library.md) can be downloaded from [here](https://nightly.link/deepmodeling/deepmd-kit/workflows/package_c/devel/libdeepmd_c-0-libdeepmd_c.tar.gz.zip), or via a shell command:
 

diff --git a/doc/install/easy-install.md b/doc/install/easy-install.md
@@ -19,9 +19,9 @@ Python 3.8 or above is required for Python interface.
 
 
 ## Install off-line packages
-Both CPU and GPU version offline packages are available in [the Releases page](https://github.com/deepmodeling/deepmd-kit/releases).
+Both CPU and GPU version offline packages are available on [the Releases page](https://github.com/deepmodeling/deepmd-kit/releases).
 
-Some packages are splited into two files due to size limit of GitHub. One may merge them into one after downloading:
+Some packages are split into two files due to the size limit of GitHub. One may merge them into one after downloading:
 ```bash
 cat deepmd-kit-2.1.1-cuda11.6_gpu-Linux-x86_64.sh.0 deepmd-kit-2.1.1-cuda11.6_gpu-Linux-x86_64.sh.1 > deepmd-kit-2.1.1-cuda11.6_gpu-Linux-x86_64.sh
 ```
@@ -73,50 +73,47 @@ A docker for installing the DeePMD-kit is available [here](https://github.com/or
 
 To pull the CPU version:
 ```bash
-docker pull ghcr.io/deepmodeling/deepmd-kit:2.1.1_cpu
+docker pull ghcr.io/deepmodeling/deepmd-kit:2.2.8_cpu
 ```
 
 To pull the GPU version:
 ```bash
-docker pull ghcr.io/deepmodeling/deepmd-kit:2.1.1_cuda11.6_gpu
-```
-
-To pull the ROCm version:
-```bash
-docker pull deepmodeling/dpmdkit-rocm:dp2.0.3-rocm4.5.2-tf2.6-lmp29Sep2021
+docker pull ghcr.io/deepmodeling/deepmd-kit:2.2.8_cuda12.0_gpu
 ```
 
 ## Install Python interface with pip
 
 If you have no existing TensorFlow installed, you can use `pip` to install the pre-built package of the Python interface with CUDA 12 supported:
 
 ```bash
-pip install deepmd-kit[gpu,cu12]
+pip install deepmd-kit[gpu,cu12,torch]
 ```
 
 `cu12` is required only when CUDA Toolkit and cuDNN were not installed.
 
 To install the package built against CUDA 11.8, use
 
 ```bash
+pip install torch --index-url https://download.pytorch.org/whl/cu118
 pip install deepmd-kit-cu11[gpu,cu11]
 ```
 
 Or install the CPU version without CUDA supported:
 ```bash
+pip install torch --index-url https://download.pytorch.org/whl/cpu
 pip install deepmd-kit[cpu]
 ```
 
-[The LAMMPS module](../third-party/lammps-command.md) and [the i-Pi driver](../third-party/ipi.md) are only provided on Linux and macOS. To install LAMMPS and/or i-Pi, add `lmp` and/or `ipi` to extras:
+[The LAMMPS module](../third-party/lammps-command.md) and [the i-Pi driver](../third-party/ipi.md) are only provided on Linux and macOS for the TensorFlow backend. To install LAMMPS and/or i-Pi, add `lmp` and/or `ipi` to extras:
 ```bash
-pip install deepmd-kit[gpu,cu12,lmp,ipi]
+pip install deepmd-kit[gpu,cu12,torch,lmp,ipi]
 ```
 MPICH is required for parallel running. (The macOS arm64 package doesn't support MPI yet.)
 
 It is suggested to install the package into an isolated environment.
 The supported platform includes Linux x86-64 and aarch64 with GNU C Library 2.28 or above, macOS x86-64 and arm64, and Windows x86-64.
-A specific version of TensorFlow which is compatible with DeePMD-kit will be also installed.
+A specific version of TensorFlow and PyTorch which is compatible with DeePMD-kit will be also installed.
 
 :::{Warning}
-If your platform is not supported, or want to build against the installed TensorFlow, or want to enable ROCM support, please [build from source](install-from-source.md).
+If your platform is not supported, or you want to build against the installed TensorFlow, or you want to enable ROCM support, please [build from source](install-from-source.md).
 :::