diff --git a/README.md b/README.md index 0b84c705dd..995ac6e914 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,7 @@ A full [document](doc/train/train-input-auto.rst) on options in the training inp - [Descriptor `"se_e2_r"`](doc/model/train-se-e2-r.md) - [Descriptor `"se_e3"`](doc/model/train-se-e3.md) - [Descriptor `"hybrid"`](doc/model/train-hybrid.md) + - [Descriptor `sel`](doc/model/sel.md) - [Fit energy](doc/model/train-energy.md) - [Fit `tensor` like `Dipole` and `Polarizability`](doc/model/train-fitting-tensor.md) - [Train a Deep Potential model using `type embedding` approach](doc/model/train-se-e2-a-tebd.md) diff --git a/deepmd/entrypoints/__init__.py b/deepmd/entrypoints/__init__.py index 80d0431f8c..d7559ad74b 100644 --- a/deepmd/entrypoints/__init__.py +++ b/deepmd/entrypoints/__init__.py @@ -11,6 +11,7 @@ from .transfer import transfer from ..infer.model_devi import make_model_devi from .convert import convert +from .neighbor_stat import neighbor_stat __all__ = [ "config", @@ -23,4 +24,5 @@ "doc_train_input", "make_model_devi", "convert", + "neighbor_stat", ] diff --git a/deepmd/entrypoints/main.py b/deepmd/entrypoints/main.py index 0f7372e0b9..3a9d3991ce 100644 --- a/deepmd/entrypoints/main.py +++ b/deepmd/entrypoints/main.py @@ -16,6 +16,7 @@ transfer, make_model_devi, convert, + neighbor_stat, ) from deepmd.loggers import set_log_handles @@ -408,6 +409,36 @@ def parse_args(args: Optional[List[str]] = None): type=str, help='the output model', ) + + # neighbor_stat + parser_neighbor_stat = subparsers.add_parser( + 'neighbor-stat', + parents=[parser_log], + help='Calculate neighbor statistics', + ) + parser_neighbor_stat.add_argument( + "-s", + "--system", + default=".", + type=str, + help="The system dir. Recursively detect systems in this directory", + ) + parser_neighbor_stat.add_argument( + "-r", + "--rcut", + type=float, + required=True, + help="cutoff radius", + ) + parser_neighbor_stat.add_argument( + "-t", + "--type-map", + type=str, + nargs='+', + required=True, + help="type map", + ) + # --version parser.add_argument('--version', action='version', version='DeePMD-kit v%s' % __version__) @@ -456,6 +487,8 @@ def main(): make_model_devi(**dict_args) elif args.command == "convert-from": convert(**dict_args) + elif args.command == "neighbor-stat": + neighbor_stat(**dict_args) elif args.command is None: pass else: diff --git a/deepmd/entrypoints/neighbor_stat.py b/deepmd/entrypoints/neighbor_stat.py new file mode 100644 index 0000000000..eaf8b3efe3 --- /dev/null +++ b/deepmd/entrypoints/neighbor_stat.py @@ -0,0 +1,49 @@ +import logging +from typing import List + +from deepmd.common import expand_sys_str +from deepmd.utils.data_system import DeepmdDataSystem +from deepmd.utils.neighbor_stat import NeighborStat + +log = logging.getLogger(__name__) + +def neighbor_stat( + *, + system: str, + rcut: float, + type_map: List[str], + **kwargs, +): + """Calculate neighbor statistics. + + Parameters + ---------- + system : str + system to stat + rcut : float + cutoff radius + type_map : list[str] + type map + + Examples + -------- + >>> neighbor_stat(system='.', rcut=6., type_map=["C", "H", "O", "N", "P", "S", "Mg", "Na", "HW", "OW", "mNa", "mCl", "mC", "mH", "mMg", "mN", "mO", "mP"]) + min_nbor_dist: 0.6599510670195264 + max_nbor_size: [23, 26, 19, 16, 2, 2, 1, 1, 72, 37, 5, 0, 31, 29, 1, 21, 20, 5] + """ + all_sys = expand_sys_str(system) + if not len(all_sys): + raise RuntimeError("Did not find valid system") + data = DeepmdDataSystem( + systems=all_sys, + batch_size=1, + test_size=1, + rcut=rcut, + type_map=type_map, + ) + data.get_batch() + nei = NeighborStat(data.get_ntypes(), rcut) + min_nbor_dist, max_nbor_size = nei.get_stat(data) + log.info("min_nbor_dist: %f" % min_nbor_dist) + log.info("max_nbor_size: %s" % str(max_nbor_size)) + return min_nbor_dist, max_nbor_size diff --git a/doc/model/index.md b/doc/model/index.md index cce3ec57ec..94575f67c6 100644 --- a/doc/model/index.md +++ b/doc/model/index.md @@ -5,6 +5,7 @@ - [Descriptor `"se_e2_r"`](train-se-e2-r.md) - [Descriptor `"se_e3"`](train-se-e3.md) - [Descriptor `"hybrid"`](train-hybrid.md) +- [Descriptor `sel`](sel.md) - [Fit energy](train-energy.md) - [Fit `tensor` like `Dipole` and `Polarizability`](train-fitting-tensor.md) - [Train a Deep Potential model using `type embedding` approach](train-se-e2-a-tebd.md) diff --git a/doc/model/index.rst b/doc/model/index.rst index 1cc0f7e6ea..10f41b375c 100644 --- a/doc/model/index.rst +++ b/doc/model/index.rst @@ -9,6 +9,7 @@ Model train-se-e2-r train-se-e3 train-hybrid + sel train-energy train-fitting-tensor train-se-e2-a-tebd diff --git a/doc/model/sel.md b/doc/model/sel.md new file mode 100644 index 0000000000..dce1b8f860 --- /dev/null +++ b/doc/model/sel.md @@ -0,0 +1,13 @@ +# Determine `sel` + +All descriptors require to set `sel`, which means the expected maximum number of type-i neighbors of an atom. DeePMD-kit will allocate memory according to `sel`. + +`sel` should not be too large or too small. If `sel` is too large, the computing will become much slower and cost more memory. If `sel` is not enough, the energy will be not conserved, making the accuracy of the model worse. + +To determine a proper `sel`, one can calculate the neighbor stat of the training data before training: +```sh +dp neighbor-stat -s data -r 6.0 -t O H +``` +where `data` is the directory of data, `6.0` is the cutoff radius, and `O` and `H` is the type map. The program will give the `max_nbor_size`. For example, `max_nbor_size` of the water example is `[38, 72]`, meaning an atom may have 38 O neighbors and 72 H neighbors in the training data. + +The `sel` should be set to a higher value than that of the training data, considering there may be some extreme geometries during MD simulations. As a result, we set to `[46, 92]` in the water example. diff --git a/source/tests/test_neighbor_stat.py b/source/tests/test_neighbor_stat.py new file mode 100644 index 0000000000..2c044c76a4 --- /dev/null +++ b/source/tests/test_neighbor_stat.py @@ -0,0 +1,47 @@ +import shutil +import numpy as np +import unittest +import dpdata + +from deepmd.entrypoints.neighbor_stat import neighbor_stat + +def gen_sys(nframes): + natoms = 1000 + data = {} + X, Y, Z = np.mgrid[0:9:10j, 0:9:10j, 0:9:10j] + positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T #+ 0.1 + data['coords'] = np.repeat(positions[np.newaxis, :, :], nframes, axis=0) + data['forces'] = np.random.random([nframes, natoms, 3]) + data['cells'] = np.array([10., 0., 0., 0., 10., 0., 0., 0., 10.]).reshape(1,3,3) + data['energies'] = np.random.random([nframes, 1]) + data['atom_names'] = ['TYPE'] + data['atom_numbs'] = [1000] + data['atom_types'] = np.repeat(0, 1000) + return data + + +class TestNeighborStat(unittest.TestCase): + def setUp(self): + data0 = gen_sys(1) + sys0 = dpdata.LabeledSystem() + sys0.data = data0 + sys0.to_deepmd_npy('system_0', set_size = 10) + + def tearDown(self): + shutil.rmtree('system_0') + + def test_neighbor_stat(self): + # set rcut to 0. will cause a core dumped + # TODO: check what is wrong + for rcut in (3., 6., 11.): + with self.subTest(): + rcut += 1e-3 # prevent numerical errors + min_nbor_dist, max_nbor_size = neighbor_stat(system="system_0", rcut=rcut, type_map=["TYPE"]) + upper = np.ceil(rcut)+1 + X, Y, Z = np.mgrid[-upper:upper, -upper:upper, -upper:upper] + positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T + # distance to (0,0,0) + distance = np.linalg.norm(positions, axis=1) + expected_neighbors = np.count_nonzero(np.logical_and(distance > 0, distance <= rcut)) + self.assertAlmostEqual(min_nbor_dist, 1.0, 6) + self.assertEqual(max_nbor_size, [expected_neighbors])