Skip to content

Commit 3f0087b

Browse files
Copilotnjzjzpre-commit-ci[bot]
authored
feat: Add eval-desc CLI command for descriptor evaluation with 3D output format (#4903)
This PR implements a new command-line interface for evaluating descriptors using trained DeePMD models, addressing the feature request for making the `eval_descriptor` function available from the command line. ## Overview The new `dp eval-desc` command allows users to generate descriptor matrices from their models using a simple CLI interface, similar to the existing `dp test` command. ## Usage ```bash # Basic usage dp eval-desc -m model.pb -s /path/to/system # With custom output directory dp eval-desc -m model.pth -s /path/to/system -o my_descriptors # Using datafile with multiple systems dp eval-desc -m model.pb -f systems_list.txt -o desc_output # For multi-task models dp eval-desc -m model.pth -s system_dir --head task_branch ``` ## Output Format Descriptors are saved as NumPy `.npy` files in 3D format (nframes, natoms, ndesc) preserving the natural structure of the data with separate dimensions for frames, atoms, and descriptor components. This format maintains the original data organization and is suitable for various analysis workflows. ## Implementation Details The implementation follows the same architectural pattern as the existing `dp test` command: - **CLI Parser**: Added argument parser in `deepmd/main.py` with options for model (`-m`), system (`-s`), datafile (`-f`), output (`-o`), and model branch (`--head`) - **Command Routing**: Integrated into the entrypoints system in `deepmd/entrypoints/main.py` - **Core Functionality**: New `eval_desc.py` module that uses `DeepEval.eval_descriptor()` to generate descriptors and saves them as `.npy` files in their natural 3D format - **Documentation**: Updated user guide and API documentation with output format details - **Testing**: Comprehensive tests following the pattern of existing `dp test` functionality Fixes #4503. <!-- START COPILOT CODING AGENT TIPS --> --- ✨ Let Copilot coding agent [set things up for you](https://github.com/deepmodeling/deepmd-kit/issues/new?title=✨+Set+up+Copilot+instructions&body=Configure%20instructions%20for%20this%20repository%20as%20documented%20in%20%5BBest%20practices%20for%20Copilot%20coding%20agent%20in%20your%20repository%5D%28https://gh.io/copilot-coding-agent-tips%29%2E%0A%0A%3COnboard%20this%20repo%3E&assignees=copilot) — coding agent works faster and does higher quality work when set up for your repo. --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 73755b3 commit 3f0087b

File tree

6 files changed

+344
-0
lines changed

6 files changed

+344
-0
lines changed

deepmd/entrypoints/eval_desc.py

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
# SPDX-License-Identifier: LGPL-3.0-or-later
2+
"""Evaluate descriptors using trained DeePMD model."""
3+
4+
import logging
5+
import os
6+
from pathlib import (
7+
Path,
8+
)
9+
from typing import (
10+
Optional,
11+
)
12+
13+
import numpy as np
14+
15+
from deepmd.common import (
16+
expand_sys_str,
17+
)
18+
from deepmd.infer.deep_eval import (
19+
DeepEval,
20+
)
21+
from deepmd.utils.data import (
22+
DeepmdData,
23+
)
24+
25+
__all__ = ["eval_desc"]
26+
27+
log = logging.getLogger(__name__)
28+
29+
30+
def eval_desc(
31+
*,
32+
model: str,
33+
system: str,
34+
datafile: str,
35+
output: str = "desc",
36+
head: Optional[str] = None,
37+
**kwargs,
38+
) -> None:
39+
"""Evaluate descriptors for given systems.
40+
41+
Parameters
42+
----------
43+
model : str
44+
path where model is stored
45+
system : str
46+
system directory
47+
datafile : str
48+
the path to the list of systems to process
49+
output : str
50+
output directory for descriptor files
51+
head : Optional[str], optional
52+
(Supported backend: PyTorch) Task head if in multi-task mode.
53+
**kwargs
54+
additional arguments
55+
56+
Notes
57+
-----
58+
Descriptors are saved as 3D numpy arrays with shape (nframes, natoms, ndesc)
59+
where each frame contains the descriptors for all atoms.
60+
61+
Raises
62+
------
63+
RuntimeError
64+
if no valid system was found
65+
"""
66+
if datafile is not None:
67+
with open(datafile) as datalist:
68+
all_sys = datalist.read().splitlines()
69+
else:
70+
all_sys = expand_sys_str(system)
71+
72+
if len(all_sys) == 0:
73+
raise RuntimeError("Did not find valid system")
74+
75+
# init model
76+
dp = DeepEval(model, head=head)
77+
78+
# create output directory
79+
output_dir = Path(output)
80+
output_dir.mkdir(parents=True, exist_ok=True)
81+
82+
for cc, system_path in enumerate(all_sys):
83+
log.info("# -------output of dp eval_desc------- ")
84+
log.info(f"# processing system : {system_path}")
85+
86+
# create data class
87+
tmap = dp.get_type_map()
88+
data = DeepmdData(
89+
system_path,
90+
set_prefix="set",
91+
shuffle_test=False,
92+
type_map=tmap,
93+
sort_atoms=False,
94+
)
95+
96+
# get test data
97+
test_data = data.get_test()
98+
mixed_type = data.mixed_type
99+
natoms = len(test_data["type"][0])
100+
nframes = test_data["box"].shape[0]
101+
102+
# prepare input data
103+
coord = test_data["coord"].reshape([nframes, -1])
104+
box = test_data["box"]
105+
if not data.pbc:
106+
box = None
107+
if mixed_type:
108+
atype = test_data["type"].reshape([nframes, -1])
109+
else:
110+
atype = test_data["type"][0]
111+
112+
# handle optional parameters
113+
fparam = None
114+
if dp.get_dim_fparam() > 0:
115+
if "fparam" in test_data:
116+
fparam = test_data["fparam"]
117+
118+
aparam = None
119+
if dp.get_dim_aparam() > 0:
120+
if "aparam" in test_data:
121+
aparam = test_data["aparam"]
122+
123+
# evaluate descriptors
124+
log.info(f"# evaluating descriptors for {nframes} frames")
125+
descriptors = dp.eval_descriptor(
126+
coord,
127+
box,
128+
atype,
129+
fparam=fparam,
130+
aparam=aparam,
131+
)
132+
133+
# descriptors are kept in 3D format (nframes, natoms, ndesc)
134+
135+
# save descriptors
136+
system_name = os.path.basename(system_path.rstrip("/"))
137+
desc_file = output_dir / f"{system_name}.npy"
138+
np.save(desc_file, descriptors)
139+
140+
log.info(f"# descriptors saved to {desc_file}")
141+
log.info(f"# descriptor shape: {descriptors.shape}")
142+
log.info("# ----------------------------------- ")
143+
144+
log.info("# eval_desc completed successfully")

deepmd/entrypoints/main.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818
from deepmd.entrypoints.doc import (
1919
doc_train_input,
2020
)
21+
from deepmd.entrypoints.eval_desc import (
22+
eval_desc,
23+
)
2124
from deepmd.entrypoints.gui import (
2225
start_dpgui,
2326
)
@@ -65,6 +68,14 @@ def main(args: argparse.Namespace) -> None:
6568
strict_prefer=False,
6669
)
6770
test(**dict_args)
71+
elif args.command == "eval-desc":
72+
dict_args["model"] = format_model_suffix(
73+
dict_args["model"],
74+
feature=Backend.Feature.DEEP_EVAL,
75+
preferred_backend=args.backend,
76+
strict_prefer=False,
77+
)
78+
eval_desc(**dict_args)
6879
elif args.command == "doc-train-input":
6980
doc_train_input(**dict_args)
7081
elif args.command == "model-devi":

deepmd/main.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,56 @@ def main_parser() -> argparse.ArgumentParser:
416416
help="(Supported backend: PyTorch) Task head (alias: model branch) to test if in multi-task mode.",
417417
)
418418

419+
# * eval_desc script ***************************************************************
420+
parser_eval_desc = subparsers.add_parser(
421+
"eval-desc",
422+
parents=[parser_log],
423+
help="evaluate descriptors using the model",
424+
formatter_class=RawTextArgumentDefaultsHelpFormatter,
425+
epilog=textwrap.dedent(
426+
"""\
427+
examples:
428+
dp eval-desc -m graph.pb -s /path/to/system -o desc
429+
"""
430+
),
431+
)
432+
parser_eval_desc.add_argument(
433+
"-m",
434+
"--model",
435+
default="frozen_model",
436+
type=str,
437+
help="Frozen model file (prefix) to import. TensorFlow backend: suffix is .pb; PyTorch backend: suffix is .pth.",
438+
)
439+
parser_eval_desc_subgroup = parser_eval_desc.add_mutually_exclusive_group()
440+
parser_eval_desc_subgroup.add_argument(
441+
"-s",
442+
"--system",
443+
default=".",
444+
type=str,
445+
help="The system dir. Recursively detect systems in this directory",
446+
)
447+
parser_eval_desc_subgroup.add_argument(
448+
"-f",
449+
"--datafile",
450+
default=None,
451+
type=str,
452+
help="The path to the datafile, each line of which is a path to one data system.",
453+
)
454+
parser_eval_desc.add_argument(
455+
"-o",
456+
"--output",
457+
default="desc",
458+
type=str,
459+
help="Output directory for descriptor files. Descriptors will be saved as desc/(system_name).npy",
460+
)
461+
parser_eval_desc.add_argument(
462+
"--head",
463+
"--model-branch",
464+
default=None,
465+
type=str,
466+
help="(Supported backend: PyTorch) Task head (alias: model branch) to use if in multi-task mode.",
467+
)
468+
419469
# * compress model *****************************************************************
420470
# Compress a model, which including tabulating the embedding-net.
421471
# The table is composed of fifth-order polynomial coefficients and is assembled
@@ -909,6 +959,7 @@ def main(args: Optional[list[str]] = None) -> None:
909959

910960
if args.command in (
911961
"test",
962+
"eval-desc",
912963
"doc-train-input",
913964
"model-devi",
914965
"neighbor-stat",

doc/inference/python.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,21 @@ e, f, v = dp.eval(coord, cell, atype)
1919

2020
where `e`, `f` and `v` are predicted energy, force and virial of the system, respectively.
2121

22+
One can also evaluate the descriptors of the model:
23+
24+
```python
25+
from deepmd.infer import DeepPot
26+
import numpy as np
27+
28+
dp = DeepPot("graph.pb")
29+
coord = np.array([[1, 0, 0], [0, 0, 1.5], [1, 0, 3]]).reshape([1, -1])
30+
cell = np.diag(10 * np.ones(3)).reshape([1, -1])
31+
atype = [1, 0, 1]
32+
descriptors = dp.eval_descriptor(coord, cell, atype)
33+
```
34+
35+
where `descriptors` is the descriptor matrix of the system. This can also be done using the command line interface `dp eval-desc` as described in the [test documentation](../test/test.md).
36+
2237
Furthermore, one can use the python interface to calculate model deviation.
2338

2439
```python

doc/test/test.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,25 @@ An explanation will be provided
1717
```{program-output} dp test -h
1818
1919
```
20+
21+
## Evaluate descriptors
22+
23+
The descriptors of a model can be evaluated and saved using `dp eval-desc`. A typical usage of `dp eval-desc` is
24+
25+
```bash
26+
dp eval-desc -m graph.pb -s /path/to/system -o desc
27+
```
28+
29+
where `-m` gives the model file, `-s` the path to the system directory (or `-f` for a datafile containing paths to systems), and `-o` the output directory where descriptor files will be saved. The descriptors for each system will be saved as `.npy` files with the format `desc/(system_name).npy`. Each descriptor file contains a 3D array with shape (nframes, natoms, ndesc).
30+
31+
Several other command line options can be passed to `dp eval-desc`, which can be checked with
32+
33+
```bash
34+
$ dp eval-desc --help
35+
```
36+
37+
An explanation will be provided
38+
39+
```{program-output} dp eval-desc -h
40+
41+
```

source/tests/pt/test_eval_desc.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# SPDX-License-Identifier: LGPL-3.0-or-later
2+
import json
3+
import os
4+
import shutil
5+
import tempfile
6+
import unittest
7+
from copy import (
8+
deepcopy,
9+
)
10+
from pathlib import (
11+
Path,
12+
)
13+
14+
import numpy as np
15+
import torch
16+
17+
from deepmd.entrypoints.eval_desc import (
18+
eval_desc,
19+
)
20+
from deepmd.pt.entrypoints.main import (
21+
get_trainer,
22+
)
23+
24+
from .model.test_permutation import (
25+
model_se_e2_a,
26+
)
27+
28+
29+
class DPEvalDesc:
30+
def test_dp_eval_desc_1_frame(self) -> None:
31+
trainer = get_trainer(deepcopy(self.config))
32+
with torch.device("cpu"):
33+
input_dict, label_dict, _ = trainer.get_data(is_train=False)
34+
has_spin = getattr(trainer.model, "has_spin", False)
35+
if callable(has_spin):
36+
has_spin = has_spin()
37+
if not has_spin:
38+
input_dict.pop("spin", None)
39+
input_dict["do_atomic_virial"] = True
40+
result = trainer.model(**input_dict)
41+
model = torch.jit.script(trainer.model)
42+
tmp_model = tempfile.NamedTemporaryFile(delete=False, suffix=".pth")
43+
torch.jit.save(model, tmp_model.name)
44+
45+
# Test eval_desc
46+
eval_desc(
47+
model=tmp_model.name,
48+
system=self.config["training"]["validation_data"]["systems"][0],
49+
datafile=None,
50+
output=self.output_dir,
51+
)
52+
os.unlink(tmp_model.name)
53+
54+
# Check that descriptor file was created
55+
system_name = os.path.basename(
56+
self.config["training"]["validation_data"]["systems"][0].rstrip("/")
57+
)
58+
desc_file = os.path.join(self.output_dir, f"{system_name}.npy")
59+
self.assertTrue(os.path.exists(desc_file))
60+
61+
# Load and validate descriptor
62+
descriptors = np.load(desc_file)
63+
self.assertIsInstance(descriptors, np.ndarray)
64+
# Descriptors should be 3D: (nframes, natoms, ndesc)
65+
self.assertEqual(len(descriptors.shape), 3) # Should be 3D array
66+
self.assertGreater(descriptors.shape[0], 0) # Should have frames
67+
self.assertGreater(descriptors.shape[1], 0) # Should have atoms
68+
self.assertGreater(descriptors.shape[2], 0) # Should have descriptor dimensions
69+
70+
def tearDown(self) -> None:
71+
for f in os.listdir("."):
72+
if f.startswith("model") and f.endswith(".pt"):
73+
os.remove(f)
74+
if f in ["lcurve.out", self.input_json]:
75+
os.remove(f)
76+
if f in ["stat_files"]:
77+
shutil.rmtree(f)
78+
# Clean up output directory
79+
if hasattr(self, "output_dir") and os.path.exists(self.output_dir):
80+
shutil.rmtree(self.output_dir)
81+
82+
83+
class TestDPEvalDescSeA(DPEvalDesc, unittest.TestCase):
84+
def setUp(self) -> None:
85+
self.output_dir = "test_eval_desc_output"
86+
input_json = str(Path(__file__).parent / "water" / "se_atten.json")
87+
with open(input_json) as f:
88+
self.config = json.load(f)
89+
self.config["training"]["numb_steps"] = 1
90+
self.config["training"]["save_freq"] = 1
91+
data_file = [str(Path(__file__).parent / "water" / "data" / "single")]
92+
self.config["training"]["training_data"]["systems"] = data_file
93+
self.config["training"]["validation_data"]["systems"] = data_file
94+
self.config["model"] = deepcopy(model_se_e2_a)
95+
self.input_json = "test_eval_desc.json"
96+
with open(self.input_json, "w") as fp:
97+
json.dump(self.config, fp, indent=4)
98+
99+
100+
if __name__ == "__main__":
101+
unittest.main()

0 commit comments

Comments
 (0)