diff --git a/deepmd/common.py b/deepmd/common.py index 098bb0ed11..aa76c5dd08 100644 --- a/deepmd/common.py +++ b/deepmd/common.py @@ -16,7 +16,6 @@ Any, Dict, List, - Optional, Set, TypeVar, Union, @@ -39,8 +38,6 @@ ) __all__ = [ - "data_requirement", - "add_data_requirement", "select_idx_map", "make_default_mesh", "j_must_have", @@ -78,64 +75,6 @@ ) -# TODO: refactor data_requirement to make it not a global variable -# this is not a good way to do things. This is some global variable to which -# anyone can write and there is no good way to keep track of the changes -data_requirement = {} - - -def add_data_requirement( - key: str, - ndof: int, - atomic: bool = False, - must: bool = False, - high_prec: bool = False, - type_sel: Optional[bool] = None, - repeat: int = 1, - default: float = 0.0, - dtype: Optional[np.dtype] = None, - output_natoms_for_type_sel: bool = False, -): - """Specify data requirements for training. - - Parameters - ---------- - key : str - type of data stored in corresponding `*.npy` file e.g. `forces` or `energy` - ndof : int - number of the degrees of freedom, this is tied to `atomic` parameter e.g. forces - have `atomic=True` and `ndof=3` - atomic : bool, optional - specifies whwther the `ndof` keyworrd applies to per atom quantity or not, - by default False - must : bool, optional - specifi if the `*.npy` data file must exist, by default False - high_prec : bool, optional - if true load data to `np.float64` else `np.float32`, by default False - type_sel : bool, optional - select only certain type of atoms, by default None - repeat : int, optional - if specify repaeat data `repeat` times, by default 1 - default : float, optional, default=0. - default value of data - dtype : np.dtype, optional - the dtype of data, overwrites `high_prec` if provided - output_natoms_for_type_sel : bool, optional - if True and type_sel is True, the atomic dimension will be natoms instead of nsel - """ - data_requirement[key] = { - "ndof": ndof, - "atomic": atomic, - "must": must, - "high_prec": high_prec, - "type_sel": type_sel, - "repeat": repeat, - "default": default, - "dtype": dtype, - "output_natoms_for_type_sel": output_natoms_for_type_sel, - } - - def select_idx_map(atom_types: np.ndarray, select_types: np.ndarray) -> np.ndarray: """Build map of indices for element supplied element types from all atoms list. diff --git a/deepmd/tf/common.py b/deepmd/tf/common.py index 06be22a2ee..f4c6d5b0ce 100644 --- a/deepmd/tf/common.py +++ b/deepmd/tf/common.py @@ -23,8 +23,6 @@ from deepmd.common import ( VALID_ACTIVATION, VALID_PRECISION, - add_data_requirement, - data_requirement, expand_sys_str, get_np_precision, j_loader, @@ -47,8 +45,6 @@ __all__ = [ # from deepmd.common - "data_requirement", - "add_data_requirement", "select_idx_map", "make_default_mesh", "j_must_have", @@ -291,8 +287,6 @@ def wrapper(self, *args, **kwargs): def clear_session(): """Reset all state generated by DeePMD-kit.""" tf.reset_default_graph() - # TODO: remove this line when data_requirement is not a global variable - data_requirement.clear() _TF_VERSION = Version(TF_VERSION) if _TF_VERSION < Version("2.4.0"): tf.train.experimental.disable_mixed_precision_graph_rewrite() diff --git a/deepmd/tf/descriptor/descriptor.py b/deepmd/tf/descriptor/descriptor.py index fd6f6729e8..fabaf78c85 100644 --- a/deepmd/tf/descriptor/descriptor.py +++ b/deepmd/tf/descriptor/descriptor.py @@ -23,6 +23,9 @@ from deepmd.tf.utils import ( PluginVariant, ) +from deepmd.utils.data import ( + DataRequirementItem, +) from deepmd.utils.plugin import ( make_plugin_registry, ) @@ -512,3 +515,8 @@ def serialize(self, suffix: str = "") -> dict: Name suffix to identify this descriptor """ raise NotImplementedError(f"Not implemented in class {self.__name__}") + + @property + def input_requirement(self) -> List[DataRequirementItem]: + """Return data requirements needed for the model input.""" + return [] diff --git a/deepmd/tf/descriptor/se_a_ebd.py b/deepmd/tf/descriptor/se_a_ebd.py index f252bf114c..c558cd285e 100644 --- a/deepmd/tf/descriptor/se_a_ebd.py +++ b/deepmd/tf/descriptor/se_a_ebd.py @@ -6,9 +6,6 @@ import numpy as np -from deepmd.tf.common import ( - add_data_requirement, -) from deepmd.tf.env import ( GLOBAL_TF_FLOAT_PRECISION, op_module, @@ -18,6 +15,9 @@ embedding_net, one_layer, ) +from deepmd.utils.data import ( + DataRequirementItem, +) from .descriptor import ( Descriptor, @@ -110,8 +110,6 @@ def __init__( self.type_nlayer = type_nlayer self.type_one_side = type_one_side self.numb_aparam = numb_aparam - if self.numb_aparam > 0: - add_data_requirement("aparam", 3, atomic=True, must=True, high_prec=False) def build( self, @@ -600,3 +598,15 @@ def _ebd_filter( result = tf.reshape(result, [-1, outputs_size_2 * outputs_size]) return result, qmat + + @property + def input_requirement(self) -> List[DataRequirementItem]: + """Return data requirements needed for the model input.""" + data_requirement = super().input_requirement + if self.numb_aparam > 0: + data_requirement.append( + DataRequirementItem( + "aparam", 3, atomic=True, must=True, high_prec=False + ) + ) + return data_requirement diff --git a/deepmd/tf/descriptor/se_a_ef.py b/deepmd/tf/descriptor/se_a_ef.py index f1201d30fb..5a9020a6e6 100644 --- a/deepmd/tf/descriptor/se_a_ef.py +++ b/deepmd/tf/descriptor/se_a_ef.py @@ -7,9 +7,6 @@ import numpy as np -from deepmd.tf.common import ( - add_data_requirement, -) from deepmd.tf.env import ( GLOBAL_NP_FLOAT_PRECISION, GLOBAL_TF_FLOAT_PRECISION, @@ -20,6 +17,9 @@ from deepmd.tf.utils.sess import ( run_sess, ) +from deepmd.utils.data import ( + DataRequirementItem, +) from .descriptor import ( Descriptor, @@ -361,8 +361,6 @@ def __init__( self.dstd = None self.davg = None - add_data_requirement("efield", 3, atomic=True, must=True, high_prec=False) - self.place_holders = {} avg_zero = np.zeros([self.ntypes, self.ndescrpt]).astype( GLOBAL_NP_FLOAT_PRECISION @@ -586,3 +584,12 @@ def _compute_dstats_sys_smth( sysr2.append(sumr2) sysa2.append(suma2) return sysr, sysr2, sysa, sysa2, sysn + + @property + def input_requirement(self) -> List[DataRequirementItem]: + """Return data requirements needed for the model input.""" + data_requirement = super().input_requirement + data_requirement.append( + DataRequirementItem("efield", 3, atomic=True, must=True, high_prec=False) + ) + return data_requirement diff --git a/deepmd/tf/entrypoints/train.py b/deepmd/tf/entrypoints/train.py index 2fef038f7d..dfe1f3db91 100755 --- a/deepmd/tf/entrypoints/train.py +++ b/deepmd/tf/entrypoints/train.py @@ -195,6 +195,7 @@ def _do_work(jdata: Dict[str, Any], run_opt: RunOptions, is_compress: bool = Fal train_data = get_data( jdata["training"]["training_data"], rcut, ipt_type_map, modifier ) + train_data.add_data_requirements(model.data_requirements) train_data.print_summary("training") if jdata["training"].get("validation_data", None) is not None: valid_data = get_data( @@ -203,6 +204,7 @@ def _do_work(jdata: Dict[str, Any], run_opt: RunOptions, is_compress: bool = Fal train_data.type_map, modifier, ) + valid_data.add_data_requirements(model.data_requirements) valid_data.print_summary("validation") else: if modifier is not None: diff --git a/deepmd/tf/fit/dos.py b/deepmd/tf/fit/dos.py index d967ce03d0..bc5180b60a 100644 --- a/deepmd/tf/fit/dos.py +++ b/deepmd/tf/fit/dos.py @@ -8,7 +8,6 @@ import numpy as np from deepmd.tf.common import ( - add_data_requirement, cast_precision, get_activation_func, get_precision, @@ -43,6 +42,9 @@ from deepmd.tf.utils.network import ( one_layer_rand_seed_shift, ) +from deepmd.utils.data import ( + DataRequirementItem, +) from deepmd.utils.out_stat import ( compute_stats_from_redu, ) @@ -151,18 +153,9 @@ def __init__( self.useBN = False self.bias_dos = np.zeros((self.ntypes, self.numb_dos), dtype=np.float64) - # data requirement - if self.numb_fparam > 0: - add_data_requirement( - "fparam", self.numb_fparam, atomic=False, must=True, high_prec=False - ) self.fparam_avg = None self.fparam_std = None self.fparam_inv_std = None - if self.numb_aparam > 0: - add_data_requirement( - "aparam", self.numb_aparam, atomic=True, must=True, high_prec=False - ) self.aparam_avg = None self.aparam_std = None self.aparam_inv_std = None @@ -738,3 +731,21 @@ def serialize(self, suffix: str = "") -> dict: }, } return data + + @property + def input_requirement(self) -> List[DataRequirementItem]: + """Return data requirements needed for the model input.""" + data_requirement = [] + if self.numb_fparam > 0: + data_requirement.append( + DataRequirementItem( + "fparam", self.numb_fparam, atomic=False, must=True, high_prec=False + ) + ) + if self.numb_aparam > 0: + data_requirement.append( + DataRequirementItem( + "aparam", self.numb_aparam, atomic=True, must=True, high_prec=False + ) + ) + return data_requirement diff --git a/deepmd/tf/fit/ener.py b/deepmd/tf/fit/ener.py index 873f7258db..a1eb916a1c 100644 --- a/deepmd/tf/fit/ener.py +++ b/deepmd/tf/fit/ener.py @@ -9,7 +9,6 @@ import numpy as np from deepmd.tf.common import ( - add_data_requirement, cast_precision, get_activation_func, get_precision, @@ -53,6 +52,9 @@ from deepmd.tf.utils.spin import ( Spin, ) +from deepmd.utils.data import ( + DataRequirementItem, +) from deepmd.utils.finetune import ( change_energy_bias_lower, ) @@ -218,18 +220,9 @@ def __init__( self.atom_ener.append(None) self.useBN = False self.bias_atom_e = np.zeros(self.ntypes, dtype=np.float64) - # data requirement - if self.numb_fparam > 0: - add_data_requirement( - "fparam", self.numb_fparam, atomic=False, must=True, high_prec=False - ) self.fparam_avg = None self.fparam_std = None self.fparam_inv_std = None - if self.numb_aparam > 0: - add_data_requirement( - "aparam", self.numb_aparam, atomic=True, must=True, high_prec=False - ) self.aparam_avg = None self.aparam_std = None self.aparam_inv_std = None @@ -939,3 +932,21 @@ def serialize(self, suffix: str = "") -> dict: }, } return data + + @property + def input_requirement(self) -> List[DataRequirementItem]: + """Return data requirements needed for the model input.""" + data_requirement = [] + if self.numb_fparam > 0: + data_requirement.append( + DataRequirementItem( + "fparam", self.numb_fparam, atomic=False, must=True, high_prec=False + ) + ) + if self.numb_aparam > 0: + data_requirement.append( + DataRequirementItem( + "aparam", self.numb_aparam, atomic=True, must=True, high_prec=False + ) + ) + return data_requirement diff --git a/deepmd/tf/fit/fitting.py b/deepmd/tf/fit/fitting.py index d2aebd8f97..9190261187 100644 --- a/deepmd/tf/fit/fitting.py +++ b/deepmd/tf/fit/fitting.py @@ -25,6 +25,9 @@ from deepmd.tf.utils import ( PluginVariant, ) +from deepmd.utils.data import ( + DataRequirementItem, +) from deepmd.utils.plugin import ( make_plugin_registry, ) @@ -252,3 +255,8 @@ def deserialize_network(cls, data: dict, suffix: str = "") -> dict: # prevent keyError fitting_net_variables[f"{layer_name}{key}{suffix}/idt"] = 0.0 return fitting_net_variables + + @property + def input_requirement(self) -> List[DataRequirementItem]: + """Return data requirements needed for the model input.""" + return [] diff --git a/deepmd/tf/loss/dos.py b/deepmd/tf/loss/dos.py index 763e75638f..385d2484a8 100644 --- a/deepmd/tf/loss/dos.py +++ b/deepmd/tf/loss/dos.py @@ -1,9 +1,10 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + List, +) + import numpy as np -from deepmd.tf.common import ( - add_data_requirement, -) from deepmd.tf.env import ( global_cvt_2_ener_float, global_cvt_2_tf_float, @@ -12,6 +13,9 @@ from deepmd.tf.utils.sess import ( run_sess, ) +from deepmd.utils.data import ( + DataRequirementItem, +) from .loss import ( Loss, @@ -56,13 +60,6 @@ def __init__( self.has_cdf = self.start_pref_cdf != 0.0 or self.limit_pref_cdf != 0.0 self.has_ados = self.start_pref_ados != 0.0 or self.limit_pref_ados != 0.0 self.has_acdf = self.start_pref_acdf != 0.0 or self.limit_pref_acdf != 0.0 - # data required - add_data_requirement( - "dos", self.numb_dos, atomic=False, must=True, high_prec=True - ) - add_data_requirement( - "atom_dos", self.numb_dos, atomic=True, must=False, high_prec=True - ) def build(self, learning_rate, natoms, model_dict, label_dict, suffix): dos = model_dict["dos"] @@ -212,3 +209,20 @@ def eval(self, sess, feed_dict, natoms): results["rmse_acdf"] = np.sqrt(error_acdf) return results + + @property + def label_requirement(self) -> List[DataRequirementItem]: + """Return data label requirements needed for this loss calculation.""" + data_requirements = [] + # data required + data_requirements.append( + DataRequirementItem( + "dos", self.numb_dos, atomic=False, must=True, high_prec=True + ) + ) + data_requirements.append( + DataRequirementItem( + "atom_dos", self.numb_dos, atomic=True, must=False, high_prec=True + ) + ) + return data_requirements diff --git a/deepmd/tf/loss/ener.py b/deepmd/tf/loss/ener.py index baa4aa3e02..7ecb185818 100644 --- a/deepmd/tf/loss/ener.py +++ b/deepmd/tf/loss/ener.py @@ -1,13 +1,11 @@ # SPDX-License-Identifier: LGPL-3.0-or-later from typing import ( + List, Optional, ) import numpy as np -from deepmd.tf.common import ( - add_data_requirement, -) from deepmd.tf.env import ( global_cvt_2_ener_float, global_cvt_2_tf_float, @@ -16,6 +14,9 @@ from deepmd.tf.utils.sess import ( run_sess, ) +from deepmd.utils.data import ( + DataRequirementItem, +) from .loss import ( Loss, @@ -111,32 +112,6 @@ def __init__( raise RuntimeError( "When generalized force loss is used, the dimension of generalized coordinates should be larger than 0" ) - # data required - add_data_requirement("energy", 1, atomic=False, must=False, high_prec=True) - add_data_requirement("force", 3, atomic=True, must=False, high_prec=False) - add_data_requirement("virial", 9, atomic=False, must=False, high_prec=False) - add_data_requirement("atom_ener", 1, atomic=True, must=False, high_prec=False) - add_data_requirement( - "atom_pref", 1, atomic=True, must=False, high_prec=False, repeat=3 - ) - # drdq: the partial derivative of atomic coordinates w.r.t. generalized coordinates - if self.has_gf > 0: - add_data_requirement( - "drdq", - self.numb_generalized_coord * 3, - atomic=True, - must=False, - high_prec=False, - ) - if self.enable_atom_ener_coeff: - add_data_requirement( - "atom_ener_coeff", - 1, - atomic=True, - must=False, - high_prec=False, - default=1.0, - ) def build(self, learning_rate, natoms, model_dict, label_dict, suffix): energy = model_dict["energy"] @@ -380,6 +355,54 @@ def eval(self, sess, feed_dict, natoms): results["rmse_gf"] = np.sqrt(error_gf) return results + @property + def label_requirement(self) -> List[DataRequirementItem]: + """Return data label requirements needed for this loss calculation.""" + data_requirements = [] + # data required + data_requirements.append( + DataRequirementItem("energy", 1, atomic=False, must=False, high_prec=True) + ) + data_requirements.append( + DataRequirementItem("force", 3, atomic=True, must=False, high_prec=False) + ) + data_requirements.append( + DataRequirementItem("virial", 9, atomic=False, must=False, high_prec=False) + ) + data_requirements.append( + DataRequirementItem( + "atom_ener", 1, atomic=True, must=False, high_prec=False + ) + ) + data_requirements.append( + DataRequirementItem( + "atom_pref", 1, atomic=True, must=False, high_prec=False, repeat=3 + ) + ) + # drdq: the partial derivative of atomic coordinates w.r.t. generalized coordinates + if self.has_gf > 0: + data_requirements.append( + DataRequirementItem( + "drdq", + self.numb_generalized_coord * 3, + atomic=True, + must=False, + high_prec=False, + ) + ) + if self.enable_atom_ener_coeff: + data_requirements.append( + DataRequirementItem( + "atom_ener_coeff", + 1, + atomic=True, + must=False, + high_prec=False, + default=1.0, + ) + ) + return data_requirements + class EnerSpinLoss(Loss): def __init__( @@ -422,23 +445,6 @@ def __init__( self.has_fm = self.start_pref_fm != 0.0 or self.limit_pref_fm != 0.0 self.has_v = self.start_pref_v != 0.0 or self.limit_pref_v != 0.0 self.has_ae = self.start_pref_ae != 0.0 or self.limit_pref_ae != 0.0 - # data required - add_data_requirement("energy", 1, atomic=False, must=False, high_prec=True) - add_data_requirement("force", 3, atomic=True, must=False, high_prec=False) - add_data_requirement("virial", 9, atomic=False, must=False, high_prec=False) - add_data_requirement("atom_ener", 1, atomic=True, must=False, high_prec=False) - add_data_requirement( - "atom_pref", 1, atomic=True, must=False, high_prec=False, repeat=3 - ) - if self.enable_atom_ener_coeff: - add_data_requirement( - "atom_ener_coeff", - 1, - atomic=True, - must=False, - high_prec=False, - default=1.0, - ) def build(self, learning_rate, natoms, model_dict, label_dict, suffix): energy_pred = model_dict["energy"] @@ -719,6 +725,43 @@ def print_on_training( return print_str + @property + def label_requirement(self) -> List[DataRequirementItem]: + """Return data label requirements needed for this loss calculation.""" + data_requirements = [] + # data required + data_requirements.append( + DataRequirementItem("energy", 1, atomic=False, must=False, high_prec=True) + ) + data_requirements.append( + DataRequirementItem("force", 3, atomic=True, must=False, high_prec=False) + ) + data_requirements.append( + DataRequirementItem("virial", 9, atomic=False, must=False, high_prec=False) + ) + data_requirements.append( + DataRequirementItem( + "atom_ener", 1, atomic=True, must=False, high_prec=False + ) + ) + data_requirements.append( + DataRequirementItem( + "atom_pref", 1, atomic=True, must=False, high_prec=False, repeat=3 + ) + ) + if self.enable_atom_ener_coeff: + data_requirements.append( + DataRequirementItem( + "atom_ener_coeff", + 1, + atomic=True, + must=False, + high_prec=False, + default=1.0, + ) + ) + return data_requirements + class EnerDipoleLoss(Loss): def __init__( @@ -734,11 +777,6 @@ def __init__( self.limit_pref_e = limit_pref_e self.start_pref_ed = start_pref_ed self.limit_pref_ed = limit_pref_ed - # data required - add_data_requirement("energy", 1, atomic=False, must=True, high_prec=True) - add_data_requirement( - "energy_dipole", 3, atomic=False, must=True, high_prec=False - ) def build(self, learning_rate, natoms, model_dict, label_dict, suffix): coord = model_dict["coord"] @@ -832,3 +870,18 @@ def eval(self, sess, feed_dict, natoms): "rmse_ed": np.sqrt(error_ed), } return results + + @property + def label_requirement(self) -> List[DataRequirementItem]: + """Return data label requirements needed for this loss calculation.""" + data_requirements = [] + # data required + data_requirements.append( + DataRequirementItem("energy", 1, atomic=False, must=False, high_prec=True) + ) + data_requirements.append( + DataRequirementItem( + "energy_dipole", 3, atomic=False, must=True, high_prec=False + ) + ) + return data_requirements diff --git a/deepmd/tf/loss/loss.py b/deepmd/tf/loss/loss.py index 327aea5230..ca90c2eb64 100644 --- a/deepmd/tf/loss/loss.py +++ b/deepmd/tf/loss/loss.py @@ -5,6 +5,7 @@ ) from typing import ( Dict, + List, Tuple, ) @@ -13,6 +14,9 @@ from deepmd.tf.env import ( tf, ) +from deepmd.utils.data import ( + DataRequirementItem, +) class Loss(metaclass=ABCMeta): @@ -91,3 +95,8 @@ def display_if_exist(loss: tf.Tensor, find_property: float) -> tf.Tensor: lambda: loss, lambda: tf.cast(np.nan, dtype=loss.dtype), ) + + @property + @abstractmethod + def label_requirement(self) -> List[DataRequirementItem]: + """Return data label requirements needed for this loss calculation.""" diff --git a/deepmd/tf/loss/tensor.py b/deepmd/tf/loss/tensor.py index 6a0eb30a44..d3a1b95369 100644 --- a/deepmd/tf/loss/tensor.py +++ b/deepmd/tf/loss/tensor.py @@ -1,9 +1,10 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + List, +) + import numpy as np -from deepmd.tf.common import ( - add_data_requirement, -) from deepmd.tf.env import ( global_cvt_2_tf_float, tf, @@ -11,6 +12,9 @@ from deepmd.tf.utils.sess import ( run_sess, ) +from deepmd.utils.data import ( + DataRequirementItem, +) from .loss import ( Loss, @@ -50,24 +54,6 @@ def __init__(self, jdata, **kwarg): "Can not assian zero weight both to `pref` and `pref_atomic`" ) - # data required - add_data_requirement( - "atomic_" + self.label_name, - self.tensor_size, - atomic=True, - must=False, - high_prec=False, - type_sel=self.type_sel, - ) - add_data_requirement( - self.label_name, - self.tensor_size, - atomic=False, - must=False, - high_prec=False, - type_sel=self.type_sel, - ) - def build(self, learning_rate, natoms, model_dict, label_dict, suffix): polar_hat = label_dict[self.label_name] atomic_polar_hat = label_dict["atom_" + self.label_name] @@ -154,3 +140,30 @@ def eval(self, sess, feed_dict, natoms): if self.global_weight > 0.0: results["rmse_gl"] = np.sqrt(error_gl) / atoms return results + + @property + def label_requirement(self) -> List[DataRequirementItem]: + """Return data label requirements needed for this loss calculation.""" + data_requirements = [] + # data required + data_requirements.append( + DataRequirementItem( + "atomic_" + self.label_name, + self.tensor_size, + atomic=True, + must=False, + high_prec=False, + type_sel=self.type_sel, + ) + ) + data_requirements.append( + DataRequirementItem( + self.label_name, + self.tensor_size, + atomic=False, + must=False, + high_prec=False, + type_sel=self.type_sel, + ) + ) + return data_requirements diff --git a/deepmd/tf/model/frozen.py b/deepmd/tf/model/frozen.py index 86676bfe0b..fa28b2cc58 100644 --- a/deepmd/tf/model/frozen.py +++ b/deepmd/tf/model/frozen.py @@ -6,6 +6,7 @@ Enum, ) from typing import ( + List, Optional, Union, ) @@ -34,6 +35,9 @@ get_tensor_by_name_from_graph, load_graph_def, ) +from deepmd.utils.data import ( + DataRequirementItem, +) from .model import ( Model, @@ -261,3 +265,23 @@ def serialize(self, suffix: str = "") -> dict: @classmethod def deserialize(cls, data: dict, suffix: str = ""): raise RuntimeError("Should not touch here.") + + @property + def input_requirement(self) -> List[DataRequirementItem]: + """Return data requirements needed for the model input.""" + data_requirement = [] + numb_fparam = self.model.get_dim_fparam() + numb_aparam = self.model.get_dim_aparam() + if numb_fparam > 0: + data_requirement.append( + DataRequirementItem( + "fparam", numb_fparam, atomic=False, must=True, high_prec=False + ) + ) + if numb_aparam > 0: + data_requirement.append( + DataRequirementItem( + "aparam", numb_aparam, atomic=True, must=True, high_prec=False + ) + ) + return data_requirement diff --git a/deepmd/tf/model/linear.py b/deepmd/tf/model/linear.py index ae1b0b5c78..26bc382569 100644 --- a/deepmd/tf/model/linear.py +++ b/deepmd/tf/model/linear.py @@ -1,9 +1,11 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +import operator from enum import ( Enum, ) from functools import ( lru_cache, + reduce, ) from typing import ( List, @@ -22,6 +24,9 @@ from deepmd.tf.loss.loss import ( Loss, ) +from deepmd.utils.data import ( + DataRequirementItem, +) from .model import ( Model, @@ -145,6 +150,13 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict): ] return local_jdata_cpy + @property + def input_requirement(self) -> List[DataRequirementItem]: + """Return data requirements needed for the model input.""" + return reduce( + operator.iadd, [model.input_requirement for model in self.models], [] + ) + @Model.register("linear_ener") class LinearEnergyModel(LinearModel): diff --git a/deepmd/tf/model/model.py b/deepmd/tf/model/model.py index 5914dcf48d..af3ea3c4d5 100644 --- a/deepmd/tf/model/model.py +++ b/deepmd/tf/model/model.py @@ -62,6 +62,9 @@ from deepmd.tf.utils.type_embed import ( TypeEmbedNet, ) +from deepmd.utils.data import ( + DataRequirementItem, +) from deepmd.utils.plugin import ( make_plugin_registry, ) @@ -588,6 +591,11 @@ def serialize(self, suffix: str = "") -> dict: """ raise NotImplementedError(f"Not implemented in class {self.__name__}") + @property + @abstractmethod + def input_requirement(self) -> List[DataRequirementItem]: + """Return data requirements needed for the model input.""" + @Model.register("standard") class StandardModel(Model): @@ -842,3 +850,8 @@ def serialize(self, suffix: str = "") -> dict: "out_std": np.ones([1, ntypes, dict_fit["dim_out"]]), }, } + + @property + def input_requirement(self) -> List[DataRequirementItem]: + """Return data requirements needed for the model input.""" + return self.descrpt.input_requirement + self.fitting.input_requirement diff --git a/deepmd/tf/model/pairtab.py b/deepmd/tf/model/pairtab.py index 3cc1114f81..0979ae87de 100644 --- a/deepmd/tf/model/pairtab.py +++ b/deepmd/tf/model/pairtab.py @@ -32,6 +32,9 @@ from deepmd.tf.utils.update_sel import ( UpdateSel, ) +from deepmd.utils.data import ( + DataRequirementItem, +) @Model.register("pairtab") @@ -286,3 +289,8 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict) -> dict: """ local_jdata_cpy = local_jdata.copy() return UpdateSel().update_one_sel(global_jdata, local_jdata_cpy, True) + + @property + def input_requirement(self) -> List[DataRequirementItem]: + """Return data requirements needed for the model input.""" + return [] diff --git a/deepmd/tf/model/pairwise_dprc.py b/deepmd/tf/model/pairwise_dprc.py index 6b0e95e88b..3d61dfd339 100644 --- a/deepmd/tf/model/pairwise_dprc.py +++ b/deepmd/tf/model/pairwise_dprc.py @@ -7,7 +7,6 @@ ) from deepmd.tf.common import ( - add_data_requirement, make_default_mesh, ) from deepmd.tf.env import ( @@ -34,6 +33,9 @@ from deepmd.tf.utils.update_sel import ( UpdateSel, ) +from deepmd.utils.data import ( + DataRequirementItem, +) @Model.register("pairwise_dprc") @@ -103,7 +105,6 @@ def __init__( type_embedding=self.typeebd, compress=compress, ) - add_data_requirement("aparam", 1, atomic=True, must=True, high_prec=False) self.rcut = max(self.qm_model.get_rcut(), self.qmmm_model.get_rcut()) def build( @@ -424,6 +425,15 @@ def update_sel(cls, global_jdata: dict, local_jdata: dict): UpdateSel().get_min_nbor_dist(global_jdata, 6.0) return local_jdata + @property + def input_requirement(self) -> List[DataRequirementItem]: + """Return data requirements needed for the model input.""" + data_requirement = [] + data_requirement.append( + DataRequirementItem("aparam", 1, atomic=True, must=True, high_prec=False) + ) + return data_requirement + def gather_placeholder( params: tf.Tensor, indices: tf.Tensor, placeholder: float = 0.0, **kwargs diff --git a/deepmd/tf/train/trainer.py b/deepmd/tf/train/trainer.py index 855b2ee722..dda79afea6 100644 --- a/deepmd/tf/train/trainer.py +++ b/deepmd/tf/train/trainer.py @@ -28,7 +28,6 @@ format_training_message_per_task, ) from deepmd.tf.common import ( - data_requirement, get_precision, j_must_have, ) @@ -63,6 +62,9 @@ from deepmd.tf.utils.sess import ( run_sess, ) +from deepmd.utils.data import ( + DataRequirementItem, +) log = logging.getLogger(__name__) @@ -270,7 +272,7 @@ def _build_network(self, data, suffix=""): self.place_holders[kk] = tf.placeholder( GLOBAL_TF_FLOAT_PRECISION, [None], "t_" + kk ) - self._get_place_holders(data_requirement) + self._get_place_holders({rr.key: rr.dict for rr in self.data_requirements}) else: self._get_place_holders(data.get_data_dict()) @@ -860,6 +862,10 @@ def _change_energy_bias( bias_adjust_mode=bias_adjust_mode, ) + @property + def data_requirements(self) -> List[DataRequirementItem]: + return self.model.input_requirement + self.loss.label_requirement + class DatasetLoader: """Generate an OP that loads the training data from the given DeepmdDataSystem. diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py index cd0e414b5f..91782d898f 100644 --- a/deepmd/utils/data.py +++ b/deepmd/utils/data.py @@ -787,3 +787,11 @@ def __getitem__(self, key: str): if key not in self.dict: raise KeyError(key) return self.dict[key] + + def __eq__(self, __value: object) -> bool: + if not isinstance(__value, DataRequirementItem): + return False + return self.dict == __value.dict + + def __repr__(self) -> str: + return f"DataRequirementItem({self.dict})" diff --git a/deepmd/utils/data_system.py b/deepmd/utils/data_system.py index 693845f8d0..b657781d3c 100644 --- a/deepmd/utils/data_system.py +++ b/deepmd/utils/data_system.py @@ -17,7 +17,6 @@ import deepmd.utils.random as dp_random from deepmd.common import ( - data_requirement, expand_sys_str, j_must_have, make_default_mesh, @@ -26,6 +25,7 @@ GLOBAL_NP_FLOAT_PRECISION, ) from deepmd.utils.data import ( + DataRequirementItem, DeepmdData, ) from deepmd.utils.out_stat import ( @@ -267,7 +267,7 @@ def compute_energy_shift(self, rcond=None, key="energy"): ) return energy_shift.ravel() - def add_dict(self, adict: dict) -> None: + def add_dict(self, adict: Dict[str, Dict[str, Any]]) -> None: """Add items to the data system by a `dict`. `adict` should have items like .. code-block:: python. @@ -299,6 +299,12 @@ def add_dict(self, adict: dict) -> None: ), ) + def add_data_requirements( + self, data_requirements: List[DataRequirementItem] + ) -> None: + """Add items to the data system by a list of `DataRequirementItem`.""" + self.add_dict({rr.key: rr.dict for rr in data_requirements}) + def add( self, key: str, @@ -807,6 +813,5 @@ def get_data( sys_probs=sys_probs, auto_prob_style=auto_prob, ) - data.add_dict(data_requirement) return data diff --git a/source/tests/pt/model/test_model.py b/source/tests/pt/model/test_model.py index 6eb460e808..8fdbdaf413 100644 --- a/source/tests/pt/model/test_model.py +++ b/source/tests/pt/model/test_model.py @@ -31,7 +31,6 @@ ) from deepmd.pt.utils.learning_rate import LearningRateExp as MyLRExp from deepmd.tf.common import ( - data_requirement, expand_sys_str, ) from deepmd.tf.descriptor import DescrptSeA as DescrptSeA_tf @@ -114,6 +113,8 @@ def get_intermediate_state(self, num_steps=1): dp_loss = self._get_dp_loss() dp_lr = self._get_dp_lr() dp_ds = self._get_dp_dataset() + dp_ds.add_data_requirements(dp_model.input_requirement) + dp_ds.add_data_requirements(dp_loss.label_requirement) dp_model.data_stat(dp_ds) # Build graph @@ -188,7 +189,6 @@ def _get_dp_dataset(self): type_map=self.type_map, trn_all_set=True, ) - data.add_dict(data_requirement) return data def _get_dp_model(self): diff --git a/source/tests/tf/test_data_modifier.py b/source/tests/tf/test_data_modifier.py index eae3155cb8..e618b8716d 100644 --- a/source/tests/tf/test_data_modifier.py +++ b/source/tests/tf/test_data_modifier.py @@ -4,7 +4,6 @@ import numpy as np from deepmd.tf.common import ( - data_requirement, j_must_have, ) from deepmd.tf.env import ( @@ -71,7 +70,7 @@ def _setUp(self): data = DeepmdDataSystem( systems, batch_size, test_size, rcut, set_prefix=set_pfx ) - data.add_dict(data_requirement) + data.add_data_requirements(model.data_requirements) # clear the default graph tf.reset_default_graph() diff --git a/source/tests/tf/test_data_modifier_shuffle.py b/source/tests/tf/test_data_modifier_shuffle.py index 1fdd8b4967..97e11685e9 100644 --- a/source/tests/tf/test_data_modifier_shuffle.py +++ b/source/tests/tf/test_data_modifier_shuffle.py @@ -5,7 +5,6 @@ import numpy as np from deepmd.tf.common import ( - data_requirement, j_must_have, ) from deepmd.tf.env import ( @@ -72,7 +71,7 @@ def _setUp(self): data = DeepmdDataSystem( systems, batch_size, test_size, rcut, set_prefix=set_pfx ) - data.add_dict(data_requirement) + data.add_data_requirements(model.data_requirements) # clear the default graph tf.reset_default_graph() diff --git a/source/tests/tf/test_data_requirement.py b/source/tests/tf/test_data_requirement.py deleted file mode 100644 index e825bc3f92..0000000000 --- a/source/tests/tf/test_data_requirement.py +++ /dev/null @@ -1,19 +0,0 @@ -# SPDX-License-Identifier: LGPL-3.0-or-later -import unittest - -from deepmd.tf.common import ( - add_data_requirement, - data_requirement, -) - - -class TestDataRequirement(unittest.TestCase): - def test_add(self): - add_data_requirement("test", 3) - self.assertEqual(data_requirement["test"]["ndof"], 3) - self.assertEqual(data_requirement["test"]["atomic"], False) - self.assertEqual(data_requirement["test"]["must"], False) - self.assertEqual(data_requirement["test"]["high_prec"], False) - self.assertEqual(data_requirement["test"]["repeat"], 1) - self.assertEqual(data_requirement["test"]["default"], 0.0) - self.assertEqual(data_requirement["test"]["output_natoms_for_type_sel"], False) diff --git a/source/tests/tf/test_loss_gf.py b/source/tests/tf/test_loss_gf.py index 78e5404e03..116b98b649 100644 --- a/source/tests/tf/test_loss_gf.py +++ b/source/tests/tf/test_loss_gf.py @@ -5,6 +5,9 @@ from deepmd.tf.loss import ( EnerStdLoss, ) +from deepmd.utils.data import ( + DataRequirementItem, +) class TestLossGf(tf.test.TestCase): @@ -26,6 +29,62 @@ def setUp(self): numb_generalized_coord=2, ) + def test_label_requirements(self): + """Test label_requirements are expected.""" + self.assertCountEqual( + self.loss.label_requirement, + [ + DataRequirementItem( + "energy", + 1, + atomic=False, + must=False, + high_prec=True, + repeat=1, + ), + DataRequirementItem( + "force", + 3, + atomic=True, + must=False, + high_prec=False, + repeat=1, + ), + DataRequirementItem( + "virial", + 9, + atomic=False, + must=False, + high_prec=False, + repeat=1, + ), + DataRequirementItem( + "atom_pref", + 1, + atomic=True, + must=False, + high_prec=False, + repeat=3, + ), + DataRequirementItem( + "atom_ener", + 1, + atomic=True, + must=False, + high_prec=False, + repeat=1, + ), + DataRequirementItem( + "drdq", + 2 * 3, + atomic=True, + must=False, + high_prec=False, + repeat=1, + ), + ], + ) + def test_build_loss(self): natoms = tf.constant([6, 6]) model_dict = { diff --git a/source/tests/tf/test_model_se_a.py b/source/tests/tf/test_model_se_a.py index ad2c1b7ced..039ead3a09 100644 --- a/source/tests/tf/test_model_se_a.py +++ b/source/tests/tf/test_model_se_a.py @@ -265,6 +265,9 @@ def test_model(self): np.testing.assert_almost_equal(f, reff, places) np.testing.assert_almost_equal(v, refv, places) + # test input requirement for the model + self.assertCountEqual(model.input_requirement, []) + def test_model_atom_ener_type_embedding(self): """Test atom ener with type embedding.""" jfile = "water_se_a.json" diff --git a/source/tests/tf/test_model_se_a_aparam.py b/source/tests/tf/test_model_se_a_aparam.py index e44e1c8c9f..2485d1e674 100644 --- a/source/tests/tf/test_model_se_a_aparam.py +++ b/source/tests/tf/test_model_se_a_aparam.py @@ -16,6 +16,9 @@ from deepmd.tf.model import ( EnerModel, ) +from deepmd.utils.data import ( + DataRequirementItem, +) from .common import ( DataSystem, @@ -165,3 +168,9 @@ def test_model(self): np.testing.assert_almost_equal(e, refe, places) np.testing.assert_almost_equal(f, reff, places) np.testing.assert_almost_equal(v, refv, places) + + # test input requirement for the model + self.assertCountEqual( + model.input_requirement, + [DataRequirementItem("aparam", 2, atomic=True, must=True, high_prec=False)], + ) diff --git a/source/tests/tf/test_model_se_a_fparam.py b/source/tests/tf/test_model_se_a_fparam.py index ce31f94488..efcd3f44c8 100644 --- a/source/tests/tf/test_model_se_a_fparam.py +++ b/source/tests/tf/test_model_se_a_fparam.py @@ -16,6 +16,9 @@ from deepmd.tf.model import ( EnerModel, ) +from deepmd.utils.data import ( + DataRequirementItem, +) from .common import ( DataSystem, @@ -166,3 +169,13 @@ def test_model(self): np.testing.assert_almost_equal(e, refe, places) np.testing.assert_almost_equal(f, reff, places) np.testing.assert_almost_equal(v, refv, places) + + # test input requirement for the model + self.assertCountEqual( + model.input_requirement, + [ + DataRequirementItem( + "fparam", 2, atomic=False, must=True, high_prec=False + ) + ], + ) diff --git a/source/tests/tf/test_pairwise_dprc.py b/source/tests/tf/test_pairwise_dprc.py index 38b8d8b775..7a0f28b092 100644 --- a/source/tests/tf/test_pairwise_dprc.py +++ b/source/tests/tf/test_pairwise_dprc.py @@ -34,6 +34,9 @@ from deepmd.tf.utils.sess import ( run_sess, ) +from deepmd.utils.data import ( + DataRequirementItem, +) from .common import ( run_dp, @@ -523,6 +526,12 @@ def test_model_ener(self): self.assertAllClose(e[0], 0.189075, 1e-6) self.assertAllClose(f[0, 0], 0.060047, 1e-6) + # test input requirement for the model + self.assertCountEqual( + model.input_requirement, + [DataRequirementItem("aparam", 1, atomic=True, must=True, high_prec=False)], + ) + def test_nloc(self): jfile = tests_path / "pairwise_dprc.json" jdata = j_loader(jfile)