From 2d5add198dec9a6d7cc961bda1cb288c3ac9b0ee Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 11 Jul 2024 09:10:53 +0800
Subject: [PATCH] breaking(pt/tf/dp): disable bias in type embedding (#3958)
This PR addresses an issue observed during training with DPA2 on complex
datasets, such as `mptraj`. Specifically, the **learning curves of
energy** from the **2024Q1-based branch** and the **devel branch** show
significant differences at the very beginning when setting `tebd_dim` =
256 (and thus descriptor `dim_out` = 128 + 256). The issue is
illustrated in the following image:
After removing the bias in the type embedding, which affects the
standard deviation of the descriptor when `tebd_dim` is very large, the
learning curve improves significantly:
Notably, this behavior is not prominent when using a `tebd_dim` that is
relatively smaller than the descriptor itself, such as when using DPA2
with `tebd_dim` = 8 or using DPA1.
The same issue exists in econf of type embedding, which will be solved
in a separated PR.
**NOTE**
**This PR disables bias in type embedding in all backends, which is a
breaking change.**
## Summary by CodeRabbit
- **New Features**
- Introduced `use_tebd_bias` and `bias` parameters across various
components to control the use of bias in type embeddings and networks.
- **Updates**
- Updated serialization and deserialization methods to include the new
parameters and ensure version compatibility.
---
deepmd/dpmodel/descriptor/dpa1.py | 13 ++++-
deepmd/dpmodel/descriptor/dpa2.py | 13 ++++-
deepmd/dpmodel/descriptor/se_atten_v2.py | 10 +++-
deepmd/dpmodel/utils/network.py | 12 +++--
deepmd/dpmodel/utils/type_embed.py | 13 ++++-
deepmd/pt/model/descriptor/dpa1.py | 13 ++++-
deepmd/pt/model/descriptor/dpa2.py | 13 ++++-
deepmd/pt/model/descriptor/se_atten_v2.py | 30 ++++++++++-
deepmd/pt/model/network/network.py | 16 +++++-
deepmd/tf/descriptor/se_atten.py | 12 ++++-
deepmd/tf/utils/network.py | 52 +++++++++++++------
deepmd/tf/utils/type_embed.py | 21 ++++++--
deepmd/utils/argcheck.py | 32 ++++++++++++
.../tests/consistent/descriptor/test_dpa1.py | 9 ++++
.../tests/consistent/descriptor/test_dpa2.py | 9 ++++
.../consistent/descriptor/test_se_atten_v2.py | 7 +++
.../tests/consistent/test_type_embedding.py | 6 +++
source/tests/pt/model/test_descriptor_dpa1.py | 3 +-
source/tests/pt/model/test_descriptor_dpa2.py | 1 +
source/tests/tf/pairwise_dprc.json | 1 +
source/tests/tf/test_data_large_batch.py | 3 ++
source/tests/tf/test_descrpt_hybrid.py | 1 +
source/tests/tf/test_descrpt_se_a_type.py | 2 +
source/tests/tf/test_descrpt_se_atten.py | 4 ++
source/tests/tf/test_dipole_se_a_tebd.py | 1 +
source/tests/tf/test_model_se_a.py | 1 +
source/tests/tf/test_model_se_a_ebd_v2.py | 1 +
source/tests/tf/test_model_se_a_type.py | 1 +
source/tests/tf/test_model_se_atten.py | 8 +++
source/tests/tf/test_nvnmd_entrypoints.py | 4 ++
source/tests/tf/test_polar_se_a_tebd.py | 1 +
source/tests/tf/test_type_embed.py | 4 +-
.../dpmodel/descriptor/test_descriptor.py | 6 +++
33 files changed, 280 insertions(+), 43 deletions(-)
diff --git a/deepmd/dpmodel/descriptor/dpa1.py b/deepmd/dpmodel/descriptor/dpa1.py
index a0227e13c9..4eae05560f 100644
--- a/deepmd/dpmodel/descriptor/dpa1.py
+++ b/deepmd/dpmodel/descriptor/dpa1.py
@@ -201,6 +201,8 @@ class DescrptDPA1(NativeOP, BaseDescriptor):
The default value is `None`, which means the `tebd_input_mode` setting will be used instead.
use_econf_tebd: bool, Optional
Whether to use electronic configuration type embedding.
+ use_tebd_bias : bool, Optional
+ Whether to use bias in the type embedding layer.
type_map: List[str], Optional
A list of strings. Give the name to each type of atoms.
spin
@@ -253,6 +255,7 @@ def __init__(
spin: Optional[Any] = None,
stripped_type_embedding: Optional[bool] = None,
use_econf_tebd: bool = False,
+ use_tebd_bias: bool = False,
type_map: Optional[List[str]] = None,
# consistent with argcheck, not used though
seed: Optional[Union[int, List[int]]] = None,
@@ -301,6 +304,7 @@ def __init__(
seed=child_seed(seed, 0),
)
self.use_econf_tebd = use_econf_tebd
+ self.use_tebd_bias = use_tebd_bias
self.type_map = type_map
self.type_embedding = TypeEmbedNet(
ntypes=ntypes,
@@ -309,6 +313,7 @@ def __init__(
activation_function="Linear",
precision=precision,
use_econf_tebd=use_econf_tebd,
+ use_tebd_bias=use_tebd_bias,
type_map=type_map,
seed=child_seed(seed, 1),
)
@@ -491,7 +496,7 @@ def serialize(self) -> dict:
data = {
"@class": "Descriptor",
"type": "dpa1",
- "@version": 1,
+ "@version": 2,
"rcut": obj.rcut,
"rcut_smth": obj.rcut_smth,
"sel": obj.sel,
@@ -516,6 +521,7 @@ def serialize(self) -> dict:
"type_one_side": obj.type_one_side,
"concat_output_tebd": self.concat_output_tebd,
"use_econf_tebd": self.use_econf_tebd,
+ "use_tebd_bias": self.use_tebd_bias,
"type_map": self.type_map,
# make deterministic
"precision": np.dtype(PRECISION_DICT[obj.precision]).name,
@@ -541,7 +547,7 @@ def serialize(self) -> dict:
def deserialize(cls, data: dict) -> "DescrptDPA1":
"""Deserialize from dict."""
data = data.copy()
- check_version_compatibility(data.pop("@version"), 1, 1)
+ check_version_compatibility(data.pop("@version"), 2, 1)
data.pop("@class")
data.pop("type")
variables = data.pop("@variables")
@@ -554,6 +560,9 @@ def deserialize(cls, data: dict) -> "DescrptDPA1":
embeddings_strip = data.pop("embeddings_strip")
else:
embeddings_strip = None
+ # compat with version 1
+ if "use_tebd_bias" not in data:
+ data["use_tebd_bias"] = True
obj = cls(**data)
obj.se_atten["davg"] = variables["davg"]
diff --git a/deepmd/dpmodel/descriptor/dpa2.py b/deepmd/dpmodel/descriptor/dpa2.py
index 86ed7ec40c..5fcf1e27b9 100644
--- a/deepmd/dpmodel/descriptor/dpa2.py
+++ b/deepmd/dpmodel/descriptor/dpa2.py
@@ -331,6 +331,7 @@ def __init__(
seed: Optional[Union[int, List[int]]] = None,
add_tebd_to_repinit_out: bool = False,
use_econf_tebd: bool = False,
+ use_tebd_bias: bool = False,
type_map: Optional[List[str]] = None,
):
r"""The DPA-2 descriptor. see https://arxiv.org/abs/2312.15492.
@@ -361,6 +362,8 @@ def __init__(
Whether to add type embedding to the output representation from repinit before inputting it into repformer.
use_econf_tebd : bool, Optional
Whether to use electronic configuration type embedding.
+ use_tebd_bias : bool, Optional
+ Whether to use bias in the type embedding layer.
type_map : List[str], Optional
A list of strings. Give the name to each type of atoms.
@@ -449,6 +452,7 @@ def init_subclass_params(sub_data, sub_class):
seed=child_seed(seed, 1),
)
self.use_econf_tebd = use_econf_tebd
+ self.use_tebd_bias = use_tebd_bias
self.type_map = type_map
self.type_embedding = TypeEmbedNet(
ntypes=ntypes,
@@ -457,6 +461,7 @@ def init_subclass_params(sub_data, sub_class):
activation_function="Linear",
precision=precision,
use_econf_tebd=use_econf_tebd,
+ use_tebd_bias=use_tebd_bias,
type_map=type_map,
seed=child_seed(seed, 2),
)
@@ -720,7 +725,7 @@ def serialize(self) -> dict:
data = {
"@class": "Descriptor",
"type": "dpa2",
- "@version": 1,
+ "@version": 2,
"ntypes": self.ntypes,
"repinit_args": self.repinit_args.serialize(),
"repformer_args": self.repformer_args.serialize(),
@@ -732,6 +737,7 @@ def serialize(self) -> dict:
"trainable": self.trainable,
"add_tebd_to_repinit_out": self.add_tebd_to_repinit_out,
"use_econf_tebd": self.use_econf_tebd,
+ "use_tebd_bias": self.use_tebd_bias,
"type_map": self.type_map,
"type_embedding": self.type_embedding.serialize(),
"g1_shape_tranform": self.g1_shape_tranform.serialize(),
@@ -774,7 +780,7 @@ def serialize(self) -> dict:
@classmethod
def deserialize(cls, data: dict) -> "DescrptDPA2":
data = data.copy()
- check_version_compatibility(data.pop("@version"), 1, 1)
+ check_version_compatibility(data.pop("@version"), 2, 1)
data.pop("@class")
data.pop("type")
repinit_variable = data.pop("repinit_variable").copy()
@@ -785,6 +791,9 @@ def deserialize(cls, data: dict) -> "DescrptDPA2":
add_tebd_to_repinit_out = data["add_tebd_to_repinit_out"]
data["repinit"] = RepinitArgs(**data.pop("repinit_args"))
data["repformer"] = RepformerArgs(**data.pop("repformer_args"))
+ # compat with version 1
+ if "use_tebd_bias" not in data:
+ data["use_tebd_bias"] = True
obj = cls(**data)
obj.type_embedding = TypeEmbedNet.deserialize(type_embedding)
if add_tebd_to_repinit_out:
diff --git a/deepmd/dpmodel/descriptor/se_atten_v2.py b/deepmd/dpmodel/descriptor/se_atten_v2.py
index 037222076e..d29580062c 100644
--- a/deepmd/dpmodel/descriptor/se_atten_v2.py
+++ b/deepmd/dpmodel/descriptor/se_atten_v2.py
@@ -64,6 +64,7 @@ def __init__(
spin: Optional[Any] = None,
stripped_type_embedding: Optional[bool] = None,
use_econf_tebd: bool = False,
+ use_tebd_bias: bool = False,
type_map: Optional[List[str]] = None,
# consistent with argcheck, not used though
seed: Optional[Union[int, List[int]]] = None,
@@ -100,6 +101,7 @@ def __init__(
spin=spin,
stripped_type_embedding=stripped_type_embedding,
use_econf_tebd=use_econf_tebd,
+ use_tebd_bias=use_tebd_bias,
type_map=type_map,
# consistent with argcheck, not used though
seed=seed,
@@ -111,7 +113,7 @@ def serialize(self) -> dict:
data = {
"@class": "Descriptor",
"type": "se_atten_v2",
- "@version": 1,
+ "@version": 2,
"rcut": obj.rcut,
"rcut_smth": obj.rcut_smth,
"sel": obj.sel,
@@ -134,6 +136,7 @@ def serialize(self) -> dict:
"type_one_side": obj.type_one_side,
"concat_output_tebd": self.concat_output_tebd,
"use_econf_tebd": self.use_econf_tebd,
+ "use_tebd_bias": self.use_tebd_bias,
"type_map": self.type_map,
# make deterministic
"precision": np.dtype(PRECISION_DICT[obj.precision]).name,
@@ -158,7 +161,7 @@ def serialize(self) -> dict:
def deserialize(cls, data: dict) -> "DescrptSeAttenV2":
"""Deserialize from dict."""
data = data.copy()
- check_version_compatibility(data.pop("@version"), 1, 1)
+ check_version_compatibility(data.pop("@version"), 2, 1)
data.pop("@class")
data.pop("type")
variables = data.pop("@variables")
@@ -167,6 +170,9 @@ def deserialize(cls, data: dict) -> "DescrptSeAttenV2":
attention_layers = data.pop("attention_layers")
data.pop("env_mat")
embeddings_strip = data.pop("embeddings_strip")
+ # compat with version 1
+ if "use_tebd_bias" not in data:
+ data["use_tebd_bias"] = True
obj = cls(**data)
obj.se_atten["davg"] = variables["davg"]
diff --git a/deepmd/dpmodel/utils/network.py b/deepmd/dpmodel/utils/network.py
index 53a170ac4a..941e2cfc86 100644
--- a/deepmd/dpmodel/utils/network.py
+++ b/deepmd/dpmodel/utils/network.py
@@ -571,6 +571,8 @@ class EN(T_Network):
Floating point precision for the model paramters.
seed : int, optional
Random seed.
+ bias : bool, Optional
+ Whether to use bias in the embedding layer.
"""
def __init__(
@@ -581,6 +583,7 @@ def __init__(
resnet_dt: bool = False,
precision: str = DEFAULT_PRECISION,
seed: Optional[Union[int, List[int]]] = None,
+ bias: bool = True,
):
layers = []
i_in = in_dim
@@ -590,7 +593,7 @@ def __init__(
T_NetworkLayer(
i_in,
i_ot,
- bias=True,
+ bias=bias,
use_timestep=resnet_dt,
activation_function=activation_function,
resnet=True,
@@ -605,6 +608,7 @@ def __init__(
self.activation_function = activation_function
self.resnet_dt = resnet_dt
self.precision = precision
+ self.bias = bias
def serialize(self) -> dict:
"""Serialize the network to a dict.
@@ -616,11 +620,12 @@ def serialize(self) -> dict:
"""
return {
"@class": "EmbeddingNetwork",
- "@version": 1,
+ "@version": 2,
"in_dim": self.in_dim,
"neuron": self.neuron.copy(),
"activation_function": self.activation_function,
"resnet_dt": self.resnet_dt,
+ "bias": self.bias,
# make deterministic
"precision": np.dtype(PRECISION_DICT[self.precision]).name,
"layers": [layer.serialize() for layer in self.layers],
@@ -636,7 +641,7 @@ def deserialize(cls, data: dict) -> "EmbeddingNet":
The dict to deserialize from.
"""
data = copy.deepcopy(data)
- check_version_compatibility(data.pop("@version", 1), 1, 1)
+ check_version_compatibility(data.pop("@version", 1), 2, 1)
data.pop("@class", None)
layers = data.pop("layers")
obj = cls(**data)
@@ -691,6 +696,7 @@ def __init__(
activation_function=activation_function,
resnet_dt=resnet_dt,
precision=precision,
+ seed=seed,
)
i_in = neuron[-1] if len(neuron) > 0 else in_dim
i_ot = out_dim
diff --git a/deepmd/dpmodel/utils/type_embed.py b/deepmd/dpmodel/utils/type_embed.py
index 0db064fb5e..cf9b31e607 100644
--- a/deepmd/dpmodel/utils/type_embed.py
+++ b/deepmd/dpmodel/utils/type_embed.py
@@ -45,6 +45,8 @@ class TypeEmbedNet(NativeOP):
Concat the zero padding to the output, as the default embedding of empty type.
use_econf_tebd: bool, Optional
Whether to use electronic configuration type embedding.
+ use_tebd_bias : bool, Optional
+ Whether to use bias in the type embedding layer.
type_map: List[str], Optional
A list of strings. Give the name to each type of atoms.
"""
@@ -61,6 +63,7 @@ def __init__(
seed: Optional[Union[int, List[int]]] = None,
padding: bool = False,
use_econf_tebd: bool = False,
+ use_tebd_bias: bool = False,
type_map: Optional[List[str]] = None,
) -> None:
self.ntypes = ntypes
@@ -72,6 +75,7 @@ def __init__(
self.trainable = trainable
self.padding = padding
self.use_econf_tebd = use_econf_tebd
+ self.use_tebd_bias = use_tebd_bias
self.type_map = type_map
embed_input_dim = ntypes
if self.use_econf_tebd:
@@ -85,6 +89,7 @@ def __init__(
self.resnet_dt,
self.precision,
seed=self.seed,
+ bias=self.use_tebd_bias,
)
def call(self) -> np.ndarray:
@@ -114,11 +119,14 @@ def deserialize(cls, data: dict):
The deserialized model
"""
data = data.copy()
- check_version_compatibility(data.pop("@version", 1), 1, 1)
+ check_version_compatibility(data.pop("@version", 1), 2, 1)
data_cls = data.pop("@class")
assert data_cls == "TypeEmbedNet", f"Invalid class {data_cls}"
embedding_net = EmbeddingNet.deserialize(data.pop("embedding"))
+ # compat with version 1
+ if "use_tebd_bias" not in data:
+ data["use_tebd_bias"] = True
type_embedding_net = cls(**data)
type_embedding_net.embedding_net = embedding_net
return type_embedding_net
@@ -133,7 +141,7 @@ def serialize(self) -> dict:
"""
return {
"@class": "TypeEmbedNet",
- "@version": 1,
+ "@version": 2,
"ntypes": self.ntypes,
"neuron": self.neuron,
"resnet_dt": self.resnet_dt,
@@ -142,6 +150,7 @@ def serialize(self) -> dict:
"trainable": self.trainable,
"padding": self.padding,
"use_econf_tebd": self.use_econf_tebd,
+ "use_tebd_bias": self.use_tebd_bias,
"type_map": self.type_map,
"embedding": self.embedding_net.serialize(),
}
diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py
index e6dba7f88f..0bc4a03807 100644
--- a/deepmd/pt/model/descriptor/dpa1.py
+++ b/deepmd/pt/model/descriptor/dpa1.py
@@ -189,6 +189,8 @@ class DescrptDPA1(BaseDescriptor, torch.nn.Module):
Random seed for parameter initialization.
use_econf_tebd: bool, Optional
Whether to use electronic configuration type embedding.
+ use_tebd_bias : bool, Optional
+ Whether to use bias in the type embedding layer.
type_map: List[str], Optional
A list of strings. Give the name to each type of atoms.
spin
@@ -241,6 +243,7 @@ def __init__(
stripped_type_embedding: Optional[bool] = None,
seed: Optional[Union[int, List[int]]] = None,
use_econf_tebd: bool = False,
+ use_tebd_bias: bool = False,
type_map: Optional[List[str]] = None,
# not implemented
spin=None,
@@ -293,6 +296,7 @@ def __init__(
old_impl=old_impl,
)
self.use_econf_tebd = use_econf_tebd
+ self.use_tebd_bias = use_tebd_bias
self.type_map = type_map
self.type_embedding = TypeEmbedNet(
ntypes,
@@ -300,6 +304,7 @@ def __init__(
precision=precision,
seed=child_seed(seed, 2),
use_econf_tebd=use_econf_tebd,
+ use_tebd_bias=use_tebd_bias,
type_map=type_map,
)
self.tebd_dim = tebd_dim
@@ -462,7 +467,7 @@ def serialize(self) -> dict:
data = {
"@class": "Descriptor",
"type": "dpa1",
- "@version": 1,
+ "@version": 2,
"rcut": obj.rcut,
"rcut_smth": obj.rcut_smth,
"sel": obj.sel,
@@ -487,6 +492,7 @@ def serialize(self) -> dict:
"type_one_side": obj.type_one_side,
"concat_output_tebd": self.concat_output_tebd,
"use_econf_tebd": self.use_econf_tebd,
+ "use_tebd_bias": self.use_tebd_bias,
"type_map": self.type_map,
# make deterministic
"precision": RESERVED_PRECISON_DICT[obj.prec],
@@ -510,7 +516,7 @@ def serialize(self) -> dict:
@classmethod
def deserialize(cls, data: dict) -> "DescrptDPA1":
data = data.copy()
- check_version_compatibility(data.pop("@version"), 1, 1)
+ check_version_compatibility(data.pop("@version"), 2, 1)
data.pop("@class")
data.pop("type")
variables = data.pop("@variables")
@@ -523,6 +529,9 @@ def deserialize(cls, data: dict) -> "DescrptDPA1":
embeddings_strip = data.pop("embeddings_strip")
else:
embeddings_strip = None
+ # compat with version 1
+ if "use_tebd_bias" not in data:
+ data["use_tebd_bias"] = True
obj = cls(**data)
def t_cvt(xx):
diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py
index aa5aa11f67..4d830ace1b 100644
--- a/deepmd/pt/model/descriptor/dpa2.py
+++ b/deepmd/pt/model/descriptor/dpa2.py
@@ -90,6 +90,7 @@ def __init__(
seed: Optional[Union[int, List[int]]] = None,
add_tebd_to_repinit_out: bool = False,
use_econf_tebd: bool = False,
+ use_tebd_bias: bool = False,
type_map: Optional[List[str]] = None,
old_impl: bool = False,
):
@@ -121,6 +122,8 @@ def __init__(
Whether to add type embedding to the output representation from repinit before inputting it into repformer.
use_econf_tebd : bool, Optional
Whether to use electronic configuration type embedding.
+ use_tebd_bias : bool, Optional
+ Whether to use bias in the type embedding layer.
type_map : List[str], Optional
A list of strings. Give the name to each type of atoms.
@@ -211,6 +214,7 @@ def init_subclass_params(sub_data, sub_class):
old_impl=old_impl,
)
self.use_econf_tebd = use_econf_tebd
+ self.use_tebd_bias = use_tebd_bias
self.type_map = type_map
self.type_embedding = TypeEmbedNet(
ntypes,
@@ -218,6 +222,7 @@ def init_subclass_params(sub_data, sub_class):
precision=precision,
seed=child_seed(seed, 2),
use_econf_tebd=self.use_econf_tebd,
+ use_tebd_bias=use_tebd_bias,
type_map=type_map,
)
self.concat_output_tebd = concat_output_tebd
@@ -455,7 +460,7 @@ def serialize(self) -> dict:
data = {
"@class": "Descriptor",
"type": "dpa2",
- "@version": 1,
+ "@version": 2,
"ntypes": self.ntypes,
"repinit_args": self.repinit_args.serialize(),
"repformer_args": self.repformer_args.serialize(),
@@ -467,6 +472,7 @@ def serialize(self) -> dict:
"trainable": self.trainable,
"add_tebd_to_repinit_out": self.add_tebd_to_repinit_out,
"use_econf_tebd": self.use_econf_tebd,
+ "use_tebd_bias": self.use_tebd_bias,
"type_map": self.type_map,
"type_embedding": self.type_embedding.embedding.serialize(),
"g1_shape_tranform": self.g1_shape_tranform.serialize(),
@@ -509,7 +515,7 @@ def serialize(self) -> dict:
@classmethod
def deserialize(cls, data: dict) -> "DescrptDPA2":
data = data.copy()
- check_version_compatibility(data.pop("@version"), 1, 1)
+ check_version_compatibility(data.pop("@version"), 2, 1)
data.pop("@class")
data.pop("type")
repinit_variable = data.pop("repinit_variable").copy()
@@ -520,6 +526,9 @@ def deserialize(cls, data: dict) -> "DescrptDPA2":
add_tebd_to_repinit_out = data["add_tebd_to_repinit_out"]
data["repinit"] = RepinitArgs(**data.pop("repinit_args"))
data["repformer"] = RepformerArgs(**data.pop("repformer_args"))
+ # compat with version 1
+ if "use_tebd_bias" not in data:
+ data["use_tebd_bias"] = True
obj = cls(**data)
obj.type_embedding.embedding = TypeEmbedNetConsistent.deserialize(
type_embedding
diff --git a/deepmd/pt/model/descriptor/se_atten_v2.py b/deepmd/pt/model/descriptor/se_atten_v2.py
index 8fc6271665..41e37eb03c 100644
--- a/deepmd/pt/model/descriptor/se_atten_v2.py
+++ b/deepmd/pt/model/descriptor/se_atten_v2.py
@@ -68,6 +68,7 @@ def __init__(
stripped_type_embedding: Optional[bool] = None,
seed: Optional[Union[int, List[int]]] = None,
use_econf_tebd: bool = False,
+ use_tebd_bias: bool = False,
type_map: Optional[List[str]] = None,
# not implemented
spin=None,
@@ -124,6 +125,10 @@ def __init__(
Whether to normalize the hidden vectors in attention weights calculation.
temperature : float
If not None, the scaling of attention weights is `temperature` itself.
+ concat_output_tebd : bool
+ Whether to concat type embedding at the output of the descriptor.
+ trainable : bool
+ If the weights of this descriptors are trainable.
trainable_ln : bool
Whether to use trainable shift and scale weights in layer normalization.
ln_eps : float, Optional
@@ -132,8 +137,24 @@ def __init__(
If 'False', type embeddings of both neighbor and central atoms are considered.
If 'True', only type embeddings of neighbor atoms are considered.
Default is 'False'.
+ stripped_type_embedding : bool, Optional
+ (Deprecated, kept only for compatibility.)
+ Whether to strip the type embedding into a separate embedding network.
+ Setting this parameter to `True` is equivalent to setting `tebd_input_mode` to 'strip'.
+ Setting it to `False` is equivalent to setting `tebd_input_mode` to 'concat'.
+ The default value is `None`, which means the `tebd_input_mode` setting will be used instead.
seed : int, Optional
Random seed for parameter initialization.
+ use_econf_tebd : bool, Optional
+ Whether to use electronic configuration type embedding.
+ use_tebd_bias : bool, Optional
+ Whether to use bias in the type embedding layer.
+ type_map : List[str], Optional
+ A list of strings. Give the name to each type of atoms.
+ spin
+ (Only support None to keep consistent with other backend references.)
+ (Not used in this version. Not-none option is not implemented.)
+ The old implementation of deepspin.
"""
DescrptDPA1.__init__(
self,
@@ -167,6 +188,7 @@ def __init__(
stripped_type_embedding=stripped_type_embedding,
seed=seed,
use_econf_tebd=use_econf_tebd,
+ use_tebd_bias=use_tebd_bias,
type_map=type_map,
# not implemented
spin=spin,
@@ -179,7 +201,7 @@ def serialize(self) -> dict:
data = {
"@class": "Descriptor",
"type": "se_atten_v2",
- "@version": 1,
+ "@version": 2,
"rcut": obj.rcut,
"rcut_smth": obj.rcut_smth,
"sel": obj.sel,
@@ -202,6 +224,7 @@ def serialize(self) -> dict:
"type_one_side": obj.type_one_side,
"concat_output_tebd": self.concat_output_tebd,
"use_econf_tebd": self.use_econf_tebd,
+ "use_tebd_bias": self.use_tebd_bias,
"type_map": self.type_map,
# make deterministic
"precision": RESERVED_PRECISON_DICT[obj.prec],
@@ -224,7 +247,7 @@ def serialize(self) -> dict:
@classmethod
def deserialize(cls, data: dict) -> "DescrptSeAttenV2":
data = data.copy()
- check_version_compatibility(data.pop("@version"), 1, 1)
+ check_version_compatibility(data.pop("@version"), 2, 1)
data.pop("@class")
data.pop("type")
variables = data.pop("@variables")
@@ -233,6 +256,9 @@ def deserialize(cls, data: dict) -> "DescrptSeAttenV2":
attention_layers = data.pop("attention_layers")
data.pop("env_mat")
embeddings_strip = data.pop("embeddings_strip")
+ # compat with version 1
+ if "use_tebd_bias" not in data:
+ data["use_tebd_bias"] = True
obj = cls(**data)
def t_cvt(xx):
diff --git a/deepmd/pt/model/network/network.py b/deepmd/pt/model/network/network.py
index 13599a77dd..0469462061 100644
--- a/deepmd/pt/model/network/network.py
+++ b/deepmd/pt/model/network/network.py
@@ -573,6 +573,7 @@ def __init__(
precision="default",
seed: Optional[Union[int, List[int]]] = None,
use_econf_tebd=False,
+ use_tebd_bias: bool = False,
type_map=None,
):
"""Construct a type embedding net."""
@@ -582,6 +583,7 @@ def __init__(
self.bavg = bavg
self.stddev = stddev
self.use_econf_tebd = use_econf_tebd
+ self.use_tebd_bias = use_tebd_bias
self.type_map = type_map
self.embedding = TypeEmbedNetConsistent(
ntypes=self.type_nums,
@@ -589,6 +591,7 @@ def __init__(
padding=True,
activation_function="Linear",
use_econf_tebd=use_econf_tebd,
+ use_tebd_bias=use_tebd_bias,
type_map=type_map,
precision=precision,
seed=seed,
@@ -655,6 +658,8 @@ class TypeEmbedNetConsistent(nn.Module):
Concat the zero padding to the output, as the default embedding of empty type.
use_econf_tebd: bool, Optional
Whether to use electronic configuration type embedding.
+ use_tebd_bias : bool, Optional
+ Whether to use bias in the type embedding layer.
type_map: List[str], Optional
A list of strings. Give the name to each type of atoms.
"""
@@ -671,6 +676,7 @@ def __init__(
seed: Optional[Union[int, List[int]]] = None,
padding: bool = False,
use_econf_tebd: bool = False,
+ use_tebd_bias: bool = False,
type_map: Optional[List[str]] = None,
):
"""Construct a type embedding net."""
@@ -685,6 +691,7 @@ def __init__(
self.trainable = trainable
self.padding = padding
self.use_econf_tebd = use_econf_tebd
+ self.use_tebd_bias = use_tebd_bias
self.type_map = type_map
self.econf_tebd = None
embed_input_dim = ntypes
@@ -700,6 +707,7 @@ def __init__(
self.resnet_dt,
self.precision,
self.seed,
+ bias=self.use_tebd_bias,
)
for param in self.parameters():
param.requires_grad = trainable
@@ -802,11 +810,14 @@ def deserialize(cls, data: dict):
The deserialized model
"""
data = data.copy()
- check_version_compatibility(data.pop("@version", 1), 1, 1)
+ check_version_compatibility(data.pop("@version", 1), 2, 1)
data_cls = data.pop("@class")
assert data_cls == "TypeEmbedNet", f"Invalid class {data_cls}"
embedding_net = EmbeddingNet.deserialize(data.pop("embedding"))
+ # compat with version 1
+ if "use_tebd_bias" not in data:
+ data["use_tebd_bias"] = True
type_embedding_net = cls(**data)
type_embedding_net.embedding_net = embedding_net
return type_embedding_net
@@ -821,7 +832,7 @@ def serialize(self) -> dict:
"""
return {
"@class": "TypeEmbedNet",
- "@version": 1,
+ "@version": 2,
"ntypes": self.ntypes,
"neuron": self.neuron,
"resnet_dt": self.resnet_dt,
@@ -830,6 +841,7 @@ def serialize(self) -> dict:
"trainable": self.trainable,
"padding": self.padding,
"use_econf_tebd": self.use_econf_tebd,
+ "use_tebd_bias": self.use_tebd_bias,
"type_map": self.type_map,
"embedding": self.embedding_net.serialize(),
}
diff --git a/deepmd/tf/descriptor/se_atten.py b/deepmd/tf/descriptor/se_atten.py
index de652bc91d..13976a84e1 100644
--- a/deepmd/tf/descriptor/se_atten.py
+++ b/deepmd/tf/descriptor/se_atten.py
@@ -2079,6 +2079,8 @@ class DescrptDPA1Compat(DescrptSeAtten):
Whether to concat type embedding at the output of the descriptor.
use_econf_tebd: bool, Optional
Whether to use electronic configuration type embedding.
+ use_tebd_bias : bool, Optional
+ Whether to use bias in the type embedding layer.
type_map: List[str], Optional
A list of strings. Give the name to each type of atoms.
spin
@@ -2116,6 +2118,7 @@ def __init__(
smooth_type_embedding: bool = True,
concat_output_tebd: bool = True,
use_econf_tebd: bool = False,
+ use_tebd_bias: bool = False,
type_map: Optional[List[str]] = None,
spin: Optional[Any] = None,
# consistent with argcheck, not used though
@@ -2167,6 +2170,7 @@ def __init__(
)
self.tebd_dim = tebd_dim
self.use_econf_tebd = use_econf_tebd
+ self.use_tebd_bias = use_tebd_bias
self.scaling_factor = scaling_factor
self.normalize = normalize
self.temperature = temperature
@@ -2176,6 +2180,7 @@ def __init__(
padding=True,
activation_function="Linear",
use_econf_tebd=use_econf_tebd,
+ use_tebd_bias=use_tebd_bias,
type_map=type_map,
# precision=precision,
seed=seed,
@@ -2303,7 +2308,7 @@ def deserialize(cls, data: dict, suffix: str = ""):
if cls is not DescrptDPA1Compat:
raise NotImplementedError(f"Not implemented in class {cls.__name__}")
data = data.copy()
- check_version_compatibility(data.pop("@version"), 1, 1)
+ check_version_compatibility(data.pop("@version"), 2, 1)
data.pop("@class")
data.pop("type")
embedding_net_variables = cls.deserialize_network(
@@ -2325,6 +2330,9 @@ def deserialize(cls, data: dict, suffix: str = ""):
)
else:
two_side_embeeding_net_variables = None
+ # compat with version 1
+ if "use_tebd_bias" not in data:
+ data["use_tebd_bias"] = True
descriptor = cls(**data)
descriptor.embedding_net_variables = embedding_net_variables
descriptor.attention_layer_variables = attention_layer_variables
@@ -2357,12 +2365,14 @@ def serialize(self, suffix: str = "") -> dict:
data.update(
{
"type": "dpa1",
+ "@version": 2,
"tebd_dim": self.tebd_dim,
"scaling_factor": self.scaling_factor,
"normalize": self.normalize,
"temperature": self.temperature,
"concat_output_tebd": self.concat_output_tebd,
"use_econf_tebd": self.use_econf_tebd,
+ "use_tebd_bias": self.use_tebd_bias,
"type_embedding": self.type_embedding.serialize(suffix),
}
)
diff --git a/deepmd/tf/utils/network.py b/deepmd/tf/utils/network.py
index 7918b58d0c..7941b451af 100644
--- a/deepmd/tf/utils/network.py
+++ b/deepmd/tf/utils/network.py
@@ -219,6 +219,7 @@ def embedding_net(
uniform_seed=False,
initial_variables=None,
mixed_prec=None,
+ bias=True,
):
r"""The embedding network.
@@ -274,6 +275,8 @@ def embedding_net(
The input dict which stores the embedding net variables
mixed_prec
The input dict which stores the mixed precision setting for the embedding net
+ bias : bool, Optional
+ Whether to use bias in the embedding layer.
References
----------
@@ -289,18 +292,27 @@ def embedding_net(
stddev=stddev / np.sqrt(outputs_size[ii] + outputs_size[ii - 1]),
seed=seed if (seed is None or uniform_seed) else seed + ii * 3 + 0,
)
- b_initializer = tf.random_normal_initializer(
- stddev=stddev,
- mean=bavg,
- seed=seed if (seed is None or uniform_seed) else seed + 3 * ii + 1,
+ b_initializer = (
+ tf.random_normal_initializer(
+ stddev=stddev,
+ mean=bavg,
+ seed=seed if (seed is None or uniform_seed) else seed + 3 * ii + 1,
+ )
+ if bias
+ else None
)
if initial_variables is not None:
scope = tf.get_variable_scope().name
w_initializer = tf.constant_initializer(
initial_variables[scope + "/matrix_" + str(ii) + name_suffix]
)
- b_initializer = tf.constant_initializer(
- initial_variables[scope + "/bias_" + str(ii) + name_suffix]
+ bias = (scope + "/bias_" + str(ii) + name_suffix) in initial_variables
+ b_initializer = (
+ tf.constant_initializer(
+ initial_variables[scope + "/bias_" + str(ii) + name_suffix]
+ )
+ if bias
+ else None
)
w = tf.get_variable(
"matrix_" + str(ii) + name_suffix,
@@ -311,27 +323,35 @@ def embedding_net(
)
variable_summaries(w, "matrix_" + str(ii) + name_suffix)
- b = tf.get_variable(
- "bias_" + str(ii) + name_suffix,
- [outputs_size[ii]],
- precision,
- b_initializer,
- trainable=trainable,
+ b = (
+ tf.get_variable(
+ "bias_" + str(ii) + name_suffix,
+ [outputs_size[ii]],
+ precision,
+ b_initializer,
+ trainable=trainable,
+ )
+ if bias
+ else None
)
- variable_summaries(b, "bias_" + str(ii) + name_suffix)
+ if bias:
+ variable_summaries(b, "bias_" + str(ii) + name_suffix)
if mixed_prec is not None:
xx = tf.cast(xx, get_precision(mixed_prec["compute_prec"]))
w = tf.cast(w, get_precision(mixed_prec["compute_prec"]))
- b = tf.cast(b, get_precision(mixed_prec["compute_prec"]))
+ b = tf.cast(b, get_precision(mixed_prec["compute_prec"])) if bias else None
if activation_fn is not None:
hidden = tf.reshape(
- activation_fn(tf.nn.bias_add(tf.matmul(xx, w), b)),
+ activation_fn(
+ tf.nn.bias_add(tf.matmul(xx, w), b) if bias else tf.matmul(xx, w)
+ ),
[-1, outputs_size[ii]],
)
else:
hidden = tf.reshape(
- tf.nn.bias_add(tf.matmul(xx, w), b), [-1, outputs_size[ii]]
+ tf.nn.bias_add(tf.matmul(xx, w), b) if bias else tf.matmul(xx, w),
+ [-1, outputs_size[ii]],
)
if resnet_dt:
idt_initializer = tf.random_normal_initializer(
diff --git a/deepmd/tf/utils/type_embed.py b/deepmd/tf/utils/type_embed.py
index 20beda9d3a..3f40a0cef1 100644
--- a/deepmd/tf/utils/type_embed.py
+++ b/deepmd/tf/utils/type_embed.py
@@ -100,6 +100,8 @@ class TypeEmbedNet:
Concat the zero padding to the output, as the default embedding of empty type.
use_econf_tebd: bool, Optional
Whether to use electronic configuration type embedding.
+ use_tebd_bias : bool, Optional
+ Whether to use bias in the type embedding layer.
type_map: List[str], Optional
A list of strings. Give the name to each type of atoms.
"""
@@ -117,6 +119,7 @@ def __init__(
uniform_seed: bool = False,
padding: bool = False,
use_econf_tebd: bool = False,
+ use_tebd_bias: bool = False,
type_map: Optional[List[str]] = None,
**kwargs,
) -> None:
@@ -133,6 +136,7 @@ def __init__(
self.type_embedding_net_variables = None
self.padding = padding
self.use_econf_tebd = use_econf_tebd
+ self.use_tebd_bias = use_tebd_bias
self.type_map = type_map
if self.use_econf_tebd:
self.econf_tebd, _ = get_econf_tebd(self.type_map, precision=precision)
@@ -191,6 +195,7 @@ def build(
trainable=self.trainable,
initial_variables=self.type_embedding_net_variables,
uniform_seed=self.uniform_seed,
+ bias=self.use_tebd_bias,
)
ebd_type = tf.reshape(ebd_type, [-1, self.neuron[-1]]) # ntypes * neuron[-1]
if self.padding:
@@ -241,7 +246,7 @@ def deserialize(cls, data: dict, suffix: str = ""):
The deserialized model
"""
data = data.copy()
- check_version_compatibility(data.pop("@version", 1), 1, 1)
+ check_version_compatibility(data.pop("@version", 1), 2, 1)
data_cls = data.pop("@class")
assert data_cls == "TypeEmbedNet", f"Invalid class {data_cls}"
@@ -251,9 +256,10 @@ def deserialize(cls, data: dict, suffix: str = ""):
embedding_net_variables[
f"type_embed_net{suffix}/matrix_{layer_idx + 1}"
] = layer.w
- embedding_net_variables[f"type_embed_net{suffix}/bias_{layer_idx + 1}"] = (
- layer.b
- )
+ if layer.b is not None:
+ embedding_net_variables[
+ f"type_embed_net{suffix}/bias_{layer_idx + 1}"
+ ] = layer.b
if layer.idt is not None:
embedding_net_variables[
f"type_embed_net{suffix}/idt_{layer_idx + 1}"
@@ -264,6 +270,9 @@ def deserialize(cls, data: dict, suffix: str = ""):
f"type_embed_net{suffix}/idt_{layer_idx + 1}"
] = 0.0
+ # compat with version 1
+ if "use_tebd_bias" not in data:
+ data["use_tebd_bias"] = True
type_embedding_net = cls(**data)
type_embedding_net.type_embedding_net_variables = embedding_net_variables
return type_embedding_net
@@ -303,6 +312,7 @@ def serialize(self, suffix: str = "") -> dict:
activation_function=self.filter_activation_fn_name,
resnet_dt=self.filter_resnet_dt,
precision=self.filter_precision.name,
+ bias=self.use_tebd_bias,
)
for key, value in self.type_embedding_net_variables.items():
m = re.search(type_embedding_pattern, key)
@@ -315,7 +325,7 @@ def serialize(self, suffix: str = "") -> dict:
return {
"@class": "TypeEmbedNet",
- "@version": 1,
+ "@version": 2,
"ntypes": self.ntypes,
"neuron": self.neuron,
"resnet_dt": self.filter_resnet_dt,
@@ -324,6 +334,7 @@ def serialize(self, suffix: str = "") -> dict:
"trainable": self.trainable,
"padding": self.padding,
"use_econf_tebd": self.use_econf_tebd,
+ "use_tebd_bias": self.use_tebd_bias,
"type_map": self.type_map,
"embedding": embedding_net.serialize(),
}
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index cb39ead6be..0bf50fd019 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -87,6 +87,7 @@ def type_embedding_args():
doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
doc_trainable = "If the parameters in the embedding net are trainable"
doc_use_econf_tebd = "Whether to use electronic configuration type embedding."
+ doc_use_tebd_bias = "Whether to use bias in the type embedding layer."
return [
Argument("neuron", List[int], optional=True, default=[8], doc=doc_neuron),
@@ -104,6 +105,13 @@ def type_embedding_args():
Argument(
"use_econf_tebd", bool, optional=True, default=False, doc=doc_use_econf_tebd
),
+ Argument(
+ "use_tebd_bias",
+ bool,
+ optional=True,
+ default=False,
+ doc=doc_use_tebd_bias,
+ ),
]
@@ -529,6 +537,7 @@ def descrpt_se_atten_args():
doc_ln_eps = "The epsilon value for layer normalization. The default value for TensorFlow is set to 1e-3 to keep consistent with keras while set to 1e-5 in PyTorch and DP implementation."
doc_tebd_dim = "The dimension of atom type embedding."
doc_use_econf_tebd = r"Whether to use electronic configuration type embedding. For TensorFlow backend, please set `use_econf_tebd` in `type_embedding` block instead."
+ doc_use_tebd_bias = "Whether to use bias in the type embedding layer."
doc_temperature = "The scaling factor of normalization in calculations of attention weights, which is used to scale the matmul(Q, K)."
doc_scaling_factor = (
"The scaling factor of normalization in calculations of attention weights, which is used to scale the matmul(Q, K). "
@@ -596,6 +605,13 @@ def descrpt_se_atten_args():
default=False,
doc=doc_only_pt_supported + doc_use_econf_tebd,
),
+ Argument(
+ "use_tebd_bias",
+ bool,
+ optional=True,
+ default=False,
+ doc=doc_use_tebd_bias,
+ ),
Argument(
"tebd_input_mode",
str,
@@ -642,6 +658,7 @@ def descrpt_se_atten_v2_args():
doc_ln_eps = "The epsilon value for layer normalization. The default value for TensorFlow is set to 1e-3 to keep consistent with keras while set to 1e-5 in PyTorch and DP implementation."
doc_tebd_dim = "The dimension of atom type embedding."
doc_use_econf_tebd = r"Whether to use electronic configuration type embedding. For TensorFlow backend, please set `use_econf_tebd` in `type_embedding` block instead."
+ doc_use_tebd_bias = "Whether to use bias in the type embedding layer."
doc_temperature = "The scaling factor of normalization in calculations of attention weights, which is used to scale the matmul(Q, K)."
doc_scaling_factor = (
"The scaling factor of normalization in calculations of attention weights, which is used to scale the matmul(Q, K). "
@@ -679,6 +696,13 @@ def descrpt_se_atten_v2_args():
default=False,
doc=doc_only_pt_supported + doc_use_econf_tebd,
),
+ Argument(
+ "use_tebd_bias",
+ bool,
+ optional=True,
+ default=False,
+ doc=doc_use_tebd_bias,
+ ),
Argument(
"scaling_factor",
float,
@@ -729,6 +753,7 @@ def descrpt_dpa2_args():
doc_seed = "Random seed for parameter initialization."
doc_add_tebd_to_repinit_out = "Add type embedding to the output representation from repinit before inputting it into repformer."
doc_use_econf_tebd = "Whether to use electronic configuration type embedding."
+ doc_use_tebd_bias = "Whether to use bias in the type embedding layer."
return [
# repinit args
Argument("repinit", dict, dpa2_repinit_args(), doc=doc_repinit),
@@ -775,6 +800,13 @@ def descrpt_dpa2_args():
default=False,
doc=doc_only_pt_supported + doc_use_econf_tebd,
),
+ Argument(
+ "use_tebd_bias",
+ bool,
+ optional=True,
+ default=False,
+ doc=doc_use_tebd_bias,
+ ),
]
diff --git a/source/tests/consistent/descriptor/test_dpa1.py b/source/tests/consistent/descriptor/test_dpa1.py
index 8bd9af6d5f..955b58932a 100644
--- a/source/tests/consistent/descriptor/test_dpa1.py
+++ b/source/tests/consistent/descriptor/test_dpa1.py
@@ -58,6 +58,7 @@
(True,), # concat_output_tebd
("float64",), # precision
(True, False), # use_econf_tebd
+ (False, True), # use_tebd_bias
)
class TestDPA1(CommonTest, DescriptorTest, unittest.TestCase):
@property
@@ -81,6 +82,7 @@ def data(self) -> dict:
concat_output_tebd,
precision,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
return {
"sel": [10],
@@ -108,6 +110,7 @@ def data(self) -> dict:
"set_davg_zero": set_davg_zero,
"smooth_type_embedding": smooth_type_embedding,
"use_econf_tebd": use_econf_tebd,
+ "use_tebd_bias": use_tebd_bias,
"type_map": ["O", "H"] if use_econf_tebd else None,
"seed": 1145141919810,
}
@@ -142,6 +145,7 @@ def skip_pt(self) -> bool:
concat_output_tebd,
precision,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
return CommonTest.skip_pt or self.is_meaningless_zero_attention_layer_tests(
attn_layer,
@@ -171,6 +175,7 @@ def skip_dp(self) -> bool:
concat_output_tebd,
precision,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
return CommonTest.skip_pt or self.is_meaningless_zero_attention_layer_tests(
attn_layer,
@@ -200,6 +205,7 @@ def skip_tf(self) -> bool:
concat_output_tebd,
precision,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
return (
CommonTest.skip_tf
@@ -275,6 +281,7 @@ def setUp(self):
concat_output_tebd,
precision,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
@@ -332,6 +339,7 @@ def rtol(self) -> float:
concat_output_tebd,
precision,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
if precision == "float64":
return 1e-10
@@ -362,6 +370,7 @@ def atol(self) -> float:
concat_output_tebd,
precision,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
if precision == "float64":
return 1e-10
diff --git a/source/tests/consistent/descriptor/test_dpa2.py b/source/tests/consistent/descriptor/test_dpa2.py
index b24274c9d7..9b88b4238a 100644
--- a/source/tests/consistent/descriptor/test_dpa2.py
+++ b/source/tests/consistent/descriptor/test_dpa2.py
@@ -64,6 +64,7 @@
("float64",), # precision
(True, False), # add_tebd_to_repinit_out
(True, False), # use_econf_tebd
+ (False,), # use_tebd_bias
)
class TestDPA2(CommonTest, DescriptorTest, unittest.TestCase):
@property
@@ -91,6 +92,7 @@ def data(self) -> dict:
precision,
add_tebd_to_repinit_out,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
return {
"ntypes": self.ntypes,
@@ -149,6 +151,7 @@ def data(self) -> dict:
"env_protection": 0.0,
"trainable": True,
"use_econf_tebd": use_econf_tebd,
+ "use_tebd_bias": use_tebd_bias,
"type_map": ["O", "H"] if use_econf_tebd else None,
"add_tebd_to_repinit_out": add_tebd_to_repinit_out,
}
@@ -178,6 +181,7 @@ def skip_pt(self) -> bool:
precision,
add_tebd_to_repinit_out,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
return CommonTest.skip_pt
@@ -206,6 +210,7 @@ def skip_dp(self) -> bool:
precision,
add_tebd_to_repinit_out,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
return CommonTest.skip_pt
@@ -234,6 +239,7 @@ def skip_tf(self) -> bool:
precision,
add_tebd_to_repinit_out,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
return True
@@ -298,6 +304,7 @@ def setUp(self):
precision,
add_tebd_to_repinit_out,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
@@ -359,6 +366,7 @@ def rtol(self) -> float:
precision,
add_tebd_to_repinit_out,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
if precision == "float64":
return 1e-10
@@ -393,6 +401,7 @@ def atol(self) -> float:
precision,
add_tebd_to_repinit_out,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
if precision == "float64":
return 1e-6 # need to fix in the future, see issue https://github.com/deepmodeling/deepmd-kit/issues/3786
diff --git a/source/tests/consistent/descriptor/test_se_atten_v2.py b/source/tests/consistent/descriptor/test_se_atten_v2.py
index 54f3cb5826..9ae16b96fa 100644
--- a/source/tests/consistent/descriptor/test_se_atten_v2.py
+++ b/source/tests/consistent/descriptor/test_se_atten_v2.py
@@ -54,6 +54,7 @@
(True,), # concat_output_tebd
("float64",), # precision
(True, False), # use_econf_tebd
+ (False,), # use_tebd_bias
)
class TestSeAttenV2(CommonTest, DescriptorTest, unittest.TestCase):
@property
@@ -75,6 +76,7 @@ def data(self) -> dict:
concat_output_tebd,
precision,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
return {
"sel": [10],
@@ -100,6 +102,7 @@ def data(self) -> dict:
"precision": precision,
"set_davg_zero": set_davg_zero,
"use_econf_tebd": use_econf_tebd,
+ "use_tebd_bias": use_tebd_bias,
"type_map": ["O", "H"] if use_econf_tebd else None,
"seed": 1145141919810,
}
@@ -132,6 +135,7 @@ def skip_pt(self) -> bool:
concat_output_tebd,
precision,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
return CommonTest.skip_pt or self.is_meaningless_zero_attention_layer_tests(
attn_layer,
@@ -159,6 +163,7 @@ def skip_dp(self) -> bool:
concat_output_tebd,
precision,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
return CommonTest.skip_pt or self.is_meaningless_zero_attention_layer_tests(
attn_layer,
@@ -263,6 +268,7 @@ def rtol(self) -> float:
concat_output_tebd,
precision,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
if precision == "float64":
return 1e-10
@@ -291,6 +297,7 @@ def atol(self) -> float:
concat_output_tebd,
precision,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
if precision == "float64":
return 1e-10
diff --git a/source/tests/consistent/test_type_embedding.py b/source/tests/consistent/test_type_embedding.py
index cf358771b3..6583dddb5f 100644
--- a/source/tests/consistent/test_type_embedding.py
+++ b/source/tests/consistent/test_type_embedding.py
@@ -37,6 +37,7 @@
("float32", "float64"), # precision
(True, False), # padding
(True, False), # use_econf_tebd
+ (True, False), # use_tebd_bias
)
class TestTypeEmbedding(CommonTest, unittest.TestCase):
"""Useful utilities for descriptor tests."""
@@ -48,12 +49,14 @@ def data(self) -> dict:
precision,
padding,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
return {
"neuron": [2, 4, 4],
"resnet_dt": resnet_dt,
"precision": precision,
"use_econf_tebd": use_econf_tebd,
+ "use_tebd_bias": use_tebd_bias,
"seed": 20240327,
}
@@ -69,6 +72,7 @@ def addtional_data(self) -> dict:
precision,
padding,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
# implict argument not input by users
return {
@@ -110,6 +114,7 @@ def rtol(self) -> float:
precision,
padding,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
if precision == "float64":
return 1e-10
@@ -128,6 +133,7 @@ def atol(self) -> float:
precision,
padding,
use_econf_tebd,
+ use_tebd_bias,
) = self.param
if precision == "float64":
return 1e-10
diff --git a/source/tests/pt/model/test_descriptor_dpa1.py b/source/tests/pt/model/test_descriptor_dpa1.py
index 6dadcdacb7..488cc2f7ff 100644
--- a/source/tests/pt/model/test_descriptor_dpa1.py
+++ b/source/tests/pt/model/test_descriptor_dpa1.py
@@ -250,7 +250,7 @@ def test_descriptor_block(self):
atype = self.atype
box = self.cell
# handel type_embedding
- type_embedding = TypeEmbedNet(ntypes, 8).to(env.DEVICE)
+ type_embedding = TypeEmbedNet(ntypes, 8, use_tebd_bias=True).to(env.DEVICE)
type_embedding.load_state_dict(torch.load(self.file_type_embed))
## to save model parameters
@@ -294,6 +294,7 @@ def test_descriptor(self):
dparams["ntypes"] = ntypes
assert dparams.pop("type") == "se_atten"
dparams["concat_output_tebd"] = False
+ dparams["use_tebd_bias"] = True
des = DescrptDPA1(
**dparams,
).to(env.DEVICE)
diff --git a/source/tests/pt/model/test_descriptor_dpa2.py b/source/tests/pt/model/test_descriptor_dpa2.py
index 240871f2d7..ac04bfc417 100644
--- a/source/tests/pt/model/test_descriptor_dpa2.py
+++ b/source/tests/pt/model/test_descriptor_dpa2.py
@@ -118,6 +118,7 @@ def test_descriptor(self):
dparams["ntypes"] = ntypes
assert dparams.pop("type") == "dpa2"
dparams["concat_output_tebd"] = False
+ dparams["use_tebd_bias"] = True
des = DescrptDPA2(
**dparams,
).to(env.DEVICE)
diff --git a/source/tests/tf/pairwise_dprc.json b/source/tests/tf/pairwise_dprc.json
index 7c735a8191..4924c23195 100644
--- a/source/tests/tf/pairwise_dprc.json
+++ b/source/tests/tf/pairwise_dprc.json
@@ -11,6 +11,7 @@
"HW"
],
"type_embedding": {
+ "use_tebd_bias": true,
"neuron": [
2
]
diff --git a/source/tests/tf/test_data_large_batch.py b/source/tests/tf/test_data_large_batch.py
index 1b19d664dd..d9bb00de40 100644
--- a/source/tests/tf/test_data_large_batch.py
+++ b/source/tests/tf/test_data_large_batch.py
@@ -121,6 +121,7 @@ def test_data_mixed_type(self):
activation_function=None,
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
padding=True,
)
model = EnerModel(descrpt, fitting, typeebd)
@@ -320,6 +321,7 @@ def test_stripped_data_mixed_type(self):
activation_function=None,
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
padding=True,
)
model = EnerModel(descrpt, fitting, typeebd)
@@ -519,6 +521,7 @@ def test_compressible_data_mixed_type(self):
activation_function=None,
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
padding=True,
)
model = EnerModel(descrpt, fitting, typeebd)
diff --git a/source/tests/tf/test_descrpt_hybrid.py b/source/tests/tf/test_descrpt_hybrid.py
index 76c0932316..cadf2f83ae 100644
--- a/source/tests/tf/test_descrpt_hybrid.py
+++ b/source/tests/tf/test_descrpt_hybrid.py
@@ -64,6 +64,7 @@ def test_descriptor_hybrid(self):
resnet_dt=typeebd_param["resnet_dt"],
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
padding=True,
)
diff --git a/source/tests/tf/test_descrpt_se_a_type.py b/source/tests/tf/test_descrpt_se_a_type.py
index 5bc4680d6d..ccb2ef96a6 100644
--- a/source/tests/tf/test_descrpt_se_a_type.py
+++ b/source/tests/tf/test_descrpt_se_a_type.py
@@ -59,6 +59,7 @@ def test_descriptor_two_sides(self):
resnet_dt=typeebd_param["resnet_dt"],
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
)
jdata["model"]["descriptor"].pop("type", None)
@@ -223,6 +224,7 @@ def test_descriptor_one_side(self):
resnet_dt=typeebd_param["resnet_dt"],
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
)
jdata["model"]["descriptor"].pop("type", None)
diff --git a/source/tests/tf/test_descrpt_se_atten.py b/source/tests/tf/test_descrpt_se_atten.py
index 84325cadc9..6393cc222b 100644
--- a/source/tests/tf/test_descrpt_se_atten.py
+++ b/source/tests/tf/test_descrpt_se_atten.py
@@ -71,6 +71,7 @@ def test_descriptor_two_sides(self):
resnet_dt=typeebd_param["resnet_dt"],
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
padding=True,
)
@@ -248,6 +249,7 @@ def test_descriptor_one_side(self):
resnet_dt=typeebd_param["resnet_dt"],
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
padding=True,
)
@@ -425,6 +427,7 @@ def test_stripped_type_embedding_descriptor_two_sides(self):
resnet_dt=typeebd_param["resnet_dt"],
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
padding=True,
)
@@ -596,6 +599,7 @@ def test_compressible_descriptor_two_sides(self):
resnet_dt=typeebd_param["resnet_dt"],
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
padding=True,
)
diff --git a/source/tests/tf/test_dipole_se_a_tebd.py b/source/tests/tf/test_dipole_se_a_tebd.py
index 66be544e78..ada0643dbf 100644
--- a/source/tests/tf/test_dipole_se_a_tebd.py
+++ b/source/tests/tf/test_dipole_se_a_tebd.py
@@ -73,6 +73,7 @@ def test_model(self):
resnet_dt=typeebd_param["resnet_dt"],
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
)
model = DipoleModel(descrpt, fitting, typeebd)
diff --git a/source/tests/tf/test_model_se_a.py b/source/tests/tf/test_model_se_a.py
index 1d67ef5fab..4d691834ae 100644
--- a/source/tests/tf/test_model_se_a.py
+++ b/source/tests/tf/test_model_se_a.py
@@ -299,6 +299,7 @@ def test_model_atom_ener_type_embedding(self):
typeebd = TypeEmbedNet(
ntypes=len(jdata["model"]["descriptor"]["sel"]),
**jdata["model"]["type_embeding"],
+ use_tebd_bias=True,
)
jdata["model"]["descriptor"].pop("type", None)
descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
diff --git a/source/tests/tf/test_model_se_a_ebd_v2.py b/source/tests/tf/test_model_se_a_ebd_v2.py
index ffa558da35..69d44b7c56 100644
--- a/source/tests/tf/test_model_se_a_ebd_v2.py
+++ b/source/tests/tf/test_model_se_a_ebd_v2.py
@@ -62,6 +62,7 @@ def test_model(self):
resnet_dt=typeebd_param["resnet_dt"],
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
padding=True,
)
descrpt = DescrptSeAEbdV2(
diff --git a/source/tests/tf/test_model_se_a_type.py b/source/tests/tf/test_model_se_a_type.py
index e38afc0fb4..e9fb8db466 100644
--- a/source/tests/tf/test_model_se_a_type.py
+++ b/source/tests/tf/test_model_se_a_type.py
@@ -62,6 +62,7 @@ def test_model(self):
resnet_dt=typeebd_param["resnet_dt"],
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
)
model = EnerModel(descrpt, fitting, typeebd)
diff --git a/source/tests/tf/test_model_se_atten.py b/source/tests/tf/test_model_se_atten.py
index 06945fab9c..4f661c2cc3 100644
--- a/source/tests/tf/test_model_se_atten.py
+++ b/source/tests/tf/test_model_se_atten.py
@@ -74,6 +74,7 @@ def test_model(self):
resnet_dt=typeebd_param["resnet_dt"],
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
padding=True,
)
model = EnerModel(descrpt, fitting, typeebd)
@@ -232,6 +233,7 @@ def test_exclude_types(self):
resnet_dt=typeebd_param["resnet_dt"],
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
padding=True,
)
type_embedding = typeebd.build(
@@ -298,6 +300,7 @@ def test_compressible_model(self):
resnet_dt=typeebd_param["resnet_dt"],
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
padding=True,
)
model = EnerModel(descrpt, fitting, typeebd)
@@ -459,6 +462,7 @@ def test_compressible_exclude_types(self):
resnet_dt=typeebd_param["resnet_dt"],
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
padding=True,
)
type_embedding = typeebd.build(
@@ -526,6 +530,7 @@ def test_stripped_type_embedding_model(self):
resnet_dt=typeebd_param["resnet_dt"],
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
padding=True,
)
model = EnerModel(descrpt, fitting, typeebd)
@@ -691,6 +696,7 @@ def test_stripped_type_embedding_exclude_types(self):
resnet_dt=typeebd_param["resnet_dt"],
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
padding=True,
)
type_embedding = typeebd.build(
@@ -765,6 +771,7 @@ def test_smoothness_of_stripped_type_embedding_smooth_model(self):
resnet_dt=typeebd_param["resnet_dt"],
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
padding=True,
)
model = EnerModel(descrpt, fitting, typeebd)
@@ -913,6 +920,7 @@ def test_smoothness_of_stripped_type_embedding_smooth_model_excluded_types(self)
resnet_dt=typeebd_param["resnet_dt"],
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
padding=True,
)
model = EnerModel(descrpt, fitting, typeebd)
diff --git a/source/tests/tf/test_nvnmd_entrypoints.py b/source/tests/tf/test_nvnmd_entrypoints.py
index 17ad62b4bc..32e90d9d9b 100644
--- a/source/tests/tf/test_nvnmd_entrypoints.py
+++ b/source/tests/tf/test_nvnmd_entrypoints.py
@@ -727,7 +727,11 @@ def test_model_qnn_v1(self):
jdata = nvnmd_cfg.get_deepmd_jdata()
run_opt = RunOptions(log_path=None, log_level=20)
jdata = update_deepmd_input(jdata, warning=True, dump="input_v2_compat.json")
+ jdata["model"]["type_embedding"] = {}
jdata = normalize(jdata)
+ jdata["model"]["type_embedding"].update(
+ {"activation_function": None, "use_tebd_bias": True}
+ )
self.trainer = DPTrainer(jdata, run_opt, False)
self.model = self.trainer.model
# place holder
diff --git a/source/tests/tf/test_polar_se_a_tebd.py b/source/tests/tf/test_polar_se_a_tebd.py
index 2abaedd40a..7f8d304af9 100644
--- a/source/tests/tf/test_polar_se_a_tebd.py
+++ b/source/tests/tf/test_polar_se_a_tebd.py
@@ -71,6 +71,7 @@ def test_model(self):
resnet_dt=typeebd_param["resnet_dt"],
seed=typeebd_param["seed"],
uniform_seed=True,
+ use_tebd_bias=True,
)
model = PolarModel(descrpt, fitting, typeebd)
diff --git a/source/tests/tf/test_type_embed.py b/source/tests/tf/test_type_embed.py
index c3f5077943..fd89f8fd10 100644
--- a/source/tests/tf/test_type_embed.py
+++ b/source/tests/tf/test_type_embed.py
@@ -28,7 +28,9 @@ def test_embed_atom_type(self):
np.testing.assert_almost_equal(atom_embed, expected_out, 10)
def test_type_embed_net(self):
- ten = TypeEmbedNet(ntypes=2, neuron=[2, 4, 8], seed=1, uniform_seed=True)
+ ten = TypeEmbedNet(
+ ntypes=2, neuron=[2, 4, 8], seed=1, uniform_seed=True, use_tebd_bias=True
+ )
type_embedding = ten.build(2)
sess = self.cached_session().__enter__()
sess.run(tf.global_variables_initializer())
diff --git a/source/tests/universal/dpmodel/descriptor/test_descriptor.py b/source/tests/universal/dpmodel/descriptor/test_descriptor.py
index 691e8312d4..41d915847e 100644
--- a/source/tests/universal/dpmodel/descriptor/test_descriptor.py
+++ b/source/tests/universal/dpmodel/descriptor/test_descriptor.py
@@ -187,6 +187,7 @@ def DescriptorParamDPA1(
set_davg_zero=True,
smooth_type_embedding=True,
use_econf_tebd=False,
+ use_tebd_bias=False,
precision="float64",
):
input_dict = {
@@ -214,6 +215,7 @@ def DescriptorParamDPA1(
"set_davg_zero": set_davg_zero,
"smooth_type_embedding": smooth_type_embedding,
"use_econf_tebd": use_econf_tebd,
+ "use_tebd_bias": use_tebd_bias,
"precision": precision,
}
return input_dict
@@ -240,6 +242,7 @@ def DescriptorParamDPA1(
"smooth_type_embedding": (True, False),
"concat_output_tebd": (True,),
"use_econf_tebd": (False, True),
+ "use_tebd_bias": (False,),
"precision": ("float64",),
}
),
@@ -274,6 +277,7 @@ def DescriptorParamDPA2(
smooth=True,
add_tebd_to_repinit_out=True,
use_econf_tebd=False,
+ use_tebd_bias=False,
env_protection=0.0,
exclude_types=[],
precision="float64",
@@ -335,6 +339,7 @@ def DescriptorParamDPA2(
"env_protection": env_protection,
"trainable": True,
"use_econf_tebd": use_econf_tebd,
+ "use_tebd_bias": use_tebd_bias,
"type_map": type_map,
"seed": GLOBAL_SEED,
"add_tebd_to_repinit_out": add_tebd_to_repinit_out,
@@ -368,6 +373,7 @@ def DescriptorParamDPA2(
"precision": ("float64",),
"add_tebd_to_repinit_out": (True, False),
"use_econf_tebd": (False,),
+ "use_tebd_bias": (False,),
}
),
)