Skip to content

Commit

Permalink
breaking(pt/tf/dp): disable bias in type embedding (deepmodeling#3958)
Browse files Browse the repository at this point in the history
This PR addresses an issue observed during training with DPA2 on complex
datasets, such as `mptraj`. Specifically, the **learning curves of
energy** from the **2024Q1-based branch** and the **devel branch** show
significant differences at the very beginning when setting `tebd_dim` =
256 (and thus descriptor `dim_out` = 128 + 256). The issue is
illustrated in the following image:

<img
src="https://github.com/deepmodeling/deepmd-kit/assets/50307526/701835a4-126f-4a93-91c7-f9e685c4dc9d"
alt="Example Image" width="500">


After removing the bias in the type embedding, which affects the
standard deviation of the descriptor when `tebd_dim` is very large, the
learning curve improves significantly:

<img
src="https://github.com/deepmodeling/deepmd-kit/assets/50307526/8915e7dd-1813-42bc-8617-fe8209bc6da1"
alt="Example Image" width="500">

Notably, this behavior is not prominent when using a `tebd_dim` that is
relatively smaller than the descriptor itself, such as when using DPA2
with `tebd_dim` = 8 or using DPA1.

The same issue exists in econf of type embedding, which will be solved
in a separated PR.

**NOTE**
**This PR disables bias in type embedding in all backends, which is a
breaking change.**




<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Introduced `use_tebd_bias` and `bias` parameters across various
components to control the use of bias in type embeddings and networks.
  
- **Updates**
- Updated serialization and deserialization methods to include the new
parameters and ensure version compatibility.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
  • Loading branch information
iProzd authored and Mathieu Taillefumier committed Sep 18, 2024
1 parent 7e7436c commit 2d5add1
Show file tree
Hide file tree
Showing 33 changed files with 280 additions and 43 deletions.
13 changes: 11 additions & 2 deletions deepmd/dpmodel/descriptor/dpa1.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,8 @@ class DescrptDPA1(NativeOP, BaseDescriptor):
The default value is `None`, which means the `tebd_input_mode` setting will be used instead.
use_econf_tebd: bool, Optional
Whether to use electronic configuration type embedding.
use_tebd_bias : bool, Optional
Whether to use bias in the type embedding layer.
type_map: List[str], Optional
A list of strings. Give the name to each type of atoms.
spin
Expand Down Expand Up @@ -253,6 +255,7 @@ def __init__(
spin: Optional[Any] = None,
stripped_type_embedding: Optional[bool] = None,
use_econf_tebd: bool = False,
use_tebd_bias: bool = False,
type_map: Optional[List[str]] = None,
# consistent with argcheck, not used though
seed: Optional[Union[int, List[int]]] = None,
Expand Down Expand Up @@ -301,6 +304,7 @@ def __init__(
seed=child_seed(seed, 0),
)
self.use_econf_tebd = use_econf_tebd
self.use_tebd_bias = use_tebd_bias
self.type_map = type_map
self.type_embedding = TypeEmbedNet(
ntypes=ntypes,
Expand All @@ -309,6 +313,7 @@ def __init__(
activation_function="Linear",
precision=precision,
use_econf_tebd=use_econf_tebd,
use_tebd_bias=use_tebd_bias,
type_map=type_map,
seed=child_seed(seed, 1),
)
Expand Down Expand Up @@ -491,7 +496,7 @@ def serialize(self) -> dict:
data = {
"@class": "Descriptor",
"type": "dpa1",
"@version": 1,
"@version": 2,
"rcut": obj.rcut,
"rcut_smth": obj.rcut_smth,
"sel": obj.sel,
Expand All @@ -516,6 +521,7 @@ def serialize(self) -> dict:
"type_one_side": obj.type_one_side,
"concat_output_tebd": self.concat_output_tebd,
"use_econf_tebd": self.use_econf_tebd,
"use_tebd_bias": self.use_tebd_bias,
"type_map": self.type_map,
# make deterministic
"precision": np.dtype(PRECISION_DICT[obj.precision]).name,
Expand All @@ -541,7 +547,7 @@ def serialize(self) -> dict:
def deserialize(cls, data: dict) -> "DescrptDPA1":
"""Deserialize from dict."""
data = data.copy()
check_version_compatibility(data.pop("@version"), 1, 1)
check_version_compatibility(data.pop("@version"), 2, 1)
data.pop("@class")
data.pop("type")
variables = data.pop("@variables")
Expand All @@ -554,6 +560,9 @@ def deserialize(cls, data: dict) -> "DescrptDPA1":
embeddings_strip = data.pop("embeddings_strip")
else:
embeddings_strip = None
# compat with version 1
if "use_tebd_bias" not in data:
data["use_tebd_bias"] = True
obj = cls(**data)

obj.se_atten["davg"] = variables["davg"]
Expand Down
13 changes: 11 additions & 2 deletions deepmd/dpmodel/descriptor/dpa2.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,7 @@ def __init__(
seed: Optional[Union[int, List[int]]] = None,
add_tebd_to_repinit_out: bool = False,
use_econf_tebd: bool = False,
use_tebd_bias: bool = False,
type_map: Optional[List[str]] = None,
):
r"""The DPA-2 descriptor. see https://arxiv.org/abs/2312.15492.
Expand Down Expand Up @@ -361,6 +362,8 @@ def __init__(
Whether to add type embedding to the output representation from repinit before inputting it into repformer.
use_econf_tebd : bool, Optional
Whether to use electronic configuration type embedding.
use_tebd_bias : bool, Optional
Whether to use bias in the type embedding layer.
type_map : List[str], Optional
A list of strings. Give the name to each type of atoms.
Expand Down Expand Up @@ -449,6 +452,7 @@ def init_subclass_params(sub_data, sub_class):
seed=child_seed(seed, 1),
)
self.use_econf_tebd = use_econf_tebd
self.use_tebd_bias = use_tebd_bias
self.type_map = type_map
self.type_embedding = TypeEmbedNet(
ntypes=ntypes,
Expand All @@ -457,6 +461,7 @@ def init_subclass_params(sub_data, sub_class):
activation_function="Linear",
precision=precision,
use_econf_tebd=use_econf_tebd,
use_tebd_bias=use_tebd_bias,
type_map=type_map,
seed=child_seed(seed, 2),
)
Expand Down Expand Up @@ -720,7 +725,7 @@ def serialize(self) -> dict:
data = {
"@class": "Descriptor",
"type": "dpa2",
"@version": 1,
"@version": 2,
"ntypes": self.ntypes,
"repinit_args": self.repinit_args.serialize(),
"repformer_args": self.repformer_args.serialize(),
Expand All @@ -732,6 +737,7 @@ def serialize(self) -> dict:
"trainable": self.trainable,
"add_tebd_to_repinit_out": self.add_tebd_to_repinit_out,
"use_econf_tebd": self.use_econf_tebd,
"use_tebd_bias": self.use_tebd_bias,
"type_map": self.type_map,
"type_embedding": self.type_embedding.serialize(),
"g1_shape_tranform": self.g1_shape_tranform.serialize(),
Expand Down Expand Up @@ -774,7 +780,7 @@ def serialize(self) -> dict:
@classmethod
def deserialize(cls, data: dict) -> "DescrptDPA2":
data = data.copy()
check_version_compatibility(data.pop("@version"), 1, 1)
check_version_compatibility(data.pop("@version"), 2, 1)
data.pop("@class")
data.pop("type")
repinit_variable = data.pop("repinit_variable").copy()
Expand All @@ -785,6 +791,9 @@ def deserialize(cls, data: dict) -> "DescrptDPA2":
add_tebd_to_repinit_out = data["add_tebd_to_repinit_out"]
data["repinit"] = RepinitArgs(**data.pop("repinit_args"))
data["repformer"] = RepformerArgs(**data.pop("repformer_args"))
# compat with version 1
if "use_tebd_bias" not in data:
data["use_tebd_bias"] = True
obj = cls(**data)
obj.type_embedding = TypeEmbedNet.deserialize(type_embedding)
if add_tebd_to_repinit_out:
Expand Down
10 changes: 8 additions & 2 deletions deepmd/dpmodel/descriptor/se_atten_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def __init__(
spin: Optional[Any] = None,
stripped_type_embedding: Optional[bool] = None,
use_econf_tebd: bool = False,
use_tebd_bias: bool = False,
type_map: Optional[List[str]] = None,
# consistent with argcheck, not used though
seed: Optional[Union[int, List[int]]] = None,
Expand Down Expand Up @@ -100,6 +101,7 @@ def __init__(
spin=spin,
stripped_type_embedding=stripped_type_embedding,
use_econf_tebd=use_econf_tebd,
use_tebd_bias=use_tebd_bias,
type_map=type_map,
# consistent with argcheck, not used though
seed=seed,
Expand All @@ -111,7 +113,7 @@ def serialize(self) -> dict:
data = {
"@class": "Descriptor",
"type": "se_atten_v2",
"@version": 1,
"@version": 2,
"rcut": obj.rcut,
"rcut_smth": obj.rcut_smth,
"sel": obj.sel,
Expand All @@ -134,6 +136,7 @@ def serialize(self) -> dict:
"type_one_side": obj.type_one_side,
"concat_output_tebd": self.concat_output_tebd,
"use_econf_tebd": self.use_econf_tebd,
"use_tebd_bias": self.use_tebd_bias,
"type_map": self.type_map,
# make deterministic
"precision": np.dtype(PRECISION_DICT[obj.precision]).name,
Expand All @@ -158,7 +161,7 @@ def serialize(self) -> dict:
def deserialize(cls, data: dict) -> "DescrptSeAttenV2":
"""Deserialize from dict."""
data = data.copy()
check_version_compatibility(data.pop("@version"), 1, 1)
check_version_compatibility(data.pop("@version"), 2, 1)
data.pop("@class")
data.pop("type")
variables = data.pop("@variables")
Expand All @@ -167,6 +170,9 @@ def deserialize(cls, data: dict) -> "DescrptSeAttenV2":
attention_layers = data.pop("attention_layers")
data.pop("env_mat")
embeddings_strip = data.pop("embeddings_strip")
# compat with version 1
if "use_tebd_bias" not in data:
data["use_tebd_bias"] = True
obj = cls(**data)

obj.se_atten["davg"] = variables["davg"]
Expand Down
12 changes: 9 additions & 3 deletions deepmd/dpmodel/utils/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,8 @@ class EN(T_Network):
Floating point precision for the model paramters.
seed : int, optional
Random seed.
bias : bool, Optional
Whether to use bias in the embedding layer.
"""

def __init__(
Expand All @@ -581,6 +583,7 @@ def __init__(
resnet_dt: bool = False,
precision: str = DEFAULT_PRECISION,
seed: Optional[Union[int, List[int]]] = None,
bias: bool = True,
):
layers = []
i_in = in_dim
Expand All @@ -590,7 +593,7 @@ def __init__(
T_NetworkLayer(
i_in,
i_ot,
bias=True,
bias=bias,
use_timestep=resnet_dt,
activation_function=activation_function,
resnet=True,
Expand All @@ -605,6 +608,7 @@ def __init__(
self.activation_function = activation_function
self.resnet_dt = resnet_dt
self.precision = precision
self.bias = bias

def serialize(self) -> dict:
"""Serialize the network to a dict.
Expand All @@ -616,11 +620,12 @@ def serialize(self) -> dict:
"""
return {
"@class": "EmbeddingNetwork",
"@version": 1,
"@version": 2,
"in_dim": self.in_dim,
"neuron": self.neuron.copy(),
"activation_function": self.activation_function,
"resnet_dt": self.resnet_dt,
"bias": self.bias,
# make deterministic
"precision": np.dtype(PRECISION_DICT[self.precision]).name,
"layers": [layer.serialize() for layer in self.layers],
Expand All @@ -636,7 +641,7 @@ def deserialize(cls, data: dict) -> "EmbeddingNet":
The dict to deserialize from.
"""
data = copy.deepcopy(data)
check_version_compatibility(data.pop("@version", 1), 1, 1)
check_version_compatibility(data.pop("@version", 1), 2, 1)
data.pop("@class", None)
layers = data.pop("layers")
obj = cls(**data)
Expand Down Expand Up @@ -691,6 +696,7 @@ def __init__(
activation_function=activation_function,
resnet_dt=resnet_dt,
precision=precision,
seed=seed,
)
i_in = neuron[-1] if len(neuron) > 0 else in_dim
i_ot = out_dim
Expand Down
13 changes: 11 additions & 2 deletions deepmd/dpmodel/utils/type_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ class TypeEmbedNet(NativeOP):
Concat the zero padding to the output, as the default embedding of empty type.
use_econf_tebd: bool, Optional
Whether to use electronic configuration type embedding.
use_tebd_bias : bool, Optional
Whether to use bias in the type embedding layer.
type_map: List[str], Optional
A list of strings. Give the name to each type of atoms.
"""
Expand All @@ -61,6 +63,7 @@ def __init__(
seed: Optional[Union[int, List[int]]] = None,
padding: bool = False,
use_econf_tebd: bool = False,
use_tebd_bias: bool = False,
type_map: Optional[List[str]] = None,
) -> None:
self.ntypes = ntypes
Expand All @@ -72,6 +75,7 @@ def __init__(
self.trainable = trainable
self.padding = padding
self.use_econf_tebd = use_econf_tebd
self.use_tebd_bias = use_tebd_bias
self.type_map = type_map
embed_input_dim = ntypes
if self.use_econf_tebd:
Expand All @@ -85,6 +89,7 @@ def __init__(
self.resnet_dt,
self.precision,
seed=self.seed,
bias=self.use_tebd_bias,
)

def call(self) -> np.ndarray:
Expand Down Expand Up @@ -114,11 +119,14 @@ def deserialize(cls, data: dict):
The deserialized model
"""
data = data.copy()
check_version_compatibility(data.pop("@version", 1), 1, 1)
check_version_compatibility(data.pop("@version", 1), 2, 1)
data_cls = data.pop("@class")
assert data_cls == "TypeEmbedNet", f"Invalid class {data_cls}"

embedding_net = EmbeddingNet.deserialize(data.pop("embedding"))
# compat with version 1
if "use_tebd_bias" not in data:
data["use_tebd_bias"] = True
type_embedding_net = cls(**data)
type_embedding_net.embedding_net = embedding_net
return type_embedding_net
Expand All @@ -133,7 +141,7 @@ def serialize(self) -> dict:
"""
return {
"@class": "TypeEmbedNet",
"@version": 1,
"@version": 2,
"ntypes": self.ntypes,
"neuron": self.neuron,
"resnet_dt": self.resnet_dt,
Expand All @@ -142,6 +150,7 @@ def serialize(self) -> dict:
"trainable": self.trainable,
"padding": self.padding,
"use_econf_tebd": self.use_econf_tebd,
"use_tebd_bias": self.use_tebd_bias,
"type_map": self.type_map,
"embedding": self.embedding_net.serialize(),
}
Expand Down
13 changes: 11 additions & 2 deletions deepmd/pt/model/descriptor/dpa1.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ class DescrptDPA1(BaseDescriptor, torch.nn.Module):
Random seed for parameter initialization.
use_econf_tebd: bool, Optional
Whether to use electronic configuration type embedding.
use_tebd_bias : bool, Optional
Whether to use bias in the type embedding layer.
type_map: List[str], Optional
A list of strings. Give the name to each type of atoms.
spin
Expand Down Expand Up @@ -241,6 +243,7 @@ def __init__(
stripped_type_embedding: Optional[bool] = None,
seed: Optional[Union[int, List[int]]] = None,
use_econf_tebd: bool = False,
use_tebd_bias: bool = False,
type_map: Optional[List[str]] = None,
# not implemented
spin=None,
Expand Down Expand Up @@ -293,13 +296,15 @@ def __init__(
old_impl=old_impl,
)
self.use_econf_tebd = use_econf_tebd
self.use_tebd_bias = use_tebd_bias
self.type_map = type_map
self.type_embedding = TypeEmbedNet(
ntypes,
tebd_dim,
precision=precision,
seed=child_seed(seed, 2),
use_econf_tebd=use_econf_tebd,
use_tebd_bias=use_tebd_bias,
type_map=type_map,
)
self.tebd_dim = tebd_dim
Expand Down Expand Up @@ -462,7 +467,7 @@ def serialize(self) -> dict:
data = {
"@class": "Descriptor",
"type": "dpa1",
"@version": 1,
"@version": 2,
"rcut": obj.rcut,
"rcut_smth": obj.rcut_smth,
"sel": obj.sel,
Expand All @@ -487,6 +492,7 @@ def serialize(self) -> dict:
"type_one_side": obj.type_one_side,
"concat_output_tebd": self.concat_output_tebd,
"use_econf_tebd": self.use_econf_tebd,
"use_tebd_bias": self.use_tebd_bias,
"type_map": self.type_map,
# make deterministic
"precision": RESERVED_PRECISON_DICT[obj.prec],
Expand All @@ -510,7 +516,7 @@ def serialize(self) -> dict:
@classmethod
def deserialize(cls, data: dict) -> "DescrptDPA1":
data = data.copy()
check_version_compatibility(data.pop("@version"), 1, 1)
check_version_compatibility(data.pop("@version"), 2, 1)
data.pop("@class")
data.pop("type")
variables = data.pop("@variables")
Expand All @@ -523,6 +529,9 @@ def deserialize(cls, data: dict) -> "DescrptDPA1":
embeddings_strip = data.pop("embeddings_strip")
else:
embeddings_strip = None
# compat with version 1
if "use_tebd_bias" not in data:
data["use_tebd_bias"] = True
obj = cls(**data)

def t_cvt(xx):
Expand Down
Loading

0 comments on commit 2d5add1

Please sign in to comment.