From 2d5add198dec9a6d7cc961bda1cb288c3ac9b0ee Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Thu, 11 Jul 2024 09:10:53 +0800
Subject: [PATCH] breaking(pt/tf/dp): disable bias in type embedding (#3958)

This PR addresses an issue observed during training with DPA2 on complex
datasets, such as `mptraj`. Specifically, the **learning curves of
energy** from the **2024Q1-based branch** and the **devel branch** show
significant differences at the very beginning when setting `tebd_dim` =
256 (and thus descriptor `dim_out` = 128 + 256). The issue is
illustrated in the following image:

<img
src="https://github.com/deepmodeling/deepmd-kit/assets/50307526/701835a4-126f-4a93-91c7-f9e685c4dc9d"
alt="Example Image" width="500">


After removing the bias in the type embedding, which affects the
standard deviation of the descriptor when `tebd_dim` is very large, the
learning curve improves significantly:

<img
src="https://github.com/deepmodeling/deepmd-kit/assets/50307526/8915e7dd-1813-42bc-8617-fe8209bc6da1"
alt="Example Image" width="500">

Notably, this behavior is not prominent when using a `tebd_dim` that is
relatively smaller than the descriptor itself, such as when using DPA2
with `tebd_dim` = 8 or using DPA1.

The same issue exists in econf of type embedding, which will be solved
in a separated PR.

**NOTE**
**This PR disables bias in type embedding in all backends, which is a
breaking change.**




<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Introduced `use_tebd_bias` and `bias` parameters across various
components to control the use of bias in type embeddings and networks.

- **Updates**
- Updated serialization and deserialization methods to include the new
parameters and ensure version compatibility.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
---
 deepmd/dpmodel/descriptor/dpa1.py             | 13 ++++-
 deepmd/dpmodel/descriptor/dpa2.py             | 13 ++++-
 deepmd/dpmodel/descriptor/se_atten_v2.py      | 10 +++-
 deepmd/dpmodel/utils/network.py               | 12 +++--
 deepmd/dpmodel/utils/type_embed.py            | 13 ++++-
 deepmd/pt/model/descriptor/dpa1.py            | 13 ++++-
 deepmd/pt/model/descriptor/dpa2.py            | 13 ++++-
 deepmd/pt/model/descriptor/se_atten_v2.py     | 30 ++++++++++-
 deepmd/pt/model/network/network.py            | 16 +++++-
 deepmd/tf/descriptor/se_atten.py              | 12 ++++-
 deepmd/tf/utils/network.py                    | 52 +++++++++++++------
 deepmd/tf/utils/type_embed.py                 | 21 ++++++--
 deepmd/utils/argcheck.py                      | 32 ++++++++++++
 .../tests/consistent/descriptor/test_dpa1.py  |  9 ++++
 .../tests/consistent/descriptor/test_dpa2.py  |  9 ++++
 .../consistent/descriptor/test_se_atten_v2.py |  7 +++
 .../tests/consistent/test_type_embedding.py   |  6 +++
 source/tests/pt/model/test_descriptor_dpa1.py |  3 +-
 source/tests/pt/model/test_descriptor_dpa2.py |  1 +
 source/tests/tf/pairwise_dprc.json            |  1 +
 source/tests/tf/test_data_large_batch.py      |  3 ++
 source/tests/tf/test_descrpt_hybrid.py        |  1 +
 source/tests/tf/test_descrpt_se_a_type.py     |  2 +
 source/tests/tf/test_descrpt_se_atten.py      |  4 ++
 source/tests/tf/test_dipole_se_a_tebd.py      |  1 +
 source/tests/tf/test_model_se_a.py            |  1 +
 source/tests/tf/test_model_se_a_ebd_v2.py     |  1 +
 source/tests/tf/test_model_se_a_type.py       |  1 +
 source/tests/tf/test_model_se_atten.py        |  8 +++
 source/tests/tf/test_nvnmd_entrypoints.py     |  4 ++
 source/tests/tf/test_polar_se_a_tebd.py       |  1 +
 source/tests/tf/test_type_embed.py            |  4 +-
 .../dpmodel/descriptor/test_descriptor.py     |  6 +++
 33 files changed, 280 insertions(+), 43 deletions(-)
diff --git a/deepmd/dpmodel/descriptor/dpa1.py b/deepmd/dpmodel/descriptor/dpa1.py
index a0227e13c9..4eae05560f 100644
--- a/deepmd/dpmodel/descriptor/dpa1.py
+++ b/deepmd/dpmodel/descriptor/dpa1.py
@@ -201,6 +201,8 @@ class DescrptDPA1(NativeOP, BaseDescriptor):
             The default value is `None`, which means the `tebd_input_mode` setting will be used instead.
     use_econf_tebd: bool, Optional
             Whether to use electronic configuration type embedding.
+    use_tebd_bias : bool, Optional
+            Whether to use bias in the type embedding layer.
     type_map: List[str], Optional
             A list of strings. Give the name to each type of atoms.
     spin
@@ -253,6 +255,7 @@ def __init__(
         spin: Optional[Any] = None,
         stripped_type_embedding: Optional[bool] = None,
         use_econf_tebd: bool = False,
+        use_tebd_bias: bool = False,
         type_map: Optional[List[str]] = None,
         # consistent with argcheck, not used though
         seed: Optional[Union[int, List[int]]] = None,
@@ -301,6 +304,7 @@ def __init__(
             seed=child_seed(seed, 0),
         )
         self.use_econf_tebd = use_econf_tebd
+        self.use_tebd_bias = use_tebd_bias
         self.type_map = type_map
         self.type_embedding = TypeEmbedNet(
             ntypes=ntypes,
@@ -309,6 +313,7 @@ def __init__(
             activation_function="Linear",
             precision=precision,
             use_econf_tebd=use_econf_tebd,
+            use_tebd_bias=use_tebd_bias,
             type_map=type_map,
             seed=child_seed(seed, 1),
         )
@@ -491,7 +496,7 @@ def serialize(self) -> dict:
         data = {
             "@class": "Descriptor",
             "type": "dpa1",
-            "@version": 1,
+            "@version": 2,
             "rcut": obj.rcut,
             "rcut_smth": obj.rcut_smth,
             "sel": obj.sel,
@@ -516,6 +521,7 @@ def serialize(self) -> dict:
             "type_one_side": obj.type_one_side,
             "concat_output_tebd": self.concat_output_tebd,
             "use_econf_tebd": self.use_econf_tebd,
+            "use_tebd_bias": self.use_tebd_bias,
             "type_map": self.type_map,
             # make deterministic
             "precision": np.dtype(PRECISION_DICT[obj.precision]).name,
@@ -541,7 +547,7 @@ def serialize(self) -> dict:
     def deserialize(cls, data: dict) -> "DescrptDPA1":
         """Deserialize from dict."""
         data = data.copy()
-        check_version_compatibility(data.pop("@version"), 1, 1)
+        check_version_compatibility(data.pop("@version"), 2, 1)
         data.pop("@class")
         data.pop("type")
         variables = data.pop("@variables")
@@ -554,6 +560,9 @@ def deserialize(cls, data: dict) -> "DescrptDPA1":
             embeddings_strip = data.pop("embeddings_strip")
         else:
             embeddings_strip = None
+        # compat with version 1
+        if "use_tebd_bias" not in data:
+            data["use_tebd_bias"] = True
         obj = cls(**data)
 
         obj.se_atten["davg"] = variables["davg"]
diff --git a/deepmd/dpmodel/descriptor/dpa2.py b/deepmd/dpmodel/descriptor/dpa2.py
index 86ed7ec40c..5fcf1e27b9 100644
--- a/deepmd/dpmodel/descriptor/dpa2.py
+++ b/deepmd/dpmodel/descriptor/dpa2.py
@@ -331,6 +331,7 @@ def __init__(
         seed: Optional[Union[int, List[int]]] = None,
         add_tebd_to_repinit_out: bool = False,
         use_econf_tebd: bool = False,
+        use_tebd_bias: bool = False,
         type_map: Optional[List[str]] = None,
     ):
         r"""The DPA-2 descriptor. see https://arxiv.org/abs/2312.15492.
@@ -361,6 +362,8 @@ def __init__(
             Whether to add type embedding to the output representation from repinit before inputting it into repformer.
         use_econf_tebd : bool, Optional
             Whether to use electronic configuration type embedding.
+        use_tebd_bias : bool, Optional
+            Whether to use bias in the type embedding layer.
         type_map : List[str], Optional
             A list of strings. Give the name to each type of atoms.
 
@@ -449,6 +452,7 @@ def init_subclass_params(sub_data, sub_class):
             seed=child_seed(seed, 1),
         )
         self.use_econf_tebd = use_econf_tebd
+        self.use_tebd_bias = use_tebd_bias
         self.type_map = type_map
         self.type_embedding = TypeEmbedNet(
             ntypes=ntypes,
@@ -457,6 +461,7 @@ def init_subclass_params(sub_data, sub_class):
             activation_function="Linear",
             precision=precision,
             use_econf_tebd=use_econf_tebd,
+            use_tebd_bias=use_tebd_bias,
             type_map=type_map,
             seed=child_seed(seed, 2),
         )
@@ -720,7 +725,7 @@ def serialize(self) -> dict:
         data = {
             "@class": "Descriptor",
             "type": "dpa2",
-            "@version": 1,
+            "@version": 2,
             "ntypes": self.ntypes,
             "repinit_args": self.repinit_args.serialize(),
             "repformer_args": self.repformer_args.serialize(),
@@ -732,6 +737,7 @@ def serialize(self) -> dict:
             "trainable": self.trainable,
             "add_tebd_to_repinit_out": self.add_tebd_to_repinit_out,
             "use_econf_tebd": self.use_econf_tebd,
+            "use_tebd_bias": self.use_tebd_bias,
             "type_map": self.type_map,
             "type_embedding": self.type_embedding.serialize(),
             "g1_shape_tranform": self.g1_shape_tranform.serialize(),
@@ -774,7 +780,7 @@ def serialize(self) -> dict:
     @classmethod
     def deserialize(cls, data: dict) -> "DescrptDPA2":
         data = data.copy()
-        check_version_compatibility(data.pop("@version"), 1, 1)
+        check_version_compatibility(data.pop("@version"), 2, 1)
         data.pop("@class")
         data.pop("type")
         repinit_variable = data.pop("repinit_variable").copy()
@@ -785,6 +791,9 @@ def deserialize(cls, data: dict) -> "DescrptDPA2":
         add_tebd_to_repinit_out = data["add_tebd_to_repinit_out"]
         data["repinit"] = RepinitArgs(**data.pop("repinit_args"))
         data["repformer"] = RepformerArgs(**data.pop("repformer_args"))
+        # compat with version 1
+        if "use_tebd_bias" not in data:
+            data["use_tebd_bias"] = True
         obj = cls(**data)
         obj.type_embedding = TypeEmbedNet.deserialize(type_embedding)
         if add_tebd_to_repinit_out:
diff --git a/deepmd/dpmodel/descriptor/se_atten_v2.py b/deepmd/dpmodel/descriptor/se_atten_v2.py
index 037222076e..d29580062c 100644
--- a/deepmd/dpmodel/descriptor/se_atten_v2.py
+++ b/deepmd/dpmodel/descriptor/se_atten_v2.py
@@ -64,6 +64,7 @@ def __init__(
         spin: Optional[Any] = None,
         stripped_type_embedding: Optional[bool] = None,
         use_econf_tebd: bool = False,
+        use_tebd_bias: bool = False,
         type_map: Optional[List[str]] = None,
         # consistent with argcheck, not used though
         seed: Optional[Union[int, List[int]]] = None,
@@ -100,6 +101,7 @@ def __init__(
             spin=spin,
             stripped_type_embedding=stripped_type_embedding,
             use_econf_tebd=use_econf_tebd,
+            use_tebd_bias=use_tebd_bias,
             type_map=type_map,
             # consistent with argcheck, not used though
             seed=seed,
@@ -111,7 +113,7 @@ def serialize(self) -> dict:
         data = {
             "@class": "Descriptor",
             "type": "se_atten_v2",
-            "@version": 1,
+            "@version": 2,
             "rcut": obj.rcut,
             "rcut_smth": obj.rcut_smth,
             "sel": obj.sel,
@@ -134,6 +136,7 @@ def serialize(self) -> dict:
             "type_one_side": obj.type_one_side,
             "concat_output_tebd": self.concat_output_tebd,
             "use_econf_tebd": self.use_econf_tebd,
+            "use_tebd_bias": self.use_tebd_bias,
             "type_map": self.type_map,
             # make deterministic
             "precision": np.dtype(PRECISION_DICT[obj.precision]).name,
@@ -158,7 +161,7 @@ def serialize(self) -> dict:
     def deserialize(cls, data: dict) -> "DescrptSeAttenV2":
         """Deserialize from dict."""
         data = data.copy()
-        check_version_compatibility(data.pop("@version"), 1, 1)
+        check_version_compatibility(data.pop("@version"), 2, 1)
         data.pop("@class")
         data.pop("type")
         variables = data.pop("@variables")
@@ -167,6 +170,9 @@ def deserialize(cls, data: dict) -> "DescrptSeAttenV2":
         attention_layers = data.pop("attention_layers")
         data.pop("env_mat")
         embeddings_strip = data.pop("embeddings_strip")
+        # compat with version 1
+        if "use_tebd_bias" not in data:
+            data["use_tebd_bias"] = True
         obj = cls(**data)
 
         obj.se_atten["davg"] = variables["davg"]
diff --git a/deepmd/dpmodel/utils/network.py b/deepmd/dpmodel/utils/network.py
index 53a170ac4a..941e2cfc86 100644
--- a/deepmd/dpmodel/utils/network.py
+++ b/deepmd/dpmodel/utils/network.py
@@ -571,6 +571,8 @@ class EN(T_Network):
             Floating point precision for the model paramters.
         seed : int, optional
             Random seed.
+        bias : bool, Optional
+            Whether to use bias in the embedding layer.
         """
 
         def __init__(
@@ -581,6 +583,7 @@ def __init__(
             resnet_dt: bool = False,
             precision: str = DEFAULT_PRECISION,
             seed: Optional[Union[int, List[int]]] = None,
+            bias: bool = True,
         ):
             layers = []
             i_in = in_dim
@@ -590,7 +593,7 @@ def __init__(
                     T_NetworkLayer(
                         i_in,
                         i_ot,
-                        bias=True,
+                        bias=bias,
                         use_timestep=resnet_dt,
                         activation_function=activation_function,
                         resnet=True,
@@ -605,6 +608,7 @@ def __init__(
             self.activation_function = activation_function
             self.resnet_dt = resnet_dt
             self.precision = precision
+            self.bias = bias
 
         def serialize(self) -> dict:
             """Serialize the network to a dict.
@@ -616,11 +620,12 @@ def serialize(self) -> dict:
             """
             return {
                 "@class": "EmbeddingNetwork",
-                "@version": 1,
+                "@version": 2,
                 "in_dim": self.in_dim,
                 "neuron": self.neuron.copy(),
                 "activation_function": self.activation_function,
                 "resnet_dt": self.resnet_dt,
+                "bias": self.bias,
                 # make deterministic
                 "precision": np.dtype(PRECISION_DICT[self.precision]).name,
                 "layers": [layer.serialize() for layer in self.layers],
@@ -636,7 +641,7 @@ def deserialize(cls, data: dict) -> "EmbeddingNet":
                 The dict to deserialize from.
             """
             data = copy.deepcopy(data)
-            check_version_compatibility(data.pop("@version", 1), 1, 1)
+            check_version_compatibility(data.pop("@version", 1), 2, 1)
             data.pop("@class", None)
             layers = data.pop("layers")
             obj = cls(**data)
@@ -691,6 +696,7 @@ def __init__(
                 activation_function=activation_function,
                 resnet_dt=resnet_dt,
                 precision=precision,
+                seed=seed,
             )
             i_in = neuron[-1] if len(neuron) > 0 else in_dim
             i_ot = out_dim
diff --git a/deepmd/dpmodel/utils/type_embed.py b/deepmd/dpmodel/utils/type_embed.py
index 0db064fb5e..cf9b31e607 100644
--- a/deepmd/dpmodel/utils/type_embed.py
+++ b/deepmd/dpmodel/utils/type_embed.py
@@ -45,6 +45,8 @@ class TypeEmbedNet(NativeOP):
         Concat the zero padding to the output, as the default embedding of empty type.
     use_econf_tebd: bool, Optional
         Whether to use electronic configuration type embedding.
+    use_tebd_bias : bool, Optional
+        Whether to use bias in the type embedding layer.
     type_map: List[str], Optional
         A list of strings. Give the name to each type of atoms.
     """
@@ -61,6 +63,7 @@ def __init__(
         seed: Optional[Union[int, List[int]]] = None,
         padding: bool = False,
         use_econf_tebd: bool = False,
+        use_tebd_bias: bool = False,
         type_map: Optional[List[str]] = None,
     ) -> None:
         self.ntypes = ntypes
@@ -72,6 +75,7 @@ def __init__(
         self.trainable = trainable
         self.padding = padding
         self.use_econf_tebd = use_econf_tebd
+        self.use_tebd_bias = use_tebd_bias
         self.type_map = type_map
         embed_input_dim = ntypes
         if self.use_econf_tebd:
@@ -85,6 +89,7 @@ def __init__(
             self.resnet_dt,
             self.precision,
             seed=self.seed,
+            bias=self.use_tebd_bias,
         )
 
     def call(self) -> np.ndarray:
@@ -114,11 +119,14 @@ def deserialize(cls, data: dict):
             The deserialized model
         """
         data = data.copy()
-        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
         data_cls = data.pop("@class")
         assert data_cls == "TypeEmbedNet", f"Invalid class {data_cls}"
 
         embedding_net = EmbeddingNet.deserialize(data.pop("embedding"))
+        # compat with version 1
+        if "use_tebd_bias" not in data:
+            data["use_tebd_bias"] = True
         type_embedding_net = cls(**data)
         type_embedding_net.embedding_net = embedding_net
         return type_embedding_net
@@ -133,7 +141,7 @@ def serialize(self) -> dict:
         """
         return {
             "@class": "TypeEmbedNet",
-            "@version": 1,
+            "@version": 2,
             "ntypes": self.ntypes,
             "neuron": self.neuron,
             "resnet_dt": self.resnet_dt,
@@ -142,6 +150,7 @@ def serialize(self) -> dict:
             "trainable": self.trainable,
             "padding": self.padding,
             "use_econf_tebd": self.use_econf_tebd,
+            "use_tebd_bias": self.use_tebd_bias,
             "type_map": self.type_map,
             "embedding": self.embedding_net.serialize(),
         }
diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py
index e6dba7f88f..0bc4a03807 100644
--- a/deepmd/pt/model/descriptor/dpa1.py
+++ b/deepmd/pt/model/descriptor/dpa1.py
@@ -189,6 +189,8 @@ class DescrptDPA1(BaseDescriptor, torch.nn.Module):
             Random seed for parameter initialization.
     use_econf_tebd: bool, Optional
             Whether to use electronic configuration type embedding.
+    use_tebd_bias : bool, Optional
+            Whether to use bias in the type embedding layer.
     type_map: List[str], Optional
             A list of strings. Give the name to each type of atoms.
     spin
@@ -241,6 +243,7 @@ def __init__(
         stripped_type_embedding: Optional[bool] = None,
         seed: Optional[Union[int, List[int]]] = None,
         use_econf_tebd: bool = False,
+        use_tebd_bias: bool = False,
         type_map: Optional[List[str]] = None,
         # not implemented
         spin=None,
@@ -293,6 +296,7 @@ def __init__(
             old_impl=old_impl,
         )
         self.use_econf_tebd = use_econf_tebd
+        self.use_tebd_bias = use_tebd_bias
         self.type_map = type_map
         self.type_embedding = TypeEmbedNet(
             ntypes,
@@ -300,6 +304,7 @@ def __init__(
             precision=precision,
             seed=child_seed(seed, 2),
             use_econf_tebd=use_econf_tebd,
+            use_tebd_bias=use_tebd_bias,
             type_map=type_map,
         )
         self.tebd_dim = tebd_dim
@@ -462,7 +467,7 @@ def serialize(self) -> dict:
         data = {
             "@class": "Descriptor",
             "type": "dpa1",
-            "@version": 1,
+            "@version": 2,
             "rcut": obj.rcut,
             "rcut_smth": obj.rcut_smth,
             "sel": obj.sel,
@@ -487,6 +492,7 @@ def serialize(self) -> dict:
             "type_one_side": obj.type_one_side,
             "concat_output_tebd": self.concat_output_tebd,
             "use_econf_tebd": self.use_econf_tebd,
+            "use_tebd_bias": self.use_tebd_bias,
             "type_map": self.type_map,
             # make deterministic
             "precision": RESERVED_PRECISON_DICT[obj.prec],
@@ -510,7 +516,7 @@ def serialize(self) -> dict:
     @classmethod
     def deserialize(cls, data: dict) -> "DescrptDPA1":
         data = data.copy()
-        check_version_compatibility(data.pop("@version"), 1, 1)
+        check_version_compatibility(data.pop("@version"), 2, 1)
         data.pop("@class")
         data.pop("type")
         variables = data.pop("@variables")
@@ -523,6 +529,9 @@ def deserialize(cls, data: dict) -> "DescrptDPA1":
             embeddings_strip = data.pop("embeddings_strip")
         else:
             embeddings_strip = None
+        # compat with version 1
+        if "use_tebd_bias" not in data:
+            data["use_tebd_bias"] = True
         obj = cls(**data)
 
         def t_cvt(xx):
diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py
index aa5aa11f67..4d830ace1b 100644
--- a/deepmd/pt/model/descriptor/dpa2.py
+++ b/deepmd/pt/model/descriptor/dpa2.py
@@ -90,6 +90,7 @@ def __init__(
         seed: Optional[Union[int, List[int]]] = None,
         add_tebd_to_repinit_out: bool = False,
         use_econf_tebd: bool = False,
+        use_tebd_bias: bool = False,
         type_map: Optional[List[str]] = None,
         old_impl: bool = False,
     ):
@@ -121,6 +122,8 @@ def __init__(
             Whether to add type embedding to the output representation from repinit before inputting it into repformer.
         use_econf_tebd : bool, Optional
             Whether to use electronic configuration type embedding.
+        use_tebd_bias : bool, Optional
+            Whether to use bias in the type embedding layer.
         type_map : List[str], Optional
             A list of strings. Give the name to each type of atoms.
 
@@ -211,6 +214,7 @@ def init_subclass_params(sub_data, sub_class):
             old_impl=old_impl,
         )
         self.use_econf_tebd = use_econf_tebd
+        self.use_tebd_bias = use_tebd_bias
         self.type_map = type_map
         self.type_embedding = TypeEmbedNet(
             ntypes,
@@ -218,6 +222,7 @@ def init_subclass_params(sub_data, sub_class):
             precision=precision,
             seed=child_seed(seed, 2),
             use_econf_tebd=self.use_econf_tebd,
+            use_tebd_bias=use_tebd_bias,
             type_map=type_map,
         )
         self.concat_output_tebd = concat_output_tebd
@@ -455,7 +460,7 @@ def serialize(self) -> dict:
         data = {
             "@class": "Descriptor",
             "type": "dpa2",
-            "@version": 1,
+            "@version": 2,
             "ntypes": self.ntypes,
             "repinit_args": self.repinit_args.serialize(),
             "repformer_args": self.repformer_args.serialize(),
@@ -467,6 +472,7 @@ def serialize(self) -> dict:
             "trainable": self.trainable,
             "add_tebd_to_repinit_out": self.add_tebd_to_repinit_out,
             "use_econf_tebd": self.use_econf_tebd,
+            "use_tebd_bias": self.use_tebd_bias,
             "type_map": self.type_map,
             "type_embedding": self.type_embedding.embedding.serialize(),
             "g1_shape_tranform": self.g1_shape_tranform.serialize(),
@@ -509,7 +515,7 @@ def serialize(self) -> dict:
     @classmethod
     def deserialize(cls, data: dict) -> "DescrptDPA2":
         data = data.copy()
-        check_version_compatibility(data.pop("@version"), 1, 1)
+        check_version_compatibility(data.pop("@version"), 2, 1)
         data.pop("@class")
         data.pop("type")
         repinit_variable = data.pop("repinit_variable").copy()
@@ -520,6 +526,9 @@ def deserialize(cls, data: dict) -> "DescrptDPA2":
         add_tebd_to_repinit_out = data["add_tebd_to_repinit_out"]
         data["repinit"] = RepinitArgs(**data.pop("repinit_args"))
         data["repformer"] = RepformerArgs(**data.pop("repformer_args"))
+        # compat with version 1
+        if "use_tebd_bias" not in data:
+            data["use_tebd_bias"] = True
         obj = cls(**data)
         obj.type_embedding.embedding = TypeEmbedNetConsistent.deserialize(
             type_embedding
diff --git a/deepmd/pt/model/descriptor/se_atten_v2.py b/deepmd/pt/model/descriptor/se_atten_v2.py
index 8fc6271665..41e37eb03c 100644
--- a/deepmd/pt/model/descriptor/se_atten_v2.py
+++ b/deepmd/pt/model/descriptor/se_atten_v2.py
@@ -68,6 +68,7 @@ def __init__(
         stripped_type_embedding: Optional[bool] = None,
         seed: Optional[Union[int, List[int]]] = None,
         use_econf_tebd: bool = False,
+        use_tebd_bias: bool = False,
         type_map: Optional[List[str]] = None,
         # not implemented
         spin=None,
@@ -124,6 +125,10 @@ def __init__(
             Whether to normalize the hidden vectors in attention weights calculation.
         temperature : float
             If not None, the scaling of attention weights is `temperature` itself.
+        concat_output_tebd : bool
+            Whether to concat type embedding at the output of the descriptor.
+        trainable : bool
+            If the weights of this descriptors are trainable.
         trainable_ln : bool
             Whether to use trainable shift and scale weights in layer normalization.
         ln_eps : float, Optional
@@ -132,8 +137,24 @@ def __init__(
             If 'False', type embeddings of both neighbor and central atoms are considered.
             If 'True', only type embeddings of neighbor atoms are considered.
             Default is 'False'.
+        stripped_type_embedding : bool, Optional
+            (Deprecated, kept only for compatibility.)
+            Whether to strip the type embedding into a separate embedding network.
+            Setting this parameter to `True` is equivalent to setting `tebd_input_mode` to 'strip'.
+            Setting it to `False` is equivalent to setting `tebd_input_mode` to 'concat'.
+            The default value is `None`, which means the `tebd_input_mode` setting will be used instead.
         seed : int, Optional
             Random seed for parameter initialization.
+        use_econf_tebd : bool, Optional
+            Whether to use electronic configuration type embedding.
+        use_tebd_bias : bool, Optional
+            Whether to use bias in the type embedding layer.
+        type_map : List[str], Optional
+            A list of strings. Give the name to each type of atoms.
+        spin
+            (Only support None to keep consistent with other backend references.)
+            (Not used in this version. Not-none option is not implemented.)
+            The old implementation of deepspin.
         """
         DescrptDPA1.__init__(
             self,
@@ -167,6 +188,7 @@ def __init__(
             stripped_type_embedding=stripped_type_embedding,
             seed=seed,
             use_econf_tebd=use_econf_tebd,
+            use_tebd_bias=use_tebd_bias,
             type_map=type_map,
             # not implemented
             spin=spin,
@@ -179,7 +201,7 @@ def serialize(self) -> dict:
         data = {
             "@class": "Descriptor",
             "type": "se_atten_v2",
-            "@version": 1,
+            "@version": 2,
             "rcut": obj.rcut,
             "rcut_smth": obj.rcut_smth,
             "sel": obj.sel,
@@ -202,6 +224,7 @@ def serialize(self) -> dict:
             "type_one_side": obj.type_one_side,
             "concat_output_tebd": self.concat_output_tebd,
             "use_econf_tebd": self.use_econf_tebd,
+            "use_tebd_bias": self.use_tebd_bias,
             "type_map": self.type_map,
             # make deterministic
             "precision": RESERVED_PRECISON_DICT[obj.prec],
@@ -224,7 +247,7 @@ def serialize(self) -> dict:
     @classmethod
     def deserialize(cls, data: dict) -> "DescrptSeAttenV2":
         data = data.copy()
-        check_version_compatibility(data.pop("@version"), 1, 1)
+        check_version_compatibility(data.pop("@version"), 2, 1)
         data.pop("@class")
         data.pop("type")
         variables = data.pop("@variables")
@@ -233,6 +256,9 @@ def deserialize(cls, data: dict) -> "DescrptSeAttenV2":
         attention_layers = data.pop("attention_layers")
         data.pop("env_mat")
         embeddings_strip = data.pop("embeddings_strip")
+        # compat with version 1
+        if "use_tebd_bias" not in data:
+            data["use_tebd_bias"] = True
         obj = cls(**data)
 
         def t_cvt(xx):
diff --git a/deepmd/pt/model/network/network.py b/deepmd/pt/model/network/network.py
index 13599a77dd..0469462061 100644
--- a/deepmd/pt/model/network/network.py
+++ b/deepmd/pt/model/network/network.py
@@ -573,6 +573,7 @@ def __init__(
         precision="default",
         seed: Optional[Union[int, List[int]]] = None,
         use_econf_tebd=False,
+        use_tebd_bias: bool = False,
         type_map=None,
     ):
         """Construct a type embedding net."""
@@ -582,6 +583,7 @@ def __init__(
         self.bavg = bavg
         self.stddev = stddev
         self.use_econf_tebd = use_econf_tebd
+        self.use_tebd_bias = use_tebd_bias
         self.type_map = type_map
         self.embedding = TypeEmbedNetConsistent(
             ntypes=self.type_nums,
@@ -589,6 +591,7 @@ def __init__(
             padding=True,
             activation_function="Linear",
             use_econf_tebd=use_econf_tebd,
+            use_tebd_bias=use_tebd_bias,
             type_map=type_map,
             precision=precision,
             seed=seed,
@@ -655,6 +658,8 @@ class TypeEmbedNetConsistent(nn.Module):
         Concat the zero padding to the output, as the default embedding of empty type.
     use_econf_tebd: bool, Optional
         Whether to use electronic configuration type embedding.
+    use_tebd_bias : bool, Optional
+        Whether to use bias in the type embedding layer.
     type_map: List[str], Optional
         A list of strings. Give the name to each type of atoms.
     """
@@ -671,6 +676,7 @@ def __init__(
         seed: Optional[Union[int, List[int]]] = None,
         padding: bool = False,
         use_econf_tebd: bool = False,
+        use_tebd_bias: bool = False,
         type_map: Optional[List[str]] = None,
     ):
         """Construct a type embedding net."""
@@ -685,6 +691,7 @@ def __init__(
         self.trainable = trainable
         self.padding = padding
         self.use_econf_tebd = use_econf_tebd
+        self.use_tebd_bias = use_tebd_bias
         self.type_map = type_map
         self.econf_tebd = None
         embed_input_dim = ntypes
@@ -700,6 +707,7 @@ def __init__(
             self.resnet_dt,
             self.precision,
             self.seed,
+            bias=self.use_tebd_bias,
         )
         for param in self.parameters():
             param.requires_grad = trainable
@@ -802,11 +810,14 @@ def deserialize(cls, data: dict):
             The deserialized model
         """
         data = data.copy()
-        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
         data_cls = data.pop("@class")
         assert data_cls == "TypeEmbedNet", f"Invalid class {data_cls}"
 
         embedding_net = EmbeddingNet.deserialize(data.pop("embedding"))
+        # compat with version 1
+        if "use_tebd_bias" not in data:
+            data["use_tebd_bias"] = True
         type_embedding_net = cls(**data)
         type_embedding_net.embedding_net = embedding_net
         return type_embedding_net
@@ -821,7 +832,7 @@ def serialize(self) -> dict:
         """
         return {
             "@class": "TypeEmbedNet",
-            "@version": 1,
+            "@version": 2,
             "ntypes": self.ntypes,
             "neuron": self.neuron,
             "resnet_dt": self.resnet_dt,
@@ -830,6 +841,7 @@ def serialize(self) -> dict:
             "trainable": self.trainable,
             "padding": self.padding,
             "use_econf_tebd": self.use_econf_tebd,
+            "use_tebd_bias": self.use_tebd_bias,
             "type_map": self.type_map,
             "embedding": self.embedding_net.serialize(),
         }
diff --git a/deepmd/tf/descriptor/se_atten.py b/deepmd/tf/descriptor/se_atten.py
index de652bc91d..13976a84e1 100644
--- a/deepmd/tf/descriptor/se_atten.py
+++ b/deepmd/tf/descriptor/se_atten.py
@@ -2079,6 +2079,8 @@ class DescrptDPA1Compat(DescrptSeAtten):
             Whether to concat type embedding at the output of the descriptor.
     use_econf_tebd: bool, Optional
             Whether to use electronic configuration type embedding.
+    use_tebd_bias : bool, Optional
+            Whether to use bias in the type embedding layer.
     type_map: List[str], Optional
             A list of strings. Give the name to each type of atoms.
     spin
@@ -2116,6 +2118,7 @@ def __init__(
         smooth_type_embedding: bool = True,
         concat_output_tebd: bool = True,
         use_econf_tebd: bool = False,
+        use_tebd_bias: bool = False,
         type_map: Optional[List[str]] = None,
         spin: Optional[Any] = None,
         # consistent with argcheck, not used though
@@ -2167,6 +2170,7 @@ def __init__(
         )
         self.tebd_dim = tebd_dim
         self.use_econf_tebd = use_econf_tebd
+        self.use_tebd_bias = use_tebd_bias
         self.scaling_factor = scaling_factor
         self.normalize = normalize
         self.temperature = temperature
@@ -2176,6 +2180,7 @@ def __init__(
             padding=True,
             activation_function="Linear",
             use_econf_tebd=use_econf_tebd,
+            use_tebd_bias=use_tebd_bias,
             type_map=type_map,
             # precision=precision,
             seed=seed,
@@ -2303,7 +2308,7 @@ def deserialize(cls, data: dict, suffix: str = ""):
         if cls is not DescrptDPA1Compat:
             raise NotImplementedError(f"Not implemented in class {cls.__name__}")
         data = data.copy()
-        check_version_compatibility(data.pop("@version"), 1, 1)
+        check_version_compatibility(data.pop("@version"), 2, 1)
         data.pop("@class")
         data.pop("type")
         embedding_net_variables = cls.deserialize_network(
@@ -2325,6 +2330,9 @@ def deserialize(cls, data: dict, suffix: str = ""):
             )
         else:
             two_side_embeeding_net_variables = None
+        # compat with version 1
+        if "use_tebd_bias" not in data:
+            data["use_tebd_bias"] = True
         descriptor = cls(**data)
         descriptor.embedding_net_variables = embedding_net_variables
         descriptor.attention_layer_variables = attention_layer_variables
@@ -2357,12 +2365,14 @@ def serialize(self, suffix: str = "") -> dict:
         data.update(
             {
                 "type": "dpa1",
+                "@version": 2,
                 "tebd_dim": self.tebd_dim,
                 "scaling_factor": self.scaling_factor,
                 "normalize": self.normalize,
                 "temperature": self.temperature,
                 "concat_output_tebd": self.concat_output_tebd,
                 "use_econf_tebd": self.use_econf_tebd,
+                "use_tebd_bias": self.use_tebd_bias,
                 "type_embedding": self.type_embedding.serialize(suffix),
             }
         )
diff --git a/deepmd/tf/utils/network.py b/deepmd/tf/utils/network.py
index 7918b58d0c..7941b451af 100644
--- a/deepmd/tf/utils/network.py
+++ b/deepmd/tf/utils/network.py
@@ -219,6 +219,7 @@ def embedding_net(
     uniform_seed=False,
     initial_variables=None,
     mixed_prec=None,
+    bias=True,
 ):
     r"""The embedding network.
 
@@ -274,6 +275,8 @@ def embedding_net(
         The input dict which stores the embedding net variables
     mixed_prec
         The input dict which stores the mixed precision setting for the embedding net
+    bias : bool, Optional
+        Whether to use bias in the embedding layer.
 
     References
     ----------
@@ -289,18 +292,27 @@ def embedding_net(
             stddev=stddev / np.sqrt(outputs_size[ii] + outputs_size[ii - 1]),
             seed=seed if (seed is None or uniform_seed) else seed + ii * 3 + 0,
         )
-        b_initializer = tf.random_normal_initializer(
-            stddev=stddev,
-            mean=bavg,
-            seed=seed if (seed is None or uniform_seed) else seed + 3 * ii + 1,
+        b_initializer = (
+            tf.random_normal_initializer(
+                stddev=stddev,
+                mean=bavg,
+                seed=seed if (seed is None or uniform_seed) else seed + 3 * ii + 1,
+            )
+            if bias
+            else None
         )
         if initial_variables is not None:
             scope = tf.get_variable_scope().name
             w_initializer = tf.constant_initializer(
                 initial_variables[scope + "/matrix_" + str(ii) + name_suffix]
             )
-            b_initializer = tf.constant_initializer(
-                initial_variables[scope + "/bias_" + str(ii) + name_suffix]
+            bias = (scope + "/bias_" + str(ii) + name_suffix) in initial_variables
+            b_initializer = (
+                tf.constant_initializer(
+                    initial_variables[scope + "/bias_" + str(ii) + name_suffix]
+                )
+                if bias
+                else None
             )
         w = tf.get_variable(
             "matrix_" + str(ii) + name_suffix,
@@ -311,27 +323,35 @@ def embedding_net(
         )
         variable_summaries(w, "matrix_" + str(ii) + name_suffix)
 
-        b = tf.get_variable(
-            "bias_" + str(ii) + name_suffix,
-            [outputs_size[ii]],
-            precision,
-            b_initializer,
-            trainable=trainable,
+        b = (
+            tf.get_variable(
+                "bias_" + str(ii) + name_suffix,
+                [outputs_size[ii]],
+                precision,
+                b_initializer,
+                trainable=trainable,
+            )
+            if bias
+            else None
         )
-        variable_summaries(b, "bias_" + str(ii) + name_suffix)
+        if bias:
+            variable_summaries(b, "bias_" + str(ii) + name_suffix)
 
         if mixed_prec is not None:
             xx = tf.cast(xx, get_precision(mixed_prec["compute_prec"]))
             w = tf.cast(w, get_precision(mixed_prec["compute_prec"]))
-            b = tf.cast(b, get_precision(mixed_prec["compute_prec"]))
+            b = tf.cast(b, get_precision(mixed_prec["compute_prec"])) if bias else None
         if activation_fn is not None:
             hidden = tf.reshape(
-                activation_fn(tf.nn.bias_add(tf.matmul(xx, w), b)),
+                activation_fn(
+                    tf.nn.bias_add(tf.matmul(xx, w), b) if bias else tf.matmul(xx, w)
+                ),
                 [-1, outputs_size[ii]],
             )
         else:
             hidden = tf.reshape(
-                tf.nn.bias_add(tf.matmul(xx, w), b), [-1, outputs_size[ii]]
+                tf.nn.bias_add(tf.matmul(xx, w), b) if bias else tf.matmul(xx, w),
+                [-1, outputs_size[ii]],
             )
         if resnet_dt:
             idt_initializer = tf.random_normal_initializer(
diff --git a/deepmd/tf/utils/type_embed.py b/deepmd/tf/utils/type_embed.py
index 20beda9d3a..3f40a0cef1 100644
--- a/deepmd/tf/utils/type_embed.py
+++ b/deepmd/tf/utils/type_embed.py
@@ -100,6 +100,8 @@ class TypeEmbedNet:
             Concat the zero padding to the output, as the default embedding of empty type.
     use_econf_tebd: bool, Optional
             Whether to use electronic configuration type embedding.
+    use_tebd_bias : bool, Optional
+            Whether to use bias in the type embedding layer.
     type_map: List[str], Optional
             A list of strings. Give the name to each type of atoms.
     """
@@ -117,6 +119,7 @@ def __init__(
         uniform_seed: bool = False,
         padding: bool = False,
         use_econf_tebd: bool = False,
+        use_tebd_bias: bool = False,
         type_map: Optional[List[str]] = None,
         **kwargs,
     ) -> None:
@@ -133,6 +136,7 @@ def __init__(
         self.type_embedding_net_variables = None
         self.padding = padding
         self.use_econf_tebd = use_econf_tebd
+        self.use_tebd_bias = use_tebd_bias
         self.type_map = type_map
         if self.use_econf_tebd:
             self.econf_tebd, _ = get_econf_tebd(self.type_map, precision=precision)
@@ -191,6 +195,7 @@ def build(
                 trainable=self.trainable,
                 initial_variables=self.type_embedding_net_variables,
                 uniform_seed=self.uniform_seed,
+                bias=self.use_tebd_bias,
             )
         ebd_type = tf.reshape(ebd_type, [-1, self.neuron[-1]])  # ntypes * neuron[-1]
         if self.padding:
@@ -241,7 +246,7 @@ def deserialize(cls, data: dict, suffix: str = ""):
             The deserialized model
         """
         data = data.copy()
-        check_version_compatibility(data.pop("@version", 1), 1, 1)
+        check_version_compatibility(data.pop("@version", 1), 2, 1)
         data_cls = data.pop("@class")
         assert data_cls == "TypeEmbedNet", f"Invalid class {data_cls}"
 
@@ -251,9 +256,10 @@ def deserialize(cls, data: dict, suffix: str = ""):
             embedding_net_variables[
                 f"type_embed_net{suffix}/matrix_{layer_idx + 1}"
             ] = layer.w
-            embedding_net_variables[f"type_embed_net{suffix}/bias_{layer_idx + 1}"] = (
-                layer.b
-            )
+            if layer.b is not None:
+                embedding_net_variables[
+                    f"type_embed_net{suffix}/bias_{layer_idx + 1}"
+                ] = layer.b
             if layer.idt is not None:
                 embedding_net_variables[
                     f"type_embed_net{suffix}/idt_{layer_idx + 1}"
@@ -264,6 +270,9 @@ def deserialize(cls, data: dict, suffix: str = ""):
                     f"type_embed_net{suffix}/idt_{layer_idx + 1}"
                 ] = 0.0
 
+        # compat with version 1
+        if "use_tebd_bias" not in data:
+            data["use_tebd_bias"] = True
         type_embedding_net = cls(**data)
         type_embedding_net.type_embedding_net_variables = embedding_net_variables
         return type_embedding_net
@@ -303,6 +312,7 @@ def serialize(self, suffix: str = "") -> dict:
             activation_function=self.filter_activation_fn_name,
             resnet_dt=self.filter_resnet_dt,
             precision=self.filter_precision.name,
+            bias=self.use_tebd_bias,
         )
         for key, value in self.type_embedding_net_variables.items():
             m = re.search(type_embedding_pattern, key)
@@ -315,7 +325,7 @@ def serialize(self, suffix: str = "") -> dict:
 
         return {
             "@class": "TypeEmbedNet",
-            "@version": 1,
+            "@version": 2,
             "ntypes": self.ntypes,
             "neuron": self.neuron,
             "resnet_dt": self.filter_resnet_dt,
@@ -324,6 +334,7 @@ def serialize(self, suffix: str = "") -> dict:
             "trainable": self.trainable,
             "padding": self.padding,
             "use_econf_tebd": self.use_econf_tebd,
+            "use_tebd_bias": self.use_tebd_bias,
             "type_map": self.type_map,
             "embedding": embedding_net.serialize(),
         }
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index cb39ead6be..0bf50fd019 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -87,6 +87,7 @@ def type_embedding_args():
     doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
     doc_trainable = "If the parameters in the embedding net are trainable"
     doc_use_econf_tebd = "Whether to use electronic configuration type embedding."
+    doc_use_tebd_bias = "Whether to use bias in the type embedding layer."
 
     return [
         Argument("neuron", List[int], optional=True, default=[8], doc=doc_neuron),
@@ -104,6 +105,13 @@ def type_embedding_args():
         Argument(
             "use_econf_tebd", bool, optional=True, default=False, doc=doc_use_econf_tebd
         ),
+        Argument(
+            "use_tebd_bias",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_use_tebd_bias,
+        ),
     ]
 
 
@@ -529,6 +537,7 @@ def descrpt_se_atten_args():
     doc_ln_eps = "The epsilon value for layer normalization. The default value for TensorFlow is set to 1e-3 to keep consistent with keras while set to 1e-5 in PyTorch and DP implementation."
     doc_tebd_dim = "The dimension of atom type embedding."
     doc_use_econf_tebd = r"Whether to use electronic configuration type embedding. For TensorFlow backend, please set `use_econf_tebd` in `type_embedding` block instead."
+    doc_use_tebd_bias = "Whether to use bias in the type embedding layer."
     doc_temperature = "The scaling factor of normalization in calculations of attention weights, which is used to scale the matmul(Q, K)."
     doc_scaling_factor = (
         "The scaling factor of normalization in calculations of attention weights, which is used to scale the matmul(Q, K). "
@@ -596,6 +605,13 @@ def descrpt_se_atten_args():
             default=False,
             doc=doc_only_pt_supported + doc_use_econf_tebd,
         ),
+        Argument(
+            "use_tebd_bias",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_use_tebd_bias,
+        ),
         Argument(
             "tebd_input_mode",
             str,
@@ -642,6 +658,7 @@ def descrpt_se_atten_v2_args():
     doc_ln_eps = "The epsilon value for layer normalization. The default value for TensorFlow is set to 1e-3 to keep consistent with keras while set to 1e-5 in PyTorch and DP implementation."
     doc_tebd_dim = "The dimension of atom type embedding."
     doc_use_econf_tebd = r"Whether to use electronic configuration type embedding. For TensorFlow backend, please set `use_econf_tebd` in `type_embedding` block instead."
+    doc_use_tebd_bias = "Whether to use bias in the type embedding layer."
     doc_temperature = "The scaling factor of normalization in calculations of attention weights, which is used to scale the matmul(Q, K)."
     doc_scaling_factor = (
         "The scaling factor of normalization in calculations of attention weights, which is used to scale the matmul(Q, K). "
@@ -679,6 +696,13 @@ def descrpt_se_atten_v2_args():
             default=False,
             doc=doc_only_pt_supported + doc_use_econf_tebd,
         ),
+        Argument(
+            "use_tebd_bias",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_use_tebd_bias,
+        ),
         Argument(
             "scaling_factor",
             float,
@@ -729,6 +753,7 @@ def descrpt_dpa2_args():
     doc_seed = "Random seed for parameter initialization."
     doc_add_tebd_to_repinit_out = "Add type embedding to the output representation from repinit before inputting it into repformer."
     doc_use_econf_tebd = "Whether to use electronic configuration type embedding."
+    doc_use_tebd_bias = "Whether to use bias in the type embedding layer."
     return [
         # repinit args
         Argument("repinit", dict, dpa2_repinit_args(), doc=doc_repinit),
@@ -775,6 +800,13 @@ def descrpt_dpa2_args():
             default=False,
             doc=doc_only_pt_supported + doc_use_econf_tebd,
         ),
+        Argument(
+            "use_tebd_bias",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_use_tebd_bias,
+        ),
     ]
 
 
diff --git a/source/tests/consistent/descriptor/test_dpa1.py b/source/tests/consistent/descriptor/test_dpa1.py
index 8bd9af6d5f..955b58932a 100644
--- a/source/tests/consistent/descriptor/test_dpa1.py
+++ b/source/tests/consistent/descriptor/test_dpa1.py
@@ -58,6 +58,7 @@
     (True,),  # concat_output_tebd
     ("float64",),  # precision
     (True, False),  # use_econf_tebd
+    (False, True),  # use_tebd_bias
 )
 class TestDPA1(CommonTest, DescriptorTest, unittest.TestCase):
     @property
@@ -81,6 +82,7 @@ def data(self) -> dict:
             concat_output_tebd,
             precision,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         return {
             "sel": [10],
@@ -108,6 +110,7 @@ def data(self) -> dict:
             "set_davg_zero": set_davg_zero,
             "smooth_type_embedding": smooth_type_embedding,
             "use_econf_tebd": use_econf_tebd,
+            "use_tebd_bias": use_tebd_bias,
             "type_map": ["O", "H"] if use_econf_tebd else None,
             "seed": 1145141919810,
         }
@@ -142,6 +145,7 @@ def skip_pt(self) -> bool:
             concat_output_tebd,
             precision,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         return CommonTest.skip_pt or self.is_meaningless_zero_attention_layer_tests(
             attn_layer,
@@ -171,6 +175,7 @@ def skip_dp(self) -> bool:
             concat_output_tebd,
             precision,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         return CommonTest.skip_pt or self.is_meaningless_zero_attention_layer_tests(
             attn_layer,
@@ -200,6 +205,7 @@ def skip_tf(self) -> bool:
             concat_output_tebd,
             precision,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         return (
             CommonTest.skip_tf
@@ -275,6 +281,7 @@ def setUp(self):
             concat_output_tebd,
             precision,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
 
     def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
@@ -332,6 +339,7 @@ def rtol(self) -> float:
             concat_output_tebd,
             precision,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         if precision == "float64":
             return 1e-10
@@ -362,6 +370,7 @@ def atol(self) -> float:
             concat_output_tebd,
             precision,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         if precision == "float64":
             return 1e-10
diff --git a/source/tests/consistent/descriptor/test_dpa2.py b/source/tests/consistent/descriptor/test_dpa2.py
index b24274c9d7..9b88b4238a 100644
--- a/source/tests/consistent/descriptor/test_dpa2.py
+++ b/source/tests/consistent/descriptor/test_dpa2.py
@@ -64,6 +64,7 @@
     ("float64",),  # precision
     (True, False),  # add_tebd_to_repinit_out
     (True, False),  # use_econf_tebd
+    (False,),  # use_tebd_bias
 )
 class TestDPA2(CommonTest, DescriptorTest, unittest.TestCase):
     @property
@@ -91,6 +92,7 @@ def data(self) -> dict:
             precision,
             add_tebd_to_repinit_out,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         return {
             "ntypes": self.ntypes,
@@ -149,6 +151,7 @@ def data(self) -> dict:
             "env_protection": 0.0,
             "trainable": True,
             "use_econf_tebd": use_econf_tebd,
+            "use_tebd_bias": use_tebd_bias,
             "type_map": ["O", "H"] if use_econf_tebd else None,
             "add_tebd_to_repinit_out": add_tebd_to_repinit_out,
         }
@@ -178,6 +181,7 @@ def skip_pt(self) -> bool:
             precision,
             add_tebd_to_repinit_out,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         return CommonTest.skip_pt
 
@@ -206,6 +210,7 @@ def skip_dp(self) -> bool:
             precision,
             add_tebd_to_repinit_out,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         return CommonTest.skip_pt
 
@@ -234,6 +239,7 @@ def skip_tf(self) -> bool:
             precision,
             add_tebd_to_repinit_out,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         return True
 
@@ -298,6 +304,7 @@ def setUp(self):
             precision,
             add_tebd_to_repinit_out,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
 
     def build_tf(self, obj: Any, suffix: str) -> Tuple[list, dict]:
@@ -359,6 +366,7 @@ def rtol(self) -> float:
             precision,
             add_tebd_to_repinit_out,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         if precision == "float64":
             return 1e-10
@@ -393,6 +401,7 @@ def atol(self) -> float:
             precision,
             add_tebd_to_repinit_out,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         if precision == "float64":
             return 1e-6  # need to fix in the future, see issue https://github.com/deepmodeling/deepmd-kit/issues/3786
diff --git a/source/tests/consistent/descriptor/test_se_atten_v2.py b/source/tests/consistent/descriptor/test_se_atten_v2.py
index 54f3cb5826..9ae16b96fa 100644
--- a/source/tests/consistent/descriptor/test_se_atten_v2.py
+++ b/source/tests/consistent/descriptor/test_se_atten_v2.py
@@ -54,6 +54,7 @@
     (True,),  # concat_output_tebd
     ("float64",),  # precision
     (True, False),  # use_econf_tebd
+    (False,),  # use_tebd_bias
 )
 class TestSeAttenV2(CommonTest, DescriptorTest, unittest.TestCase):
     @property
@@ -75,6 +76,7 @@ def data(self) -> dict:
             concat_output_tebd,
             precision,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         return {
             "sel": [10],
@@ -100,6 +102,7 @@ def data(self) -> dict:
             "precision": precision,
             "set_davg_zero": set_davg_zero,
             "use_econf_tebd": use_econf_tebd,
+            "use_tebd_bias": use_tebd_bias,
             "type_map": ["O", "H"] if use_econf_tebd else None,
             "seed": 1145141919810,
         }
@@ -132,6 +135,7 @@ def skip_pt(self) -> bool:
             concat_output_tebd,
             precision,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         return CommonTest.skip_pt or self.is_meaningless_zero_attention_layer_tests(
             attn_layer,
@@ -159,6 +163,7 @@ def skip_dp(self) -> bool:
             concat_output_tebd,
             precision,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         return CommonTest.skip_pt or self.is_meaningless_zero_attention_layer_tests(
             attn_layer,
@@ -263,6 +268,7 @@ def rtol(self) -> float:
             concat_output_tebd,
             precision,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         if precision == "float64":
             return 1e-10
@@ -291,6 +297,7 @@ def atol(self) -> float:
             concat_output_tebd,
             precision,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         if precision == "float64":
             return 1e-10
diff --git a/source/tests/consistent/test_type_embedding.py b/source/tests/consistent/test_type_embedding.py
index cf358771b3..6583dddb5f 100644
--- a/source/tests/consistent/test_type_embedding.py
+++ b/source/tests/consistent/test_type_embedding.py
@@ -37,6 +37,7 @@
     ("float32", "float64"),  # precision
     (True, False),  # padding
     (True, False),  # use_econf_tebd
+    (True, False),  # use_tebd_bias
 )
 class TestTypeEmbedding(CommonTest, unittest.TestCase):
     """Useful utilities for descriptor tests."""
@@ -48,12 +49,14 @@ def data(self) -> dict:
             precision,
             padding,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         return {
             "neuron": [2, 4, 4],
             "resnet_dt": resnet_dt,
             "precision": precision,
             "use_econf_tebd": use_econf_tebd,
+            "use_tebd_bias": use_tebd_bias,
             "seed": 20240327,
         }
 
@@ -69,6 +72,7 @@ def addtional_data(self) -> dict:
             precision,
             padding,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         # implict argument not input by users
         return {
@@ -110,6 +114,7 @@ def rtol(self) -> float:
             precision,
             padding,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         if precision == "float64":
             return 1e-10
@@ -128,6 +133,7 @@ def atol(self) -> float:
             precision,
             padding,
             use_econf_tebd,
+            use_tebd_bias,
         ) = self.param
         if precision == "float64":
             return 1e-10
diff --git a/source/tests/pt/model/test_descriptor_dpa1.py b/source/tests/pt/model/test_descriptor_dpa1.py
index 6dadcdacb7..488cc2f7ff 100644
--- a/source/tests/pt/model/test_descriptor_dpa1.py
+++ b/source/tests/pt/model/test_descriptor_dpa1.py
@@ -250,7 +250,7 @@ def test_descriptor_block(self):
         atype = self.atype
         box = self.cell
         # handel type_embedding
-        type_embedding = TypeEmbedNet(ntypes, 8).to(env.DEVICE)
+        type_embedding = TypeEmbedNet(ntypes, 8, use_tebd_bias=True).to(env.DEVICE)
         type_embedding.load_state_dict(torch.load(self.file_type_embed))
 
         ## to save model parameters
@@ -294,6 +294,7 @@ def test_descriptor(self):
         dparams["ntypes"] = ntypes
         assert dparams.pop("type") == "se_atten"
         dparams["concat_output_tebd"] = False
+        dparams["use_tebd_bias"] = True
         des = DescrptDPA1(
             **dparams,
         ).to(env.DEVICE)
diff --git a/source/tests/pt/model/test_descriptor_dpa2.py b/source/tests/pt/model/test_descriptor_dpa2.py
index 240871f2d7..ac04bfc417 100644
--- a/source/tests/pt/model/test_descriptor_dpa2.py
+++ b/source/tests/pt/model/test_descriptor_dpa2.py
@@ -118,6 +118,7 @@ def test_descriptor(self):
         dparams["ntypes"] = ntypes
         assert dparams.pop("type") == "dpa2"
         dparams["concat_output_tebd"] = False
+        dparams["use_tebd_bias"] = True
         des = DescrptDPA2(
             **dparams,
         ).to(env.DEVICE)
diff --git a/source/tests/tf/pairwise_dprc.json b/source/tests/tf/pairwise_dprc.json
index 7c735a8191..4924c23195 100644
--- a/source/tests/tf/pairwise_dprc.json
+++ b/source/tests/tf/pairwise_dprc.json
@@ -11,6 +11,7 @@
       "HW"
     ],
     "type_embedding": {
+      "use_tebd_bias": true,
       "neuron": [
         2
       ]
diff --git a/source/tests/tf/test_data_large_batch.py b/source/tests/tf/test_data_large_batch.py
index 1b19d664dd..d9bb00de40 100644
--- a/source/tests/tf/test_data_large_batch.py
+++ b/source/tests/tf/test_data_large_batch.py
@@ -121,6 +121,7 @@ def test_data_mixed_type(self):
             activation_function=None,
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
             padding=True,
         )
         model = EnerModel(descrpt, fitting, typeebd)
@@ -320,6 +321,7 @@ def test_stripped_data_mixed_type(self):
             activation_function=None,
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
             padding=True,
         )
         model = EnerModel(descrpt, fitting, typeebd)
@@ -519,6 +521,7 @@ def test_compressible_data_mixed_type(self):
             activation_function=None,
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
             padding=True,
         )
         model = EnerModel(descrpt, fitting, typeebd)
diff --git a/source/tests/tf/test_descrpt_hybrid.py b/source/tests/tf/test_descrpt_hybrid.py
index 76c0932316..cadf2f83ae 100644
--- a/source/tests/tf/test_descrpt_hybrid.py
+++ b/source/tests/tf/test_descrpt_hybrid.py
@@ -64,6 +64,7 @@ def test_descriptor_hybrid(self):
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
             padding=True,
         )
 
diff --git a/source/tests/tf/test_descrpt_se_a_type.py b/source/tests/tf/test_descrpt_se_a_type.py
index 5bc4680d6d..ccb2ef96a6 100644
--- a/source/tests/tf/test_descrpt_se_a_type.py
+++ b/source/tests/tf/test_descrpt_se_a_type.py
@@ -59,6 +59,7 @@ def test_descriptor_two_sides(self):
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
         )
 
         jdata["model"]["descriptor"].pop("type", None)
@@ -223,6 +224,7 @@ def test_descriptor_one_side(self):
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
         )
 
         jdata["model"]["descriptor"].pop("type", None)
diff --git a/source/tests/tf/test_descrpt_se_atten.py b/source/tests/tf/test_descrpt_se_atten.py
index 84325cadc9..6393cc222b 100644
--- a/source/tests/tf/test_descrpt_se_atten.py
+++ b/source/tests/tf/test_descrpt_se_atten.py
@@ -71,6 +71,7 @@ def test_descriptor_two_sides(self):
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
             padding=True,
         )
 
@@ -248,6 +249,7 @@ def test_descriptor_one_side(self):
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
             padding=True,
         )
 
@@ -425,6 +427,7 @@ def test_stripped_type_embedding_descriptor_two_sides(self):
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
             padding=True,
         )
 
@@ -596,6 +599,7 @@ def test_compressible_descriptor_two_sides(self):
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
             padding=True,
         )
 
diff --git a/source/tests/tf/test_dipole_se_a_tebd.py b/source/tests/tf/test_dipole_se_a_tebd.py
index 66be544e78..ada0643dbf 100644
--- a/source/tests/tf/test_dipole_se_a_tebd.py
+++ b/source/tests/tf/test_dipole_se_a_tebd.py
@@ -73,6 +73,7 @@ def test_model(self):
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
         )
         model = DipoleModel(descrpt, fitting, typeebd)
 
diff --git a/source/tests/tf/test_model_se_a.py b/source/tests/tf/test_model_se_a.py
index 1d67ef5fab..4d691834ae 100644
--- a/source/tests/tf/test_model_se_a.py
+++ b/source/tests/tf/test_model_se_a.py
@@ -299,6 +299,7 @@ def test_model_atom_ener_type_embedding(self):
         typeebd = TypeEmbedNet(
             ntypes=len(jdata["model"]["descriptor"]["sel"]),
             **jdata["model"]["type_embeding"],
+            use_tebd_bias=True,
         )
         jdata["model"]["descriptor"].pop("type", None)
         descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
diff --git a/source/tests/tf/test_model_se_a_ebd_v2.py b/source/tests/tf/test_model_se_a_ebd_v2.py
index ffa558da35..69d44b7c56 100644
--- a/source/tests/tf/test_model_se_a_ebd_v2.py
+++ b/source/tests/tf/test_model_se_a_ebd_v2.py
@@ -62,6 +62,7 @@ def test_model(self):
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
             padding=True,
         )
         descrpt = DescrptSeAEbdV2(
diff --git a/source/tests/tf/test_model_se_a_type.py b/source/tests/tf/test_model_se_a_type.py
index e38afc0fb4..e9fb8db466 100644
--- a/source/tests/tf/test_model_se_a_type.py
+++ b/source/tests/tf/test_model_se_a_type.py
@@ -62,6 +62,7 @@ def test_model(self):
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
         )
         model = EnerModel(descrpt, fitting, typeebd)
 
diff --git a/source/tests/tf/test_model_se_atten.py b/source/tests/tf/test_model_se_atten.py
index 06945fab9c..4f661c2cc3 100644
--- a/source/tests/tf/test_model_se_atten.py
+++ b/source/tests/tf/test_model_se_atten.py
@@ -74,6 +74,7 @@ def test_model(self):
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
             padding=True,
         )
         model = EnerModel(descrpt, fitting, typeebd)
@@ -232,6 +233,7 @@ def test_exclude_types(self):
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
             padding=True,
         )
         type_embedding = typeebd.build(
@@ -298,6 +300,7 @@ def test_compressible_model(self):
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
             padding=True,
         )
         model = EnerModel(descrpt, fitting, typeebd)
@@ -459,6 +462,7 @@ def test_compressible_exclude_types(self):
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
             padding=True,
         )
         type_embedding = typeebd.build(
@@ -526,6 +530,7 @@ def test_stripped_type_embedding_model(self):
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
             padding=True,
         )
         model = EnerModel(descrpt, fitting, typeebd)
@@ -691,6 +696,7 @@ def test_stripped_type_embedding_exclude_types(self):
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
             padding=True,
         )
         type_embedding = typeebd.build(
@@ -765,6 +771,7 @@ def test_smoothness_of_stripped_type_embedding_smooth_model(self):
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
             padding=True,
         )
         model = EnerModel(descrpt, fitting, typeebd)
@@ -913,6 +920,7 @@ def test_smoothness_of_stripped_type_embedding_smooth_model_excluded_types(self)
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
             padding=True,
         )
         model = EnerModel(descrpt, fitting, typeebd)
diff --git a/source/tests/tf/test_nvnmd_entrypoints.py b/source/tests/tf/test_nvnmd_entrypoints.py
index 17ad62b4bc..32e90d9d9b 100644
--- a/source/tests/tf/test_nvnmd_entrypoints.py
+++ b/source/tests/tf/test_nvnmd_entrypoints.py
@@ -727,7 +727,11 @@ def test_model_qnn_v1(self):
         jdata = nvnmd_cfg.get_deepmd_jdata()
         run_opt = RunOptions(log_path=None, log_level=20)
         jdata = update_deepmd_input(jdata, warning=True, dump="input_v2_compat.json")
+        jdata["model"]["type_embedding"] = {}
         jdata = normalize(jdata)
+        jdata["model"]["type_embedding"].update(
+            {"activation_function": None, "use_tebd_bias": True}
+        )
         self.trainer = DPTrainer(jdata, run_opt, False)
         self.model = self.trainer.model
         # place holder
diff --git a/source/tests/tf/test_polar_se_a_tebd.py b/source/tests/tf/test_polar_se_a_tebd.py
index 2abaedd40a..7f8d304af9 100644
--- a/source/tests/tf/test_polar_se_a_tebd.py
+++ b/source/tests/tf/test_polar_se_a_tebd.py
@@ -71,6 +71,7 @@ def test_model(self):
             resnet_dt=typeebd_param["resnet_dt"],
             seed=typeebd_param["seed"],
             uniform_seed=True,
+            use_tebd_bias=True,
         )
         model = PolarModel(descrpt, fitting, typeebd)
 
diff --git a/source/tests/tf/test_type_embed.py b/source/tests/tf/test_type_embed.py
index c3f5077943..fd89f8fd10 100644
--- a/source/tests/tf/test_type_embed.py
+++ b/source/tests/tf/test_type_embed.py
@@ -28,7 +28,9 @@ def test_embed_atom_type(self):
         np.testing.assert_almost_equal(atom_embed, expected_out, 10)
 
     def test_type_embed_net(self):
-        ten = TypeEmbedNet(ntypes=2, neuron=[2, 4, 8], seed=1, uniform_seed=True)
+        ten = TypeEmbedNet(
+            ntypes=2, neuron=[2, 4, 8], seed=1, uniform_seed=True, use_tebd_bias=True
+        )
         type_embedding = ten.build(2)
         sess = self.cached_session().__enter__()
         sess.run(tf.global_variables_initializer())
diff --git a/source/tests/universal/dpmodel/descriptor/test_descriptor.py b/source/tests/universal/dpmodel/descriptor/test_descriptor.py
index 691e8312d4..41d915847e 100644
--- a/source/tests/universal/dpmodel/descriptor/test_descriptor.py
+++ b/source/tests/universal/dpmodel/descriptor/test_descriptor.py
@@ -187,6 +187,7 @@ def DescriptorParamDPA1(
     set_davg_zero=True,
     smooth_type_embedding=True,
     use_econf_tebd=False,
+    use_tebd_bias=False,
     precision="float64",
 ):
     input_dict = {
@@ -214,6 +215,7 @@ def DescriptorParamDPA1(
         "set_davg_zero": set_davg_zero,
         "smooth_type_embedding": smooth_type_embedding,
         "use_econf_tebd": use_econf_tebd,
+        "use_tebd_bias": use_tebd_bias,
         "precision": precision,
     }
     return input_dict
@@ -240,6 +242,7 @@ def DescriptorParamDPA1(
             "smooth_type_embedding": (True, False),
             "concat_output_tebd": (True,),
             "use_econf_tebd": (False, True),
+            "use_tebd_bias": (False,),
             "precision": ("float64",),
         }
     ),
@@ -274,6 +277,7 @@ def DescriptorParamDPA2(
     smooth=True,
     add_tebd_to_repinit_out=True,
     use_econf_tebd=False,
+    use_tebd_bias=False,
     env_protection=0.0,
     exclude_types=[],
     precision="float64",
@@ -335,6 +339,7 @@ def DescriptorParamDPA2(
         "env_protection": env_protection,
         "trainable": True,
         "use_econf_tebd": use_econf_tebd,
+        "use_tebd_bias": use_tebd_bias,
         "type_map": type_map,
         "seed": GLOBAL_SEED,
         "add_tebd_to_repinit_out": add_tebd_to_repinit_out,
@@ -368,6 +373,7 @@ def DescriptorParamDPA2(
             "precision": ("float64",),
             "add_tebd_to_repinit_out": (True, False),
             "use_econf_tebd": (False,),
+            "use_tebd_bias": (False,),
         }
     ),
 )