From 061c296fff58cfa70b5ca9e0c5f6db07791ded92 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 26 May 2023 14:56:41 -0400
Subject: [PATCH 01/48] init pairwise dprc

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/entrypoints/train.py   |   2 +
 deepmd/model/model.py         |   5 +
 deepmd/model/pairwise_dprc.py | 229 ++++++++++++++++++++++++++++++++++
 deepmd/utils/argcheck.py      |  30 ++++-
 source/lib/include/pairwise.h |  43 +++++++
 source/lib/src/pairwise.cc    | 152 ++++++++++++++++++++++
 source/op/CMakeLists.txt      |   3 +-
 source/op/pairwise.cc         | 207 ++++++++++++++++++++++++++++++
 8 files changed, 669 insertions(+), 2 deletions(-)
 create mode 100644 deepmd/model/pairwise_dprc.py
 create mode 100644 source/lib/include/pairwise.h
 create mode 100644 source/lib/src/pairwise.cc
 create mode 100644 source/op/pairwise.cc

diff --git a/deepmd/entrypoints/train.py b/deepmd/entrypoints/train.py
index c806fb3804..2d961199b0 100755
--- a/deepmd/entrypoints/train.py
+++ b/deepmd/entrypoints/train.py
@@ -492,6 +492,8 @@ def update_one_sel(jdata, descriptor):
 
 
 def update_sel(jdata):
+    if "descriptor" not in jdata["model"]:
+        return jdata
     log.info(
         "Calculate neighbor statistics... (add --skip-neighbor-stat to skip this step)"
     )
diff --git a/deepmd/model/model.py b/deepmd/model/model.py
index 9a1f72368b..132bb75216 100644
--- a/deepmd/model/model.py
+++ b/deepmd/model/model.py
@@ -81,12 +81,17 @@ def __new__(cls, *args, **kwargs):
             from deepmd.model.multi import (
                 MultiModel,
             )
+            from deepmd.model.pairwise_dprc import (
+                PairwiseDPRc,
+            )
 
             model_type = kwargs.get("type", "standard")
             if model_type == "standard":
                 cls = StandardModel
             elif model_type == "multi":
                 cls = MultiModel
+            elif model_type == "pairwise_dprc":
+                cls = PairwiseDPRc
             else:
                 raise ValueError(f"unknown model type: {model_type}")
             return cls.__new__(cls, *args, **kwargs)
diff --git a/deepmd/model/pairwise_dprc.py b/deepmd/model/pairwise_dprc.py
new file mode 100644
index 0000000000..73b092f4a4
--- /dev/null
+++ b/deepmd/model/pairwise_dprc.py
@@ -0,0 +1,229 @@
+from typing import (
+    List,
+    Optional,
+    Union,
+)
+
+from deepmd.common import (
+    add_data_requirement,
+)
+from deepmd.env import (
+    op_module,
+    tf,
+)
+from deepmd.loss.loss import (
+    Loss,
+)
+from deepmd.model.model import (
+    Model,
+)
+from deepmd.utils.spin import (
+    Spin,
+)
+from deepmd.utils.type_embed import (
+    TypeEmbedNet,
+)
+
+from .ener import (
+    EnerModel,
+)
+
+
+class PairwiseDPRc(Model):
+    """Pairwise Deep Potential - Range Correction."""
+
+    model_type = "pairwise_dprc"
+
+    def __init__(
+        self,
+        qm_model: dict,
+        qmmm_model: dict,
+        type_embedding: Union[dict, TypeEmbedNet],
+        type_map: List[str],
+        data_stat_nbatch: int = 10,
+        data_stat_nsample: int = 10,
+        data_stat_protect: float = 1e-2,
+        use_srtab: Optional[str] = None,
+        smin_alpha: Optional[float] = None,
+        sw_rmin: Optional[float] = None,
+        sw_rmax: Optional[float] = None,
+        spin: Optional[Spin] = None,
+        compress: Optional[dict] = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(
+            type_embedding=type_embedding,
+            type_map=type_map,
+            data_stat_nbatch=data_stat_nbatch,
+            data_stat_nsample=data_stat_nsample,
+            data_stat_protect=data_stat_protect,
+            use_srtab=use_srtab,
+            smin_alpha=smin_alpha,
+            sw_rmin=sw_rmin,
+            sw_rmax=sw_rmax,
+            spin=spin,
+            compress=compress,
+            **kwargs,
+        )
+        # type embedding
+        if isinstance(type_embedding, TypeEmbedNet):
+            self.typeebd = type_embedding
+        else:
+            self.typeebd = TypeEmbedNet(
+                **type_embedding,
+                # must use se_atten, so it must be True
+                padding=True,
+            )
+
+        self.qm_model = EnerModel(
+            **qm_model, type_map=type_map, type_embedding=self.typeebd
+        )
+        self.qmmm_model = EnerModel(
+            **qmmm_model, type_map=type_map, type_embedding=self.typeebd
+        )
+        add_data_requirement("aparam", 1, atomic=True, must=True, high_prec=False)
+
+    def build(
+        self,
+        coord_: tf.Tensor,
+        atype_: tf.Tensor,
+        natoms: tf.Tensor,
+        box: tf.Tensor,
+        mesh: tf.Tensor,
+        input_dict: dict,
+        frz_model=None,
+        ckpt_meta: Optional[str] = None,
+        suffix: str = "",
+        reuse: Optional[bool] = None,
+    ):
+        with tf.variable_scope("fitting_attr" + suffix, reuse=reuse):
+            t_dfparam = tf.constant(0, name="dfparam", dtype=tf.int32)
+            t_daparam = tf.constant(1, name="daparam", dtype=tf.int32)
+        # convert X-frame to X-Y-frame coordinates
+        idxs = input_dict["aparam"].astype(tf.int32)
+
+        (
+            forward_qm_map,
+            backward_qm_map,
+            forward_qmmm_map,
+            backward_qmmm_map,
+            natoms_qm,
+            natoms_qmmm,
+            qmmm_frame_idx,
+        ) = op_module.dprc_pairwise_idx(idxs, natoms)
+
+        coord_qm = gather_placeholder(coord_, forward_qm_map)
+        atype_qm = gather_placeholder(atype_, forward_qm_map, placeholder=-1)
+        coord_qmmm = gather_placeholder(coord_, forward_qmmm_map)
+        atype_qmmm = gather_placeholder(atype_, forward_qmmm_map, placeholder=-1)
+        box_qm = tf.gather(box, forward_qm_map)
+        box_qmmm = tf.gather(box, forward_qmmm_map)
+
+        # TODO: after #2481 is merged, change the mesh to mixed_type specific
+
+        qm_dict = self.qm_model.build(
+            coord_qm,
+            atype_qm,
+            natoms_qm,
+            box_qm,
+            mesh,
+            input_dict,
+            frz_model=frz_model,
+            ckpt_meta=ckpt_meta,
+            suffix="_qm" + suffix,
+            reuse=reuse,
+        )
+        qmmm_dict = self.qmmm_model.build(
+            coord_qmmm,
+            atype_qmmm,
+            natoms_qmmm,
+            box_qmmm,
+            mesh,
+            input_dict,
+            frz_model=frz_model,
+            ckpt_meta=ckpt_meta,
+            suffix="_qmmm" + suffix,
+            reuse=reuse,
+        )
+
+        energy_qm = qm_dict["energy"]
+        energy_qmmm = tf.math.segment_sum(qmmm_dict["energy"], qmmm_frame_idx)
+        energy = energy_qm + energy_qmmm
+
+        force_qm = gather_placeholder(
+            qm_dict["force"], backward_qm_map, placeholder=0.0
+        )
+        force_qmmm = tf.math.segment_sum(
+            gather_placeholder(qmmm_dict["force"], backward_qmmm_map, placeholder=0.0),
+            qmmm_frame_idx,
+        )
+        force = force_qm + force_qmmm
+
+        virial_qm = qm_dict["virial"]
+        virial_qmmm = tf.math.segment_sum(qmmm_dict["virial"], qmmm_frame_idx)
+        virial = virial_qm + virial_qmmm
+
+        atom_ener_qm = gather_placeholder(
+            qm_dict["atom_ener"], backward_qm_map, placeholder=0.0
+        )
+        atom_ener_qmmm = tf.math.segment_sum(
+            gather_placeholder(
+                qmmm_dict["atom_ener"], backward_qmmm_map, placeholder=0.0
+            ),
+            qmmm_frame_idx,
+        )
+        atom_ener = atom_ener_qm + atom_ener_qmmm
+
+        atom_virial_qm = gather_placeholder(
+            qm_dict["atom_virial"], backward_qm_map, placeholder=0.0
+        )
+        atom_virial_qmmm = tf.math.segment_sum(
+            gather_placeholder(
+                qmmm_dict["atom_virial"], backward_qmmm_map, placeholder=0.0
+            ),
+            qmmm_frame_idx,
+        )
+        atom_virial = atom_virial_qm + atom_virial_qmmm
+
+        model_dict = {}
+        model_dict["energy"] = energy
+        model_dict["force"] = force
+        model_dict["virial"] = virial
+        model_dict["atom_ener"] = atom_ener
+        model_dict["atom_virial"] = atom_virial
+        model_dict["coord"] = coord_
+        model_dict["atype"] = atype_
+        return model_dict
+
+    def get_fitting(self) -> Union[str, dict]:
+        """Get the fitting(s)."""
+        return {
+            "qm": self.qm_model.get_fitting(),
+            "qmmm": self.qmmm_model.get_fitting(),
+        }
+
+    def get_loss(self, loss: dict, lr) -> Union[Loss, dict]:
+        """Get the loss function(s)."""
+        return self.qm_model.get_loss(loss, lr)
+
+    def get_rcut(self):
+        return max(self.qm_model.get_rcut(), self.qmmm_model.get_rcut())
+
+    def get_ntypes(self) -> int:
+        return self.qm_model.get_ntypes()
+
+    def data_stat(self, data):
+        self.qm_model.data_stat(data)
+        self.qmmm_model.data_stat(data)
+
+
+def gather_placeholder(
+    params: tf.Tensor, indices: tf.Tensor, placeholder: float = 0.0, **kwargs
+) -> tf.Tensor:
+    """Call tf.gather but allow indices to contain placeholders (-1)."""
+    # (x, 2, 3) -> (1, 2, 3)
+    placeholder_shape = tf.concat([[1], tf.shape(params)[1:]], axis=0)
+    params = tf.concat(
+        [tf.cast(tf.fill(placeholder_shape, placeholder), params.dtype), params], axis=0
+    )
+    return tf.gather(params, indices + 1, **kwargs)
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 5748ad9bf7..826c1c9f36 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -738,7 +738,7 @@ def model_compression_type_args():
     )
 
 
-def model_args():
+def model_args(exclude_hybrid=False):
     doc_type_map = "A list of strings. Give the name to each type of atoms. It is noted that the number of atom type of training system must be less than 128 in a GPU environment. If not given, type.raw in each system should use the same type indexes, and type_map.raw will take no effect."
     doc_data_stat_nbatch = "The model determines the normalization from the statistics of the data. This key specifies the number of `frames` in each `system` used for statistics."
     doc_data_stat_protect = "Protect parameter for atomic energy regression."
@@ -751,6 +751,13 @@ def model_args():
     doc_sw_rmax = "The upper boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided."
     doc_compress_config = "Model compression configurations"
     doc_spin = "The settings for systems with spin."
+    hybrid_models = []
+    if not exclude_hybrid:
+        hybrid_models.extend(
+            [
+                pairwise_dprc(),
+            ]
+        )
     return Argument(
         "model",
         dict,
@@ -814,6 +821,7 @@ def model_args():
                 [
                     standard_model_args(),
                     multi_model_args(),
+                    *hybrid_models,
                 ],
                 optional=True,
                 default_tag="standard",
@@ -869,6 +877,24 @@ def multi_model_args() -> Argument:
     return ca
 
 
+def pairwise_dprc() -> Argument:
+    qm_model_args = model_args(exclude_hybrid=True)
+    qm_model_args.name = "qm_model"
+    qm_model_args.fold_subdoc = True
+    qmmm_model_args = model_args(exclude_hybrid=True)
+    qmmm_model_args.name = "qmmm_model"
+    qmmm_model_args.fold_subdoc = True
+    ca = Argument(
+        "pairwise_dprc",
+        dict,
+        [
+            qm_model_args,
+            qmmm_model_args,
+        ],
+    )
+    return ca
+
+
 #  --- Learning rate configurations: --- #
 def learning_rate_exp():
     doc_start_lr = "The learning rate the start of the training."
@@ -1580,6 +1606,8 @@ def gen_args(**kwargs) -> List[Argument]:
 
 def normalize_multi_task(data):
     # single-task or multi-task mode
+    if data["model"].get("type", "standard") not in ("standard", "multi"):
+        return data
     single_fitting_net = "fitting_net" in data["model"].keys()
     single_training_data = "training_data" in data["training"].keys()
     single_valid_data = "validation_data" in data["training"].keys()
diff --git a/source/lib/include/pairwise.h b/source/lib/include/pairwise.h
new file mode 100644
index 0000000000..027d80cf7e
--- /dev/null
+++ b/source/lib/include/pairwise.h
@@ -0,0 +1,43 @@
+#include <vector>
+
+namespace deepmd {
+/**
+ * Group atoms into different fragments according to indexes.
+ *
+ * @param[out] fragments The indexes of atoms that each fragment contains.
+ * Fragment has been sorted.
+ * @param[in] idxs The indexes of the fragment that each atom belongs to. -1
+ * will be ignored.
+ */
+void group_atoms_cpu(std::vector<std::vector<int>> &fragments,
+                     const std::vector<int> &idxs);
+/**
+ * DPRc pairwise map.
+ *
+ * @param[out] forward_qm_map Forward map for QM atoms.
+ * @param[out] backward_qm_map Backward map for QM atoms.
+ * @param[out] forward_qmmm_map Forward map for QM/MM atoms.
+ * @param[out] backward_qmmm_map Backward map for QM/MM atoms.
+ * @param[out] nloc_qm The number of local QM atoms.
+ * @param[out] nloc_qmmm The number of local QM/MM atoms.
+ * @param[out] nall_qm The number of all QM atoms, including local and ghost
+ * atoms.
+ * @param[out] nall_qmmm The number of all QM/MM atoms, including local and
+ * ghost atoms.
+ * @param[in] fragments The indexes of atoms that each fragment contains.
+ * Assume that only the first fragment consists of QM atoms.
+ * @param[in] nloc The number of local atoms.
+ * @param[in] nall The number of all atoms, including local and ghost atoms.
+ */
+void dprc_pairwise_map_cpu(std::vector<int> &forward_qm_map,
+                           std::vector<int> &backward_qm_map,
+                           std::vector<int> &forward_qmmm_map,
+                           std::vector<int> &backward_qmmm_map,
+                           int &nloc_qm,
+                           int &nloc_qmmm,
+                           int &nall_qm,
+                           int &nall_qmmm,
+                           const std::vector<std::vector<int>> &fragments,
+                           const int nloc,
+                           const int nall);
+}  // namespace deepmd
diff --git a/source/lib/src/pairwise.cc b/source/lib/src/pairwise.cc
new file mode 100644
index 0000000000..2ecc6b8519
--- /dev/null
+++ b/source/lib/src/pairwise.cc
@@ -0,0 +1,152 @@
+#include "pairwise.h"
+
+#include <algorithm>
+#include <numeric>
+#include <vector>
+
+#include "errors.h"
+
+template <typename T>
+std::vector<size_t> sort_indexes(const std::vector<T> &v) {
+  // https://stackoverflow.com/a/12399290/9567349
+  // by Lukasz Wiklendt under CC BY-SA 4.0
+  std::vector<size_t> idx(v.size());
+  std::iota(idx.begin(), idx.end(), 0);
+  std::stable_sort(idx.begin(), idx.end(),
+                   [&v](size_t i1, size_t i2) { return v[i1] < v[i2]; });
+  return idx;
+}
+
+void deepmd::group_atoms_cpu(std::vector<std::vector<int>> &fragments,
+                             const std::vector<int> &idxs) {
+  int natoms = idxs.size();
+  // sort idxs
+  std::vector<size_t> idxs_idx = sort_indexes(idxs);
+  // now idxs_idx is sorted index, so we can easily group atoms in only one loop
+  int last_frag = -1;
+  for (size_t ii = 0; ii < idxs.size(); ii++) {
+    int frag = idxs[idxs_idx[ii]];
+    if (frag == -1) {
+      // -1 is the place holder
+      continue;
+    }
+    if (frag != last_frag) {
+      last_frag = frag;
+      fragments.emplace_back();
+    }
+    // push to the last fragment
+    fragments.back().push_back(idxs_idx[ii]);
+  }
+}
+
+void deepmd::dprc_pairwise_map_cpu(
+    std::vector<int> &forward_qm_map,
+    std::vector<int> &backward_qm_map,
+    std::vector<int> &forward_qmmm_map,
+    std::vector<int> &backward_qmmm_map,
+    int &nloc_qm,
+    int &nloc_qmmm,
+    int &nall_qm,
+    int &nall_qmmm,
+    const std::vector<std::vector<int>> &fragments,
+    const int nloc,
+    const int nall) {
+  int nfragments = fragments.size();
+  if (nfragments == 0) {
+    throw deepmd::deepmd_exception("fragments is empty");
+  }
+  int nqm = fragments[0].size();
+  // assume fragments = ((3,4,10), (0,1,2,11), (5,6,7), (8,9))
+  // 10, 11 is ghost atoms
+  // (3, 4, 10)
+  forward_qm_map = fragments[0];
+  // (-1, -1, -1, 0, 1, -1, -1, -1, -1, -1, 0, -1)
+  backward_qm_map.resize(nall);
+  std::fill(backward_qm_map.begin(), backward_qm_map.end(), -1);
+  for (int ii = 0; ii < forward_qm_map.size(); ++ii) {
+    backward_qm_map[forward_qm_map[ii]] = ii;
+  }
+
+  // get max size of fragments
+  int max_fragment_real_size = 0;
+  int max_fragment_ghost_size = 0;
+  for (int ii = 1; ii < nfragments; ++ii) {
+    int fragment_real_size = 0;
+    int fragment_ghost_size = 0;
+    for (int jj = 0; jj < fragments[ii].size(); ++jj) {
+      if (fragments[ii][jj] >= nloc) {
+        fragment_ghost_size += 1;
+      } else {
+        fragment_real_size += 1;
+      }
+    }
+    if (fragment_real_size > max_fragment_real_size) {
+      max_fragment_real_size = fragment_real_size;
+    }
+    if (fragment_ghost_size > max_fragment_ghost_size) {
+      max_fragment_ghost_size = fragment_ghost_size;
+    }
+  }
+  int max_fragment_size = max_fragment_real_size + max_fragment_ghost_size;
+  int map_size = nqm + max_fragment_real_size + max_fragment_ghost_size;
+  // (3, 4, 0, 1, 2, 10, 11),
+  // (3, 4, 5, 6, 7, 10, -1),
+  // (3, 4, 8, 9, -1, 10, -1)
+  forward_qmmm_map.resize((nfragments - 1) * map_size);
+  std::fill(forward_qmmm_map.begin(), forward_qmmm_map.end(), -1);
+  int nqm_real;
+  for (int ii = 0; ii < nfragments - 1; ++ii) {
+    // real
+    int kk = 0;
+    for (int jj = 0; jj < nqm; ++jj) {
+      if (fragments[0][kk] < nloc) {
+        forward_qmmm_map[ii * map_size + kk] = fragments[0][kk];
+        kk++;
+      }
+    }
+    nqm_real = kk;
+    kk = 0;
+    for (int jj = nqm; jj < fragments[ii + 1].size(); ++jj) {
+      if (fragments[ii + 1][kk] < nloc) {
+        forward_qmmm_map[ii * map_size + nqm_real + kk] = fragments[ii + 1][kk];
+        kk++;
+      }
+    }
+    // ghost
+    kk = 0;
+    for (int jj = 0; jj < nqm; ++jj) {
+      if (fragments[0][kk] >= nloc) {
+        forward_qmmm_map[ii * map_size + nqm_real + max_fragment_real_size +
+                         kk] = fragments[0][kk];
+        kk++;
+      }
+    }
+    kk = 0;
+    for (int jj = nqm; jj < fragments[ii + 1].size(); ++jj) {
+      if (fragments[ii + 1][kk] >= nloc) {
+        forward_qmmm_map[ii * map_size + nqm + max_fragment_real_size + kk] =
+            fragments[ii + 1][kk];
+        kk++;
+      }
+    }
+  }
+
+  // (2, 3, 4, 0, 1, -1, -1, -1, -1, -1, 5, 6)
+  // (-1, -1, -1, 0, 1, 2, 3, 4, -1, -1, 5, -1)
+  // (-1, -1, -1, 0, 1, -1, -1, -1, 2, 3, 5, -1)
+  backward_qmmm_map.resize((nfragments - 1) * nall);
+  std::fill(backward_qmmm_map.begin(), backward_qmmm_map.end(), -1);
+  for (int ii = 0; ii < nfragments - 1; ++ii) {
+    for (int jj = 0; jj < fragments[ii + 1].size(); ++jj) {
+      if (forward_qmmm_map[ii * map_size + jj] != -1) {
+        backward_qmmm_map[ii * nall + forward_qmmm_map[ii * map_size + jj]] =
+            jj;
+      }
+    }
+  }
+  // natoms
+  nloc_qm = nqm_real;
+  nloc_qmmm = nqm_real + max_fragment_real_size;
+  nall_qm = nqm;
+  nall_qmmm = nqm + max_fragment_size;
+}
diff --git a/source/op/CMakeLists.txt b/source/op/CMakeLists.txt
index 059e72b9a8..167c6c5396 100644
--- a/source/op/CMakeLists.txt
+++ b/source/op/CMakeLists.txt
@@ -37,7 +37,8 @@ file(
   neighbor_stat.cc
   unaggregated_grad.cc
   tabulate_multi_device.cc
-  prod_env_mat_multi_device.cc)
+  prod_env_mat_multi_device.cc
+  pairwise.cc)
 file(
   GLOB
   OP_GRADS_SRC
diff --git a/source/op/pairwise.cc b/source/op/pairwise.cc
new file mode 100644
index 0000000000..e65610f3dd
--- /dev/null
+++ b/source/op/pairwise.cc
@@ -0,0 +1,207 @@
+#include "pairwise.h"
+
+#include "custom_op.h"
+
+REGISTER_OP("DprcPairwiseIdx")
+    .Input("idxs: int32")
+    .Input("natoms: int32")
+    .Output("forward_qm_map: int32")
+    .Output("backward_qm_map: int32")
+    .Output("forward_qmmm_map: int32")
+    .Output("backward_qmmm_map: int32")
+    .Output("natoms_qm: int32")
+    .Output("natoms_qmmm: int32")
+    .Output("qmmm_frame_idx: int32");
+
+using namespace tensorflow;
+
+using CPUDevice = Eigen::ThreadPoolDevice;
+
+template <typename Device>
+class PairwiseIdxOp : public OpKernel {
+ public:
+  explicit PairwiseIdxOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
+  }
+
+  void _Compute(OpKernelContext* context) {
+    // Grab the input tensor
+    int tmp_idx = 0;
+    const Tensor& idxs_tensor = context->input(tmp_idx++);
+    const Tensor& natoms_tensor = context->input(tmp_idx++);
+
+    // set size of the sample
+    OP_REQUIRES(context, (idxs_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of idxs should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+
+    auto idxs = idxs_tensor.matrix<int>();
+    int nframes = idxs_tensor.shape().dim_size(0);
+    auto natoms = natoms_tensor.vec<int>();
+    int nloc = natoms(0);
+    int nall = natoms(1);
+    OP_REQUIRES(context, nframes > 0,
+                errors::InvalidArgument("nframes should be > 0"));
+
+    std::vector<std::vector<int>> forward_qm_maps, backward_qm_maps,
+        forward_qmmm_maps, backward_qmmm_maps;
+    std::vector<int> nframes_qmmm, nloc_qm, nloc_qmmm, nghost_qm, nghost_qmmm;
+    for (int ii = 0; ii < nframes; ++ii) {
+      std::vector<int> v_idxs(nall);
+      for (int jj = 0; jj < nall; ++jj) {
+        v_idxs[jj] = idxs(ii, jj);
+      }
+      std::vector<std::vector<int>> fragments;
+      std::vector<int> forward_qm_map, backward_qm_map, forward_qmmm_map,
+          backward_qmmm_map;
+      int nloc_qm_ii, nloc_qmmm_ii, nall_qm_ii, nall_qmmm_ii;
+      deepmd::group_atoms_cpu(fragments, v_idxs);
+      deepmd::dprc_pairwise_map_cpu(forward_qm_map, backward_qm_map,
+                                    forward_qmmm_map, backward_qmmm_map,
+                                    nloc_qm_ii, nloc_qmmm_ii, nall_qm_ii,
+                                    nall_qmmm_ii, fragments, nloc, nall);
+      forward_qm_maps.push_back(forward_qm_map);
+      backward_qm_maps.push_back(backward_qm_map);
+      forward_qmmm_maps.push_back(forward_qmmm_map);
+      backward_qmmm_maps.push_back(backward_qmmm_map);
+      // get the maximun
+      int nghost_qm_ii = nall_qm_ii - nloc_qm_ii,
+          nghost_qmmm_ii = nall_qmmm_ii - nloc_qmmm_ii;
+      nloc_qm.push_back(nloc_qm_ii);
+      nloc_qmmm.push_back(nloc_qmmm_ii);
+      nghost_qm.push_back(nghost_qm_ii);
+      nghost_qmmm.push_back(nghost_qmmm_ii);
+      nframes_qmmm.push_back(backward_qmmm_map.size() / nall);
+    }
+    int max_nloc_qm = *std::max_element(nloc_qm.begin(), nloc_qm.end());
+    int max_nloc_qmmm = *std::max_element(nloc_qmmm.begin(), nloc_qmmm.end());
+    int max_nghost_qm = *std::max_element(nghost_qm.begin(), nghost_qm.end());
+    int max_nghost_qmmm =
+        *std::max_element(nghost_qmmm.begin(), nghost_qmmm.end());
+    int nframes_qmmm_tot =
+        std::accumulate(nframes_qmmm.begin(), nframes_qmmm.end(), 0);
+    // Create an output tensor
+    TensorShape forward_qm_map_shape;
+    forward_qm_map_shape.AddDim(nframes);
+    forward_qm_map_shape.AddDim(max_nloc_qm + max_nghost_qm);
+    TensorShape backward_qm_map_shape;
+    backward_qm_map_shape.AddDim(nframes);
+    backward_qm_map_shape.AddDim(nall);
+    TensorShape forward_qmmm_map_shape;
+    forward_qmmm_map_shape.AddDim(nframes_qmmm_tot);
+    forward_qmmm_map_shape.AddDim(max_nloc_qmmm + max_nghost_qmmm);
+    TensorShape backward_qmmm_map_shape;
+    backward_qmmm_map_shape.AddDim(nframes_qmmm_tot);
+    backward_qmmm_map_shape.AddDim(nall);
+    TensorShape qmmm_frame_idx_shape;
+    qmmm_frame_idx_shape.AddDim(nframes_qmmm_tot);
+
+    Tensor* forward_qm_map_tensor = NULL;
+    Tensor* backward_qm_map_tensor = NULL;
+    Tensor* forward_qmmm_map_tensor = NULL;
+    Tensor* backward_qmmm_map_tensor = NULL;
+    Tensor* natoms_qm_tensor = NULL;
+    Tensor* natoms_qmmm_tensor = NULL;
+    Tensor* qmmm_frame_idx_tensor = NULL;
+
+    tmp_idx = 0;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(tmp_idx++, forward_qm_map_shape,
+                                            &forward_qm_map_tensor));
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(tmp_idx++, backward_qm_map_shape,
+                                            &backward_qm_map_tensor));
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(tmp_idx++, forward_qmmm_map_shape,
+                                            &forward_qmmm_map_tensor));
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(tmp_idx++, backward_qmmm_map_shape,
+                                            &backward_qmmm_map_tensor));
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(tmp_idx++, natoms_tensor.shape(),
+                                            &natoms_qm_tensor));
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(tmp_idx++, natoms_tensor.shape(),
+                                            &natoms_qmmm_tensor));
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(tmp_idx++, qmmm_frame_idx_shape,
+                                            &qmmm_frame_idx_tensor));
+    // copy from vector to tensor
+    auto m_forward_qm_map = forward_qm_map_tensor->matrix<int>();
+    auto m_backward_qm_map = backward_qm_map_tensor->matrix<int>();
+    auto m_forward_qmmm_map = forward_qmmm_map_tensor->matrix<int>();
+    auto m_backward_qmmm_map = backward_qmmm_map_tensor->matrix<int>();
+    auto m_natoms_qm = natoms_qm_tensor->vec<int>();
+    auto m_natoms_qmmm = natoms_qmmm_tensor->vec<int>();
+    auto m_qmmm_frame_idx = qmmm_frame_idx_tensor->vec<int>();
+    for (int ii = 0; ii < nframes; ++ii) {
+      for (int jj = 0; jj < nloc_qm[ii]; ++jj) {
+        m_forward_qm_map(ii, jj) = forward_qm_maps[ii][jj];
+      }
+      for (int jj = nloc_qm[ii]; jj < max_nloc_qm; ++jj) {
+        m_forward_qm_map(ii, jj) = -1;
+      }
+      for (int jj = max_nloc_qm; jj < max_nloc_qm + nghost_qm[ii]; ++jj) {
+        m_forward_qm_map(ii, jj) =
+            forward_qm_maps[ii][jj - (max_nloc_qm - nloc_qm[ii])];
+      }
+      for (int jj = max_nloc_qm + nghost_qm[ii]; jj < nall; ++jj) {
+        m_forward_qm_map(ii, jj) = -1;
+      }
+      for (int jj = 0; jj < nall; ++jj) {
+        m_backward_qm_map(ii, jj) = backward_qm_maps[ii][jj];
+      }
+    }
+    int nn = 0;
+    for (int ii = 0; ii < nframes; ++ii) {
+      for (int kk = 0; kk < nframes_qmmm[ii]; ++kk) {
+        for (int jj = 0; jj < nloc_qmmm[ii]; ++jj) {
+          m_forward_qmmm_map(nn, jj) =
+              forward_qmmm_maps[ii]
+                               [kk * (nloc_qmmm[ii] + nghost_qmmm[ii]) + jj];
+        }
+        for (int jj = nloc_qmmm[ii]; jj < max_nloc_qmmm; ++jj) {
+          m_forward_qmmm_map(nn, jj) = -1;
+        }
+        for (int jj = max_nloc_qmmm; jj < max_nloc_qmmm + nghost_qmmm[ii];
+             ++jj) {
+          m_forward_qmmm_map(nn, jj) =
+              forward_qmmm_maps[ii][kk * (nloc_qmmm[ii] + nghost_qmmm[ii]) +
+                                    jj - (max_nloc_qmmm - nloc_qmmm[ii])];
+        }
+        for (int jj = max_nloc_qmmm + nghost_qmmm[ii];
+             jj < max_nloc_qmmm + max_nghost_qmmm; ++jj) {
+          m_forward_qmmm_map(nn, jj) = -1;
+        }
+
+        // max_nloc_qmmm + max_nghost_qmmm
+        for (int jj = 0; jj < nall; ++jj) {
+          m_backward_qmmm_map(nn, jj) = backward_qmmm_maps[ii][kk * nall + jj];
+        }
+        nn++;
+      }
+    }
+    m_natoms_qm(0) = max_nloc_qm;
+    m_natoms_qm(1) = max_nloc_qm + max_nghost_qm;
+    m_natoms_qm(2) = max_nloc_qm;
+    for (int ii = 3; ii < m_natoms_qm.size(); ++ii) {
+      m_natoms_qm(ii) = 0;
+    }
+    m_natoms_qmmm(0) = max_nloc_qmmm;
+    m_natoms_qmmm(1) = max_nloc_qmmm + max_nghost_qmmm;
+    m_natoms_qmmm(2) = max_nloc_qmmm;
+    for (int ii = 3; ii < m_natoms_qmmm.size(); ++ii) {
+      m_natoms_qmmm(ii) = 0;
+    }
+  }
+};
+
+// Register the CPU kernels.
+#define REGISTER_CPU(T)                                               \
+  REGISTER_KERNEL_BUILDER(Name("DprcPairwiseIdx").Device(DEVICE_CPU), \
+                          PairwiseIdxOp<CPUDevice>);
+REGISTER_CPU();

From a3634308534cf5d34dc5e5ddb7e58c2447c7bdc9 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 30 May 2023 02:37:10 -0400
Subject: [PATCH 02/48] add tests for group_atoms

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/lib/tests/CMakeLists.txt   |  4 ++--
 source/lib/tests/test_pairwise.cc | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 source/lib/tests/test_pairwise.cc

diff --git a/source/lib/tests/CMakeLists.txt b/source/lib/tests/CMakeLists.txt
index 542588535d..f2fd8969d9 100644
--- a/source/lib/tests/CMakeLists.txt
+++ b/source/lib/tests/CMakeLists.txt
@@ -4,8 +4,8 @@ project(libdeepmd_test)
 file(GLOB TEST_SRC test_*.cc)
 add_executable(runUnitTests_lib ${TEST_SRC})
 
-target_link_libraries(runUnitTests_lib GTest::gtest_main ${LIB_DEEPMD}
-                      coverage_config)
+target_link_libraries(runUnitTests_lib GTest::gtest_main GTest::gmock_main
+                      ${LIB_DEEPMD} coverage_config)
 add_test(runUnitTests_lib runUnitTests_lib)
 
 set_target_properties(runUnitTests_lib PROPERTIES INSTALL_RPATH
diff --git a/source/lib/tests/test_pairwise.cc b/source/lib/tests/test_pairwise.cc
new file mode 100644
index 0000000000..d6c37b688b
--- /dev/null
+++ b/source/lib/tests/test_pairwise.cc
@@ -0,0 +1,16 @@
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "pairwise.h"
+
+TEST(TestGroupAtoms, group_atoms) {
+  std::vector<int> idxs = {1, 1, 1, 0, 0, 2, 2, 2, 3, 3, 0, 1};
+  // ((3,4,10), (0,1,2,11), (5,6,7), (8,9))
+  std::vector<std::vector<int>> fragments;
+  deepmd::group_atoms_cpu(fragments, idxs);
+  EXPECT_EQ(fragments.size(), 4);
+  ASSERT_THAT(fragments[0], testing::ElementsAre(3, 4, 10));
+  ASSERT_THAT(fragments[1], testing::ElementsAre(0, 1, 2, 11));
+  ASSERT_THAT(fragments[2], testing::ElementsAre(5, 6, 7));
+  ASSERT_THAT(fragments[3], testing::ElementsAre(8, 9));
+}

From edf282ae732113e69651886e91c17c20785b67e8 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 2 Jun 2023 01:59:01 -0400
Subject: [PATCH 03/48] add tests and fix bugs

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/lib/src/pairwise.cc        | 24 ++++++++++++------------
 source/lib/tests/test_pairwise.cc | 26 ++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/source/lib/src/pairwise.cc b/source/lib/src/pairwise.cc
index 2ecc6b8519..157e84fcc7 100644
--- a/source/lib/src/pairwise.cc
+++ b/source/lib/src/pairwise.cc
@@ -60,7 +60,7 @@ void deepmd::dprc_pairwise_map_cpu(
   // 10, 11 is ghost atoms
   // (3, 4, 10)
   forward_qm_map = fragments[0];
-  // (-1, -1, -1, 0, 1, -1, -1, -1, -1, -1, 0, -1)
+  // (-1, -1, -1, 0, 1, -1, -1, -1, -1, -1, 2, -1)
   backward_qm_map.resize(nall);
   std::fill(backward_qm_map.begin(), backward_qm_map.end(), -1);
   for (int ii = 0; ii < forward_qm_map.size(); ++ii) {
@@ -99,33 +99,33 @@ void deepmd::dprc_pairwise_map_cpu(
     // real
     int kk = 0;
     for (int jj = 0; jj < nqm; ++jj) {
-      if (fragments[0][kk] < nloc) {
-        forward_qmmm_map[ii * map_size + kk] = fragments[0][kk];
+      if (fragments[0][jj] < nloc) {
+        forward_qmmm_map[ii * map_size + kk] = fragments[0][jj];
         kk++;
       }
     }
     nqm_real = kk;
     kk = 0;
-    for (int jj = nqm; jj < fragments[ii + 1].size(); ++jj) {
-      if (fragments[ii + 1][kk] < nloc) {
-        forward_qmmm_map[ii * map_size + nqm_real + kk] = fragments[ii + 1][kk];
+    for (int jj = 0; jj < fragments[ii + 1].size(); ++jj) {
+      if (fragments[ii + 1][jj] < nloc) {
+        forward_qmmm_map[ii * map_size + nqm_real + kk] = fragments[ii + 1][jj];
         kk++;
       }
     }
     // ghost
     kk = 0;
     for (int jj = 0; jj < nqm; ++jj) {
-      if (fragments[0][kk] >= nloc) {
+      if (fragments[0][jj] >= nloc) {
         forward_qmmm_map[ii * map_size + nqm_real + max_fragment_real_size +
-                         kk] = fragments[0][kk];
+                         kk] = fragments[0][jj];
         kk++;
       }
     }
     kk = 0;
-    for (int jj = nqm; jj < fragments[ii + 1].size(); ++jj) {
-      if (fragments[ii + 1][kk] >= nloc) {
+    for (int jj = 0; jj < fragments[ii + 1].size(); ++jj) {
+      if (fragments[ii + 1][jj] >= nloc) {
         forward_qmmm_map[ii * map_size + nqm + max_fragment_real_size + kk] =
-            fragments[ii + 1][kk];
+            fragments[ii + 1][jj];
         kk++;
       }
     }
@@ -137,7 +137,7 @@ void deepmd::dprc_pairwise_map_cpu(
   backward_qmmm_map.resize((nfragments - 1) * nall);
   std::fill(backward_qmmm_map.begin(), backward_qmmm_map.end(), -1);
   for (int ii = 0; ii < nfragments - 1; ++ii) {
-    for (int jj = 0; jj < fragments[ii + 1].size(); ++jj) {
+    for (int jj = 0; jj < map_size; ++jj) {
       if (forward_qmmm_map[ii * map_size + jj] != -1) {
         backward_qmmm_map[ii * nall + forward_qmmm_map[ii * map_size + jj]] =
             jj;
diff --git a/source/lib/tests/test_pairwise.cc b/source/lib/tests/test_pairwise.cc
index d6c37b688b..4c298863bf 100644
--- a/source/lib/tests/test_pairwise.cc
+++ b/source/lib/tests/test_pairwise.cc
@@ -14,3 +14,29 @@ TEST(TestGroupAtoms, group_atoms) {
   ASSERT_THAT(fragments[2], testing::ElementsAre(5, 6, 7));
   ASSERT_THAT(fragments[3], testing::ElementsAre(8, 9));
 }
+
+TEST(TestPairwiseMap, pairwise_map) {
+  std::vector<int> idxs = {1, 1, 1, 0, 0, 2, 2, 2, 3, 3, 0, 1};
+  std::vector<std::vector<int>> fragments;
+  deepmd::group_atoms_cpu(fragments, idxs);
+  std::vector<int> forward_qm_map, backward_qm_map, forward_qmmm_map,
+      backward_qmmm_map;
+  int nloc_qm, nloc_qmmm, nall_qm, nall_qmmm;
+  deepmd::dprc_pairwise_map_cpu(
+      forward_qm_map, backward_qm_map, forward_qmmm_map, backward_qmmm_map,
+      nloc_qm, nloc_qmmm, nall_qm, nall_qmmm, fragments, 10, 12);
+  ASSERT_THAT(forward_qm_map, testing::ElementsAre(3, 4, 10));
+  ASSERT_THAT(backward_qm_map, testing::ElementsAre(-1, -1, -1, 0, 1, -1, -1,
+                                                    -1, -1, -1, 2, -1));
+  ASSERT_THAT(forward_qmmm_map,
+              testing::ElementsAre(3, 4, 0, 1, 2, 10, 11, 3, 4, 5, 6, 7, 10, -1,
+                                   3, 4, 8, 9, -1, 10, -1));
+  ASSERT_THAT(backward_qmmm_map,
+              testing::ElementsAre(2, 3, 4, 0, 1, -1, -1, -1, -1, -1, 5, 6, -1,
+                                   -1, -1, 0, 1, 2, 3, 4, -1, -1, 5, -1, -1, -1,
+                                   -1, 0, 1, -1, -1, -1, 2, 3, 5, -1));
+  EXPECT_EQ(nloc_qm, 2);
+  EXPECT_EQ(nloc_qmmm, 5);
+  EXPECT_EQ(nall_qm, 3);
+  EXPECT_EQ(nall_qmmm, 7);
+}

From dcb3feda65714d3adb32b1ff6370c5242808ac51 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 2 Jun 2023 02:25:22 -0400
Subject: [PATCH 04/48] add tests to test the OP

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/tests/test_pairwise_dprc.py | 60 ++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 source/tests/test_pairwise_dprc.py

diff --git a/source/tests/test_pairwise_dprc.py b/source/tests/test_pairwise_dprc.py
new file mode 100644
index 0000000000..8fb62c5ae1
--- /dev/null
+++ b/source/tests/test_pairwise_dprc.py
@@ -0,0 +1,60 @@
+"""Test pairwise DPRc features."""
+import numpy as np
+
+from deepmd.env import (
+    op_module,
+    tf,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
+
+
+class TestPairwiseOP(tf.test.TestCase):
+    """Test dprc_pairwise_idx OP."""
+
+    def test_op_single_frame(self):
+        """Test dprc_pairwise_idx OP with a single frame."""
+        # same as C++ tests
+        idxs = np.array([[1, 1, 1, 0, 0, 2, 2, 2, 3, 3, 0, 1]], dtype=int)
+        natoms = np.array([10, 12, 10], dtype=int)
+        with self.cached_session() as sess:
+            t_idxs = tf.convert_to_tensor(idxs, dtype=tf.int32)
+            t_natoms = tf.convert_to_tensor(natoms, dtype=tf.int32)
+            t_outputs = op_module.dprc_pairwise_idx(t_idxs, t_natoms)
+            (
+                forward_qm_map,
+                backward_qm_map,
+                forward_qmmm_map,
+                backward_qmmm_map,
+                natoms_qm,
+                natoms_qmmm,
+                qmmm_frame_idx,
+            ) = run_sess(sess, t_outputs)
+        np.testing.assert_array_equal(forward_qm_map, np.array([[3, 4, 10]]))
+        np.testing.assert_array_equal(
+            backward_qm_map, np.array([[-1, -1, -1, 0, 1, -1, -1, -1, -1, -1, 2, -1]])
+        )
+        np.testing.assert_array_equal(
+            forward_qmmm_map,
+            np.array(
+                [
+                    [3, 4, 0, 1, 2, 10, 11],
+                    [3, 4, 5, 6, 7, 10, -1],
+                    [3, 4, 8, 9, -1, 10, -1],
+                ]
+            ),
+        )
+        np.testing.assert_array_equal(
+            backward_qmmm_map,
+            np.array(
+                [
+                    [2, 3, 4, 0, 1, -1, -1, -1, -1, -1, 5, 6],
+                    [-1, -1, -1, 0, 1, 2, 3, 4, -1, -1, 5, -1],
+                    [-1, -1, -1, 0, 1, -1, -1, -1, 2, 3, 5, -1],
+                ]
+            ),
+        )
+        np.testing.assert_array_equal(natoms_qm, [2, 3, 2])
+        np.testing.assert_array_equal(natoms_qmmm, [5, 7, 5])
+        np.testing.assert_array_equal(qmmm_frame_idx, np.array([0, 0, 0]))

From 9c64194042bbeec7b27e45f0335d923798d4d840 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 2 Jun 2023 03:18:26 -0400
Subject: [PATCH 05/48] fix m_qmmm_frame_idx

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/op/pairwise.cc              |  1 +
 source/tests/test_pairwise_dprc.py | 17 ++++++++++-------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/source/op/pairwise.cc b/source/op/pairwise.cc
index e65610f3dd..599f0c896d 100644
--- a/source/op/pairwise.cc
+++ b/source/op/pairwise.cc
@@ -182,6 +182,7 @@ class PairwiseIdxOp : public OpKernel {
         for (int jj = 0; jj < nall; ++jj) {
           m_backward_qmmm_map(nn, jj) = backward_qmmm_maps[ii][kk * nall + jj];
         }
+        m_qmmm_frame_idx(nn) = ii;
         nn++;
       }
     }
diff --git a/source/tests/test_pairwise_dprc.py b/source/tests/test_pairwise_dprc.py
index 8fb62c5ae1..bedb772a74 100644
--- a/source/tests/test_pairwise_dprc.py
+++ b/source/tests/test_pairwise_dprc.py
@@ -31,9 +31,10 @@ def test_op_single_frame(self):
                 natoms_qmmm,
                 qmmm_frame_idx,
             ) = run_sess(sess, t_outputs)
-        np.testing.assert_array_equal(forward_qm_map, np.array([[3, 4, 10]]))
+        np.testing.assert_array_equal(forward_qm_map, np.array([[3, 4, 10]], dtype=int))
         np.testing.assert_array_equal(
-            backward_qm_map, np.array([[-1, -1, -1, 0, 1, -1, -1, -1, -1, -1, 2, -1]])
+            backward_qm_map,
+            np.array([[-1, -1, -1, 0, 1, -1, -1, -1, -1, -1, 2, -1]], dtype=int),
         )
         np.testing.assert_array_equal(
             forward_qmmm_map,
@@ -42,7 +43,8 @@ def test_op_single_frame(self):
                     [3, 4, 0, 1, 2, 10, 11],
                     [3, 4, 5, 6, 7, 10, -1],
                     [3, 4, 8, 9, -1, 10, -1],
-                ]
+                ],
+                dtype=int,
             ),
         )
         np.testing.assert_array_equal(
@@ -52,9 +54,10 @@ def test_op_single_frame(self):
                     [2, 3, 4, 0, 1, -1, -1, -1, -1, -1, 5, 6],
                     [-1, -1, -1, 0, 1, 2, 3, 4, -1, -1, 5, -1],
                     [-1, -1, -1, 0, 1, -1, -1, -1, 2, 3, 5, -1],
-                ]
+                ],
+                dtype=int,
             ),
         )
-        np.testing.assert_array_equal(natoms_qm, [2, 3, 2])
-        np.testing.assert_array_equal(natoms_qmmm, [5, 7, 5])
-        np.testing.assert_array_equal(qmmm_frame_idx, np.array([0, 0, 0]))
+        np.testing.assert_array_equal(natoms_qm, np.array([2, 3, 2], dtype=int))
+        np.testing.assert_array_equal(natoms_qmmm, np.array([5, 7, 5], dtype=int))
+        np.testing.assert_array_equal(qmmm_frame_idx, np.array([0, 0, 0], dtype=int))

From 1fad763bdda6fd2507792876e7a7a66ae1bf4766 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 6 Jun 2023 17:52:02 -0400
Subject: [PATCH 06/48] fix index out of range

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/lib/src/pairwise.cc | 20 ++++-----
 source/op/pairwise.cc      | 86 +++++++++++++++++++-------------------
 2 files changed, 51 insertions(+), 55 deletions(-)

diff --git a/source/lib/src/pairwise.cc b/source/lib/src/pairwise.cc
index 157e84fcc7..297279fa91 100644
--- a/source/lib/src/pairwise.cc
+++ b/source/lib/src/pairwise.cc
@@ -75,9 +75,9 @@ void deepmd::dprc_pairwise_map_cpu(
     int fragment_ghost_size = 0;
     for (int jj = 0; jj < fragments[ii].size(); ++jj) {
       if (fragments[ii][jj] >= nloc) {
-        fragment_ghost_size += 1;
+        fragment_ghost_size++;
       } else {
-        fragment_real_size += 1;
+        fragment_real_size++;
       }
     }
     if (fragment_real_size > max_fragment_real_size) {
@@ -97,32 +97,30 @@ void deepmd::dprc_pairwise_map_cpu(
   int nqm_real;
   for (int ii = 0; ii < nfragments - 1; ++ii) {
     // real
-    int kk = 0;
-    for (int jj = 0; jj < nqm; ++jj) {
+    for (int jj = 0, kk = 0; jj < nqm; ++jj) {
       if (fragments[0][jj] < nloc) {
         forward_qmmm_map[ii * map_size + kk] = fragments[0][jj];
         kk++;
       }
+      if (jj == nqm - 1) {
+        nqm_real = kk;
+      }
     }
-    nqm_real = kk;
-    kk = 0;
-    for (int jj = 0; jj < fragments[ii + 1].size(); ++jj) {
+    for (int jj = 0, kk = 0; jj < fragments[ii + 1].size(); ++jj) {
       if (fragments[ii + 1][jj] < nloc) {
         forward_qmmm_map[ii * map_size + nqm_real + kk] = fragments[ii + 1][jj];
         kk++;
       }
     }
     // ghost
-    kk = 0;
-    for (int jj = 0; jj < nqm; ++jj) {
+    for (int jj = 0, kk = 0; jj < nqm; ++jj) {
       if (fragments[0][jj] >= nloc) {
         forward_qmmm_map[ii * map_size + nqm_real + max_fragment_real_size +
                          kk] = fragments[0][jj];
         kk++;
       }
     }
-    kk = 0;
-    for (int jj = 0; jj < fragments[ii + 1].size(); ++jj) {
+    for (int jj = 0, kk = 0; jj < fragments[ii + 1].size(); ++jj) {
       if (fragments[ii + 1][jj] >= nloc) {
         forward_qmmm_map[ii * map_size + nqm + max_fragment_real_size + kk] =
             fragments[ii + 1][jj];
diff --git a/source/op/pairwise.cc b/source/op/pairwise.cc
index 599f0c896d..cf88ac13a8 100644
--- a/source/op/pairwise.cc
+++ b/source/op/pairwise.cc
@@ -77,11 +77,14 @@ class PairwiseIdxOp : public OpKernel {
       nghost_qmmm.push_back(nghost_qmmm_ii);
       nframes_qmmm.push_back(backward_qmmm_map.size() / nall);
     }
-    int max_nloc_qm = *std::max_element(nloc_qm.begin(), nloc_qm.end());
-    int max_nloc_qmmm = *std::max_element(nloc_qmmm.begin(), nloc_qmmm.end());
-    int max_nghost_qm = *std::max_element(nghost_qm.begin(), nghost_qm.end());
-    int max_nghost_qmmm =
-        *std::max_element(nghost_qmmm.begin(), nghost_qmmm.end());
+    int max_nloc_qm = 0, max_nloc_qmmm = 0, max_nghost_qm = 0,
+        max_nghost_qmmm = 0;
+    for (int ii = 0; ii < nframes; ++ii) {
+      max_nloc_qm = std::max(max_nloc_qm, nloc_qm[ii]);
+      max_nloc_qmmm = std::max(max_nloc_qmmm, nloc_qmmm[ii]);
+      max_nghost_qm = std::max(max_nghost_qm, nghost_qm[ii]);
+      max_nghost_qmmm = std::max(max_nghost_qmmm, nghost_qmmm[ii]);
+    }
     int nframes_qmmm_tot =
         std::accumulate(nframes_qmmm.begin(), nframes_qmmm.end(), 0);
     // Create an output tensor
@@ -97,6 +100,10 @@ class PairwiseIdxOp : public OpKernel {
     TensorShape backward_qmmm_map_shape;
     backward_qmmm_map_shape.AddDim(nframes_qmmm_tot);
     backward_qmmm_map_shape.AddDim(nall);
+    TensorShape natoms_qm_shape;
+    natoms_qm_shape.AddDim(natoms_tensor.shape().dim_size(0));
+    TensorShape natoms_qmmm_shape;
+    natoms_qmmm_shape.AddDim(natoms_tensor.shape().dim_size(0));
     TensorShape qmmm_frame_idx_shape;
     qmmm_frame_idx_shape.AddDim(nframes_qmmm_tot);
 
@@ -121,11 +128,10 @@ class PairwiseIdxOp : public OpKernel {
     OP_REQUIRES_OK(context,
                    context->allocate_output(tmp_idx++, backward_qmmm_map_shape,
                                             &backward_qmmm_map_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(tmp_idx++, natoms_qm_shape,
+                                                     &natoms_qm_tensor));
     OP_REQUIRES_OK(context,
-                   context->allocate_output(tmp_idx++, natoms_tensor.shape(),
-                                            &natoms_qm_tensor));
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(tmp_idx++, natoms_tensor.shape(),
+                   context->allocate_output(tmp_idx++, natoms_qmmm_shape,
                                             &natoms_qmmm_tensor));
     OP_REQUIRES_OK(context,
                    context->allocate_output(tmp_idx++, qmmm_frame_idx_shape,
@@ -138,48 +144,40 @@ class PairwiseIdxOp : public OpKernel {
     auto m_natoms_qm = natoms_qm_tensor->vec<int>();
     auto m_natoms_qmmm = natoms_qmmm_tensor->vec<int>();
     auto m_qmmm_frame_idx = qmmm_frame_idx_tensor->vec<int>();
-    for (int ii = 0; ii < nframes; ++ii) {
-      for (int jj = 0; jj < nloc_qm[ii]; ++jj) {
-        m_forward_qm_map(ii, jj) = forward_qm_maps[ii][jj];
-      }
-      for (int jj = nloc_qm[ii]; jj < max_nloc_qm; ++jj) {
-        m_forward_qm_map(ii, jj) = -1;
-      }
-      for (int jj = max_nloc_qm; jj < max_nloc_qm + nghost_qm[ii]; ++jj) {
-        m_forward_qm_map(ii, jj) =
-            forward_qm_maps[ii][jj - (max_nloc_qm - nloc_qm[ii])];
-      }
-      for (int jj = max_nloc_qm + nghost_qm[ii]; jj < nall; ++jj) {
-        m_forward_qm_map(ii, jj) = -1;
+    for (int ii = 0, nn = 0; ii < nframes; ++ii) {
+      for (int jj = 0; jj < max_nloc_qm + max_nghost_qm; ++jj) {
+        if (jj < nloc_qm[ii]) {
+          m_forward_qm_map(ii, jj) = forward_qm_maps[ii][jj];
+        } else if (jj < max_nloc_qm) {
+          m_forward_qm_map(ii, jj) = -1;
+        } else if (jj < max_nloc_qm + nghost_qm[ii]) {
+          m_forward_qm_map(ii, jj) =
+              forward_qm_maps[ii][jj - (max_nloc_qm - nloc_qm[ii])];
+        } else {
+          m_forward_qm_map(ii, jj) = -1;
+        }
       }
       for (int jj = 0; jj < nall; ++jj) {
         m_backward_qm_map(ii, jj) = backward_qm_maps[ii][jj];
       }
-    }
-    int nn = 0;
-    for (int ii = 0; ii < nframes; ++ii) {
       for (int kk = 0; kk < nframes_qmmm[ii]; ++kk) {
-        for (int jj = 0; jj < nloc_qmmm[ii]; ++jj) {
-          m_forward_qmmm_map(nn, jj) =
-              forward_qmmm_maps[ii]
-                               [kk * (nloc_qmmm[ii] + nghost_qmmm[ii]) + jj];
-        }
-        for (int jj = nloc_qmmm[ii]; jj < max_nloc_qmmm; ++jj) {
-          m_forward_qmmm_map(nn, jj) = -1;
+        for (int jj = 0; jj < max_nloc_qmmm + max_nghost_qmmm; ++jj) {
+          if (jj < nloc_qmmm[ii]) {
+            m_forward_qmmm_map(nn, jj) =
+                forward_qmmm_maps[ii]
+                                 [kk * (nloc_qmmm[ii] + nghost_qmmm[ii]) + jj];
+          } else if (jj < max_nloc_qmmm) {
+            m_forward_qmmm_map(nn, jj) = -1;
+          } else if (jj < max_nloc_qmmm + nghost_qmmm[ii]) {
+            m_forward_qmmm_map(nn, jj) =
+                forward_qmmm_maps[ii][kk * (nloc_qmmm[ii] + nghost_qmmm[ii]) +
+                                      jj - (max_nloc_qmmm - nloc_qmmm[ii])];
+          } else {
+            m_forward_qmmm_map(nn, jj) = -1;
+          }
         }
-        for (int jj = max_nloc_qmmm; jj < max_nloc_qmmm + nghost_qmmm[ii];
-             ++jj) {
-          m_forward_qmmm_map(nn, jj) =
-              forward_qmmm_maps[ii][kk * (nloc_qmmm[ii] + nghost_qmmm[ii]) +
-                                    jj - (max_nloc_qmmm - nloc_qmmm[ii])];
-        }
-        for (int jj = max_nloc_qmmm + nghost_qmmm[ii];
-             jj < max_nloc_qmmm + max_nghost_qmmm; ++jj) {
-          m_forward_qmmm_map(nn, jj) = -1;
-        }
-
-        // max_nloc_qmmm + max_nghost_qmmm
         for (int jj = 0; jj < nall; ++jj) {
+          // max_nloc_qmmm + max_nghost_qmmm
           m_backward_qmmm_map(nn, jj) = backward_qmmm_maps[ii][kk * nall + jj];
         }
         m_qmmm_frame_idx(nn) = ii;

From 30b224cc4fc128d2d74bb332c70a8e25c92ba161 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 7 Jun 2023 04:48:33 -0400
Subject: [PATCH 07/48] fix model and add tests

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/model/pairwise_dprc.py      |  36 ++--
 source/tests/pairwise_dprc.json    | 168 ++++++++++++++++++
 source/tests/test_pairwise_dprc.py | 272 +++++++++++++++++++++++++++++
 3 files changed, 464 insertions(+), 12 deletions(-)
 create mode 100644 source/tests/pairwise_dprc.json

diff --git a/deepmd/model/pairwise_dprc.py b/deepmd/model/pairwise_dprc.py
index 73b092f4a4..5c854e4935 100644
--- a/deepmd/model/pairwise_dprc.py
+++ b/deepmd/model/pairwise_dprc.py
@@ -88,7 +88,7 @@ def build(
         coord_: tf.Tensor,
         atype_: tf.Tensor,
         natoms: tf.Tensor,
-        box: tf.Tensor,
+        box_: tf.Tensor,
         mesh: tf.Tensor,
         input_dict: dict,
         frz_model=None,
@@ -100,7 +100,10 @@ def build(
             t_dfparam = tf.constant(0, name="dfparam", dtype=tf.int32)
             t_daparam = tf.constant(1, name="daparam", dtype=tf.int32)
         # convert X-frame to X-Y-frame coordinates
-        idxs = input_dict["aparam"].astype(tf.int32)
+        box = tf.reshape(box_, [-1, 9])
+        nframes = tf.shape(box)[0]
+        idxs = tf.cast(input_dict["aparam"], tf.int32)
+        idxs = tf.reshape(idxs, (nframes, natoms[1]))
 
         (
             forward_qm_map,
@@ -112,12 +115,19 @@ def build(
             qmmm_frame_idx,
         ) = op_module.dprc_pairwise_idx(idxs, natoms)
 
-        coord_qm = gather_placeholder(coord_, forward_qm_map)
-        atype_qm = gather_placeholder(atype_, forward_qm_map, placeholder=-1)
-        coord_qmmm = gather_placeholder(coord_, forward_qmmm_map)
-        atype_qmmm = gather_placeholder(atype_, forward_qmmm_map, placeholder=-1)
-        box_qm = tf.gather(box, forward_qm_map)
-        box_qmmm = tf.gather(box, forward_qmmm_map)
+        coord = tf.reshape(coord_, [nframes, natoms[1], 3])
+        atype = tf.reshape(atype_, [nframes, natoms[1], 1])
+
+        coord_qm = gather_placeholder(coord, forward_qm_map)
+        atype_qm = gather_placeholder(atype, forward_qm_map, placeholder=-1)
+        coord_qmmm = gather_placeholder(
+            tf.gather(coord, qmmm_frame_idx), forward_qmmm_map
+        )
+        atype_qmmm = gather_placeholder(
+            tf.gather(atype, qmmm_frame_idx), forward_qmmm_map, placeholder=-1
+        )
+        box_qm = box
+        box_qmmm = tf.gather(box, qmmm_frame_idx)
 
         # TODO: after #2481 is merged, change the mesh to mixed_type specific
 
@@ -221,9 +231,11 @@ def gather_placeholder(
     params: tf.Tensor, indices: tf.Tensor, placeholder: float = 0.0, **kwargs
 ) -> tf.Tensor:
     """Call tf.gather but allow indices to contain placeholders (-1)."""
-    # (x, 2, 3) -> (1, 2, 3)
-    placeholder_shape = tf.concat([[1], tf.shape(params)[1:]], axis=0)
+    # (nframes, x, 2, 3) -> (nframes, 1, 2, 3)
+    placeholder_shape = tf.concat(
+        [[tf.shape(params)[0], 1], tf.shape(params)[2:]], axis=0
+    )
     params = tf.concat(
-        [tf.cast(tf.fill(placeholder_shape, placeholder), params.dtype), params], axis=0
+        [tf.cast(tf.fill(placeholder_shape, placeholder), params.dtype), params], axis=1
     )
-    return tf.gather(params, indices + 1, **kwargs)
+    return tf.gather(params, indices + 1, batch_dims=1, **kwargs)
diff --git a/source/tests/pairwise_dprc.json b/source/tests/pairwise_dprc.json
new file mode 100644
index 0000000000..c1491c9d11
--- /dev/null
+++ b/source/tests/pairwise_dprc.json
@@ -0,0 +1,168 @@
+{
+  "_comment": " model parameters",
+  "model": {
+    "type": "pairwise_dprc",
+    "type_map": [
+      "C",
+      "N",
+      "O",
+      "H",
+      "OW",
+      "HW"
+    ],
+    "type_embedding": {
+      "neuron": [
+        2
+      ]
+    },
+    "qm_model": {
+      "descriptor": {
+        "type": "se_atten",
+        "sel": 100,
+        "rcut_smth": 5.80,
+        "rcut": 6.00,
+        "neuron": [
+          25,
+          50,
+          100
+        ],
+        "resnet_dt": false,
+        "axis_neuron": 16,
+        "seed": 1
+      },
+      "fitting_net": {
+        "type": "ener",
+        "neuron": [
+          240,
+          240,
+          240
+        ],
+        "resnet_dt": true,
+        "seed": 1
+      }
+    },
+    "qmmm_model": {
+      "descriptor": {
+        "type": "se_atten",
+        "sel": 100,
+        "rcut_smth": 5.80,
+        "rcut": 6.00,
+        "neuron": [
+          25,
+          50,
+          100
+        ],
+        "resnet_dt": false,
+        "axis_neuron": 16,
+        "set_davg": true,
+        "exclude_types": [
+          [
+            0,
+            0
+          ],
+          [
+            0,
+            1
+          ],
+          [
+            0,
+            2
+          ],
+          [
+            0,
+            3
+          ],
+          [
+            1,
+            1
+          ],
+          [
+            1,
+            2
+          ],
+          [
+            1,
+            3
+          ],
+          [
+            2,
+            2
+          ],
+          [
+            2,
+            3
+          ],
+          [
+            3,
+            3
+          ],
+          [
+            4,
+            4
+          ],
+          [
+            4,
+            5
+          ],
+          [
+            5,
+            5
+          ]
+        ],
+        "seed": 1
+      },
+      "fitting_net": {
+        "type": "ener",
+        "neuron": [
+          240,
+          240,
+          240
+        ],
+        "resnet_dt": true,
+        "seed": 1,
+        "atom_ener": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    }
+  },
+
+  "systems": [
+    "system"
+  ],
+  "set_prefix": "set",
+  "stop_batch": 1000000,
+  "batch_size": 1,
+  "start_lr": 0.005,
+  "decay_steps": 5000,
+  "decay_rate": 0.95,
+
+  "start_pref_e": 0.02,
+  "limit_pref_e": 1,
+  "start_pref_f": 1000,
+  "limit_pref_f": 1,
+  "start_pref_v": 0,
+  "limit_pref_v": 0,
+
+  "seed": 1,
+
+  "_comment": " display and restart",
+  "_comment": " frequencies counted in batch",
+  "disp_file": "lcurve.out",
+  "disp_freq": 100,
+  "numb_test": 1,
+  "save_freq": 1000,
+  "save_ckpt": "model.ckpt",
+  "load_ckpt": "model.ckpt",
+  "disp_training": true,
+  "time_training": true,
+  "profiling": false,
+  "profiling_file": "timeline.json",
+
+  "_comment": "that's all"
+}
diff --git a/source/tests/test_pairwise_dprc.py b/source/tests/test_pairwise_dprc.py
index bedb772a74..8d003330cc 100644
--- a/source/tests/test_pairwise_dprc.py
+++ b/source/tests/test_pairwise_dprc.py
@@ -1,10 +1,26 @@
 """Test pairwise DPRc features."""
+import dpdata
 import numpy as np
 
+from deepmd.common import (
+    j_loader,
+    j_must_have,
+)
 from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
     op_module,
     tf,
 )
+from deepmd.model.model import (
+    Model,
+)
+from deepmd.model.pairwise_dprc import (
+    gather_placeholder,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
 from deepmd.utils.sess import (
     run_sess,
 )
@@ -61,3 +77,259 @@ def test_op_single_frame(self):
         np.testing.assert_array_equal(natoms_qm, np.array([2, 3, 2], dtype=int))
         np.testing.assert_array_equal(natoms_qmmm, np.array([5, 7, 5], dtype=int))
         np.testing.assert_array_equal(qmmm_frame_idx, np.array([0, 0, 0], dtype=int))
+
+
+class TestPairwiseModel(tf.test.TestCase):
+    def test_gather_placeholder(self):
+        coord = np.arange(12 * 3, dtype=np.float64).reshape(1, 12, 3)
+        idxs = np.array([[1, 1, 1, 0, 0, 2, 2, 2, 3, 3, 0, 1]], dtype=int)
+        natoms = np.array([10, 12, 10], dtype=int)
+        with self.cached_session() as sess:
+            t_idxs = tf.convert_to_tensor(idxs, dtype=tf.int32)
+            t_natoms = tf.convert_to_tensor(natoms, dtype=tf.int32)
+            t_coord = tf.convert_to_tensor(coord, dtype=tf.float32)
+            (
+                t_forward_qm_map,
+                t_backward_qm_map,
+                t_forward_qmmm_map,
+                t_backward_qmmm_map,
+                t_natoms_qm,
+                t_natoms_qmmm,
+                t_qmmm_frame_idx,
+            ) = op_module.dprc_pairwise_idx(t_idxs, t_natoms)
+
+            t_coord_qm = gather_placeholder(t_coord, t_forward_qm_map)
+            t_coord_qmmm = gather_placeholder(
+                tf.gather(t_coord, t_qmmm_frame_idx), t_forward_qmmm_map
+            )
+
+            coord_qm, coord_qmmm = run_sess(sess, [t_coord_qm, t_coord_qmmm])
+
+        np.testing.assert_array_equal(
+            coord_qm,
+            np.array(
+                [
+                    [
+                        [9, 10, 11],
+                        [12, 13, 14],
+                        [30, 31, 32],
+                    ]
+                ],
+                dtype=np.float64,
+            ),
+        )
+        np.testing.assert_array_equal(
+            coord_qmmm,
+            np.array(
+                [
+                    [
+                        [9, 10, 11],
+                        [12, 13, 14],
+                        [0, 1, 2],
+                        [3, 4, 5],
+                        [6, 7, 8],
+                        [30, 31, 32],
+                        [33, 34, 35],
+                    ],
+                    [
+                        [9, 10, 11],
+                        [12, 13, 14],
+                        [15, 16, 17],
+                        [18, 19, 20],
+                        [21, 22, 23],
+                        [30, 31, 32],
+                        [0, 0, 0],
+                    ],
+                    [
+                        [9, 10, 11],
+                        [12, 13, 14],
+                        [24, 25, 26],
+                        [27, 28, 29],
+                        [0, 0, 0],
+                        [30, 31, 32],
+                        [0, 0, 0],
+                    ],
+                ],
+                dtype=np.float64,
+            ),
+        )
+
+    def test_model_ener(self):
+        jfile = "pairwise_dprc.json"
+        jdata = j_loader(jfile)
+        model = Model(**jdata["model"])
+
+        sys = dpdata.LabeledSystem()
+        sys.data["atom_names"] = ["C", "N", "O", "H", "OW", "HW"]
+        sys.data["coords"] = np.array(
+            [
+                2.48693,
+                -0.12642,
+                0.45320,
+                3.86292,
+                -0.00082,
+                0.07286,
+                4.19135,
+                0.35148,
+                -1.21253,
+                3.35886,
+                0.58875,
+                -2.08423,
+                5.67422,
+                0.44076,
+                -1.45160,
+                2.40712,
+                -0.32538,
+                1.52137,
+                2.04219,
+                -0.93912,
+                -0.12445,
+                1.98680,
+                0.81574,
+                0.21261,
+                4.57186,
+                -0.33026,
+                0.71127,
+                6.24532,
+                0.18814,
+                -0.55212,
+                5.92647,
+                1.46447,
+                -1.74069,
+                5.95030,
+                -0.25321,
+                -2.24804,
+                -0.32794,
+                1.50468,
+                0.83176,
+                0.23662,
+                2.24068,
+                1.13166,
+                -0.24528,
+                1.59132,
+                -0.14907,
+                -0.50371,
+                -1.24800,
+                -0.05601,
+                -0.28305,
+                -1.84629,
+                0.67555,
+                -0.68673,
+                -0.40535,
+                0.41384,
+                0.38397,
+                0.80987,
+                -1.90358,
+                1.30191,
+                0.68503,
+                -2.22909,
+                0.11626,
+                -0.11276,
+                -1.70506,
+            ]
+        ).reshape(1, 21, 3)
+        sys.data["atom_types"] = np.array(
+            [0, 1, 0, 2, 0, 3, 3, 3, 3, 3, 3, 3, 4, 5, 5, 4, 5, 5, 4, 5, 5]
+        )
+        sys.data["cells"] = np.array([np.eye(3) * 30])
+        nframes = 1
+        natoms = 21
+        sys.data["coords"] = sys.data["coords"].reshape([nframes, natoms, 3])
+        sys.data["cells"] = sys.data["cells"].reshape([nframes, 3, 3])
+        sys.data["energies"] = np.ones(
+            [
+                nframes,
+            ]
+        )
+        sys.data["forces"] = np.zeros([nframes, natoms, 3])
+        sys.data["nopbc"] = True
+        sys.to_deepmd_npy("system", prec=np.float64)
+        idxs = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3])
+        np.save("system/set.000/aparam.npy", idxs)
+
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = 1
+        test_size = 1
+        rcut = model.get_rcut()
+
+        data = DeepmdDataSystem(systems, batch_size, test_size, rcut)
+        data.add("energy", 1, atomic=False, must=True, high_prec=True)
+        data.add("aparam", 1, atomic=True, must=True, high_prec=True)
+        test_data = data.get_test()
+
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [model.get_ntypes() + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        is_training = tf.placeholder(tf.bool)
+        t_aparam = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_aparam")
+        input_dict = {}
+        input_dict["aparam"] = t_aparam
+
+        model.data_stat(data)
+        model_pred = model.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            input_dict,
+            suffix="se_a_atom_ener_0",
+            reuse=False,
+        )
+        energy = model_pred["energy"]
+        force = model_pred["force"]
+        virial = model_pred["virial"]
+
+        test_types = np.array(
+            [
+                [
+                    0,
+                    0,
+                    0,
+                    1,
+                    2,
+                    3,
+                    3,
+                    3,
+                    3,
+                    3,
+                    3,
+                    3,
+                    -1,
+                    -1,
+                    -1,
+                    -1,
+                    -1,
+                    -1,
+                    -1,
+                    -1,
+                    -1,
+                ],
+                [0, 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, -1, -1, 5, 5, -1, -1, -1, -1],
+                [0, 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, -1, 4, -1, -1, -1, 5, 5, -1, -1],
+                [0, 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, -1, -1, 4, -1, -1, -1, -1, 5, 5],
+                [0, 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5],
+            ]
+        )
+        # aparam: [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 2. 3. 1. 1. 2. 2. 3. 3.]]
+        feed_dict_test = {
+            t_energy: np.reshape(np.tile(test_data["energy"], 5), [-1]),
+            t_coord: np.reshape(np.tile(test_data["coord"], 5), [-1]),
+            t_box: np.reshape(np.tile(test_data["box"], 5), (5, 9)),
+            t_type: np.reshape(test_types, [-1]),
+            t_natoms: [21, 21, 21, 0, 0, 0, 0, 0],
+            t_mesh: test_data["default_mesh"],
+            t_aparam: np.reshape(np.tile(test_data["aparam"], 5), [-1]),
+            is_training: False,
+        }
+        sess = self.test_session().__enter__()
+        sess.run(tf.global_variables_initializer())
+        [e, f, v] = sess.run([energy, force, virial], feed_dict=feed_dict_test)
+
+        # the model is pairwise!
+        self.assertAllClose(e[1] + e[2] + e[3] - 3 * e[0], e[4] - e[0])
+        self.assertAllClose(f[1] + f[2] + f[3] - 3 * f[0], f[4] - f[0])

From 0b3be1f7079944f2421ad6bbc75bcc2d48be3b16 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 7 Jun 2023 19:48:25 -0400
Subject: [PATCH 08/48] skip ut if tf<1.15

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/tests/test_pairwise_dprc.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/source/tests/test_pairwise_dprc.py b/source/tests/test_pairwise_dprc.py
index 8d003330cc..51dbc79fd4 100644
--- a/source/tests/test_pairwise_dprc.py
+++ b/source/tests/test_pairwise_dprc.py
@@ -1,6 +1,11 @@
 """Test pairwise DPRc features."""
+import unittest
+
 import dpdata
 import numpy as np
+from pkg_resources import (
+    parse_version,
+)
 
 from deepmd.common import (
     j_loader,
@@ -79,6 +84,10 @@ def test_op_single_frame(self):
         np.testing.assert_array_equal(qmmm_frame_idx, np.array([0, 0, 0], dtype=int))
 
 
+@unittest.skipIf(
+    parse_version(tf.__version__) < parse_version("1.15"),
+    f"The current tf version {tf.__version__} is too low to run the new testing model.",
+)
 class TestPairwiseModel(tf.test.TestCase):
     def test_gather_placeholder(self):
         coord = np.arange(12 * 3, dtype=np.float64).reshape(1, 12, 3)

From a81f86b0076cb3b86d4b73343adfc375d6193ca4 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 7 Jun 2023 20:02:58 -0400
Subject: [PATCH 09/48] fix test paths

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/tests/test_pairwise_dprc.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/source/tests/test_pairwise_dprc.py b/source/tests/test_pairwise_dprc.py
index 51dbc79fd4..ea4ec98ae7 100644
--- a/source/tests/test_pairwise_dprc.py
+++ b/source/tests/test_pairwise_dprc.py
@@ -3,6 +3,9 @@
 
 import dpdata
 import numpy as np
+from common import (
+    tests_path,
+)
 from pkg_resources import (
     parse_version,
 )
@@ -164,7 +167,7 @@ def test_gather_placeholder(self):
         )
 
     def test_model_ener(self):
-        jfile = "pairwise_dprc.json"
+        jfile = tests_path / "pairwise_dprc.json"
         jdata = j_loader(jfile)
         model = Model(**jdata["model"])
 

From 9d0814952a80b30a4d9622c1b5342a8c8f2331da Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 7 Jun 2023 22:03:22 -0400
Subject: [PATCH 10/48] fix training errors; add examples

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/model/pairwise_dprc.py         |  36 +++-
 examples/dprc/data/nopbc              |   0
 examples/dprc/data/set.000/aparam.npy | Bin 0 -> 2328 bytes
 examples/dprc/data/set.000/box.npy    | Bin 0 -> 200 bytes
 examples/dprc/data/set.000/coord.npy  | Bin 0 -> 6728 bytes
 examples/dprc/data/set.000/energy.npy | Bin 0 -> 136 bytes
 examples/dprc/data/set.000/force.npy  | Bin 0 -> 6728 bytes
 examples/dprc/data/type.raw           | 275 ++++++++++++++++++++++++++
 examples/dprc/data/type_map.raw       |   6 +
 examples/dprc/pairwise/input.json     | 177 +++++++++++++++++
 source/tests/pairwise_dprc.json       |   4 +-
 source/tests/test_examples.py         |   1 +
 12 files changed, 493 insertions(+), 6 deletions(-)
 create mode 100644 examples/dprc/data/nopbc
 create mode 100644 examples/dprc/data/set.000/aparam.npy
 create mode 100644 examples/dprc/data/set.000/box.npy
 create mode 100644 examples/dprc/data/set.000/coord.npy
 create mode 100644 examples/dprc/data/set.000/energy.npy
 create mode 100644 examples/dprc/data/set.000/force.npy
 create mode 100644 examples/dprc/data/type.raw
 create mode 100644 examples/dprc/data/type_map.raw
 create mode 100644 examples/dprc/pairwise/input.json

diff --git a/deepmd/model/pairwise_dprc.py b/deepmd/model/pairwise_dprc.py
index 5c854e4935..8d1aafea36 100644
--- a/deepmd/model/pairwise_dprc.py
+++ b/deepmd/model/pairwise_dprc.py
@@ -8,6 +8,7 @@
     add_data_requirement,
 )
 from deepmd.env import (
+    MODEL_VERSION,
     op_module,
     tf,
 )
@@ -32,7 +33,7 @@
 class PairwiseDPRc(Model):
     """Pairwise Deep Potential - Range Correction."""
 
-    model_type = "pairwise_dprc"
+    model_type = "ener"
 
     def __init__(
         self,
@@ -96,6 +97,13 @@ def build(
         suffix: str = "",
         reuse: Optional[bool] = None,
     ):
+        if input_dict is None:
+            input_dict = {}
+        with tf.variable_scope("model_attr" + suffix, reuse=reuse):
+            t_tmap = tf.constant(" ".join(self.type_map), name="tmap", dtype=tf.string)
+            t_mt = tf.constant(self.model_type, name="model_type", dtype=tf.string)
+            t_ver = tf.constant(MODEL_VERSION, name="model_version", dtype=tf.string)
+
         with tf.variable_scope("fitting_attr" + suffix, reuse=reuse):
             t_dfparam = tf.constant(0, name="dfparam", dtype=tf.int32)
             t_daparam = tf.constant(1, name="daparam", dtype=tf.int32)
@@ -117,6 +125,7 @@ def build(
 
         coord = tf.reshape(coord_, [nframes, natoms[1], 3])
         atype = tf.reshape(atype_, [nframes, natoms[1], 1])
+        nframes_qmmm = tf.shape(qmmm_frame_idx)[0]
 
         coord_qm = gather_placeholder(coord, forward_qm_map)
         atype_qm = gather_placeholder(atype, forward_qm_map, placeholder=-1)
@@ -159,19 +168,28 @@ def build(
         energy_qm = qm_dict["energy"]
         energy_qmmm = tf.math.segment_sum(qmmm_dict["energy"], qmmm_frame_idx)
         energy = energy_qm + energy_qmmm
+        energy = tf.identity(energy, name="o_energy" + suffix)
 
         force_qm = gather_placeholder(
-            qm_dict["force"], backward_qm_map, placeholder=0.0
+            tf.reshape(qm_dict["force"], (nframes, natoms_qm[1], 3)),
+            backward_qm_map,
+            placeholder=0.0,
         )
         force_qmmm = tf.math.segment_sum(
-            gather_placeholder(qmmm_dict["force"], backward_qmmm_map, placeholder=0.0),
+            gather_placeholder(
+                tf.reshape(qmmm_dict["force"], (nframes_qmmm, natoms_qmmm[1], 3)),
+                backward_qmmm_map,
+                placeholder=0.0,
+            ),
             qmmm_frame_idx,
         )
         force = force_qm + force_qmmm
+        force = tf.reshape(force, (nframes, 3 * natoms[1]), name="o_force" + suffix)
 
         virial_qm = qm_dict["virial"]
         virial_qmmm = tf.math.segment_sum(qmmm_dict["virial"], qmmm_frame_idx)
         virial = virial_qm + virial_qmmm
+        virial = tf.identity(virial, name="o_virial" + suffix)
 
         atom_ener_qm = gather_placeholder(
             qm_dict["atom_ener"], backward_qm_map, placeholder=0.0
@@ -183,17 +201,25 @@ def build(
             qmmm_frame_idx,
         )
         atom_ener = atom_ener_qm + atom_ener_qmmm
+        atom_ener = tf.identity(atom_ener, name="o_atom_ener" + suffix)
 
         atom_virial_qm = gather_placeholder(
-            qm_dict["atom_virial"], backward_qm_map, placeholder=0.0
+            tf.reshape(qm_dict["atom_virial"], (nframes, natoms_qm[1], 9)),
+            backward_qm_map,
+            placeholder=0.0,
         )
         atom_virial_qmmm = tf.math.segment_sum(
             gather_placeholder(
-                qmmm_dict["atom_virial"], backward_qmmm_map, placeholder=0.0
+                tf.reshape(qmmm_dict["atom_virial"], (nframes, natoms_qm[1], 9)),
+                backward_qmmm_map,
+                placeholder=0.0,
             ),
             qmmm_frame_idx,
         )
         atom_virial = atom_virial_qm + atom_virial_qmmm
+        atom_virial = tf.reshape(
+            atom_virial, (nframes, 9 * natoms[1]), name="o_atom_virial" + suffix
+        )
 
         model_dict = {}
         model_dict["energy"] = energy
diff --git a/examples/dprc/data/nopbc b/examples/dprc/data/nopbc
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/examples/dprc/data/set.000/aparam.npy b/examples/dprc/data/set.000/aparam.npy
new file mode 100644
index 0000000000000000000000000000000000000000..b2171d23db0ee45ca2bef306d856987b50700079
GIT binary patch
literal 2328
zcmdtfze^i&7{~GJCQcn3I~-Ct3qfc>Jc_~%wwrB+2yya)NwBn-BMEJ^NK=25Xj_fH
zYQ_v596Ds|;E>Vr#*PjSjt>3-D$nPZcX~_5-tyvk^W69DxWiX-v)TCv&U@#hS1i>Q
ztKM_hn=1QW#`VgTYQ0)mUZ_-yrThM}@UB+6>$RmqRJxnTo@U%<lM_!e?wb3L*Rb<n
zTtk6<1|3x&hm)GAuTH1cVe^Ic0bgrB_jdzz+Mlz18q8a#-GX)MEb6&1)I4C>_Cxwc
z9XfAyJ`6Ndr=oqBjjYp9own8K!<wB#V|98_w|(0EV4b=R>jSQ+!=7e(v8w07Xw5op
zs?+e3?bEnvo%)}xpUcj=&9t#$o%YmetWN)I+Br1ZvQDqmY2%CD2cBw9{H-~$-Lm^=
zw5{huf5$q_s?)qW?Woi6tDR4;)oK5m?bBF2G5BtKy%hajeU}XOY^Jd~ooU-X{i#l`
z)U*3`AN{3HM-FVCo~qx#;-T$t%JURsb^7SY?oWKIIY=>7r$zO|mgY{1UG>DFV|x!e
nrB3tew5v|f)M;P+Iz{(~y%$|lr*(CDrcN)_Y3@Y-^(43jG3-Z`

literal 0
HcmV?d00001

diff --git a/examples/dprc/data/set.000/box.npy b/examples/dprc/data/set.000/box.npy
new file mode 100644
index 0000000000000000000000000000000000000000..da234ee8cad58b4f37fcf2fd7049ff9c92cb17c0
GIT binary patch
literal 200
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
zXCxM+0{I$-ItrGWItsN4WCJc|0mFRx3P%S99ALKpp^u9FPB`U{8uRMwZg+G505^Ce
A=l}o!

literal 0
HcmV?d00001

diff --git a/examples/dprc/data/set.000/coord.npy b/examples/dprc/data/set.000/coord.npy
new file mode 100644
index 0000000000000000000000000000000000000000..103c14284a52b426c08ab25cb28b1747ee2415d8
GIT binary patch
literal 6728
zcmbVR`G1ec79P7$drECJ5^aRkcB_<NirOiXqN2P(LRyi{QcHO))e7RuQmVz+8hfIo
zqS0iKs<kDSAeK?JRFkWUOKiQq&*Lw+`DH%y%$YN1dCr-6ze(!%e&7Bdln|4|G{4bd
zaie1W+DX5_@V0&d(l0zJc0z3E_z_XDqr*ISx6pBMVbAfnv7ymn&*zO>2FSLpTKfmc
zPv!slS3wA&H)gk1&qp_6YctPiH+|f~Gm3rxebY0#!(-mS82uEuI$9aH^-qGlm~V+c
z`;K_~Gfg00@F8#$Ox)e68+&kF6w=Z&Iwv##<r(GIEBrj8J7pwrT{;2w$Ulby*Zpe|
zFCIj^@KS(BUd2bY@{G=q*ta~RN$`W6Iyecqy>|ijsBPE$J?n0EI&k&*Cg>59FT)<E
zRVUctI9o89PmV&~{PH>G?yE<j$F#f-e|4pPz(tvlG0K+oQ_H59JCFXwbDjytWtYNS
z9~4}VIl#E&Br;yz8e)_c_rOn;F#+=8$Jdyb+g}oIG6kbs?FZndd>ZwXKLFfT{e|*7
zW@5ix5D(lOnhyPLVhrVu*2F&dvY?*$T+C(EHTb8JX24GUaWUlv_rYlQM?;So6HfcK
zwQlZ-tF50-9%L;>UUa%bK5Y9!<U<71!MbgB8s~|SEXFY?uqpCV^53*$$zkYqZ;RT%
zXGXyeHTVkSly=hBgPT?HZ9SuUMtw5lI&f!OGH~(H)wezCcESeoVf!H1<%HL6=fP#i
z?yy@{h~W8`cS2q^8cklQyWp>hTElaPZ3iFprr&A*wwA#4?f~r9ZQg^wI&>uE@BK=?
zt$oP3PNW~PF8C%B@4!yGuMYKm;YI#ut)^dR83z$M8S>VQ0xp{du?~|`kq;qD!XD?|
zN#LqrBlETMW$0C%)9HWd1<1Qx*q?EpI<`IHQzVH$*?@hz>uB<GO#g<Sb+M-#>#Fz^
zaQW6F;?=9P!TNy~l>g$2k7sUo?q_`d>WqEv!d%!PuTNoo5^H0hTFJU}UMovH<t+Bu
zA4);q39k&?hKAC<W6|_Ci~g8|n&64)`Ymvq@CkW$m31N${{n9Q>54q-CLNjIN#8@S
zs^>+0v6p~56TNv}&Eb$Y2j`MU^WFgN4jxANsxztYkT>${{8R(>x??U9&uGJb@^(k&
zdtMUuxl3=7555B-Z#F%HU#jggjP8mDv}-u$HvSCu*{NZS*V4(Xi)C*^-mYs4dEvbs
z>(0nojXd!e+YeHH!90xO^jP3(TR#2xYzuI+)p^|`uU0?i{)gufe;HO9_KR+1>4!6o
z=afzdKb;Pd^!rG2=4-7F=dDjse!xBWr(;fF6i3>VH`@z|H+salB%~9+-y68|&W~LG
zJsy5Khl)_Ax<?-3>PF;2-tK+Gx-Pd2xH^6Sxb|%VK09~X02gI;Q127oXEKO=S>4Sd
zKTGaKU5J<OV88qOSmIr)Fwcdb0e7~%!?>9>tk=c8h)+#}o%)aQtjmkpz*WABQHO>B
zH?2Y;C;ZNHtQJc?XSRTSHm?ifFTF}(T@8A1&zwETbA5k>yuLdSdd*dD$~RxnbL$1L
zPvxgzT?U2#SEaIn%bUBX_rYG^I=DOY+nsjWzKszt`EDWdYppN)ng2oH`p5|I#BK8?
z@AC);@i7z5K#$7Y40$<Bu`iAJj(*0kBTrK+Gfz)u5r1P8@h+3tk6${2x!dO;{rqDQ
z{BSY~$=igs%;S#Lz}3cF*rOK~V=fLq!KibK(f{R**_7*f8|Ud3k+93Dd<*+b)v~}%
z#$)!2AN!EkMZVPcS0r%h_x@|1`jAEMfNv@!h4#0<Pu`Td3*7AY<GwkoaG#kHo5|1f
zbs#S%&4oU9)g$`((YLId9pU8r#SG$~?xCNZ`(ae(I_;dj9P+YmD%XE}8@y15J276x
z%Ta%3;{ev_2RD%)weckU5|6Vezhpi8$?<gZVq7G7G~z=akDp@DE!vTg2wV+4Aw9S`
zGaR@R=|w(QYU~3%Vm<7$7wSM>tk}!CDc2h3$;RwMPSFG63(gS_@`1cQu$;X3#0R)L
z*#~j7KQ$!Zj;;Z}+@xo~P3mmok-xz&Ik6Ard+lN08+L>J^~OTzx9N9@H^>4%Ma3$t
zi}TNb%l1Q&N0m|<@~+Qt@JF?>lz(~=@s@R`GY_32kOw#F#k=%zKI6YV1-S0$OZkuo
zz(q)P;A+gb@Xu`6!*@%~mcVWHMT{aM1NOKPx0&BAVUX7=uJN8NcolMT?*-QFfm?{D
zobm_Gu@!rQU!qqk;v;MEo#f2&Mts~)hhfxt{*2%IjWL%;v*}m<O2kud?~J*~copm7
zY#;Dn%|69Gys8+yQ8oeRt5biI|8bYt7n~y6m6Hj1JMR(V=+t$QXZv&k<It=j<aG6I
z@Y{M#0xlzl@_qRA55(tuioB@I9+Y2|0^B5=g}hp`9(FoQ#^W5@X(Dhp>k4_<DTVfq
zUjaSF_JzDj8_fFFt6`Tpb`j^gakH_{3EBW$@4gQ?J#-mxcl{pvH!TF~V!=4Z>#M`a
zgQ^~le3>36fU7c1G0IMdQ7`I~P@M1P_M|@N`v#smax0gkUDsdSH(tNt9Or&H*l(iZ
zzzbWjAGm4|4P4}p#6C521aK1)1%BviFL=?P?@7_+1ma>V9pt&EcY_BeCysG^FNr+L
zn9q9K8^kyU`;m_;$J9o>H;CZ*4`<+9q0ZFx;5w~3?H%WZ``3MCBJK4)hB!IlkHH6V
zV>NJ9F`xF9=DS5r?1j1T^F<z1%wW{1IM$AN@cj(9&TR_ZM%8D$L;5jqM@B);9hK(e
zvB&*;0M_N|(a@{YrUO^0tV@+tit>LhCeI4rgS=?{8~a1fMDo7*YuK;8H~^kIrwS<l
z%K+kcf+4TJ`2e`+{vB|;S;J2em_c4P@4&I30CBUukAqjvn)}G3oVfw=vR@4ArI!!-
zl6CpLtfS-5hn>I4H#Kq@<rnR)@9|e<T!;O}e2hM!3r2y5GN&Z+ZTn{eH(RDc-agI7
ze!a2?qs=P@E+2MfpIO8{CpOIkZXTY2-Ol-oIL95BNPqU!pxy-D-=_L>+8t7vdX6r~
z`Fd_W$O|<G=ZQg)%xA!SjIzx_<ilQE$9z9X!zgO<K9w6S<ZO9^(Me;RMa^c=Z@#&O
zI#TmWA^z^=-mueN>VmlGUs$iY9Q&JW=*zyki1j3I@;z!pg28*;b0J12_B7)ZRS19F
z{O?#7Uw;ptt3rR&i&-B-KfhSVeieNS@^*MNzKg>(`%fb81KGHk_jT8az;&M;)Ze!Q
z@9P$az$-WN4CD1?HR?;`ccE<cGv%`$VBO4Liu`D=g~Zbe5C`Me1?%?8M&M#_bKtu9
zV93jtqUlf9I*6B(I)HZW--UdsYWuLyIg&=)`HcJw<ozNu$6>!2TM4*M-p2FmUZy?v
zE9NWzH00DszSG?Y@yzq>62N6#G2$n`90Ykcyb;b*)he(a{bry}+~a4N-}rXGP2f-L
zcd=85C-WU`8@)pNr@!cnCC<~|>n$N?*HlG6GE;X_zfY}to;s2f=3`y%ZH#)CK^K6F
zqVcfT{b?leZb86xh1RSGuSkr>{}%kPMTI!e3EoT|Zuy7neX1}Y(U~|$oUTT>tVJ*K
zn87%t+(jH!uMpnL9nS(cS6FX$>nikbXJ#C5r_2%Nw`~C5KPQvXU+mDQeBbQr#eOrb
z2EUJbyhA%C<zT;R+6eKGc`p&~o(o(oo(emim?7wg=6DSG>Yc#*{p&ltFM=x4-b2rj
zPkU@7dEYIXc}lKGp4`j^E)wShxA6hgHzNZ1wSS~wE-qDK{i~YLZ@(Ic_=!4m;g1<m
z9eSkn1ui4IGM>lV5m#*~fA0g>W%6c`PhPw?RoLtBQ>9AaPVzpSBlCI!cZc6XUc{YD
zzSl+vQcrpl=+#5K!2?nME#S^ywHe3k%k(Sn3G~R;d@ty#U4V;s-@<;kOBi@ArkA9A
zgVVIX!8+iw;}zg0V>)ox&4%Buox=FsuFQJKDd7DvhyLnw*ND6GneT4pah^F(e%gos
z;r$iN?|ajrKKw8%`T=)4@jFU*=K<Hb<%q9c!MIfJPrI%@LtI2Ke@~QC83z%!kM`Aw
zLHyK9ZJ759{GO0=7c=fnE-)V}SHZvMelp%a%way3&H=9GjDWn2YfAhm-+}5B>&z@l
zX8n}f!G6AJH0%CIBygF|{%^ED<J8}u@;e$(zPJ?PDvyqWep%~7<Wo%R%eXB*4%}To
z8+Moz{B9Ko_+2il@b?1ub_L|$t?4ko{(+dAZ1z=i`xtP&gFLon`Fob$wwvep-5`Iq
zZpT~<{*(OAEe|_Y-)78zhl%WuH(&Pg^ed<AY2dcP5XLd+72-D!vLAP!0e|daU*;{Y
z9^}M@6pqOg5D%Tq-)r5-8^Cp=rSxZY4d~aYRe{?cy(z!^3F2wi<k0>J8JvHfjJdoL
zM1Q<aq8@b&e+L(1&(Xh!OTbUH^=HhT;z003_t=R0S~coT9_Nf^KI->H9GtO5z*Q1|
zkCRL0;C&*l#uC4<7V;)x1@}Grob^<6nQ@qT8F`d9KVsgy^PQ_taG&~{zu$?4ztgXm
z`Tr27^H;D}Y??v)%UkMsH3zu+?G)y_;26$vuGPc2dd4ou+m5;PCr#sAF=PpGwSjfI
vb`x-C8GkQuhYY7bP2cA|dFT%G$`4r&W?%>8Q%(*?A9i=uqP}bA{MG*eKbksa

literal 0
HcmV?d00001

diff --git a/examples/dprc/data/set.000/energy.npy b/examples/dprc/data/set.000/energy.npy
new file mode 100644
index 0000000000000000000000000000000000000000..1facec2739b98caa2108550ec9252142cf89fe78
GIT binary patch
literal 136
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
dXCxM+0{I$-I+{8PwF(pft^f<M15aY38~{Ab9+dz9

literal 0
HcmV?d00001

diff --git a/examples/dprc/data/set.000/force.npy b/examples/dprc/data/set.000/force.npy
new file mode 100644
index 0000000000000000000000000000000000000000..d370e291cef0e9b215c0e1046c79d8e87a1032e2
GIT binary patch
literal 6728
zcmbVP`(ISm7T+csnYV`0q@?S~Oo<QDU?ynT98t+oY7|Ha)B`f$17YMPD9HA~8w3FX
zh2jWmgfH@es9on&R9ww2D(0A4qvjK@aTSfaR(JMy&0lc+<@@=5*LUx;_u6Z%!HRc7
zgN)(*SQ=Y3CdQP!Flo#rX^da2?-);MOl)FON>Wt9g2bd4lh6aA;*(8wJ$X@-*<|PL
z-ky^0xHn$)l(MA%^WPw5|7Z07++YX(@X`+!JIZ?}?-R(I**Rcqt+Qt5(%+X16v#hq
zx+HXE=DhC&YRLny3zQeNUNs2*Mx9NdlyiNMY)4yt_g5m${(9me;^UrqLKFPit}leH
zMLL}pD4QpKFHkKXeo*+>%+iB`O9%JT`u#WBg)SG(?-eL@eLhgOqZTykjPUWC+nPZ6
z&-=KKlsfxq&CZ#ewpjRW_w!t2!QZbv1b@J5hlI|0T}uR4okE7lc9cp--4wdEdy|W1
zM;3k<=T?%34-i~)UGQ(L4?p1~INS2_N$eBh(<4xw+RS7-YJH!c#XcRmzakzWRat~y
zfAj`WKk|V<HgR^Jh^uovPYYxZ^X@@Ec=@tGb;`I0(5IXPYVVq^2`&#Dc157_YtVV&
zW2TQ9;j3A;3Gq|0;M$tyCq!H;=yyTr{HqG9;PTc#y1*wN{+HnViOpA_f7eX=$*YYb
zF82?%37yG)A&75(@v_j>*=dyb=Sr#tXE6!6@RjYa2ev*}2vp<@#Lc$~fro-NK-a<)
zaO=Vnia!+#RJJ7prQ~Fb*q3|G5S(95-U#lxq!MwS6(P7bbNU+al%Vy3%R_%r1v2JE
z-?dIs5w9D)9w?tNAucgGZ`YM|(3QR<=-PPH15@54l$O<z?)x#&?=9LV=0Levr#v&F
zSs*)-MDJ&vS^%GM;4jb>YXUgCvj}LrUkVIsrTeqb$AkBsEHH?DbjuWQEq5WfwRHo~
zy=x<|^J!Y&evR^xIh4NFbR`XO$&?6GE~Qw+eoW2;Z@rf-kPq_61$SaGKz=ira0{JB
zdb}3CyMt;iV!2v08*yzf?bp|n5AOU)EaK+%`M~rS*Fx{uoC0K(CLoVZLtHDI1MVEX
z6sY`|1)Mc~4dTZ3l|WvHI%7RUtH5QK`2zX*oqNFpe%=PGvC(&}Sw-NFUpfHfeZ|1Q
zjpab8G0GtB!H$0bu1lhKR%`sBYad1eUvmltaw!l#mi{Er_D`zINA;f|9(f@hX#ADF
zdqvs;pLH|clV{Moxw}%m^c~6+aUT6gsaU7&Piugm?V#@!`g|;OW_F={Z!V)gac@{G
zbUxM82LH^G!-8uitG<VyT^|o!?p*=2jmrSG^iW>4m2@7Zjp~Qb2*NsT2)(=857cM!
z)?ixyRVw1rgsnh%F`cjdHtln#dn0`2pm?C~$aTc4sh|H`MSV7_qYmrUOH@BQ&Qg9@
zG4|IQOEw{%ewOz0(ox+jt{*|KJ<*JP7u_fUDnHRXTDO-WJ}^BAINwTr_)7j}aJdBe
zVqrB!;Kk)sS5p0IaPua|`@HEv{)ljk$b0ARCE%SuQ{Ad>B|w)m=0U%kLEoQtT7~X@
zqZ+uSeI4S)r>HM=Ugh8&yK{iDqrUxWD9*>N#5#Ep^`H6<eOE4``deQ?b;(Poz^5C9
zd@<X^)!@C;mIGPS2!l8`KbZzhACLU<SAW|B?ioSz*Znf)vUYda7U-4RDBqb~dEl&Y
zrUm<0vw`UYmI9R+s$czfsv~tDeTQwJIn!1~c{PX7e#&Qb59aED^&L++=Jq+-S2oam
zQLoayjQ6AHyDP&D;`=@3xxkKe^atzfX##JGs1ZKxUhS6x<=F1K0{LK<^N7dH=7O_n
z5!CnkPpLk1b@VQ-ZIowe`iJoI=v4SM(=y<M(|JJV^)})xZ=yiH_aW_L+(~`vzG(w|
zzGn;Jv+bfeR~BR@?hycV?#&_oBI!LBC=c>b`mU~m&eKtBwup1P*U|g3W6|I}k;|~2
zH%<YX8&*L#8{a2AkouR0hJyQf)4imPbHKHk3*gr!76GM$bY88=g!pjB+?1!rAg(=0
z{URNt_h24z@U=|Fykb0Y7JTeB%?q!`9KOzI`1Ty5Il)uo;8TC0`^hf_3od(iQ=ark
z=p6Cmsa|@HbFBA$f$}0HP(RxmvapXFvK(=~^j&b*6HCCub|g@IxC1+>exwsipr;(A
zyy`d8T*!Zc`jM;ivA(J^mevnV0jl~Bptp^nI{MqHHPHQc?t@<ZJE6BrGq~cm30QWC
z?l~={L2&87^HzbjaffUI`RyMs!WY(Wv&GJt?j@RsPIu}4PT3VAt`!ukBF-OpQ-AUU
z^t;Foub_3ysUFnBG;gF_MXa-Z)=l@AbBfN(bgv4X8IMz*_!hdCu9*7C%c~sw8Lv@2
zX+!40&t7)Cb3gihESunXk3h<w+t?PY>o6C<@0wc(d}g6aJd65A@(hK~^B;6ic8lIa
z4$XzGJ)-@&C)K%BM(@Wf6~xu+6n8$8fqmHsdLKS~G~z|O9p@QB=e0#)-Z8h@dieJ>
zr2@@WbiS}w%nf!kmF{mmS%&zeD#}CGH*I2H^|ilt3bg%oyb<w)yhg#<#fS91QZ()-
zCsh4M=<<<S|3duM8zt~73i)Q~E5W4)G$-BvpnIRMu7IEQJ%+fJxs-VQI>PTKK$j;|
z{mVxelJ6DDqpZsiJ~?%H0sLP6p8}P&^iFJ59QfebR3EJjb;WK*((hlHO!wh2YvC)-
zE(A`y=%}~GQgHPazQZoA+iVc`XFnMUuX!WR@@5iue->O#MjbNsA^Eho>F-*W;Dflf
zAO^^T=K(8UdmkuYrTN0r=$_0HX0U&a$I0aang1H9tCo8q;CUnHUIE<=;Oz1`;AtK8
z6?ZQNw^dVp=%44{E#<hc_V4LA7O`)O56$6;d-A}QMU)TyE7T9h)D_UBJ@kI;y$Eo9
z?OKCauM495=weOK{VLO;o1doj?36!vr$5z$M+Ti&vNXdl7tma?o-G3JIaUH3Glt%$
z;5pPG|L4!&LRVZPfnmd_o;!1B9<l|ap{qrX`iXVqSx@;KJUy4zKR+MHcEtf(f+#<H
z6eoQjo!9v*>a)SoD-7bET*G@Xt%~l&y&|Aryu4E&A2yu&&e(lG?8D#Iz7)tNojC&B
znA{<J%zw`{!R4=W&Im4-480DWZK3lSv#1Uw^9JE#^53cdSxpRl)h2ppy))%GW%^p^
z+G_f};_Y+5JEttidj4<<INM?Xs^Q5%Ze0MZ*-rP=^OY9yJ+4avGQY(@znmQS)FD*I
z{B_!&SzVy__)z{wt;+}JzfoVv%PC*%?O^Pq9-((;E_5H;&@kxC(*qcPFdFOSQ0j-8
z4GMH6dnt6ueF>1&hCsh_oZflX*YV)m13I76pm_L<f0Y7vZL37Q@BK2MIhWpf?));u
zZGSETdVb=VKa1Z3cfLaJUjAhfxa;5#Eh0Zk1NBMqM#{VKUMAwbE2uw}Q0lwup*7IM
z&V2~vI`lh_E1)?w_~Wh6>uwws$W5iK7CTDk_Me5Wbl_a_LN}VH-}usgao^SoAFGL~
zg-_M525&EH1aHw(9X?VwLchPJ9s04r9pH~=?*kg$6zE~bL?C~MQ1+p|QLE>|r};)A
zE}52q`_)tZ*#F-N-CohauWwUb@VZIhKHsVEv$IqO=A3H5c}Clh0@cT#{89L{^Cv$S
z$fTv^LT9$^G-qX(3h3G}f9Ud^RPdhj>xutPb;F|QoMETUh^wDbU74@N!lzZFK)>T$
z3@#0z^Qdj~_knp&9^x${R|3n9uYiwbP+e%Z$ZvbzQGeqD;p1^s$MQ45;OWL}(#L*)
z^*csWeb{pI!P)Dy-<^-Pg4+&KeOX_pI=?z?6W0HJivFI^7S4yS&hhtoS8zSz%;_!Y
z@_*C8bpvw2wSIBn>YudVtjB7gbI0G!?T+~-CCq@XsL{Z%>R7DfYrTQeE7S)aos>`J
zM}4mSm+mE}WWbl$y9}uHUuY5U#_mUh&-j`8o%K@QbUVmr?M@axIr}%-x6RA(_u1|!
ztZ&JuxpyvcEjTw)p8b5Nzxda4pey$hfKt{fgZQjDg8J<R$Nw+cPxO7whI>hWJSd?3
zj$_{O=`kVTtZpjS*Kj(Iz6o)rrkg0<F%_u2m<jA1K=ZY4ZV<R`Am!iILiM$%JC6Kr
zj=Tl=fwMt0Pxv94lj@sPmwi>Ki1(bLKJ5Ck%^>d6w0}L&uaxG^bIqIJ%gi!Cf7F3_
zBER%~yWlp%SPS%`abE!WFnWJ>f$C1)m2MIHvhvZ0D{hJ4Ge(lmv;D!<2~^K)#u&ln
zglY~we{wBQar{2d3Z#7N3{?iPtz1a&!<10?)RuhcVM__if}5egwfGQpTQ=%nIy&}(
z(3L>P9B8|F5x#BvKNhI^_1_Qw_|cBJFx}CAne_X>=S)VN`vd^X6q*l`H|2BR=18#*
z<Im1ToO!trpG$SbKI;drbEmv^zEJ_6QAhW-8gsz!rWL_=GkgVjT{6ADdX?s({s*f6
zp4%z#InO%=pFX|;e0;z@!L`e;+rSGwC?5;H{YG%xqr0aCYVj37E_<DTPdm8`$f~^!
z_Bb<#Q=PHhv%ymesNZa}>7MMQH~9yTCA2a4<k87M-!sTND|5UHi!s6%A6bdG{tn$!
zyGHpK?NJ4t-w20KYgi4gO<M(S>rdQh$pV)rQypl5E5OYoQ-O;@Xs)Vf9Ci7g8}tJw
z%7FTz^!r-+?OO02zcmJNUluS|aCJ)mM(9n)Nl)<H1<pgJz^7fOxynCrd?zy=x?JnX
I*Ko!1KTvRYSO5S3

literal 0
HcmV?d00001

diff --git a/examples/dprc/data/type.raw b/examples/dprc/data/type.raw
new file mode 100644
index 0000000000..b9304977e2
--- /dev/null
+++ b/examples/dprc/data/type.raw
@@ -0,0 +1,275 @@
+1
+0
+1
+3
+0
+1
+1
+0
+1
+0
+1
+3
+3
+5
+3
+3
+3
+0
+1
+1
+0
+1
+1
+1
+4
+2
+2
+4
+4
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+4
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+2
+4
+2
+2
+2
+4
+2
+2
+4
+2
+2
+2
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+4
+2
+2
+2
+4
diff --git a/examples/dprc/data/type_map.raw b/examples/dprc/data/type_map.raw
new file mode 100644
index 0000000000..f066e8bdb7
--- /dev/null
+++ b/examples/dprc/data/type_map.raw
@@ -0,0 +1,6 @@
+C
+H
+HW
+O
+OW
+P
diff --git a/examples/dprc/pairwise/input.json b/examples/dprc/pairwise/input.json
new file mode 100644
index 0000000000..2bb8345dc6
--- /dev/null
+++ b/examples/dprc/pairwise/input.json
@@ -0,0 +1,177 @@
+{
+  "_comment": " model parameters",
+  "model": {
+    "type": "pairwise_dprc",
+    "type_map": [
+      "C",
+      "P",
+      "O",
+      "H",
+      "OW",
+      "HW"
+    ],
+    "type_embedding": {
+      "neuron": [
+        8
+      ],
+      "precision": "float32"
+    },
+    "qm_model": {
+      "descriptor": {
+        "type": "se_atten",
+        "sel": 24,
+        "rcut_smth": 5.80,
+        "rcut": 6.00,
+        "attn_layer": 0,
+        "neuron": [
+          25,
+          50,
+          100
+        ],
+        "resnet_dt": false,
+        "axis_neuron": 16,
+        "precision": "float32",
+        "seed": 1
+      },
+      "fitting_net": {
+        "type": "ener",
+        "neuron": [
+          240,
+          240,
+          240
+        ],
+        "resnet_dt": true,
+        "precision": "float32",
+        "seed": 1
+      }
+    },
+    "qmmm_model": {
+      "descriptor": {
+        "type": "se_atten",
+        "sel": 27,
+        "rcut_smth": 5.80,
+        "rcut": 6.00,
+        "attn_layer": 0,
+        "neuron": [
+          25,
+          50,
+          100
+        ],
+        "resnet_dt": false,
+        "axis_neuron": 16,
+        "set_davg_zero": true,
+        "exclude_types": [
+          [
+            0,
+            0
+          ],
+          [
+            0,
+            1
+          ],
+          [
+            0,
+            2
+          ],
+          [
+            0,
+            3
+          ],
+          [
+            1,
+            1
+          ],
+          [
+            1,
+            2
+          ],
+          [
+            1,
+            3
+          ],
+          [
+            2,
+            2
+          ],
+          [
+            2,
+            3
+          ],
+          [
+            3,
+            3
+          ],
+          [
+            4,
+            4
+          ],
+          [
+            4,
+            5
+          ],
+          [
+            5,
+            5
+          ]
+        ],
+        "precision": "float32",
+        "seed": 1
+      },
+      "fitting_net": {
+        "type": "ener",
+        "neuron": [
+          240,
+          240,
+          240
+        ],
+        "resnet_dt": true,
+        "seed": 1,
+        "precision": "float32",
+        "atom_ener": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    }
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
+
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment": " that's all"
+  },
+
+  "training": {
+    "training_data": {
+      "systems": [
+        "../data"
+      ],
+      "batch_size": "auto",
+      "_comment": "that's all"
+    },
+    "numb_steps": 100,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment": "that's all"
+  },
+
+  "_comment": "that's all"
+}
diff --git a/source/tests/pairwise_dprc.json b/source/tests/pairwise_dprc.json
index c1491c9d11..95f4de5cda 100644
--- a/source/tests/pairwise_dprc.json
+++ b/source/tests/pairwise_dprc.json
@@ -28,6 +28,7 @@
         ],
         "resnet_dt": false,
         "axis_neuron": 16,
+        "attn_layer": 0,
         "seed": 1
       },
       "fitting_net": {
@@ -54,7 +55,8 @@
         ],
         "resnet_dt": false,
         "axis_neuron": 16,
-        "set_davg": true,
+        "set_davg_zero": true,
+        "attn_layer": 0,
         "exclude_types": [
           [
             0,
diff --git a/source/tests/test_examples.py b/source/tests/test_examples.py
index 138a7205b5..40e4963a0d 100644
--- a/source/tests/test_examples.py
+++ b/source/tests/test_examples.py
@@ -34,6 +34,7 @@
     p_examples / "zinc_protein" / "zinc_se_a_mask.json",
     p_examples / "dos" / "train" / "input.json",
     p_examples / "spin" / "se_e2_a" / "input.json",
+    p_examples / "dprc" / "pairwise" / "input.json",
 )
 
 

From 0fd198e0f25d5ad04d5e008ad2661f5a94bed01c Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 8 Jun 2023 03:58:45 -0400
Subject: [PATCH 11/48] build type embedding only once

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/model/ener.py          |  2 +-
 deepmd/model/pairwise_dprc.py | 16 ++++++++++++----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/deepmd/model/ener.py b/deepmd/model/ener.py
index 2491091dac..8100439c9e 100644
--- a/deepmd/model/ener.py
+++ b/deepmd/model/ener.py
@@ -197,7 +197,7 @@ def build(
         input_dict["nframes"] = tf.shape(coord)[0]
 
         # type embedding if any
-        if self.typeebd is not None:
+        if self.typeebd is not None and "type_embedding" not in input_dict:
             type_embedding = self.typeebd.build(
                 self.ntypes,
                 reuse=reuse,
diff --git a/deepmd/model/pairwise_dprc.py b/deepmd/model/pairwise_dprc.py
index 8d1aafea36..8c40725204 100644
--- a/deepmd/model/pairwise_dprc.py
+++ b/deepmd/model/pairwise_dprc.py
@@ -97,8 +97,8 @@ def build(
         suffix: str = "",
         reuse: Optional[bool] = None,
     ):
-        if input_dict is None:
-            input_dict = {}
+        input_dict_qm = {}
+        input_dict_qmmm = {}
         with tf.variable_scope("model_attr" + suffix, reuse=reuse):
             t_tmap = tf.constant(" ".join(self.type_map), name="tmap", dtype=tf.string)
             t_mt = tf.constant(self.model_type, name="model_type", dtype=tf.string)
@@ -138,6 +138,14 @@ def build(
         box_qm = box
         box_qmmm = tf.gather(box, qmmm_frame_idx)
 
+        type_embedding = self.typeebd.build(
+            self.ntypes,
+            reuse=reuse,
+            suffix=suffix,
+        )
+        input_dict_qm["type_embedding"] = type_embedding
+        input_dict_qmmm["type_embedding"] = type_embedding
+
         # TODO: after #2481 is merged, change the mesh to mixed_type specific
 
         qm_dict = self.qm_model.build(
@@ -146,7 +154,7 @@ def build(
             natoms_qm,
             box_qm,
             mesh,
-            input_dict,
+            input_dict_qm,
             frz_model=frz_model,
             ckpt_meta=ckpt_meta,
             suffix="_qm" + suffix,
@@ -158,7 +166,7 @@ def build(
             natoms_qmmm,
             box_qmmm,
             mesh,
-            input_dict,
+            input_dict_qmmm,
             frz_model=frz_model,
             ckpt_meta=ckpt_meta,
             suffix="_qmmm" + suffix,

From b7d33f0ea7253f9388bf9470f5cdf093965c2d4e Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 8 Jun 2023 04:05:48 -0400
Subject: [PATCH 12/48] fix self.ntypes

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/model/pairwise_dprc.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/deepmd/model/pairwise_dprc.py b/deepmd/model/pairwise_dprc.py
index 8c40725204..c53d2406b0 100644
--- a/deepmd/model/pairwise_dprc.py
+++ b/deepmd/model/pairwise_dprc.py
@@ -83,6 +83,7 @@ def __init__(
             **qmmm_model, type_map=type_map, type_embedding=self.typeebd
         )
         add_data_requirement("aparam", 1, atomic=True, must=True, high_prec=False)
+        self.ntypes = len(type_map)
 
     def build(
         self,

From d1b51421c82b371154c1ecd43bd1d4c79f9f6a7c Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 12 Jun 2023 17:56:39 -0400
Subject: [PATCH 13/48] add examples for the normal model

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
(cherry picked from commit e40c94c0019f0835dc9182c7a6348ed77beb0eef)
---
 examples/dprc/normal/input.json   | 218 ++++++++++++++++++++++++++++++
 examples/dprc/pairwise/input.json |  12 +-
 2 files changed, 224 insertions(+), 6 deletions(-)
 create mode 100644 examples/dprc/normal/input.json

diff --git a/examples/dprc/normal/input.json b/examples/dprc/normal/input.json
new file mode 100644
index 0000000000..66f025f768
--- /dev/null
+++ b/examples/dprc/normal/input.json
@@ -0,0 +1,218 @@
+{
+  "_comment": " model parameters",
+  "model": {
+    "type_map": [
+      "C",
+      "P",
+      "O",
+      "H",
+      "OW",
+      "HW"
+    ],
+    "type_embedding": {
+      "neuron": [
+        8
+      ],
+      "precision": "float32"
+    },
+    "descriptor": {
+      "type": "hybrid",
+      "list": [
+        {
+          "type": "se_e2_a",
+          "sel": [
+            6,
+            1,
+            6,
+            11,
+            0,
+            0
+          ],
+          "rcut_smth": 0.50,
+          "rcut": 6.00,
+          "neuron": [
+            25,
+            50,
+            100
+          ],
+          "resnet_dt": false,
+          "axis_neuron": 16,
+          "precision": "float32",
+          "exclude_types": [
+            [
+              0,
+              4
+            ],
+            [
+              0,
+              5
+            ],
+            [
+              1,
+              4
+            ],
+            [
+              1,
+              5
+            ],
+            [
+              2,
+              4
+            ],
+            [
+              2,
+              5
+            ],
+            [
+              3,
+              4
+            ],
+            [
+              3,
+              5
+            ],
+            [
+              4,
+              4
+            ],
+            [
+              4,
+              5
+            ],
+            [
+              5,
+              5
+            ]
+          ],
+          "seed": 1
+        },
+        {
+          "type": "se_atten",
+          "sel": 150,
+          "rcut_smth": 5.80,
+          "rcut": 6.00,
+          "attn_layer": 0,
+          "neuron": [
+            25,
+            50,
+            100
+          ],
+          "resnet_dt": false,
+          "axis_neuron": 12,
+          "set_davg_zero": true,
+          "exclude_types": [
+            [
+              0,
+              0
+            ],
+            [
+              0,
+              1
+            ],
+            [
+              0,
+              2
+            ],
+            [
+              0,
+              3
+            ],
+            [
+              1,
+              1
+            ],
+            [
+              1,
+              2
+            ],
+            [
+              1,
+              3
+            ],
+            [
+              2,
+              2
+            ],
+            [
+              2,
+              3
+            ],
+            [
+              3,
+              3
+            ],
+            [
+              4,
+              4
+            ],
+            [
+              4,
+              5
+            ],
+            [
+              5,
+              5
+            ]
+          ],
+          "precision": "float32",
+          "seed": 1
+        }
+      ]
+    },
+    "fitting_net": {
+      "type": "ener",
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "precision": "float32",
+      "atom_ener": [
+        null,
+        null,
+        null,
+        null,
+        0.0,
+        0.0
+      ],
+      "seed": 1
+    }
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
+
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment": " that's all"
+  },
+
+  "training": {
+    "training_data": {
+      "systems": [
+        "../data"
+      ],
+      "batch_size": "auto",
+      "_comment": "that's all"
+    },
+    "numb_steps": 1000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment": "that's all"
+  },
+
+  "_comment": "that's all"
+}
diff --git a/examples/dprc/pairwise/input.json b/examples/dprc/pairwise/input.json
index 2bb8345dc6..bae2a8dfe5 100644
--- a/examples/dprc/pairwise/input.json
+++ b/examples/dprc/pairwise/input.json
@@ -20,8 +20,8 @@
       "descriptor": {
         "type": "se_atten",
         "sel": 24,
-        "rcut_smth": 5.80,
-        "rcut": 6.00,
+        "rcut_smth": 0.50,
+        "rcut": 9.00,
         "attn_layer": 0,
         "neuron": [
           25,
@@ -29,7 +29,7 @@
           100
         ],
         "resnet_dt": false,
-        "axis_neuron": 16,
+        "axis_neuron": 12,
         "precision": "float32",
         "seed": 1
       },
@@ -49,7 +49,7 @@
       "descriptor": {
         "type": "se_atten",
         "sel": 27,
-        "rcut_smth": 5.80,
+        "rcut_smth": 0.50,
         "rcut": 6.00,
         "attn_layer": 0,
         "neuron": [
@@ -58,7 +58,7 @@
           100
         ],
         "resnet_dt": false,
-        "axis_neuron": 16,
+        "axis_neuron": 12,
         "set_davg_zero": true,
         "exclude_types": [
           [
@@ -165,7 +165,7 @@
       "batch_size": "auto",
       "_comment": "that's all"
     },
-    "numb_steps": 100,
+    "numb_steps": 1000,
     "seed": 10,
     "disp_file": "lcurve.out",
     "disp_freq": 100,

From d9af6ba4763c7036d98b8d8326c89524232b78df Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 22 Jun 2023 23:49:07 -0400
Subject: [PATCH 14/48] make the example models compressible

---
 examples/dprc/normal/input.json   | 1 +
 examples/dprc/pairwise/input.json | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/examples/dprc/normal/input.json b/examples/dprc/normal/input.json
index 66f025f768..d35b48d12b 100644
--- a/examples/dprc/normal/input.json
+++ b/examples/dprc/normal/input.json
@@ -88,6 +88,7 @@
         },
         {
           "type": "se_atten",
+          "stripped_type_embedding": true,
           "sel": 150,
           "rcut_smth": 5.80,
           "rcut": 6.00,
diff --git a/examples/dprc/pairwise/input.json b/examples/dprc/pairwise/input.json
index bae2a8dfe5..25be3fdbca 100644
--- a/examples/dprc/pairwise/input.json
+++ b/examples/dprc/pairwise/input.json
@@ -19,6 +19,7 @@
     "qm_model": {
       "descriptor": {
         "type": "se_atten",
+        "stripped_type_embedding": true,
         "sel": 24,
         "rcut_smth": 0.50,
         "rcut": 9.00,
@@ -48,6 +49,7 @@
     "qmmm_model": {
       "descriptor": {
         "type": "se_atten",
+        "stripped_type_embedding": true,
         "sel": 27,
         "rcut_smth": 0.50,
         "rcut": 6.00,

From 35797eb598da3f5905be06178c87286ce4e51304 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 23 Jun 2023 18:23:29 -0400
Subject: [PATCH 15/48] fix se_atten variable names when suffix is given

It seems that #1891 wrongly added a suffix to the variable names to make them like `filter_type_all_suffix/matrix_1_suffix`. However, it is expected to be `filter_type_all_suffix/matrix_1` which is consistent in other classes and methods.

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/descriptor/se_atten.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/deepmd/descriptor/se_atten.py b/deepmd/descriptor/se_atten.py
index 61012cea7e..99cf62a644 100644
--- a/deepmd/descriptor/se_atten.py
+++ b/deepmd/descriptor/se_atten.py
@@ -649,7 +649,6 @@ def _pass_filter(
             type_i,
             natoms,
             name="filter_type_all" + suffix,
-            suffix=suffix,
             reuse=reuse,
             trainable=trainable,
             activation_fn=self.filter_activation_fn,

From 837614fa3b47bca269f4f2f62b3cb337b9e07c1b Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 23 Jun 2023 19:09:53 -0400
Subject: [PATCH 16/48] Update se_atten.py

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/descriptor/se_atten.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/deepmd/descriptor/se_atten.py b/deepmd/descriptor/se_atten.py
index 99cf62a644..45b913fb4d 100644
--- a/deepmd/descriptor/se_atten.py
+++ b/deepmd/descriptor/se_atten.py
@@ -649,6 +649,7 @@ def _pass_filter(
             type_i,
             natoms,
             name="filter_type_all" + suffix,
+            suffix=suffix
             reuse=reuse,
             trainable=trainable,
             activation_fn=self.filter_activation_fn,
@@ -1015,7 +1016,7 @@ def _filter_lower(
                         self.filter_precision,
                         activation_fn=activation_fn,
                         resnet_dt=self.filter_resnet_dt,
-                        name_suffix=suffix,
+                        name_suffix="",
                         stddev=stddev,
                         bavg=bavg,
                         seed=self.seed,
@@ -1040,7 +1041,7 @@ def _filter_lower(
                             self.filter_precision,
                             activation_fn=activation_fn,
                             resnet_dt=self.filter_resnet_dt,
-                            name_suffix=suffix,
+                            name_suffix="",
                             stddev=stddev,
                             bavg=bavg,
                             seed=self.seed,
@@ -1089,7 +1090,7 @@ def _filter_lower(
                             two_side_type_embedding,
                             [-1, two_side_type_embedding.shape[-1]],
                         )
-                        two_side_type_embedding_suffix = suffix + "_two_side_ebd"
+                        two_side_type_embedding_suffix = "_two_side_ebd"
                         embedding_of_two_side_type_embedding = embedding_net(
                             two_side_type_embedding,
                             self.filter_neuron,

From 33adbb2139423321a9dbee5ae8cda2c577320b69 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 23 Jun 2023 19:11:07 -0400
Subject: [PATCH 17/48] Update se_atten.py

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/descriptor/se_atten.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepmd/descriptor/se_atten.py b/deepmd/descriptor/se_atten.py
index 45b913fb4d..8db4502fa8 100644
--- a/deepmd/descriptor/se_atten.py
+++ b/deepmd/descriptor/se_atten.py
@@ -649,7 +649,7 @@ def _pass_filter(
             type_i,
             natoms,
             name="filter_type_all" + suffix,
-            suffix=suffix
+            suffix=suffix,
             reuse=reuse,
             trainable=trainable,
             activation_fn=self.filter_activation_fn,

From a25d57f1b4ec9f96cb1a01fee772814714b78996 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 24 Jun 2023 02:45:22 -0400
Subject: [PATCH 18/48] fix output and init_variables

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/model/ener.py          |  5 ++++-
 deepmd/model/pairwise_dprc.py | 34 +++++++++++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/deepmd/model/ener.py b/deepmd/model/ener.py
index e23908ab69..62c681760a 100644
--- a/deepmd/model/ener.py
+++ b/deepmd/model/ener.py
@@ -367,7 +367,10 @@ def init_variables(
             tf.constant("compressed_model", name="model_type", dtype=tf.string)
         else:
             raise RuntimeError("Unknown model type %s" % model_type)
-        if self.typeebd is not None:
+        if (
+            self.typeebd is not None
+            and self.typeebd.type_embedding_net_variables is None
+        ):
             self.typeebd.init_variables(
                 graph, graph_def, suffix=suffix, model_type=model_type
             )
diff --git a/deepmd/model/pairwise_dprc.py b/deepmd/model/pairwise_dprc.py
index c53d2406b0..24016dfd27 100644
--- a/deepmd/model/pairwise_dprc.py
+++ b/deepmd/model/pairwise_dprc.py
@@ -84,6 +84,7 @@ def __init__(
         )
         add_data_requirement("aparam", 1, atomic=True, must=True, high_prec=False)
         self.ntypes = len(type_map)
+        self.rcut = max(self.qm_model.get_rcut(), self.qmmm_model.get_rcut())
 
     def build(
         self,
@@ -108,6 +109,9 @@ def build(
         with tf.variable_scope("fitting_attr" + suffix, reuse=reuse):
             t_dfparam = tf.constant(0, name="dfparam", dtype=tf.int32)
             t_daparam = tf.constant(1, name="daparam", dtype=tf.int32)
+        with tf.variable_scope("descrpt_attr" + suffix, reuse=reuse):
+            t_ntypes = tf.constant(self.ntypes, name="ntypes", dtype=tf.int32)
+            t_rcut = tf.constant(self.rcut, name="rcut", dtype=tf.float32)
         # convert X-frame to X-Y-frame coordinates
         box = tf.reshape(box_, [-1, 9])
         nframes = tf.shape(box)[0]
@@ -210,7 +214,7 @@ def build(
             qmmm_frame_idx,
         )
         atom_ener = atom_ener_qm + atom_ener_qmmm
-        atom_ener = tf.identity(atom_ener, name="o_atom_ener" + suffix)
+        atom_ener = tf.identity(atom_ener, name="o_atom_energy" + suffix)
 
         atom_virial_qm = gather_placeholder(
             tf.reshape(qm_dict["atom_virial"], (nframes, natoms_qm[1], 9)),
@@ -261,6 +265,34 @@ def data_stat(self, data):
         self.qm_model.data_stat(data)
         self.qmmm_model.data_stat(data)
 
+    def init_variables(
+        self,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        model_type: str = "original_model",
+        suffix: str = "",
+    ) -> None:
+        """Init the embedding net variables with the given frozen model.
+
+        Parameters
+        ----------
+        graph : tf.Graph
+            The input frozen model graph
+        graph_def : tf.GraphDef
+            The input frozen model graph_def
+        model_type : str
+            the type of the model
+        suffix : str
+            suffix to name scope
+        """
+        self.typeebd.init_variables(graph, graph_def, model_type=model_type)
+        self.qm_model.init_variables(
+            graph, graph_def, model_type=model_type, suffix="_qm" + suffix
+        )
+        self.qmmm_model.init_variables(
+            graph, graph_def, model_type=model_type, suffix="_qmmm" + suffix
+        )
+
 
 def gather_placeholder(
     params: tf.Tensor, indices: tf.Tensor, placeholder: float = 0.0, **kwargs

From 089b1b1190cdfdc514851bb159adcec67fbbc64d Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 25 Jun 2023 18:04:57 -0400
Subject: [PATCH 19/48] support compression

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/entrypoints/train.py   |  5 +++++
 deepmd/model/model.py         | 30 +++++++++++++++++++++++-------
 deepmd/model/pairwise_dprc.py | 26 ++++++++++++++++++++++++--
 3 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/deepmd/entrypoints/train.py b/deepmd/entrypoints/train.py
index 32d23d1662..ee911f314d 100755
--- a/deepmd/entrypoints/train.py
+++ b/deepmd/entrypoints/train.py
@@ -353,6 +353,11 @@ def get_modifier(modi_data=None):
 
 
 def get_rcut(jdata):
+    if jdata["model"].get("type") == "pairwise_dprc":
+        return max(
+            jdata["model"]["qm_model"]["descriptor"]["rcut"],
+            jdata["model"]["qmmm_model"]["descriptor"]["rcut"],
+        )
     descrpt_data = jdata["model"]["descriptor"]
     rcut_list = []
     if descrpt_data["type"] == "hybrid":
diff --git a/deepmd/model/model.py b/deepmd/model/model.py
index 132bb75216..4e81741eab 100644
--- a/deepmd/model/model.py
+++ b/deepmd/model/model.py
@@ -347,8 +347,14 @@ def change_energy_bias(
         """
         raise RuntimeError("Not supported")
 
-    def enable_compression(self):
-        """Enable compression."""
+    def enable_compression(self, suffix: str = ""):
+        """Enable compression.
+
+        Parameters
+        ----------
+        suffix : str
+            suffix to name scope
+        """
         raise RuntimeError("Not supported")
 
     def get_numb_fparam(self) -> Union[int, dict]:
@@ -483,8 +489,14 @@ def enable_mixed_precision(self, mixed_prec: dict):
         self.descrpt.enable_mixed_precision(mixed_prec)
         self.fitting.enable_mixed_precision(mixed_prec)
 
-    def enable_compression(self):
-        """Enable compression."""
+    def enable_compression(self, suffix: str = ""):
+        """Enable compression.
+
+        Parameters
+        ----------
+        suffix : str
+            suffix to name scope
+        """
         graph, graph_def = load_graph_def(self.compress["model_file"])
         self.descrpt.enable_compression(
             self.compress["min_nbor_dist"],
@@ -494,11 +506,15 @@ def enable_compression(self):
             self.compress["table_config"][1],
             self.compress["table_config"][2],
             self.compress["table_config"][3],
+            suffix=suffix,
         )
         # for fparam or aparam settings in 'ener' type fitting net
-        self.fitting.init_variables(graph, graph_def)
-        if self.typeebd is not None:
-            self.typeebd.init_variables(graph, graph_def)
+        self.fitting.init_variables(graph, graph_def, suffix=suffix)
+        if (
+            self.typeebd is not None
+            and self.typeebd.type_embedding_net_variables is None
+        ):
+            self.typeebd.init_variables(graph, graph_def, suffix=suffix)
 
     def get_fitting(self) -> Union[Fitting, dict]:
         """Get the fitting(s)."""
diff --git a/deepmd/model/pairwise_dprc.py b/deepmd/model/pairwise_dprc.py
index 24016dfd27..2245df1fe8 100644
--- a/deepmd/model/pairwise_dprc.py
+++ b/deepmd/model/pairwise_dprc.py
@@ -18,6 +18,9 @@
 from deepmd.model.model import (
     Model,
 )
+from deepmd.utils.graph import (
+    load_graph_def,
+)
 from deepmd.utils.spin import (
     Spin,
 )
@@ -77,10 +80,16 @@ def __init__(
             )
 
         self.qm_model = EnerModel(
-            **qm_model, type_map=type_map, type_embedding=self.typeebd
+            **qm_model,
+            type_map=type_map,
+            type_embedding=self.typeebd,
+            compress=compress,
         )
         self.qmmm_model = EnerModel(
-            **qmmm_model, type_map=type_map, type_embedding=self.typeebd
+            **qmmm_model,
+            type_map=type_map,
+            type_embedding=self.typeebd,
+            compress=compress,
         )
         add_data_requirement("aparam", 1, atomic=True, must=True, high_prec=False)
         self.ntypes = len(type_map)
@@ -293,6 +302,19 @@ def init_variables(
             graph, graph_def, model_type=model_type, suffix="_qmmm" + suffix
         )
 
+    def enable_compression(self, suffix: str = "") -> None:
+        """Enable compression.
+
+        Parameters
+        ----------
+        suffix : str
+            suffix to name scope
+        """
+        graph, graph_def = load_graph_def(self.compress["model_file"])
+        self.typeebd.init_variables(graph, graph_def)
+        self.qm_model.enable_compression(suffix="_qm" + suffix)
+        self.qmmm_model.enable_compression(suffix="_qmmm" + suffix)
+
 
 def gather_placeholder(
     params: tf.Tensor, indices: tf.Tensor, placeholder: float = 0.0, **kwargs

From 105086c774541dd39abe3bb2f4f887ab5cf8c5c0 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 25 Jun 2023 18:05:32 -0400
Subject: [PATCH 20/48] fix se_atten compression when suffix is given

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/descriptor/se_atten.py | 6 ++----
 deepmd/utils/tabulate.py      | 4 ++--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/deepmd/descriptor/se_atten.py b/deepmd/descriptor/se_atten.py
index 8db4502fa8..2a9b70fdfb 100644
--- a/deepmd/descriptor/se_atten.py
+++ b/deepmd/descriptor/se_atten.py
@@ -429,11 +429,9 @@ def _get_two_side_embedding_net_variable(self, graph_def, varialbe_name, suffix)
         for i in range(1, self.layer_size + 1):
             target = get_pattern_nodes_from_graph_def(
                 graph_def,
-                f"filter_type_all{suffix}/{varialbe_name}_{i}{suffix}_two_side_ebd",
+                f"filter_type_all{suffix}/{varialbe_name}_{i}_two_side_ebd",
             )
-            node = target[
-                f"filter_type_all{suffix}/{varialbe_name}_{i}{suffix}_two_side_ebd"
-            ]
+            node = target[f"filter_type_all{suffix}/{varialbe_name}_{i}_two_side_ebd"]
             ret["layer_" + str(i)] = node
         return ret
 
diff --git a/deepmd/utils/tabulate.py b/deepmd/utils/tabulate.py
index 6895fcbf13..d30d8bcf38 100644
--- a/deepmd/utils/tabulate.py
+++ b/deepmd/utils/tabulate.py
@@ -420,7 +420,7 @@ def _get_bias(self):
             bias["layer_" + str(layer)] = []
             if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten):
                 node = self.embedding_net_nodes[
-                    f"filter_type_all{self.suffix}/bias_{layer}{self.suffix}"
+                    f"filter_type_all{self.suffix}/bias_{layer}"
                 ]
                 bias["layer_" + str(layer)].append(tf.make_ndarray(node))
             elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeA):
@@ -484,7 +484,7 @@ def _get_matrix(self):
             matrix["layer_" + str(layer)] = []
             if isinstance(self.descrpt, deepmd.descriptor.DescrptSeAtten):
                 node = self.embedding_net_nodes[
-                    f"filter_type_all{self.suffix}/matrix_{layer}{self.suffix}"
+                    f"filter_type_all{self.suffix}/matrix_{layer}"
                 ]
                 matrix["layer_" + str(layer)].append(tf.make_ndarray(node))
             elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeA):

From dba4fde55b2adc782917817daccad5c5f77365a3 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 26 Jun 2023 15:20:24 -0400
Subject: [PATCH 21/48] change the mesh

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/model/pairwise_dprc.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/deepmd/model/pairwise_dprc.py b/deepmd/model/pairwise_dprc.py
index 2245df1fe8..47b73b680e 100644
--- a/deepmd/model/pairwise_dprc.py
+++ b/deepmd/model/pairwise_dprc.py
@@ -6,6 +6,7 @@
 
 from deepmd.common import (
     add_data_requirement,
+    make_default_mesh,
 )
 from deepmd.env import (
     MODEL_VERSION,
@@ -160,14 +161,14 @@ def build(
         input_dict_qm["type_embedding"] = type_embedding
         input_dict_qmmm["type_embedding"] = type_embedding
 
-        # TODO: after #2481 is merged, change the mesh to mixed_type specific
+        mesh_mixed_type = make_default_mesh(False, True)
 
         qm_dict = self.qm_model.build(
             coord_qm,
             atype_qm,
             natoms_qm,
             box_qm,
-            mesh,
+            mesh_mixed_type,
             input_dict_qm,
             frz_model=frz_model,
             ckpt_meta=ckpt_meta,
@@ -179,7 +180,7 @@ def build(
             atype_qmmm,
             natoms_qmmm,
             box_qmmm,
-            mesh,
+            mesh_mixed_type,
             input_dict_qmmm,
             frz_model=frz_model,
             ckpt_meta=ckpt_meta,

From 3973b5b891bf94bbc21244d71d6c49c64d6919fb Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 3 Jul 2023 16:24:21 -0400
Subject: [PATCH 22/48] add docs; improve example

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 README.md                         |   1 +
 doc/model/index.md                |   1 +
 doc/model/index.rst               |   1 +
 doc/model/pairwise-dprc.md        | 168 ++++++++++++++++++++++++++++++
 examples/dprc/pairwise/input.json |   8 ++
 5 files changed, 179 insertions(+)
 create mode 100644 doc/model/pairwise-dprc.md

diff --git a/README.md b/README.md
index 4cb4fbb24a..f94a3c9c99 100644
--- a/README.md
+++ b/README.md
@@ -109,6 +109,7 @@ A full [document](doc/train/train-input-auto.rst) on options in the training inp
     - [Train a Deep Potential model using `type embedding` approach](doc/model/train-se-e2-a-tebd.md)
     - [Deep potential long-range](doc/model/dplr.md)
     - [Deep Potential - Range Correction (DPRc)](doc/model/dprc.md)
+    - [Pairwise DPRc](pairwise-dprc.md)
 - [Training](doc/train/index.md)
     - [Training a model](doc/train/training.md)
     - [Advanced options](doc/train/training-advanced.md)
diff --git a/doc/model/index.md b/doc/model/index.md
index c9c25c9812..bf0ee21822 100644
--- a/doc/model/index.md
+++ b/doc/model/index.md
@@ -15,3 +15,4 @@
 - [Train a Deep Potential model using `type embedding` approach](train-se-e2-a-tebd.md)
 - [Deep potential long-range](dplr.md)
 - [Deep Potential - Range Correction (DPRc)](dprc.md)
+- [Pairwise DPRc](pairwise-dprc.md)
diff --git a/doc/model/index.rst b/doc/model/index.rst
index 6a01a3b015..6a8c3f2d49 100644
--- a/doc/model/index.rst
+++ b/doc/model/index.rst
@@ -19,3 +19,4 @@ Model
    train-se-a-mask
    dplr
    dprc
+   pairwise-dprc
diff --git a/doc/model/pairwise-dprc.md b/doc/model/pairwise-dprc.md
new file mode 100644
index 0000000000..6b8c055bee
--- /dev/null
+++ b/doc/model/pairwise-dprc.md
@@ -0,0 +1,168 @@
+# Pairwise DPRc
+
+In a pairwise DPRc model, the total energy is divided into QM internal energy and the sum of QM/MM energy for each MM residue:
+
+$$ E = E_\text{QM} + \sum_{l} E_\text{QM/MM,l} $$
+
+Thus, the pairwise DPRc model is divided into two sub-[DPRc models](./dprc.md).
+`qm_model` is for the QM internal interaction and `qmmm_model` is for the QM/MM interaction.
+The configuration for these two models is similar to [the non-pairwise DPRc model](./dprc.md).
+It is noted that the [`se_atten` descriptor](./train-se-atten.md) should be used, as it is the only descriptor to support the mixed type.
+
+```json
+{
+  "model": {
+    "type": "pairwise_dprc",
+    "type_map": [
+      "C",
+      "P",
+      "O",
+      "H",
+      "OW",
+      "HW"
+    ],
+    "type_embedding": {
+      "neuron": [
+        8
+      ],
+      "precision": "float32"
+    },
+    "qm_model": {
+      "descriptor": {
+        "type": "se_atten",
+        "stripped_type_embedding": true,
+        "sel": 24,
+        "rcut_smth": 0.50,
+        "rcut": 9.00,
+        "attn_layer": 0,
+        "neuron": [
+          25,
+          50,
+          100
+        ],
+        "resnet_dt": false,
+        "axis_neuron": 12,
+        "precision": "float32",
+        "seed": 1
+      },
+      "fitting_net": {
+        "type": "ener",
+        "neuron": [
+          240,
+          240,
+          240
+        ],
+        "resnet_dt": true,
+        "precision": "float32",
+        "atom_ener": [
+          null,
+          null,
+          null,
+          null,
+          0.0,
+          0.0
+        ],
+        "seed": 1
+      }
+    },
+    "qmmm_model": {
+      "descriptor": {
+        "type": "se_atten",
+        "stripped_type_embedding": true,
+        "sel": 27,
+        "rcut_smth": 0.50,
+        "rcut": 6.00,
+        "attn_layer": 0,
+        "neuron": [
+          25,
+          50,
+          100
+        ],
+        "resnet_dt": false,
+        "axis_neuron": 12,
+        "set_davg_zero": true,
+        "exclude_types": [
+          [
+            0,
+            0
+          ],
+          [
+            0,
+            1
+          ],
+          [
+            0,
+            2
+          ],
+          [
+            0,
+            3
+          ],
+          [
+            1,
+            1
+          ],
+          [
+            1,
+            2
+          ],
+          [
+            1,
+            3
+          ],
+          [
+            2,
+            2
+          ],
+          [
+            2,
+            3
+          ],
+          [
+            3,
+            3
+          ],
+          [
+            4,
+            4
+          ],
+          [
+            4,
+            5
+          ],
+          [
+            5,
+            5
+          ]
+        ],
+        "precision": "float32",
+        "seed": 1
+      },
+      "fitting_net": {
+        "type": "ener",
+        "neuron": [
+          240,
+          240,
+          240
+        ],
+        "resnet_dt": true,
+        "seed": 1,
+        "precision": "float32",
+        "atom_ener": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    }
+  }
+}
+```
+
+The pairwise model needs information for MM residues.
+The model uses [`aparam`](../data/system.md) with the shape of `nframes x natoms` to get the residue index.
+The QM residue should always use `0` as the index.
+For example, `0 0 0 1 1 1 2 2 2` means these 9 atoms are grouped into one QM residue and two MM residues.
diff --git a/examples/dprc/pairwise/input.json b/examples/dprc/pairwise/input.json
index 25be3fdbca..675418f7d5 100644
--- a/examples/dprc/pairwise/input.json
+++ b/examples/dprc/pairwise/input.json
@@ -43,6 +43,14 @@
         ],
         "resnet_dt": true,
         "precision": "float32",
+        "atom_ener": [
+          null,
+          null,
+          null,
+          null,
+          0.0,
+          0.0
+        ],
         "seed": 1
       }
     },

From 294fb64a865d843e4c3a214840be3ae4e08ac00a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 3 Jul 2023 20:24:50 +0000
Subject: [PATCH 23/48] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 deepmd/model/pairwise_dprc.py      | 1 +
 source/lib/src/pairwise.cc         | 1 +
 source/lib/tests/test_pairwise.cc  | 1 +
 source/op/pairwise.cc              | 1 +
 source/tests/test_pairwise_dprc.py | 1 +
 5 files changed, 5 insertions(+)

diff --git a/deepmd/model/pairwise_dprc.py b/deepmd/model/pairwise_dprc.py
index 47b73b680e..254b8817e5 100644
--- a/deepmd/model/pairwise_dprc.py
+++ b/deepmd/model/pairwise_dprc.py
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     List,
     Optional,
diff --git a/source/lib/src/pairwise.cc b/source/lib/src/pairwise.cc
index 297279fa91..428e92baa4 100644
--- a/source/lib/src/pairwise.cc
+++ b/source/lib/src/pairwise.cc
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
 #include "pairwise.h"
 
 #include <algorithm>
diff --git a/source/lib/tests/test_pairwise.cc b/source/lib/tests/test_pairwise.cc
index 4c298863bf..5f5d39d4fa 100644
--- a/source/lib/tests/test_pairwise.cc
+++ b/source/lib/tests/test_pairwise.cc
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
diff --git a/source/op/pairwise.cc b/source/op/pairwise.cc
index cf88ac13a8..dfcfce6736 100644
--- a/source/op/pairwise.cc
+++ b/source/op/pairwise.cc
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
 #include "pairwise.h"
 
 #include "custom_op.h"
diff --git a/source/tests/test_pairwise_dprc.py b/source/tests/test_pairwise_dprc.py
index ea4ec98ae7..1432f0f4c2 100644
--- a/source/tests/test_pairwise_dprc.py
+++ b/source/tests/test_pairwise_dprc.py
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
 """Test pairwise DPRc features."""
 import unittest
 

From 56c10d7290793a994b6c9474edff4765242fb419 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 5 Jul 2023 17:40:27 -0400
Subject: [PATCH 24/48] docs: update equations

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 doc/model/pairwise-dprc.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/model/pairwise-dprc.md b/doc/model/pairwise-dprc.md
index 6b8c055bee..3ba2ec91fc 100644
--- a/doc/model/pairwise-dprc.md
+++ b/doc/model/pairwise-dprc.md
@@ -1,8 +1,8 @@
 # Pairwise DPRc
 
-In a pairwise DPRc model, the total energy is divided into QM internal energy and the sum of QM/MM energy for each MM residue:
+In a pairwise DPRc model, the total energy is divided into QM internal energy and the sum of QM/MM energy for each MM residue $l$:
 
-$$ E = E_\text{QM} + \sum_{l} E_\text{QM/MM,l} $$
+$$ E = E_\text{QM} + \sum_{l} E_{\text{QM/MM},l} $$
 
 Thus, the pairwise DPRc model is divided into two sub-[DPRc models](./dprc.md).
 `qm_model` is for the QM internal interaction and `qmmm_model` is for the QM/MM interaction.

From fdb4c5781b0d0b6236b8f9c447cdd873a8add8fe Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 5 Jul 2023 21:41:45 +0000
Subject: [PATCH 25/48] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 source/lib/include/pairwise.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/source/lib/include/pairwise.h b/source/lib/include/pairwise.h
index 027d80cf7e..bbb4119e59 100644
--- a/source/lib/include/pairwise.h
+++ b/source/lib/include/pairwise.h
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
 #include <vector>
 
 namespace deepmd {

From 5e2a5a4ca8299edb406e315fb00ded931fc10f98 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 7 Jul 2023 03:09:42 -0400
Subject: [PATCH 26/48] add compress information

---
 deepmd/entrypoints/train.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/deepmd/entrypoints/train.py b/deepmd/entrypoints/train.py
index ee911f314d..d4fbdefef2 100755
--- a/deepmd/entrypoints/train.py
+++ b/deepmd/entrypoints/train.py
@@ -500,11 +500,14 @@ def update_one_sel(jdata, descriptor):
 
 
 def update_sel(jdata):
-    if "descriptor" not in jdata["model"]:
-        return jdata
     log.info(
         "Calculate neighbor statistics... (add --skip-neighbor-stat to skip this step)"
     )
+    if jdata["model"].get("type") == "pairwise_dprc":
+        # do not update sel; only find min distance
+        rcut = get_rcut(jdata)
+        get_min_nbor_dist(jdata, rcut)
+        return jdata
     descrpt_data = jdata["model"]["descriptor"]
     if descrpt_data["type"] == "hybrid":
         for ii in range(len(descrpt_data["list"])):

From 3b2b5918a678ca5d79eb87e2b30a39b46e3b03a2 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 7 Jul 2023 04:23:24 -0400
Subject: [PATCH 27/48] add t_mesh to nodes

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/entrypoints/freeze.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/deepmd/entrypoints/freeze.py b/deepmd/entrypoints/freeze.py
index 9f6547998f..678831dc5c 100755
--- a/deepmd/entrypoints/freeze.py
+++ b/deepmd/entrypoints/freeze.py
@@ -210,6 +210,7 @@ def _make_node_names(
         "model_attr/model_version",
         "train_attr/min_nbor_dist",
         "train_attr/training_script",
+        "t_mesh",
     ]
 
     if model_type == "ener":

From d39ea83f45b658ed60f77778ad3c6082f71a5f40 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 7 Jul 2023 14:05:03 -0400
Subject: [PATCH 28/48] fix the dtype of rcut

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/model/pairwise_dprc.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/deepmd/model/pairwise_dprc.py b/deepmd/model/pairwise_dprc.py
index 254b8817e5..e8ae4e99a3 100644
--- a/deepmd/model/pairwise_dprc.py
+++ b/deepmd/model/pairwise_dprc.py
@@ -10,6 +10,7 @@
     make_default_mesh,
 )
 from deepmd.env import (
+    GLOBAL_TF_FLOAT_PRECISION,
     MODEL_VERSION,
     op_module,
     tf,
@@ -122,7 +123,9 @@ def build(
             t_daparam = tf.constant(1, name="daparam", dtype=tf.int32)
         with tf.variable_scope("descrpt_attr" + suffix, reuse=reuse):
             t_ntypes = tf.constant(self.ntypes, name="ntypes", dtype=tf.int32)
-            t_rcut = tf.constant(self.rcut, name="rcut", dtype=tf.float32)
+            t_rcut = tf.constant(
+                self.rcut, name="rcut", dtype=GLOBAL_TF_FLOAT_PRECISION
+            )
         # convert X-frame to X-Y-frame coordinates
         box = tf.reshape(box_, [-1, 9])
         nframes = tf.shape(box)[0]

From 6d83ef709572f0a501657e53422653974d37437f Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 7 Jul 2023 15:44:30 -0400
Subject: [PATCH 29/48] fix a typo in reshape

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/model/pairwise_dprc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepmd/model/pairwise_dprc.py b/deepmd/model/pairwise_dprc.py
index e8ae4e99a3..1e2a90f5be 100644
--- a/deepmd/model/pairwise_dprc.py
+++ b/deepmd/model/pairwise_dprc.py
@@ -237,7 +237,7 @@ def build(
         )
         atom_virial_qmmm = tf.math.segment_sum(
             gather_placeholder(
-                tf.reshape(qmmm_dict["atom_virial"], (nframes, natoms_qm[1], 9)),
+                tf.reshape(qmmm_dict["atom_virial"], (nframes_qmmm, natoms_qm[1], 9)),
                 backward_qmmm_map,
                 placeholder=0.0,
             ),

From 4a6b891fbb7c5ebf3d0639ff86308aa16cec248a Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 7 Jul 2023 16:24:32 -0400
Subject: [PATCH 30/48] another bug fixed

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/model/pairwise_dprc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepmd/model/pairwise_dprc.py b/deepmd/model/pairwise_dprc.py
index 1e2a90f5be..b51c022554 100644
--- a/deepmd/model/pairwise_dprc.py
+++ b/deepmd/model/pairwise_dprc.py
@@ -237,7 +237,7 @@ def build(
         )
         atom_virial_qmmm = tf.math.segment_sum(
             gather_placeholder(
-                tf.reshape(qmmm_dict["atom_virial"], (nframes_qmmm, natoms_qm[1], 9)),
+                tf.reshape(qmmm_dict["atom_virial"], (nframes_qmmm, natoms_qmmm[1], 9)),
                 backward_qmmm_map,
                 placeholder=0.0,
             ),

From f9663566b7bb2021aaf76dead30549aa84e4f076 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 9 Jul 2023 19:06:50 -0400
Subject: [PATCH 31/48] support fparam/aparam in dp model-devi

---
 deepmd/infer/model_devi.py                    |  28 ++++++-
 deepmd/utils/data.py                          |   2 +-
 .../system_fparam_aparam/set.000/aparam.npy   | Bin 0 -> 176 bytes
 .../system_fparam_aparam/set.000/box.npy      | Bin 0 -> 200 bytes
 .../system_fparam_aparam/set.000/coord.npy    | Bin 0 -> 272 bytes
 .../system_fparam_aparam/set.000/energy.npy   | Bin 0 -> 136 bytes
 .../system_fparam_aparam/set.000/force.npy    | Bin 0 -> 272 bytes
 .../system_fparam_aparam/set.000/fparam.npy   | Bin 0 -> 136 bytes
 source/tests/system_fparam_aparam/type.raw    |   6 ++
 .../tests/system_fparam_aparam/type_map.raw   |   1 +
 source/tests/test_model_devi.py               |  69 ++++++++++++++++++
 11 files changed, 104 insertions(+), 2 deletions(-)
 create mode 100644 source/tests/system_fparam_aparam/set.000/aparam.npy
 create mode 100644 source/tests/system_fparam_aparam/set.000/box.npy
 create mode 100644 source/tests/system_fparam_aparam/set.000/coord.npy
 create mode 100644 source/tests/system_fparam_aparam/set.000/energy.npy
 create mode 100644 source/tests/system_fparam_aparam/set.000/force.npy
 create mode 100644 source/tests/system_fparam_aparam/set.000/fparam.npy
 create mode 100644 source/tests/system_fparam_aparam/type.raw
 create mode 100644 source/tests/system_fparam_aparam/type_map.raw

diff --git a/deepmd/infer/model_devi.py b/deepmd/infer/model_devi.py
index bc80ac78f6..2f5fc02e05 100644
--- a/deepmd/infer/model_devi.py
+++ b/deepmd/infer/model_devi.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
     Tuple,
+    Optional,
 )
 
 import numpy as np
@@ -145,6 +146,8 @@ def calc_model_devi(
     fname=None,
     frequency=1,
     mixed_type=False,
+    fparam : Optional[np.ndarray] = None,
+    aparam : Optional[np.ndarray] = None,
 ):
     """Python interface to calculate model deviation.
 
@@ -164,6 +167,10 @@ def calc_model_devi(
         Steps between frames (if the system is given by molecular dynamics engine), default 1
     mixed_type : bool
         Whether the input atype is in mixed_type format or not
+    fparam : numpy.ndarray
+        frame specific parameters
+    aparam : numpy.ndarray
+        atomic specific parameters
 
     Returns
     -------
@@ -191,6 +198,8 @@ def calc_model_devi(
             coord,
             box,
             atype,
+            fparam=fparam,
+            aparam=aparam,
             mixed_type=mixed_type,
         )
         energies.append(ret[0] / natom)
@@ -248,9 +257,18 @@ def make_model_devi(
     if len(all_sys) == 0:
         raise RuntimeError("Did not find valid system")
     devis_coll = []
+
+    first_dp = dp_models[0]
+
     for system in all_sys:
         # create data-system
         dp_data = DeepmdData(system, set_prefix, shuffle_test=False, type_map=tmap)
+        if first_dp.get_dim_fparam() > 0:
+            dp_data.add(
+                    "fparam", first_dp.get_dim_fparam(), atomic=False, must=True, high_prec=False
+                    )
+        if first_dp.get_dim_aparam() > 0:
+            dp_data.add("aparam", first_dp.get_dim_aparam(), atomic=True, must=True, high_prec=False)
         mixed_type = dp_data.mixed_type
 
         data_sets = [dp_data._load_set(set_name) for set_name in dp_data.dirs]
@@ -265,7 +283,15 @@ def make_model_devi(
                 atype = data["type"][0]
             if not dp_data.pbc:
                 box = None
-            devi = calc_model_devi(coord, box, atype, dp_models, mixed_type=mixed_type)
+            if first_dp.get_dim_fparam() > 0:
+                fparam = data["fparam"]
+            else:
+                fparam = None
+            if first_dp.get_dim_aparam() > 0:
+                aparam = data["aparam"]
+            else:
+                aparam = None
+            devi = calc_model_devi(coord, box, atype, dp_models, mixed_type=mixed_type, fparam=fparam, aparam=aparam)
             nframes_tot += coord.shape[0]
             devis.append(devi)
         devis = np.vstack(devis)
diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py
index 24042444c8..8442f84156 100644
--- a/deepmd/utils/data.py
+++ b/deepmd/utils/data.py
@@ -82,7 +82,7 @@ def __init__(
         self.pbc = self._check_pbc(root)
         # enforce type_map if necessary
         self.enforce_type_map = False
-        if type_map is not None and self.type_map is not None:
+        if type_map is not None and self.type_map is not None and len(type_map):
             if not self.mixed_type:
                 atom_type_ = [
                     type_map.index(self.type_map[ii]) for ii in self.atom_type
diff --git a/source/tests/system_fparam_aparam/set.000/aparam.npy b/source/tests/system_fparam_aparam/set.000/aparam.npy
new file mode 100644
index 0000000000000000000000000000000000000000..da59918dad40fcc05a13c7a781855e2780b5b9da
GIT binary patch
literal 176
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
fXCxM+0{I$-Itpf*ItsN4WCN~QGeKadJ+U+ZA|5u(

literal 0
HcmV?d00001

diff --git a/source/tests/system_fparam_aparam/set.000/box.npy b/source/tests/system_fparam_aparam/set.000/box.npy
new file mode 100644
index 0000000000000000000000000000000000000000..f70e95b6e772550e087d1aba097f26b3b380704e
GIT binary patch
literal 200
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
hXCxM+0{I$-ItrGWItsN4WCJb+Ffeg|(6~s#@&G_89MJ#(

literal 0
HcmV?d00001

diff --git a/source/tests/system_fparam_aparam/set.000/coord.npy b/source/tests/system_fparam_aparam/set.000/coord.npy
new file mode 100644
index 0000000000000000000000000000000000000000..27a06e944251c351ad110c8c69cc764c2fcd84db
GIT binary patch
literal 272
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
zXCxM+0{I$-ItqpsnmP)#3giN=`6m;X{f%5|zg$iK+xCJ@_C<$GYa?gvv_GBq<e-kZ
zfJ43uV^Vq9Qv0^1s=no28||$QoGH&2{bYYU&(6|@uh0JcT0y=8-*($?-!egNv(RLF
z$tSyARMbD&|2ZvD{r15@d%p)3|3a%p9Be9-reCSrX3r45(`-h5n*IGv-p`l22so_G
T5Z7yW`DDMwG5XK#S!?V8ZGvDU

literal 0
HcmV?d00001

diff --git a/source/tests/system_fparam_aparam/set.000/energy.npy b/source/tests/system_fparam_aparam/set.000/energy.npy
new file mode 100644
index 0000000000000000000000000000000000000000..f07f1cd320b3ba52d12c989761cb98d639541123
GIT binary patch
literal 136
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
YXCxM+0{I$-I+{8PwF(pfE(Rz7050VlTmS$7

literal 0
HcmV?d00001

diff --git a/source/tests/system_fparam_aparam/set.000/force.npy b/source/tests/system_fparam_aparam/set.000/force.npy
new file mode 100644
index 0000000000000000000000000000000000000000..a63c02b4673b242ffb1c3484b56a598ede54b0fb
GIT binary patch
literal 272
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
bXCxM+0{I$-ItqpsnmP)#3giMVhCv1Z*m)a;

literal 0
HcmV?d00001

diff --git a/source/tests/system_fparam_aparam/set.000/fparam.npy b/source/tests/system_fparam_aparam/set.000/fparam.npy
new file mode 100644
index 0000000000000000000000000000000000000000..74e33d7416b202bd42ccdbea3a1591bfc93a4ae9
GIT binary patch
literal 136
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
bXCxM+0{I$-I+{8PwF(pfu30ldV5dC*L{}f{

literal 0
HcmV?d00001

diff --git a/source/tests/system_fparam_aparam/type.raw b/source/tests/system_fparam_aparam/type.raw
new file mode 100644
index 0000000000..f7eb44d66e
--- /dev/null
+++ b/source/tests/system_fparam_aparam/type.raw
@@ -0,0 +1,6 @@
+0
+0
+0
+0
+0
+0
diff --git a/source/tests/system_fparam_aparam/type_map.raw b/source/tests/system_fparam_aparam/type_map.raw
new file mode 100644
index 0000000000..257cc5642c
--- /dev/null
+++ b/source/tests/system_fparam_aparam/type_map.raw
@@ -0,0 +1 @@
+foo
diff --git a/source/tests/test_model_devi.py b/source/tests/test_model_devi.py
index 3ce594bb8c..e25449b1f8 100644
--- a/source/tests/test_model_devi.py
+++ b/source/tests/test_model_devi.py
@@ -12,6 +12,7 @@
 from deepmd.infer.model_devi import (
     make_model_devi,
 )
+from deepmd.common import data_requirement
 
 sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
 from common import (
@@ -91,3 +92,71 @@ def tearDown(self):
             os.remove(pb)
         os.remove(self.output)
         del_data()
+
+
+class TestMakeModelDeviFparamAparam(unittest.TestCase):
+    """Ensure dp model_devi accepts fparam and aparam."""
+    @classmethod
+    def setUpClass(cls):
+        cls.pbtxts = [
+            os.path.join(tests_path, "infer/fparam_aparam.pbtxt"),
+        ]
+        cls.graph_dirs = [pbtxt.replace("pbtxt", "pb") for pbtxt in cls.pbtxts]
+        for pbtxt, pb in zip(cls.pbtxts, cls.graph_dirs):
+            convert_pbtxt_to_pb(pbtxt, pb)
+        cls.graphs = [DeepPotential(pb) for pb in cls.graph_dirs]
+
+    @classmethod
+    def tearDownClass(cls):
+        for pb in cls.graph_dirs:
+            os.remove(pb)
+        cls.graphs = None
+
+
+    def setUp(self):
+        gen_data()
+        self.data_dir = "system_fparam_aparam"
+        coord = np.load(os.path.join(self.data_dir, "set.000/coord.npy"))
+        box = np.load(os.path.join(self.data_dir, "set.000/box.npy"))
+        self.atype = np.loadtxt(os.path.join(self.data_dir, "type.raw"))
+        self.coord = np.vstack([coord, coord])
+        self.box = np.vstack([box, box])
+        self.freq = 10
+
+        self.output = os.path.join(tests_path, "model_devi.out")
+        self.expect = np.zeros(8)
+        self.fparam = np.repeat([0.25852028], self.box.size / 9)
+        self.aparam = np.repeat(self.fparam, self.atype.size)
+
+
+    def test_calc_model_devi(self):
+        model_devi = calc_model_devi(
+            self.coord,
+            None,
+            self.atype,
+            self.graphs,
+            frequency=self.freq,
+            fname=self.output,
+            fparam=self.fparam,
+            aparam=self.aparam,
+        )
+        self.assertAlmostEqual(model_devi[0][0], 0)
+        self.assertAlmostEqual(model_devi[1][0], self.freq)
+        np.testing.assert_almost_equal(model_devi[0][1:8], self.expect[1:8], 6)
+        np.testing.assert_almost_equal(model_devi[0][1:8], model_devi[1][1:8], 6)
+        self.assertTrue(os.path.isfile(self.output))
+
+    def test_make_model_devi(self):
+        make_model_devi(
+            models=self.graph_dirs,
+            system=self.data_dir,
+            set_prefix="set",
+            output=self.output,
+            frequency=self.freq,
+        )
+        x = np.loadtxt(self.output)
+        np.testing.assert_allclose(x, self.expect, 6)
+
+    def tearDown(self):
+        os.remove(self.output)
+        del_data()

From 188ce958e77b4ec654d076e0975d6b12c6ae8c46 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 9 Jul 2023 23:07:37 +0000
Subject: [PATCH 32/48] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 deepmd/infer/model_devi.py      | 32 +++++++++++++++++++++++++-------
 source/tests/test_model_devi.py |  4 +---
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/deepmd/infer/model_devi.py b/deepmd/infer/model_devi.py
index 2f5fc02e05..0274384188 100644
--- a/deepmd/infer/model_devi.py
+++ b/deepmd/infer/model_devi.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
-    Tuple,
     Optional,
+    Tuple,
 )
 
 import numpy as np
@@ -146,8 +146,8 @@ def calc_model_devi(
     fname=None,
     frequency=1,
     mixed_type=False,
-    fparam : Optional[np.ndarray] = None,
-    aparam : Optional[np.ndarray] = None,
+    fparam: Optional[np.ndarray] = None,
+    aparam: Optional[np.ndarray] = None,
 ):
     """Python interface to calculate model deviation.
 
@@ -265,10 +265,20 @@ def make_model_devi(
         dp_data = DeepmdData(system, set_prefix, shuffle_test=False, type_map=tmap)
         if first_dp.get_dim_fparam() > 0:
             dp_data.add(
-                    "fparam", first_dp.get_dim_fparam(), atomic=False, must=True, high_prec=False
-                    )
+                "fparam",
+                first_dp.get_dim_fparam(),
+                atomic=False,
+                must=True,
+                high_prec=False,
+            )
         if first_dp.get_dim_aparam() > 0:
-            dp_data.add("aparam", first_dp.get_dim_aparam(), atomic=True, must=True, high_prec=False)
+            dp_data.add(
+                "aparam",
+                first_dp.get_dim_aparam(),
+                atomic=True,
+                must=True,
+                high_prec=False,
+            )
         mixed_type = dp_data.mixed_type
 
         data_sets = [dp_data._load_set(set_name) for set_name in dp_data.dirs]
@@ -291,7 +301,15 @@ def make_model_devi(
                 aparam = data["aparam"]
             else:
                 aparam = None
-            devi = calc_model_devi(coord, box, atype, dp_models, mixed_type=mixed_type, fparam=fparam, aparam=aparam)
+            devi = calc_model_devi(
+                coord,
+                box,
+                atype,
+                dp_models,
+                mixed_type=mixed_type,
+                fparam=fparam,
+                aparam=aparam,
+            )
             nframes_tot += coord.shape[0]
             devis.append(devi)
         devis = np.vstack(devis)
diff --git a/source/tests/test_model_devi.py b/source/tests/test_model_devi.py
index e25449b1f8..5249365187 100644
--- a/source/tests/test_model_devi.py
+++ b/source/tests/test_model_devi.py
@@ -12,7 +12,6 @@
 from deepmd.infer.model_devi import (
     make_model_devi,
 )
-from deepmd.common import data_requirement
 
 sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
 from common import (
@@ -96,6 +95,7 @@ def tearDown(self):
 
 class TestMakeModelDeviFparamAparam(unittest.TestCase):
     """Ensure dp model_devi accepts fparam and aparam."""
+
     @classmethod
     def setUpClass(cls):
         cls.pbtxts = [
@@ -112,7 +112,6 @@ def tearDownClass(cls):
             os.remove(pb)
         cls.graphs = None
 
-
     def setUp(self):
         gen_data()
         self.data_dir = "system_fparam_aparam"
@@ -128,7 +127,6 @@ def setUp(self):
         self.fparam = np.repeat([0.25852028], self.box.size / 9)
         self.aparam = np.repeat(self.fparam, self.atype.size)
 
-
     def test_calc_model_devi(self):
         model_devi = calc_model_devi(
             self.coord,

From 42603095d5e44e4efdbc7d3f310ecb3488660502 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 9 Jul 2023 19:56:50 -0400
Subject: [PATCH 33/48] fix tests

---
 source/tests/common.py                        |  32 +++++++++---------
 .../system_fparam_aparam/set.000/aparam.npy   | Bin 176 -> 0 bytes
 .../system_fparam_aparam/set.000/box.npy      | Bin 200 -> 0 bytes
 .../system_fparam_aparam/set.000/coord.npy    | Bin 272 -> 0 bytes
 .../system_fparam_aparam/set.000/energy.npy   | Bin 136 -> 0 bytes
 .../system_fparam_aparam/set.000/force.npy    | Bin 272 -> 0 bytes
 .../system_fparam_aparam/set.000/fparam.npy   | Bin 136 -> 0 bytes
 source/tests/system_fparam_aparam/type.raw    |   6 ----
 .../tests/system_fparam_aparam/type_map.raw   |   1 -
 source/tests/test_model_devi.py               |  13 ++++---
 10 files changed, 24 insertions(+), 28 deletions(-)
 delete mode 100644 source/tests/system_fparam_aparam/set.000/aparam.npy
 delete mode 100644 source/tests/system_fparam_aparam/set.000/box.npy
 delete mode 100644 source/tests/system_fparam_aparam/set.000/coord.npy
 delete mode 100644 source/tests/system_fparam_aparam/set.000/energy.npy
 delete mode 100644 source/tests/system_fparam_aparam/set.000/force.npy
 delete mode 100644 source/tests/system_fparam_aparam/set.000/fparam.npy
 delete mode 100644 source/tests/system_fparam_aparam/type.raw
 delete mode 100644 source/tests/system_fparam_aparam/type_map.raw

diff --git a/source/tests/common.py b/source/tests/common.py
index e1788bb942..6680c936d2 100644
--- a/source/tests/common.py
+++ b/source/tests/common.py
@@ -41,8 +41,8 @@ def del_data():
         shutil.rmtree("system_mixed_type")
 
 
-def gen_data_type_specific(nframes=1):
-    tmpdata = Data(rand_pert=0.1, seed=1, nframes=nframes)
+def gen_data_type_specific(nframes=1, dim_fparam=2):
+    tmpdata = Data(rand_pert=0.1, seed=1, nframes=nframes, dim_fparam=dim_fparam)
     sys = dpdata.LabeledSystem()
     sys.data["atom_names"] = ["foo", "bar"]
     sys.data["coords"] = tmpdata.coord
@@ -56,11 +56,11 @@ def gen_data_type_specific(nframes=1):
     sys.data["forces"] = np.zeros([nframes, natoms, 3])
     sys.to_deepmd_npy("system", prec=np.float64)
     np.save("system/set.000/fparam.npy", tmpdata.fparam)
-    np.save("system/set.000/aparam.npy", tmpdata.aparam.reshape([nframes, natoms, 2]))
+    np.save("system/set.000/aparam.npy", tmpdata.aparam.reshape([nframes, natoms, dim_fparam]))
 
 
-def gen_data_mixed_type(nframes=1):
-    tmpdata = Data(rand_pert=0.1, seed=1, nframes=nframes)
+def gen_data_mixed_type(nframes=1, dim_fparam=2):
+    tmpdata = Data(rand_pert=0.1, seed=1, nframes=nframes, dim_fparam=dim_fparam)
     sys = dpdata.LabeledSystem()
     real_type_map = ["foo", "bar"]
     sys.data["atom_names"] = ["X"]
@@ -82,12 +82,12 @@ def gen_data_mixed_type(nframes=1):
     np.save("system_mixed_type/set.000/fparam.npy", tmpdata.fparam)
     np.save(
         "system_mixed_type/set.000/aparam.npy",
-        tmpdata.aparam.reshape([nframes, natoms, 2]),
+        tmpdata.aparam.reshape([nframes, natoms, dim_fparam]),
     )
 
 
-def gen_data_virtual_type(nframes=1, nghost=4):
-    tmpdata = Data(rand_pert=0.1, seed=1, nframes=nframes)
+def gen_data_virtual_type(nframes=1, nghost=4, dim_fparam=2):
+    tmpdata = Data(rand_pert=0.1, seed=1, nframes=nframes, dim_fparam=dim_fparam)
     sys = dpdata.LabeledSystem()
     real_type_map = ["foo", "bar"]
     sys.data["atom_names"] = ["X"]
@@ -129,25 +129,25 @@ def gen_data_virtual_type(nframes=1, nghost=4):
         "system_mixed_type/set.000/aparam.npy",
         np.concatenate(
             [
-                tmpdata.aparam.reshape([nframes, natoms, 2]),
-                np.zeros([nframes, nghost, 2]),
+                tmpdata.aparam.reshape([nframes, natoms, dim_fparam]),
+                np.zeros([nframes, nghost, dim_fparam]),
             ],
             axis=1,
         ),
     )
 
 
-def gen_data(nframes=1, mixed_type=False, virtual_type=False):
+def gen_data(nframes=1, mixed_type=False, virtual_type=False, dim_fparam=2):
     if not mixed_type:
-        gen_data_type_specific(nframes)
+        gen_data_type_specific(nframes, dim_fparam=dim_fparam)
     elif virtual_type:
-        gen_data_virtual_type(nframes)
+        gen_data_virtual_type(nframes, dim_fparam=dim_fparam)
     else:
-        gen_data_mixed_type(nframes)
+        gen_data_mixed_type(nframes, dim_fparam=dim_fparam)
 
 
 class Data:
-    def __init__(self, rand_pert=0.1, seed=1, box_scale=20, nframes=1):
+    def __init__(self, rand_pert=0.1, seed=1, box_scale=20, nframes=1, dim_fparam=2):
         coord = [
             [0.0, 0.0, 0.1],
             [1.1, 0.0, 0.1],
@@ -161,7 +161,7 @@ def __init__(self, rand_pert=0.1, seed=1, box_scale=20, nframes=1):
         self.coord = self._copy_nframes(self.coord)
         dp_random.seed(seed)
         self.coord += rand_pert * dp_random.random(self.coord.shape)
-        self.fparam = np.array([[0.1, 0.2]])
+        self.fparam = ((np.arange(dim_fparam) + 1) * 0.1).reshape(1, dim_fparam)
         self.aparam = np.tile(self.fparam, [1, 6])
         self.fparam = self._copy_nframes(self.fparam)
         self.aparam = self._copy_nframes(self.aparam)
diff --git a/source/tests/system_fparam_aparam/set.000/aparam.npy b/source/tests/system_fparam_aparam/set.000/aparam.npy
deleted file mode 100644
index da59918dad40fcc05a13c7a781855e2780b5b9da..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 176
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
fXCxM+0{I$-Itpf*ItsN4WCN~QGeKadJ+U+ZA|5u(

diff --git a/source/tests/system_fparam_aparam/set.000/box.npy b/source/tests/system_fparam_aparam/set.000/box.npy
deleted file mode 100644
index f70e95b6e772550e087d1aba097f26b3b380704e..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 200
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
hXCxM+0{I$-ItrGWItsN4WCJb+Ffeg|(6~s#@&G_89MJ#(

diff --git a/source/tests/system_fparam_aparam/set.000/coord.npy b/source/tests/system_fparam_aparam/set.000/coord.npy
deleted file mode 100644
index 27a06e944251c351ad110c8c69cc764c2fcd84db..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 272
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
zXCxM+0{I$-ItqpsnmP)#3giN=`6m;X{f%5|zg$iK+xCJ@_C<$GYa?gvv_GBq<e-kZ
zfJ43uV^Vq9Qv0^1s=no28||$QoGH&2{bYYU&(6|@uh0JcT0y=8-*($?-!egNv(RLF
z$tSyARMbD&|2ZvD{r15@d%p)3|3a%p9Be9-reCSrX3r45(`-h5n*IGv-p`l22so_G
T5Z7yW`DDMwG5XK#S!?V8ZGvDU

diff --git a/source/tests/system_fparam_aparam/set.000/energy.npy b/source/tests/system_fparam_aparam/set.000/energy.npy
deleted file mode 100644
index f07f1cd320b3ba52d12c989761cb98d639541123..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 136
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
YXCxM+0{I$-I+{8PwF(pfE(Rz7050VlTmS$7

diff --git a/source/tests/system_fparam_aparam/set.000/force.npy b/source/tests/system_fparam_aparam/set.000/force.npy
deleted file mode 100644
index a63c02b4673b242ffb1c3484b56a598ede54b0fb..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 272
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
bXCxM+0{I$-ItqpsnmP)#3giMVhCv1Z*m)a;

diff --git a/source/tests/system_fparam_aparam/set.000/fparam.npy b/source/tests/system_fparam_aparam/set.000/fparam.npy
deleted file mode 100644
index 74e33d7416b202bd42ccdbea3a1591bfc93a4ae9..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 136
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
bXCxM+0{I$-I+{8PwF(pfu30ldV5dC*L{}f{

diff --git a/source/tests/system_fparam_aparam/type.raw b/source/tests/system_fparam_aparam/type.raw
deleted file mode 100644
index f7eb44d66e..0000000000
--- a/source/tests/system_fparam_aparam/type.raw
+++ /dev/null
@@ -1,6 +0,0 @@
-0
-0
-0
-0
-0
-0
diff --git a/source/tests/system_fparam_aparam/type_map.raw b/source/tests/system_fparam_aparam/type_map.raw
deleted file mode 100644
index 257cc5642c..0000000000
--- a/source/tests/system_fparam_aparam/type_map.raw
+++ /dev/null
@@ -1 +0,0 @@
-foo
diff --git a/source/tests/test_model_devi.py b/source/tests/test_model_devi.py
index 5249365187..3abd25f45a 100644
--- a/source/tests/test_model_devi.py
+++ b/source/tests/test_model_devi.py
@@ -113,19 +113,22 @@ def tearDownClass(cls):
         cls.graphs = None
 
     def setUp(self):
-        gen_data()
-        self.data_dir = "system_fparam_aparam"
+        gen_data(dim_fparam=1)
+        self.data_dir = "system"
         coord = np.load(os.path.join(self.data_dir, "set.000/coord.npy"))
         box = np.load(os.path.join(self.data_dir, "set.000/box.npy"))
-        self.atype = np.loadtxt(os.path.join(self.data_dir, "type.raw"))
+        atype_ = np.loadtxt(os.path.join(self.data_dir, "type.raw"))
+        self.atype = np.zeros_like(atype_)
+        np.savetxt(os.path.join(self.data_dir, "type.raw"), self.atype)
         self.coord = np.vstack([coord, coord])
         self.box = np.vstack([box, box])
         self.freq = 10
 
         self.output = os.path.join(tests_path, "model_devi.out")
         self.expect = np.zeros(8)
-        self.fparam = np.repeat([0.25852028], self.box.size / 9)
-        self.aparam = np.repeat(self.fparam, self.atype.size)
+        nframes = self.box.size // 9
+        self.fparam = np.repeat([0.25852028], nframes).reshape((nframes, 1))
+        self.aparam = np.repeat(self.fparam, self.atype.size).reshape((nframes, self.atype.size, 1))
 
     def test_calc_model_devi(self):
         model_devi = calc_model_devi(

From 459110106a606289f5a8ba975207c4ac78acbee7 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 9 Jul 2023 23:57:22 +0000
Subject: [PATCH 34/48] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 source/tests/common.py          | 5 ++++-
 source/tests/test_model_devi.py | 4 +++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/source/tests/common.py b/source/tests/common.py
index 6680c936d2..12723e52fc 100644
--- a/source/tests/common.py
+++ b/source/tests/common.py
@@ -56,7 +56,10 @@ def gen_data_type_specific(nframes=1, dim_fparam=2):
     sys.data["forces"] = np.zeros([nframes, natoms, 3])
     sys.to_deepmd_npy("system", prec=np.float64)
     np.save("system/set.000/fparam.npy", tmpdata.fparam)
-    np.save("system/set.000/aparam.npy", tmpdata.aparam.reshape([nframes, natoms, dim_fparam]))
+    np.save(
+        "system/set.000/aparam.npy",
+        tmpdata.aparam.reshape([nframes, natoms, dim_fparam]),
+    )
 
 
 def gen_data_mixed_type(nframes=1, dim_fparam=2):
diff --git a/source/tests/test_model_devi.py b/source/tests/test_model_devi.py
index 3abd25f45a..b1c5ec8ead 100644
--- a/source/tests/test_model_devi.py
+++ b/source/tests/test_model_devi.py
@@ -128,7 +128,9 @@ def setUp(self):
         self.expect = np.zeros(8)
         nframes = self.box.size // 9
         self.fparam = np.repeat([0.25852028], nframes).reshape((nframes, 1))
-        self.aparam = np.repeat(self.fparam, self.atype.size).reshape((nframes, self.atype.size, 1))
+        self.aparam = np.repeat(self.fparam, self.atype.size).reshape(
+            (nframes, self.atype.size, 1)
+        )
 
     def test_calc_model_devi(self):
         model_devi = calc_model_devi(

From 067addfa71237799f4792d590475ed77630957a5 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 10 Jul 2023 01:00:41 -0400
Subject: [PATCH 35/48] sort aparam in the C++ interface

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/api_cc/src/common.cc | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc
index 65fd3a7085..b54abd2267 100644
--- a/source/api_cc/src/common.cc
+++ b/source/api_cc/src/common.cc
@@ -372,7 +372,7 @@ int deepmd::session_input_tensors(
     const std::vector<VALUETYPE>& dbox,
     const double& cell_size,
     const std::vector<VALUETYPE>& fparam_,
-    const std::vector<VALUETYPE>& aparam_,
+    const std::vector<VALUETYPE>& aparam__,
     const deepmd::AtomMap& atommap,
     const std::string scope) {
   int nframes = dcoord_.size() / 3 / datype_.size();
@@ -410,7 +410,7 @@ int deepmd::session_input_tensors(
   fparam_shape.AddDim(fparam_.size() / nframes);
   TensorShape aparam_shape;
   aparam_shape.AddDim(nframes);
-  aparam_shape.AddDim(aparam_.size() / nframes);
+  aparam_shape.AddDim(aparam__.size() / nframes);
 
   tensorflow::DataType model_type;
   if (std::is_same<MODELTYPE, double>::value) {
@@ -439,6 +439,9 @@ int deepmd::session_input_tensors(
 
   std::vector<VALUETYPE> dcoord(dcoord_);
   atommap.forward<VALUETYPE>(dcoord.begin(), dcoord_.begin(), 3, nframes, nall);
+  std::vector<VALUETYPE> aparam_(aparam__);
+  atommap.forward<VALUETYPE>(aparam_.begin(), aparam_.begin(),
+                             aparam__.size() / nframes / nloc, nframes, nloc);
 
   for (int ii = 0; ii < nframes; ++ii) {
     for (int jj = 0; jj < nall * 3; ++jj) {
@@ -504,7 +507,7 @@ int deepmd::session_input_tensors(
     const std::vector<VALUETYPE>& dbox,
     InputNlist& dlist,
     const std::vector<VALUETYPE>& fparam_,
-    const std::vector<VALUETYPE>& aparam_,
+    const std::vector<VALUETYPE>& aparam__,
     const deepmd::AtomMap& atommap,
     const int nghost,
     const int ago,
@@ -540,7 +543,7 @@ int deepmd::session_input_tensors(
   fparam_shape.AddDim(fparam_.size() / nframes);
   TensorShape aparam_shape;
   aparam_shape.AddDim(nframes);
-  aparam_shape.AddDim(aparam_.size() / nframes);
+  aparam_shape.AddDim(aparam__.size() / nframes);
 
   tensorflow::DataType model_type;
   if (std::is_same<MODELTYPE, double>::value) {
@@ -569,6 +572,9 @@ int deepmd::session_input_tensors(
 
   std::vector<VALUETYPE> dcoord(dcoord_);
   atommap.forward<VALUETYPE>(dcoord.begin(), dcoord_.begin(), 3, nframes, nall);
+  std::vector<VALUETYPE> aparam_(aparam__);
+  atommap.forward<VALUETYPE>(aparam_.begin(), aparam_.begin(),
+                             aparam__.size() / nframes / nloc, nframes, nloc);
 
   for (int ii = 0; ii < nframes; ++ii) {
     for (int jj = 0; jj < nall * 3; ++jj) {
@@ -637,7 +643,7 @@ int deepmd::session_input_tensors_mixed_type(
     const std::vector<VALUETYPE>& dbox,
     const double& cell_size,
     const std::vector<VALUETYPE>& fparam_,
-    const std::vector<VALUETYPE>& aparam_,
+    const std::vector<VALUETYPE>& aparam__,
     const deepmd::AtomMap& atommap,
     const std::string scope) {
   int nall = datype_.size() / nframes;
@@ -670,7 +676,7 @@ int deepmd::session_input_tensors_mixed_type(
   fparam_shape.AddDim(fparam_.size() / nframes);
   TensorShape aparam_shape;
   aparam_shape.AddDim(nframes);
-  aparam_shape.AddDim(aparam_.size() / nframes);
+  aparam_shape.AddDim(aparam__.size() / nframes);
 
   tensorflow::DataType model_type;
   if (std::is_same<MODELTYPE, double>::value) {
@@ -699,6 +705,9 @@ int deepmd::session_input_tensors_mixed_type(
 
   std::vector<VALUETYPE> dcoord(dcoord_);
   atommap.forward<VALUETYPE>(dcoord.begin(), dcoord_.begin(), 3, nframes, nall);
+  std::vector<VALUETYPE> aparam_(aparam__);
+  atommap.forward<VALUETYPE>(aparam_.begin(), aparam_.begin(),
+                             aparam__.size() / nframes / nloc, nframes, nloc);
 
   for (int ii = 0; ii < nframes; ++ii) {
     for (int jj = 0; jj < nall * 3; ++jj) {

From 1e49f6d1c056cecebc161bd5d1d9fa4c382b8992 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 10 Jul 2023 02:03:45 -0400
Subject: [PATCH 36/48] fix typo

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/api_cc/src/common.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc
index b54abd2267..43412c4c43 100644
--- a/source/api_cc/src/common.cc
+++ b/source/api_cc/src/common.cc
@@ -440,7 +440,7 @@ int deepmd::session_input_tensors(
   std::vector<VALUETYPE> dcoord(dcoord_);
   atommap.forward<VALUETYPE>(dcoord.begin(), dcoord_.begin(), 3, nframes, nall);
   std::vector<VALUETYPE> aparam_(aparam__);
-  atommap.forward<VALUETYPE>(aparam_.begin(), aparam_.begin(),
+  atommap.forward<VALUETYPE>(aparam_.begin(), aparam__.begin(),
                              aparam__.size() / nframes / nloc, nframes, nloc);
 
   for (int ii = 0; ii < nframes; ++ii) {
@@ -573,7 +573,7 @@ int deepmd::session_input_tensors(
   std::vector<VALUETYPE> dcoord(dcoord_);
   atommap.forward<VALUETYPE>(dcoord.begin(), dcoord_.begin(), 3, nframes, nall);
   std::vector<VALUETYPE> aparam_(aparam__);
-  atommap.forward<VALUETYPE>(aparam_.begin(), aparam_.begin(),
+  atommap.forward<VALUETYPE>(aparam_.begin(), aparam__.begin(),
                              aparam__.size() / nframes / nloc, nframes, nloc);
 
   for (int ii = 0; ii < nframes; ++ii) {
@@ -706,7 +706,7 @@ int deepmd::session_input_tensors_mixed_type(
   std::vector<VALUETYPE> dcoord(dcoord_);
   atommap.forward<VALUETYPE>(dcoord.begin(), dcoord_.begin(), 3, nframes, nall);
   std::vector<VALUETYPE> aparam_(aparam__);
-  atommap.forward<VALUETYPE>(aparam_.begin(), aparam_.begin(),
+  atommap.forward<VALUETYPE>(aparam_.begin(), aparam__.begin(),
                              aparam__.size() / nframes / nloc, nframes, nloc);
 
   for (int ii = 0; ii < nframes; ++ii) {

From a2aa14e79ee6bbaaa759e0754676fbf43428c35b Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 10 Jul 2023 02:24:53 -0400
Subject: [PATCH 37/48] sort aparam in the Python API

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/infer/deep_pot.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/deepmd/infer/deep_pot.py b/deepmd/infer/deep_pot.py
index 10240ceb56..122dfd7442 100644
--- a/deepmd/infer/deep_pot.py
+++ b/deepmd/infer/deep_pot.py
@@ -466,6 +466,10 @@ def _prepare_feed_dict(
             efield = np.reshape(efield, [nframes, natoms, 3])
             efield = efield[:, imap, :]
             efield = np.reshape(efield, [nframes, natoms * 3])
+        if self.has_aparam:
+            aparam = np.reshape(aparam, [nframes, natoms, fdim])
+            aparam = aparam[:, imap, :]
+            aparam = np.reshape(aparam, [nframes, natoms * fdim])
 
         # make natoms_vec and default_mesh
         natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)

From f4f7ec1a26a11c05c3bcb6352d422941bc90acb5 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 11 Jul 2023 00:34:38 -0400
Subject: [PATCH 38/48] update the link in README

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 83e6470b5b..6fafff6979 100644
--- a/README.md
+++ b/README.md
@@ -109,7 +109,7 @@ A full [document](doc/train/train-input-auto.rst) on options in the training inp
     - [Train a Deep Potential model using `type embedding` approach](doc/model/train-se-e2-a-tebd.md)
     - [Deep potential long-range](doc/model/dplr.md)
     - [Deep Potential - Range Correction (DPRc)](doc/model/dprc.md)
-    - [Pairwise DPRc](pairwise-dprc.md)
+    - [Pairwise DPRc](doc/model/pairwise-dprc.md)
 - [Training](doc/train/index.md)
     - [Training a model](doc/train/training.md)
     - [Advanced options](doc/train/training-advanced.md)

From f2b0fa603c08690289f0522d0a8092b6a81e14ac Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 14 Jul 2023 18:31:25 -0400
Subject: [PATCH 39/48] fix se_atten tabulate when exclude_types is given

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/descriptor/se_atten.py       |  1 +
 source/lib/include/tabulate.h       | 27 ++++++++++-----
 source/lib/src/cuda/tabulate.cu     | 53 ++++++++++++++++++-----------
 source/lib/src/rocm/tabulate.hip.cu | 50 +++++++++++++++++----------
 source/lib/src/tabulate.cc          | 50 +++++++++++++++------------
 source/op/_tabulate_grad.py         | 19 +++++++++--
 source/op/tabulate_multi_device.cc  | 23 ++++++++-----
 7 files changed, 145 insertions(+), 78 deletions(-)

diff --git a/deepmd/descriptor/se_atten.py b/deepmd/descriptor/se_atten.py
index d0c1b38134..d1097f3fcf 100644
--- a/deepmd/descriptor/se_atten.py
+++ b/deepmd/descriptor/se_atten.py
@@ -1122,6 +1122,7 @@ def _filter_lower(
                             tf.reshape(inputs_i, [natom, shape_i[1] // 4, 4]),
                             two_embd,
                             last_layer_size=outputs_size[-1],
+                            is_sorted=len(self.exclude_types) == 0,
                         )
 
                 if (not self.uniform_seed) and (self.seed is not None):
diff --git a/source/lib/include/tabulate.h b/source/lib/include/tabulate.h
index 395621d97e..2e2c021d9c 100644
--- a/source/lib/include/tabulate.h
+++ b/source/lib/include/tabulate.h
@@ -12,7 +12,8 @@ void tabulate_fusion_se_a_cpu(FPTYPE* out,
                               const FPTYPE* two_embed,
                               const int nloc,
                               const int nnei,
-                              const int last_layer_size);
+                              const int last_layer_size,
+                              const bool is_sorted = true);
 
 template <typename FPTYPE>
 void tabulate_fusion_se_a_grad_cpu(FPTYPE* dy_dem_x,
@@ -25,7 +26,8 @@ void tabulate_fusion_se_a_grad_cpu(FPTYPE* dy_dem_x,
                                    const FPTYPE* dy,
                                    const int nloc,
                                    const int nnei,
-                                   const int last_layer_size);
+                                   const int last_layer_size,
+                                   const bool is_sorted = true);
 
 template <typename FPTYPE>
 void tabulate_fusion_se_a_grad_grad_cpu(FPTYPE* dz_dy,
@@ -37,7 +39,8 @@ void tabulate_fusion_se_a_grad_grad_cpu(FPTYPE* dz_dy,
                                         const FPTYPE* dz_dy_dem,
                                         const int nloc,
                                         const int nnei,
-                                        const int last_layer_size);
+                                        const int last_layer_size,
+                                        const bool is_sorted = true);
 
 template <typename FPTYPE>
 void tabulate_fusion_se_t_cpu(FPTYPE* out,
@@ -115,7 +118,8 @@ void tabulate_fusion_se_a_gpu_cuda(FPTYPE* out,
                                    const FPTYPE* two_embed,
                                    const int nloc,
                                    const int nnei,
-                                   const int last_layer_size);
+                                   const int last_layer_size,
+                                   const bool is_sorted = true);
 
 template <typename FPTYPE>
 void tabulate_fusion_se_a_grad_gpu_cuda(FPTYPE* dy_dem_x,
@@ -128,7 +132,8 @@ void tabulate_fusion_se_a_grad_gpu_cuda(FPTYPE* dy_dem_x,
                                         const FPTYPE* dy,
                                         const int nloc,
                                         const int nnei,
-                                        const int last_layer_size);
+                                        const int last_layer_size,
+                                        const bool is_sorted = true);
 
 template <typename FPTYPE>
 void tabulate_fusion_se_a_grad_grad_gpu_cuda(FPTYPE* dz_dy,
@@ -140,7 +145,8 @@ void tabulate_fusion_se_a_grad_grad_gpu_cuda(FPTYPE* dz_dy,
                                              const FPTYPE* dz_dy_dem,
                                              const int nloc,
                                              const int nnei,
-                                             const int last_layer_size);
+                                             const int last_layer_size,
+                                             const bool is_sorted = true);
 
 template <typename FPTYPE>
 void tabulate_fusion_se_t_gpu_cuda(FPTYPE* out,
@@ -219,7 +225,8 @@ void tabulate_fusion_se_a_gpu_rocm(FPTYPE* out,
                                    const FPTYPE* two_embed,
                                    const int nloc,
                                    const int nnei,
-                                   const int last_layer_size);
+                                   const int last_layer_size,
+                                   const bool is_sorted = true);
 
 template <typename FPTYPE>
 void tabulate_fusion_se_a_grad_gpu_rocm(FPTYPE* dy_dem_x,
@@ -232,7 +239,8 @@ void tabulate_fusion_se_a_grad_gpu_rocm(FPTYPE* dy_dem_x,
                                         const FPTYPE* dy,
                                         const int nloc,
                                         const int nnei,
-                                        const int last_layer_size);
+                                        const int last_layer_size,
+                                        const bool is_sorted = true);
 
 template <typename FPTYPE>
 void tabulate_fusion_se_a_grad_grad_gpu_rocm(FPTYPE* dz_dy,
@@ -244,7 +252,8 @@ void tabulate_fusion_se_a_grad_grad_gpu_rocm(FPTYPE* dz_dy,
                                              const FPTYPE* dz_dy_dem,
                                              const int nloc,
                                              const int nnei,
-                                             const int last_layer_size);
+                                             const int last_layer_size,
+                                             const bool is_sorted = true);
 
 template <typename FPTYPE>
 void tabulate_fusion_se_t_gpu_rocm(FPTYPE* out,
diff --git a/source/lib/src/cuda/tabulate.cu b/source/lib/src/cuda/tabulate.cu
index 06d1d49057..bb05fd59a4 100644
--- a/source/lib/src/cuda/tabulate.cu
+++ b/source/lib/src/cuda/tabulate.cu
@@ -128,7 +128,8 @@ __global__ void tabulate_fusion_se_a_fifth_order_polynomial(
     const FPTYPE stride0,
     const FPTYPE stride1,
     const int nnei,
-    const int last_layer_size) {
+    const int last_layer_size,
+    const bool is_sorted) {
   bool enable_se_atten = two_embed != nullptr;
   const int_64 block_idx = blockIdx.x;  // nloc
   const int thread_idx = threadIdx.x;   // last_layer_size
@@ -141,7 +142,7 @@ __global__ void tabulate_fusion_se_a_fifth_order_polynomial(
   FPTYPE var[6];
   for (int ii = 0; ii < nnei; ii++) {
     FPTYPE xx = em_x[block_idx * nnei + ii];
-    if (xx == ago) {
+    if (xx == ago && is_sorted) {
       unloop = true;
       breakpoint = ii;
     }
@@ -191,7 +192,8 @@ __global__ void tabulate_fusion_se_a_grad_fifth_order_polynomial(
     const FPTYPE stride0,
     const FPTYPE stride1,
     const int nnei,
-    const int last_layer_size) {
+    const int last_layer_size,
+    const bool is_sorted) {
   bool enable_se_atten = two_embed != nullptr;
   extern __shared__ int _data[];
   const int_64 block_idx = blockIdx.x;  // nloc
@@ -211,7 +213,7 @@ __global__ void tabulate_fusion_se_a_grad_fifth_order_polynomial(
   FPTYPE ago = __shfl_sync(0xffffffff, em_x[block_idx * nnei + nnei - 1], 0);
   for (int ii = warp_idx; ii < nnei; ii += KTILE) {
     FPTYPE xx = em_x[block_idx * nnei + ii];
-    if (ago == xx) {
+    if (ago == xx && is_sorted) {
       unloop = true;
       breakpoint = ii;
     }
@@ -286,7 +288,8 @@ __global__ void tabulate_fusion_se_a_grad_grad_fifth_order_polynomial(
     const FPTYPE stride0,
     const FPTYPE stride1,
     const int nnei,
-    const int last_layer_size) {
+    const int last_layer_size,
+    const bool is_sorted) {
   extern __shared__ int _data[];
   const int_64 block_idx = blockIdx.x;  // nloc
   const int thread_idx = threadIdx.x;   // last_layer_size
@@ -304,7 +307,7 @@ __global__ void tabulate_fusion_se_a_grad_grad_fifth_order_polynomial(
   for (int ii = 0; ii < nnei; ii++) {
     FPTYPE xx = em_x[block_idx * nnei + ii];
     FPTYPE dz_xx = dz_dy_dem_x[block_idx * nnei + ii];
-    if (xx == ago) {
+    if (xx == ago && is_sorted) {
       unloop = true;
       breakpoint = ii;
     }
@@ -626,16 +629,18 @@ void tabulate_fusion_se_a_gpu_cuda(FPTYPE* out,
                                    const FPTYPE* two_embed,
                                    const int nloc,
                                    const int nnei,
-                                   const int last_layer_size) {
+                                   const int last_layer_size,
+                                   const bool is_sorted) {
   if (nloc <= 0) {
     return;
   }
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
   tabulate_fusion_se_a_fifth_order_polynomial<FPTYPE, MM, KK>
-      <<<nloc, last_layer_size>>>(
-          out, table, em_x, em, two_embed, table_info[0], table_info[1],
-          table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
+      <<<nloc, last_layer_size>>>(out, table, em_x, em, two_embed,
+                                  table_info[0], table_info[1], table_info[2],
+                                  table_info[3], table_info[4], nnei,
+                                  last_layer_size, is_sorted);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
@@ -651,7 +656,8 @@ void tabulate_fusion_se_a_grad_gpu_cuda(FPTYPE* dy_dem_x,
                                         const FPTYPE* dy,
                                         const int nloc,
                                         const int nnei,
-                                        const int last_layer_size) {
+                                        const int last_layer_size,
+                                        const bool is_sorted) {
   if (nloc <= 0) {
     return;
   }
@@ -664,7 +670,7 @@ void tabulate_fusion_se_a_grad_gpu_cuda(FPTYPE* dy_dem_x,
       <<<nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size>>>(
           dy_dem_x, dy_dem, table, em_x, em, two_embed, dy, table_info[0],
           table_info[1], table_info[2], table_info[3], table_info[4], nnei,
-          last_layer_size);
+          last_layer_size, is_sorted);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
@@ -679,7 +685,8 @@ void tabulate_fusion_se_a_grad_grad_gpu_cuda(FPTYPE* dz_dy,
                                              const FPTYPE* dz_dy_dem,
                                              const int nloc,
                                              const int nnei,
-                                             const int last_layer_size) {
+                                             const int last_layer_size,
+                                             const bool is_sorted) {
   if (nloc <= 0) {
     return;
   }
@@ -690,7 +697,7 @@ void tabulate_fusion_se_a_grad_grad_gpu_cuda(FPTYPE* dz_dy,
       <<<nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size>>>(
           dz_dy, table, em_x, em, dz_dy_dem_x, dz_dy_dem, table_info[0],
           table_info[1], table_info[2], table_info[3], table_info[4], nnei,
-          last_layer_size);
+          last_layer_size, is_sorted);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
@@ -852,7 +859,8 @@ template void tabulate_fusion_se_a_gpu_cuda<float>(float* out,
                                                    const float* two_embed,
                                                    const int nloc,
                                                    const int nnei,
-                                                   const int last_layer_size);
+                                                   const int last_layer_size,
+                                                   const bool is_sorted);
 template void tabulate_fusion_se_a_gpu_cuda<double>(double* out,
                                                     const double* table,
                                                     const double* table_info,
@@ -861,7 +869,8 @@ template void tabulate_fusion_se_a_gpu_cuda<double>(double* out,
                                                     const double* two_embed,
                                                     const int nloc,
                                                     const int nnei,
-                                                    const int last_layer_size);
+                                                    const int last_layer_size,
+                                                    const bool is_sorted);
 template void tabulate_fusion_se_a_grad_gpu_cuda<float>(
     float* dy_dem_x,
     float* dy_dem,
@@ -873,7 +882,8 @@ template void tabulate_fusion_se_a_grad_gpu_cuda<float>(
     const float* dy,
     const int nloc,
     const int nnei,
-    const int last_layer_size);
+    const int last_layer_size,
+    const bool is_sorted);
 template void tabulate_fusion_se_a_grad_gpu_cuda<double>(
     double* dy_dem_x,
     double* dy_dem,
@@ -885,7 +895,8 @@ template void tabulate_fusion_se_a_grad_gpu_cuda<double>(
     const double* dy,
     const int nloc,
     const int nnei,
-    const int last_layer_size);
+    const int last_layer_size,
+    const bool is_sorted);
 template void tabulate_fusion_se_a_grad_grad_gpu_cuda<float>(
     float* dz_dy,
     const float* table,
@@ -896,7 +907,8 @@ template void tabulate_fusion_se_a_grad_grad_gpu_cuda<float>(
     const float* dz_dy_dem,
     const int nloc,
     const int nnei,
-    const int last_layer_size);
+    const int last_layer_size,
+    const bool is_sorted);
 template void tabulate_fusion_se_a_grad_grad_gpu_cuda<double>(
     double* dz_dy,
     const double* table,
@@ -907,7 +919,8 @@ template void tabulate_fusion_se_a_grad_grad_gpu_cuda<double>(
     const double* dz_dy_dem,
     const int nloc,
     const int nnei,
-    const int last_layer_size);
+    const int last_layer_size,
+    const bool is_sorted);
 
 template void tabulate_fusion_se_t_gpu_cuda<float>(float* out,
                                                    const float* table,
diff --git a/source/lib/src/rocm/tabulate.hip.cu b/source/lib/src/rocm/tabulate.hip.cu
index 5aaf023262..6ae21d4e0b 100644
--- a/source/lib/src/rocm/tabulate.hip.cu
+++ b/source/lib/src/rocm/tabulate.hip.cu
@@ -88,7 +88,8 @@ __global__ void tabulate_fusion_se_a_fifth_order_polynomial(
     const FPTYPE stride0,
     const FPTYPE stride1,
     const int nnei,
-    const int last_layer_size) {
+    const int last_layer_size,
+    const bool is_sorted) {
   bool enable_se_atten = two_embed != nullptr;
   HIP_DYNAMIC_SHARED(int, _data)
   const int_64 block_idx = blockIdx.x;  // nloc
@@ -105,7 +106,7 @@ __global__ void tabulate_fusion_se_a_fifth_order_polynomial(
   for (int ii = 0; ii < nnei; ii++) {
     FPTYPE var[6];
     FPTYPE xx = em_x[block_idx * nnei + ii];
-    if (xx == ago) {
+    if (xx == ago && is_sorted) {
       unloop = true;
       breakpoint = ii;
     }
@@ -157,7 +158,8 @@ __global__ void tabulate_fusion_se_a_grad_fifth_order_polynomial(
     const FPTYPE stride0,
     const FPTYPE stride1,
     const int nnei,
-    const int last_layer_size) {
+    const int last_layer_size,
+    const bool is_sorted) {
   bool enable_se_atten = two_embed != nullptr;
   HIP_DYNAMIC_SHARED(int, _data)
   const int_64 block_idx = blockIdx.x;  // nloc
@@ -177,7 +179,7 @@ __global__ void tabulate_fusion_se_a_grad_fifth_order_polynomial(
   FPTYPE ago = __shfl(em_x[block_idx * nnei + nnei - 1], 0);
   for (int ii = 0; ii < nnei - warp_idx; ii += KTILE) {
     FPTYPE xx = em_x[block_idx * nnei + ii + warp_idx];
-    if (ago == xx) {
+    if (ago == xx && is_sorted) {
       unloop = true;
       breakpoint = ii + warp_idx;
     }
@@ -260,7 +262,8 @@ __global__ void tabulate_fusion_se_a_grad_grad_fifth_order_polynomial(
     const FPTYPE stride0,
     const FPTYPE stride1,
     const int nnei,
-    const int last_layer_size) {
+    const int last_layer_size,
+    const bool is_sorted) {
   extern __shared__ int _data[];
   const int_64 block_idx = blockIdx.x;  // nloc
   const int thread_idx = threadIdx.x;   // last_layer_size
@@ -277,7 +280,7 @@ __global__ void tabulate_fusion_se_a_grad_grad_fifth_order_polynomial(
     FPTYPE var[6];
     FPTYPE xx = em_x[block_idx * nnei + ii];
     FPTYPE dz_xx = dz_dy_dem_x[block_idx * nnei + ii];
-    if (xx == ago) {
+    if (xx == ago && is_sorted) {
       unloop = true;
       breakpoint = ii;
     }
@@ -626,7 +629,8 @@ void tabulate_fusion_se_a_gpu_rocm(FPTYPE* out,
                                    const FPTYPE* two_embed,
                                    const int nloc,
                                    const int nnei,
-                                   const int last_layer_size) {
+                                   const int last_layer_size,
+                                   const bool is_sorted) {
   if (nloc <= 0) {
     return;
   }
@@ -635,7 +639,7 @@ void tabulate_fusion_se_a_gpu_rocm(FPTYPE* out,
           tabulate_fusion_se_a_fifth_order_polynomial<FPTYPE, MM, KK>),
       nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size, 0, out,
       table, em_x, em, two_embed, table_info[0], table_info[1], table_info[2],
-      table_info[3], table_info[4], nnei, last_layer_size);
+      table_info[3], table_info[4], nnei, last_layer_size, is_sorted);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
@@ -651,7 +655,8 @@ void tabulate_fusion_se_a_grad_gpu_rocm(FPTYPE* dy_dem_x,
                                         const FPTYPE* dy,
                                         const int nloc,
                                         const int nnei,
-                                        const int last_layer_size) {
+                                        const int last_layer_size,
+                                        const bool is_sorted) {
   if (nloc <= 0) {
     return;
   }
@@ -663,7 +668,8 @@ void tabulate_fusion_se_a_grad_gpu_rocm(FPTYPE* dy_dem_x,
           tabulate_fusion_se_a_grad_fifth_order_polynomial<FPTYPE, MM, KK>),
       nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size, 0, dy_dem_x,
       dy_dem, table, em_x, em, two_embed, dy, table_info[0], table_info[1],
-      table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
+      table_info[2], table_info[3], table_info[4], nnei, last_layer_size,
+      is_sorted);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
@@ -678,7 +684,8 @@ void tabulate_fusion_se_a_grad_grad_gpu_rocm(FPTYPE* dz_dy,
                                              const FPTYPE* dz_dy_dem,
                                              const int nloc,
                                              const int nnei,
-                                             const int last_layer_size) {
+                                             const int last_layer_size,
+                                             const bool is_sorted) {
   if (nloc <= 0) {
     return;
   }
@@ -689,7 +696,8 @@ void tabulate_fusion_se_a_grad_grad_gpu_rocm(FPTYPE* dz_dy,
                                                                 KK>),
       nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size, 0, dz_dy,
       table, em_x, em, dz_dy_dem_x, dz_dy_dem, table_info[0], table_info[1],
-      table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
+      table_info[2], table_info[3], table_info[4], nnei, last_layer_size,
+      is_sorted);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
@@ -850,7 +858,8 @@ template void tabulate_fusion_se_a_gpu_rocm<float>(float* out,
                                                    const float* two_embed,
                                                    const int nloc,
                                                    const int nnei,
-                                                   const int last_layer_size);
+                                                   const int last_layer_size,
+                                                   const bool is_sorted);
 template void tabulate_fusion_se_a_gpu_rocm<double>(double* out,
                                                     const double* table,
                                                     const double* table_info,
@@ -859,7 +868,8 @@ template void tabulate_fusion_se_a_gpu_rocm<double>(double* out,
                                                     const double* two_embed,
                                                     const int nloc,
                                                     const int nnei,
-                                                    const int last_layer_size);
+                                                    const int last_layer_size,
+                                                    const bool is_sorted);
 template void tabulate_fusion_se_a_grad_gpu_rocm<float>(
     float* dy_dem_x,
     float* dy_dem,
@@ -871,7 +881,8 @@ template void tabulate_fusion_se_a_grad_gpu_rocm<float>(
     const float* dy,
     const int nloc,
     const int nnei,
-    const int last_layer_size);
+    const int last_layer_size,
+    const bool is_sorted);
 template void tabulate_fusion_se_a_grad_gpu_rocm<double>(
     double* dy_dem_x,
     double* dy_dem,
@@ -883,7 +894,8 @@ template void tabulate_fusion_se_a_grad_gpu_rocm<double>(
     const double* dy,
     const int nloc,
     const int nnei,
-    const int last_layer_size);
+    const int last_layer_size,
+    const bool is_sorted);
 template void tabulate_fusion_se_a_grad_grad_gpu_rocm<float>(
     float* dz_dy,
     const float* table,
@@ -894,7 +906,8 @@ template void tabulate_fusion_se_a_grad_grad_gpu_rocm<float>(
     const float* dz_dy_dem,
     const int nloc,
     const int nnei,
-    const int last_layer_size);
+    const int last_layer_size,
+    const bool is_sorted);
 template void tabulate_fusion_se_a_grad_grad_gpu_rocm<double>(
     double* dz_dy,
     const double* table,
@@ -905,7 +918,8 @@ template void tabulate_fusion_se_a_grad_grad_gpu_rocm<double>(
     const double* dz_dy_dem,
     const int nloc,
     const int nnei,
-    const int last_layer_size);
+    const int last_layer_size,
+    const bool is_sorted);
 
 template void tabulate_fusion_se_t_gpu_rocm<float>(float* out,
                                                    const float* table,
diff --git a/source/lib/src/tabulate.cc b/source/lib/src/tabulate.cc
index cd23991cf8..377e6d06db 100644
--- a/source/lib/src/tabulate.cc
+++ b/source/lib/src/tabulate.cc
@@ -86,7 +86,8 @@ void deepmd::tabulate_fusion_se_a_cpu(FPTYPE* out,
                                       const FPTYPE* two_embed,
                                       const int nloc,
                                       const int nnei,
-                                      const int last_layer_size) {
+                                      const int last_layer_size,
+                                      const bool is_sorted) {
   bool enable_se_atten = two_embed != nullptr;
   memset(out, 0, sizeof(FPTYPE) * nloc * 4 * last_layer_size);
   const FPTYPE lower = table_info[0];
@@ -107,7 +108,7 @@ void deepmd::tabulate_fusion_se_a_cpu(FPTYPE* out,
       ll[2] = em[ii * nnei * 4 + jj * 4 + 2];
       ll[3] = em[ii * nnei * 4 + jj * 4 + 3];
       FPTYPE xx = em_x[ii * nnei + jj];
-      if (ago == xx) {
+      if (ago == xx && is_sorted) {
         unloop = true;
       }
       int table_idx = 0;
@@ -165,7 +166,8 @@ void deepmd::tabulate_fusion_se_a_grad_cpu(FPTYPE* dy_dem_x,
                                            const FPTYPE* dy,
                                            const int nloc,
                                            const int nnei,
-                                           const int last_layer_size) {
+                                           const int last_layer_size,
+                                           const bool is_sorted) {
   bool enable_se_atten = two_embed != nullptr;
   memset(dy_dem_x, 0, sizeof(FPTYPE) * nloc * nnei);
   memset(dy_dem, 0, sizeof(FPTYPE) * nloc * nnei * 4);
@@ -189,7 +191,7 @@ void deepmd::tabulate_fusion_se_a_grad_cpu(FPTYPE* dy_dem_x,
       ll[2] = em[ii * nnei * 4 + jj * 4 + 2];
       ll[3] = em[ii * nnei * 4 + jj * 4 + 3];
       FPTYPE xx = em_x[ii * nnei + jj];
-      if (ago == xx) {
+      if (ago == xx && is_sorted) {
         unloop = true;
       }
       int table_idx = 0;
@@ -249,7 +251,8 @@ void deepmd::tabulate_fusion_se_a_grad_grad_cpu(FPTYPE* dz_dy,
                                                 const FPTYPE* dz_dy_dem,
                                                 const int nloc,
                                                 const int nnei,
-                                                const int last_layer_size) {
+                                                const int last_layer_size,
+                                                const bool is_sorted) {
   memset(dz_dy, 0, sizeof(FPTYPE) * nloc * 4 * last_layer_size);
   const FPTYPE lower = table_info[0];
   const FPTYPE upper = table_info[1];
@@ -275,7 +278,7 @@ void deepmd::tabulate_fusion_se_a_grad_grad_cpu(FPTYPE* dz_dy,
       hh[3] = dz_dy_dem[ii * nnei * 4 + jj * 4 + 3];
       FPTYPE xx = em_x[ii * nnei + jj];
       FPTYPE dz_xx = dz_dy_dem_x[ii * nnei + jj];
-      if (ago == xx) {
+      if (ago == xx && is_sorted) {
         unloop = true;
       }
       int table_idx = 0;
@@ -604,16 +607,16 @@ void deepmd::tabulate_fusion_se_r_grad_grad_cpu(FPTYPE* dz_dy,
   }
 }
 
-template void deepmd::tabulate_fusion_se_a_cpu<float>(
-    float* out,
-    const float* table,
-    const float* table_info,
-    const float* em_x,
-    const float* em,
-    const float* two_embed,
-    const int nloc,
-    const int nnei,
-    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_a_cpu<float>(float* out,
+                                                      const float* table,
+                                                      const float* table_info,
+                                                      const float* em_x,
+                                                      const float* em,
+                                                      const float* two_embed,
+                                                      const int nloc,
+                                                      const int nnei,
+                                                      const int last_layer_size,
+                                                      const bool is_sorted);
 template void deepmd::tabulate_fusion_se_a_cpu<double>(
     double* out,
     const double* table,
@@ -623,7 +626,8 @@ template void deepmd::tabulate_fusion_se_a_cpu<double>(
     const double* two_embed,
     const int nloc,
     const int nnei,
-    const int last_layer_size);
+    const int last_layer_size,
+    const bool is_sorted);
 template void deepmd::tabulate_fusion_se_a_grad_cpu<float>(
     float* dy_dem_x,
     float* dy_dem,
@@ -635,7 +639,8 @@ template void deepmd::tabulate_fusion_se_a_grad_cpu<float>(
     const float* dy,
     const int nloc,
     const int nnei,
-    const int last_layer_size);
+    const int last_layer_size,
+    const bool is_sorted);
 template void deepmd::tabulate_fusion_se_a_grad_cpu<double>(
     double* dy_dem_x,
     double* dy_dem,
@@ -647,7 +652,8 @@ template void deepmd::tabulate_fusion_se_a_grad_cpu<double>(
     const double* dy,
     const int nloc,
     const int nnei,
-    const int last_layer_size);
+    const int last_layer_size,
+    const bool is_sorted);
 template void deepmd::tabulate_fusion_se_a_grad_grad_cpu<float>(
     float* dz_dy,
     const float* table,
@@ -658,7 +664,8 @@ template void deepmd::tabulate_fusion_se_a_grad_grad_cpu<float>(
     const float* dz_dy_dem,
     const int nloc,
     const int nnei,
-    const int last_layer_size);
+    const int last_layer_size,
+    const bool is_sorted);
 template void deepmd::tabulate_fusion_se_a_grad_grad_cpu<double>(
     double* dz_dy,
     const double* table,
@@ -669,7 +676,8 @@ template void deepmd::tabulate_fusion_se_a_grad_grad_cpu<double>(
     const double* dz_dy_dem,
     const int nloc,
     const int nnei,
-    const int last_layer_size);
+    const int last_layer_size,
+    const bool is_sorted);
 
 template void deepmd::tabulate_fusion_se_t_cpu<float>(
     float* out,
diff --git a/source/op/_tabulate_grad.py b/source/op/_tabulate_grad.py
index ac4d9df731..e91aa5fd2f 100644
--- a/source/op/_tabulate_grad.py
+++ b/source/op/_tabulate_grad.py
@@ -31,7 +31,14 @@ def _tabulate_fusion_se_a_grad_cc(op, dy):
 @ops.RegisterGradient("TabulateFusionSeAGrad")
 def _tabulate_fusion_se_a_grad_grad_cc(op, dy, dy_):
     dz_dy = op_module.tabulate_fusion_se_a_grad_grad(
-        op.inputs[0], op.inputs[1], op.inputs[2], op.inputs[3], dy, dy_, op.inputs[5]
+        op.inputs[0],
+        op.inputs[1],
+        op.inputs[2],
+        op.inputs[3],
+        dy,
+        dy_,
+        op.inputs[5],
+        is_sorted=True,
     )
     return [None, None, None, None, dz_dy, None]
 
@@ -46,6 +53,7 @@ def _tabulate_fusion_se_atten_grad_cc(op, dy):
         op.inputs[4],
         dy,
         op.outputs[0],
+        is_sorted=op.get_attr("is_sorted"),
     )
     return [None, None, dy_dx, dy_df, dy_dtwo]
 
@@ -53,7 +61,14 @@ def _tabulate_fusion_se_atten_grad_cc(op, dy):
 @ops.RegisterGradient("TabulateFusionSeAttenGrad")
 def _tabulate_fusion_se_atten_grad_grad_cc(op, dy, dy_, dy_dtwo):
     dz_dy = op_module.tabulate_fusion_se_a_grad_grad(
-        op.inputs[0], op.inputs[1], op.inputs[2], op.inputs[3], dy, dy_, op.inputs[6]
+        op.inputs[0],
+        op.inputs[1],
+        op.inputs[2],
+        op.inputs[3],
+        dy,
+        dy_,
+        op.inputs[6],
+        is_sorted=op.get_attr("is_sorted"),
     )
     return [None, None, None, None, None, dz_dy, None]
 
diff --git a/source/op/tabulate_multi_device.cc b/source/op/tabulate_multi_device.cc
index a3ec2dbd82..2f66c8c53e 100644
--- a/source/op/tabulate_multi_device.cc
+++ b/source/op/tabulate_multi_device.cc
@@ -63,7 +63,8 @@ REGISTER_OP("TabulateFusionSeAGradGrad")
     .Input("dz_dy_dem_x: T")
     .Input("dz_dy_dem: T")
     .Input("descriptor: T")
-    .Output("dz_dy: T");
+    .Output("dz_dy: T")
+    .Attr("is_sorted: bool = true");
 
 REGISTER_OP("TabulateFusionSeAtten")
     .Attr("T: {float, double} = DT_DOUBLE")
@@ -73,6 +74,7 @@ REGISTER_OP("TabulateFusionSeAtten")
     .Input("em: T")
     .Input("two_embed: T")
     .Attr("last_layer_size: int")
+    .Attr("is_sorted: bool = true")
     .Output("descriptor: T");
 
 REGISTER_OP("TabulateFusionSeAttenGrad")
@@ -86,7 +88,8 @@ REGISTER_OP("TabulateFusionSeAttenGrad")
     .Input("descriptor: T")
     .Output("dy_dem_x: T")
     .Output("dy_dem: T")
-    .Output("dy_dtwo: T");
+    .Output("dy_dtwo: T")
+    .Attr("is_sorted: bool = true");
 
 REGISTER_OP("TabulateFusionSeT")
     .Attr("T: {float, double} = DT_DOUBLE")
@@ -349,6 +352,7 @@ class TabulateFusionSeAGradGradOp : public OpKernel {
   }
 
  private:
+  bool is_sorted;
   std::string device;
 };
 
@@ -406,22 +410,24 @@ class TabulateFusionSeAttenOp : public OpKernel {
 #if GOOGLE_CUDA
       deepmd::tabulate_fusion_se_a_gpu_cuda(descriptor, table, table_info, em_x,
                                             em, two_embed, nloc, nnei,
-                                            last_layer_size);
+                                            last_layer_size, is_sorted);
 #endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
       deepmd::tabulate_fusion_se_a_gpu_rocm(descriptor, table, table_info, em_x,
                                             em, two_embed, nloc, nnei,
-                                            last_layer_size);
+                                            last_layer_size, is_sorted);
 #endif  // TENSORFLOW_USE_ROCM
     } else if (device == "CPU") {
       deepmd::tabulate_fusion_se_a_cpu(descriptor, table, table_info, em_x, em,
-                                       two_embed, nloc, nnei, last_layer_size);
+                                       two_embed, nloc, nnei, last_layer_size,
+                                       is_sorted);
     }
   }
 
  private:
   int last_layer_size;
+  bool is_sorted;
   std::string device;
 };
 
@@ -483,22 +489,23 @@ class TabulateFusionSeAttenGradOp : public OpKernel {
 #if GOOGLE_CUDA
       deepmd::tabulate_fusion_se_a_grad_gpu_cuda(
           dy_dem_x, dy_dem, table, table_info, em_x, em, two_embed, dy, nloc,
-          nnei, last_layer_size);
+          nnei, last_layer_size, is_sorted);
 #endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
       deepmd::tabulate_fusion_se_a_grad_gpu_rocm(
           dy_dem_x, dy_dem, table, table_info, em_x, em, two_embed, dy, nloc,
-          nnei, last_layer_size);
+          nnei, last_layer_size, is_sorted);
 #endif  // TENSORFLOW_USE_ROCM
     } else if (device == "CPU") {
       deepmd::tabulate_fusion_se_a_grad_cpu(dy_dem_x, dy_dem, table, table_info,
                                             em_x, em, two_embed, dy, nloc, nnei,
-                                            last_layer_size);
+                                            last_layer_size, is_sorted);
     }
   }
 
  private:
+  bool is_sorted;
   std::string device;
 };
 

From d3df0720a269821502c7bc3c216244f351222d06 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 14 Jul 2023 18:32:20 -0400
Subject: [PATCH 40/48] add tests for compression

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/tests/pairwise_dprc.json    |  65 +++---
 source/tests/test_pairwise_dprc.py | 322 ++++++++++++++++++++++++++++-
 2 files changed, 356 insertions(+), 31 deletions(-)

diff --git a/source/tests/pairwise_dprc.json b/source/tests/pairwise_dprc.json
index 95f4de5cda..60fb75e775 100644
--- a/source/tests/pairwise_dprc.json
+++ b/source/tests/pairwise_dprc.json
@@ -18,6 +18,7 @@
     "qm_model": {
       "descriptor": {
         "type": "se_atten",
+        "stripped_type_embedding": true,
         "sel": 100,
         "rcut_smth": 5.80,
         "rcut": 6.00,
@@ -45,6 +46,7 @@
     "qmmm_model": {
       "descriptor": {
         "type": "se_atten",
+        "stripped_type_embedding": true,
         "sel": 100,
         "rcut_smth": 5.80,
         "rcut": 6.00,
@@ -133,38 +135,43 @@
       }
     }
   },
+  "loss": {
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0
+  },
+  "learning_rate": {
+    "start_lr": 0.005,
+    "decay_steps": 5000,
+    "stop_lr": 1e-8
+  },
 
-  "systems": [
-    "system"
-  ],
-  "set_prefix": "set",
-  "stop_batch": 1000000,
-  "batch_size": 1,
-  "start_lr": 0.005,
-  "decay_steps": 5000,
-  "decay_rate": 0.95,
-
-  "start_pref_e": 0.02,
-  "limit_pref_e": 1,
-  "start_pref_f": 1000,
-  "limit_pref_f": 1,
-  "start_pref_v": 0,
-  "limit_pref_v": 0,
+  "training": {
+    "training_data": {
+      "systems": [
+        "system"
+      ],
+      "batch_size": 1
+    },
+    "stop_batch": 0,
 
-  "seed": 1,
+    "seed": 1,
 
-  "_comment": " display and restart",
-  "_comment": " frequencies counted in batch",
-  "disp_file": "lcurve.out",
-  "disp_freq": 100,
-  "numb_test": 1,
-  "save_freq": 1000,
-  "save_ckpt": "model.ckpt",
-  "load_ckpt": "model.ckpt",
-  "disp_training": true,
-  "time_training": true,
-  "profiling": false,
-  "profiling_file": "timeline.json",
+    "_comment": " display and restart",
+    "_comment": " frequencies counted in batch",
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "numb_test": 1,
+    "save_freq": 1000,
+    "save_ckpt": "model.ckpt",
+    "disp_training": true,
+    "time_training": true,
+    "profiling": false,
+    "profiling_file": "timeline.json"
+  },
 
   "_comment": "that's all"
 }
diff --git a/source/tests/test_pairwise_dprc.py b/source/tests/test_pairwise_dprc.py
index 1432f0f4c2..60b3fbb7df 100644
--- a/source/tests/test_pairwise_dprc.py
+++ b/source/tests/test_pairwise_dprc.py
@@ -1,22 +1,28 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 """Test pairwise DPRc features."""
+import json
 import unittest
 
 import dpdata
 import numpy as np
 from common import (
+    run_dp,
     tests_path,
 )
 from pkg_resources import (
     parse_version,
 )
 
+from deepmd import (
+    DeepPotential,
+)
 from deepmd.common import (
     j_loader,
     j_must_have,
 )
 from deepmd.env import (
     GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
     GLOBAL_TF_FLOAT_PRECISION,
     op_module,
     tf,
@@ -34,6 +40,11 @@
     run_sess,
 )
 
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
+    default_places = 4
+else:
+    default_places = 10
+
 
 class TestPairwiseOP(tf.test.TestCase):
     """Test dprc_pairwise_idx OP."""
@@ -260,8 +271,7 @@ def test_model_ener(self):
         idxs = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3])
         np.save("system/set.000/aparam.npy", idxs)
 
-        systems = j_must_have(jdata, "systems")
-        set_pfx = j_must_have(jdata, "set_prefix")
+        systems = j_must_have(jdata["training"]["training_data"], "systems")
         batch_size = 1
         test_size = 1
         rcut = model.get_rcut()
@@ -346,3 +356,311 @@ def test_model_ener(self):
         # the model is pairwise!
         self.assertAllClose(e[1] + e[2] + e[3] - 3 * e[0], e[4] - e[0])
         self.assertAllClose(f[1] + f[2] + f[3] - 3 * f[0], f[4] - f[0])
+
+
+def _init_models():
+    system = dpdata.LabeledSystem()
+    system.data["atom_names"] = ["C", "N", "O", "H", "OW", "HW"]
+    system.data["coords"] = np.array(
+        [
+            2.48693,
+            -0.12642,
+            0.45320,
+            3.86292,
+            -0.00082,
+            0.07286,
+            4.19135,
+            0.35148,
+            -1.21253,
+            3.35886,
+            0.58875,
+            -2.08423,
+            5.67422,
+            0.44076,
+            -1.45160,
+            2.40712,
+            -0.32538,
+            1.52137,
+            2.04219,
+            -0.93912,
+            -0.12445,
+            1.98680,
+            0.81574,
+            0.21261,
+            4.57186,
+            -0.33026,
+            0.71127,
+            6.24532,
+            0.18814,
+            -0.55212,
+            5.92647,
+            1.46447,
+            -1.74069,
+            5.95030,
+            -0.25321,
+            -2.24804,
+            -0.32794,
+            1.50468,
+            0.83176,
+            0.23662,
+            2.24068,
+            1.13166,
+            -0.24528,
+            1.59132,
+            -0.14907,
+            -0.50371,
+            -1.24800,
+            -0.05601,
+            -0.28305,
+            -1.84629,
+            0.67555,
+            -0.68673,
+            -0.40535,
+            0.41384,
+            0.38397,
+            0.80987,
+            -1.90358,
+            1.30191,
+            0.68503,
+            -2.22909,
+            0.11626,
+            -0.11276,
+            -1.70506,
+        ]
+    ).reshape(1, 21, 3)
+    system.data["atom_types"] = np.array(
+        [0, 1, 0, 2, 0, 3, 3, 3, 3, 3, 3, 3, 4, 5, 5, 4, 5, 5, 4, 5, 5]
+    )
+    system.data["cells"] = np.array([np.eye(3) * 30])
+    nframes = 1
+    natoms = 21
+    system.data["coords"] = system.data["coords"].reshape([nframes, natoms, 3])
+    system.data["cells"] = system.data["cells"].reshape([nframes, 3, 3])
+    system.data["energies"] = np.ones(
+        [
+            nframes,
+        ]
+    )
+    system.data["forces"] = np.zeros([nframes, natoms, 3])
+    system.data["nopbc"] = True
+    system.to_deepmd_npy("pairwise_system", prec=np.float64)
+    idxs = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3])
+    np.save("pairwise_system/set.000/aparam.npy", idxs)
+
+    data_file = str(tests_path / "pairwise_system")
+    frozen_model = str(tests_path / "dp-original-se-t.pb")
+    compressed_model = str(tests_path / "dp-compressed-se-t.pb")
+    INPUT = str(tests_path / "input.json")
+    jdata = j_loader(str(tests_path / "pairwise_dprc.json"))
+    jdata["training"]["training_data"]["systems"] = data_file
+    with open(INPUT, "w") as fp:
+        json.dump(jdata, fp, indent=4)
+
+    ret = run_dp("dp train " + INPUT)
+    np.testing.assert_equal(ret, 0, "DP train failed!")
+    ret = run_dp("dp freeze -o " + frozen_model)
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
+    ret = run_dp("dp compress " + " -i " + frozen_model + " -o " + compressed_model)
+    np.testing.assert_equal(ret, 0, "DP model compression failed!")
+    return INPUT, frozen_model, compressed_model
+
+
+class TestPairwiseCompress(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        INPUT, FROZEN_MODEL, COMPRESSED_MODEL = _init_models()
+        cls.dp_original = DeepPotential(FROZEN_MODEL)
+        cls.dp_compressed = DeepPotential(COMPRESSED_MODEL)
+
+    def setUp(self) -> None:
+        self.coords = np.array(
+            [
+                2.48693,
+                -0.12642,
+                0.45320,
+                3.86292,
+                -0.00082,
+                0.07286,
+                4.19135,
+                0.35148,
+                -1.21253,
+                3.35886,
+                0.58875,
+                -2.08423,
+                5.67422,
+                0.44076,
+                -1.45160,
+                2.40712,
+                -0.32538,
+                1.52137,
+                2.04219,
+                -0.93912,
+                -0.12445,
+                1.98680,
+                0.81574,
+                0.21261,
+                4.57186,
+                -0.33026,
+                0.71127,
+                6.24532,
+                0.18814,
+                -0.55212,
+                5.92647,
+                1.46447,
+                -1.74069,
+                5.95030,
+                -0.25321,
+                -2.24804,
+                -0.32794,
+                1.50468,
+                0.83176,
+                0.23662,
+                2.24068,
+                1.13166,
+                -0.24528,
+                1.59132,
+                -0.14907,
+                -0.50371,
+                -1.24800,
+                -0.05601,
+                -0.28305,
+                -1.84629,
+                0.67555,
+                -0.68673,
+                -0.40535,
+                0.41384,
+                0.38397,
+                0.80987,
+                -1.90358,
+                1.30191,
+                0.68503,
+                -2.22909,
+                0.11626,
+                -0.11276,
+                -1.70506,
+            ]
+        ).reshape(1, 21, 3)
+        self.atype = [0, 1, 0, 2, 0, 3, 3, 3, 3, 3, 3, 3, 4, 5, 5, 4, 5, 5, 4, 5, 5]
+        self.box = None
+        self.idxs = np.array(
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]
+        ).astype(np.float64)
+        # self.idxs = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0]).astype(np.float64)
+        self.type_map = ["C", "N", "O", "H", "OW", "HW"]
+
+    def test_attrs(self):
+        self.assertEqual(self.dp_original.get_ntypes(), len(self.type_map))
+        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places=default_places)
+        self.assertEqual(self.dp_original.get_type_map(), self.type_map)
+        self.assertEqual(self.dp_original.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_original.get_dim_aparam(), 1)
+
+        self.assertEqual(self.dp_compressed.get_ntypes(), len(self.type_map))
+        self.assertAlmostEqual(
+            self.dp_compressed.get_rcut(), 6.0, places=default_places
+        )
+        self.assertEqual(self.dp_compressed.get_type_map(), self.type_map)
+        self.assertEqual(self.dp_compressed.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_compressed.get_dim_aparam(), 1)
+
+    def test_1frame(self):
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords,
+            self.box,
+            self.atype,
+            atomic=False,
+            aparam=self.idxs,
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords,
+            self.box,
+            self.atype,
+            atomic=False,
+            aparam=self.idxs,
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_1frame_atm(self):
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords,
+            self.box,
+            self.atype,
+            atomic=True,
+            aparam=self.idxs,
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords,
+            self.box,
+            self.atype,
+            atomic=True,
+            aparam=self.idxs,
+        )
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_2frame_atm(self):
+        coords2 = np.concatenate((self.coords, self.coords))
+        box2 = None
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2,
+            box2,
+            self.atype,
+            atomic=True,
+            aparam=self.idxs,
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            coords2,
+            box2,
+            self.atype,
+            atomic=True,
+            aparam=self.idxs,
+        )
+        # check shape of the returns
+        nframes = 2
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)

From f11c5ea9ad71fdfe55cb5b88232dcadeb2a94ee4 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 14 Jul 2023 19:07:19 -0400
Subject: [PATCH 41/48] fix path to data

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/tests/test_pairwise_dprc.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/tests/test_pairwise_dprc.py b/source/tests/test_pairwise_dprc.py
index 60b3fbb7df..8073d128a6 100644
--- a/source/tests/test_pairwise_dprc.py
+++ b/source/tests/test_pairwise_dprc.py
@@ -443,9 +443,9 @@ def _init_models():
     )
     system.data["forces"] = np.zeros([nframes, natoms, 3])
     system.data["nopbc"] = True
-    system.to_deepmd_npy("pairwise_system", prec=np.float64)
+    system.to_deepmd_npy(str(tests_path / "pairwise_system"), prec=np.float64)
     idxs = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3])
-    np.save("pairwise_system/set.000/aparam.npy", idxs)
+    np.save(str(tests_path / "pairwise_system/set.000/aparam.npy"), idxs)
 
     data_file = str(tests_path / "pairwise_system")
     frozen_model = str(tests_path / "dp-original-se-t.pb")

From 4121e8a0b19be8b8ca27d05ae6678912dbec0bb0 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 14 Jul 2023 19:43:24 -0400
Subject: [PATCH 42/48] skip tf 1.14

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/tests/test_pairwise_dprc.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/source/tests/test_pairwise_dprc.py b/source/tests/test_pairwise_dprc.py
index 8073d128a6..c8f35e8603 100644
--- a/source/tests/test_pairwise_dprc.py
+++ b/source/tests/test_pairwise_dprc.py
@@ -465,6 +465,10 @@ def _init_models():
     return INPUT, frozen_model, compressed_model
 
 
+@unittest.skipIf(
+    parse_version(tf.__version__) < parse_version("1.15"),
+    f"The current tf version {tf.__version__} is too low to run the new testing model.",
+)
 class TestPairwiseCompress(unittest.TestCase):
     @classmethod
     def setUpClass(cls):

From 514d25aef1125cf58128b9b63cdb502e3451c1ba Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 14 Jul 2023 20:10:52 -0400
Subject: [PATCH 43/48] fix model name

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/tests/test_pairwise_dprc.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/tests/test_pairwise_dprc.py b/source/tests/test_pairwise_dprc.py
index c8f35e8603..2ea5888b60 100644
--- a/source/tests/test_pairwise_dprc.py
+++ b/source/tests/test_pairwise_dprc.py
@@ -448,8 +448,8 @@ def _init_models():
     np.save(str(tests_path / "pairwise_system/set.000/aparam.npy"), idxs)
 
     data_file = str(tests_path / "pairwise_system")
-    frozen_model = str(tests_path / "dp-original-se-t.pb")
-    compressed_model = str(tests_path / "dp-compressed-se-t.pb")
+    frozen_model = str(tests_path / "dp-original-pairwise-dprc.pb")
+    compressed_model = str(tests_path / "dp-compressed-pairwise-dprc.pb")
     INPUT = str(tests_path / "input.json")
     jdata = j_loader(str(tests_path / "pairwise_dprc.json"))
     jdata["training"]["training_data"]["systems"] = data_file

From 1a4eb5d97e128a1b83484192b86b334e21a284c4 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 14 Jul 2023 20:47:21 -0400
Subject: [PATCH 44/48] fix TabulateFusionSeAGradGradOp

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
(cherry picked from commit 1cbe1a8d26849ea11ad56838fbbc619023a97131)
---
 source/op/tabulate_multi_device.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/source/op/tabulate_multi_device.cc b/source/op/tabulate_multi_device.cc
index 2f66c8c53e..f9492fdba6 100644
--- a/source/op/tabulate_multi_device.cc
+++ b/source/op/tabulate_multi_device.cc
@@ -333,12 +333,12 @@ class TabulateFusionSeAGradGradOp : public OpKernel {
 #if GOOGLE_CUDA
       deepmd::tabulate_fusion_se_a_grad_grad_gpu_cuda(
           dz_dy, table, table_info, em_x, em, dz_dy_dem_x, dz_dy_dem, nloc,
-          nnei, last_layer_size);
+          nnei, last_layer_size, is_sorted);
 #endif  // GOOGLE_CUDA
 #if TENSORFLOW_USE_ROCM
       deepmd::tabulate_fusion_se_a_grad_grad_gpu_rocm(
           dz_dy, table, table_info, em_x, em, dz_dy_dem_x, dz_dy_dem, nloc,
-          nnei, last_layer_size);
+          nnei, last_layer_size, is_sorted);
 #endif  // TENSORFLOW_USE_ROCM
       OP_REQUIRES(context, (last_layer_size <= 1024),
                   errors::InvalidArgument(
@@ -347,7 +347,7 @@ class TabulateFusionSeAGradGradOp : public OpKernel {
     } else if (device == "CPU") {
       deepmd::tabulate_fusion_se_a_grad_grad_cpu(dz_dy, table, table_info, em_x,
                                                  em, dz_dy_dem_x, dz_dy_dem,
-                                                 nloc, nnei, last_layer_size);
+                                                 nloc, nnei, last_layer_size, is_sorted);
     }
   }
 

From b881afc11333ce57b4a8b13590b10c1cfaf88152 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 15 Jul 2023 03:12:22 +0000
Subject: [PATCH 45/48] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 source/op/tabulate_multi_device.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/source/op/tabulate_multi_device.cc b/source/op/tabulate_multi_device.cc
index f9492fdba6..332fc36fef 100644
--- a/source/op/tabulate_multi_device.cc
+++ b/source/op/tabulate_multi_device.cc
@@ -345,9 +345,9 @@ class TabulateFusionSeAGradGradOp : public OpKernel {
                       "In the process of model compression, the size of the "
                       "last layer of embedding net must be less than 1024!"));
     } else if (device == "CPU") {
-      deepmd::tabulate_fusion_se_a_grad_grad_cpu(dz_dy, table, table_info, em_x,
-                                                 em, dz_dy_dem_x, dz_dy_dem,
-                                                 nloc, nnei, last_layer_size, is_sorted);
+      deepmd::tabulate_fusion_se_a_grad_grad_cpu(
+          dz_dy, table, table_info, em_x, em, dz_dy_dem_x, dz_dy_dem, nloc,
+          nnei, last_layer_size, is_sorted);
     }
   }
 

From 38744bf76a70b863d84962cf017c3bbdaf1ee043 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 15 Jul 2023 02:24:49 -0400
Subject: [PATCH 46/48] fetch attr in the OP

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 source/op/tabulate_multi_device.cc | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/source/op/tabulate_multi_device.cc b/source/op/tabulate_multi_device.cc
index 332fc36fef..0ac8745f64 100644
--- a/source/op/tabulate_multi_device.cc
+++ b/source/op/tabulate_multi_device.cc
@@ -294,7 +294,9 @@ template <typename Device, typename FPTYPE>
 class TabulateFusionSeAGradGradOp : public OpKernel {
  public:
   explicit TabulateFusionSeAGradGradOp(OpKernelConstruction* context)
-      : OpKernel(context) {}
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("is_sorted", &is_sorted));
+  }
   void Compute(OpKernelContext* context) override {
     // Grab the input tensor
     int context_input_index = 0;
@@ -363,6 +365,7 @@ class TabulateFusionSeAttenOp : public OpKernel {
       : OpKernel(context) {
     OP_REQUIRES_OK(context,
                    context->GetAttr("last_layer_size", &last_layer_size));
+    OP_REQUIRES_OK(context, context->GetAttr("is_sorted", &is_sorted));
   }
   void Compute(OpKernelContext* context) override {
     deepmd::safe_compute(
@@ -435,7 +438,9 @@ template <typename Device, typename FPTYPE>
 class TabulateFusionSeAttenGradOp : public OpKernel {
  public:
   explicit TabulateFusionSeAttenGradOp(OpKernelConstruction* context)
-      : OpKernel(context) {}
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("is_sorted", &is_sorted));
+  }
   void Compute(OpKernelContext* context) override {
     deepmd::safe_compute(
         context, [this](OpKernelContext* context) { this->_Compute(context); });

From 06dddcf1462fe2d59084a651c39835dcfe6b7024 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 17 Jul 2023 16:14:48 -0400
Subject: [PATCH 47/48] fix compress training

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 deepmd/descriptor/descriptor.py | 42 ------------------
 deepmd/model/model.py           | 77 ++++++++++++++++++++++++++++++---
 deepmd/model/pairwise_dprc.py   | 55 ++++++++++++++++++++++-
 3 files changed, 125 insertions(+), 49 deletions(-)

diff --git a/deepmd/descriptor/descriptor.py b/deepmd/descriptor/descriptor.py
index 4578cab277..c885e73145 100644
--- a/deepmd/descriptor/descriptor.py
+++ b/deepmd/descriptor/descriptor.py
@@ -324,48 +324,6 @@ def prod_force_virial(
             The atomic virial
         """
 
-    def get_feed_dict(
-        self,
-        coord_: tf.Tensor,
-        atype_: tf.Tensor,
-        natoms: tf.Tensor,
-        box: tf.Tensor,
-        mesh: tf.Tensor,
-    ) -> Dict[str, tf.Tensor]:
-        """Generate the feed_dict for current descriptor.
-
-        Parameters
-        ----------
-        coord_ : tf.Tensor
-            The coordinate of atoms
-        atype_ : tf.Tensor
-            The type of atoms
-        natoms : tf.Tensor
-            The number of atoms. This tensor has the length of Ntypes + 2
-            natoms[0]: number of local atoms
-            natoms[1]: total number of atoms held by this processor
-            natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
-        box : tf.Tensor
-            The box. Can be generated by deepmd.model.make_stat_input
-        mesh : tf.Tensor
-            For historical reasons, only the length of the Tensor matters.
-            if size of mesh == 6, pbc is assumed.
-            if size of mesh == 0, no-pbc is assumed.
-
-        Returns
-        -------
-        feed_dict : dict[str, tf.Tensor]
-            The output feed_dict of current descriptor
-        """
-        feed_dict = {
-            "t_coord:0": coord_,
-            "t_type:0": atype_,
-            "t_natoms:0": natoms,
-            "t_box:0": box,
-            "t_mesh:0": mesh,
-        }
-        return feed_dict
-
     def init_variables(
         self,
         graph: tf.Graph,
diff --git a/deepmd/model/model.py b/deepmd/model/model.py
index ef5e2426c8..f75c2a7a24 100644
--- a/deepmd/model/model.py
+++ b/deepmd/model/model.py
@@ -7,6 +7,7 @@
     Enum,
 )
 from typing import (
+    Dict,
     List,
     Optional,
     Union,
@@ -266,14 +267,31 @@ def build_descrpt(
                 suffix=suffix,
                 reuse=reuse,
             )
-            dout = tf.identity(dout, name="o_descriptor")
+            dout = tf.identity(dout, name="o_descriptor" + suffix)
         else:
             tf.constant(
-                self.rcut, name="descrpt_attr/rcut", dtype=GLOBAL_TF_FLOAT_PRECISION
+                self.rcut,
+                name="descrpt_attr%s/rcut" % suffix,
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
             )
-            tf.constant(self.ntypes, name="descrpt_attr/ntypes", dtype=tf.int32)
-            feed_dict = self.descrpt.get_feed_dict(coord_, atype_, natoms, box, mesh)
-            return_elements = [*self.descrpt.get_tensor_names(), "o_descriptor:0"]
+            tf.constant(
+                self.ntypes, name="descrpt_attr%s/ntypes" % suffix, dtype=tf.int32
+            )
+            if "global_feed_dict" in input_dict:
+                feed_dict = input_dict["global_feed_dict"]
+            else:
+                extra_feed_dict = {}
+                if "fparam" in input_dict:
+                    extra_feed_dict["fparam"] = input_dict["fparam"]
+                if "aparam" in input_dict:
+                    extra_feed_dict["aparam"] = input_dict["aparam"]
+                feed_dict = self.get_feed_dict(
+                    coord_, atype_, natoms, box, mesh, **extra_feed_dict
+                )
+            return_elements = [
+                *self.descrpt.get_tensor_names(suffix=suffix),
+                "o_descriptor%s:0" % suffix,
+            ]
             if frz_model is not None:
                 imported_tensors = self._import_graph_def_from_frz_model(
                     frz_model, feed_dict, return_elements
@@ -390,6 +408,55 @@ def get_ntypes(self) -> int:
     def data_stat(self, data: dict):
         """Data staticis."""
 
+    def get_feed_dict(
+        self,
+        coord_: tf.Tensor,
+        atype_: tf.Tensor,
+        natoms: tf.Tensor,
+        box: tf.Tensor,
+        mesh: tf.Tensor,
+        **kwargs,
+    ) -> Dict[str, tf.Tensor]:
+        """Generate the feed_dict for current descriptor.
+
+        Parameters
+        ----------
+        coord_ : tf.Tensor
+            The coordinate of atoms
+        atype_ : tf.Tensor
+            The type of atoms
+        natoms : tf.Tensor
+            The number of atoms. This tensor has the length of Ntypes + 2
+            natoms[0]: number of local atoms
+            natoms[1]: total number of atoms held by this processor
+            natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
+        box : tf.Tensor
+            The box. Can be generated by deepmd.model.make_stat_input
+        mesh : tf.Tensor
+            For historical reasons, only the length of the Tensor matters.
+            if size of mesh == 6, pbc is assumed.
+            if size of mesh == 0, no-pbc is assumed.
+        **kwargs : dict
+            The additional arguments
+
+        Returns
+        -------
+        feed_dict : dict[str, tf.Tensor]
+            The output feed_dict of current descriptor
+        """
+        feed_dict = {
+            "t_coord:0": coord_,
+            "t_type:0": atype_,
+            "t_natoms:0": natoms,
+            "t_box:0": box,
+            "t_mesh:0": mesh,
+        }
+        if kwargs.get("fparam") is not None:
+            feed_dict["t_fparam:0"] = kwargs["fparam"]
+        if kwargs.get("aparam") is not None:
+            feed_dict["t_aparam:0"] = kwargs["aparam"]
+        return feed_dict
+
 
 class StandardModel(Model):
     """Standard model, which must contain a descriptor and a fitting.
diff --git a/deepmd/model/pairwise_dprc.py b/deepmd/model/pairwise_dprc.py
index b51c022554..bf158434b0 100644
--- a/deepmd/model/pairwise_dprc.py
+++ b/deepmd/model/pairwise_dprc.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 from typing import (
+    Dict,
     List,
     Optional,
     Union,
@@ -111,8 +112,11 @@ def build(
         suffix: str = "",
         reuse: Optional[bool] = None,
     ):
-        input_dict_qm = {}
-        input_dict_qmmm = {}
+        feed_dict = self.get_feed_dict(
+            coord_, atype_, natoms, box_, mesh, aparam=input_dict["aparam"]
+        )
+        input_dict_qm = {"global_feed_dict": feed_dict}
+        input_dict_qmmm = {"global_feed_dict": feed_dict}
         with tf.variable_scope("model_attr" + suffix, reuse=reuse):
             t_tmap = tf.constant(" ".join(self.type_map), name="tmap", dtype=tf.string)
             t_mt = tf.constant(self.model_type, name="model_type", dtype=tf.string)
@@ -320,6 +324,53 @@ def enable_compression(self, suffix: str = "") -> None:
         self.qm_model.enable_compression(suffix="_qm" + suffix)
         self.qmmm_model.enable_compression(suffix="_qmmm" + suffix)
 
+    def get_feed_dict(
+        self,
+        coord_: tf.Tensor,
+        atype_: tf.Tensor,
+        natoms: tf.Tensor,
+        box: tf.Tensor,
+        mesh: tf.Tensor,
+        **kwargs,
+    ) -> Dict[str, tf.Tensor]:
+        """Generate the feed_dict for current descriptor.
+
+        Parameters
+        ----------
+        coord_ : tf.Tensor
+            The coordinate of atoms
+        atype_ : tf.Tensor
+            The type of atoms
+        natoms : tf.Tensor
+            The number of atoms. This tensor has the length of Ntypes + 2
+            natoms[0]: number of local atoms
+            natoms[1]: total number of atoms held by this processor
+            natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
+        box : tf.Tensor
+            The box. Can be generated by deepmd.model.make_stat_input
+        mesh : tf.Tensor
+            For historical reasons, only the length of the Tensor matters.
+            if size of mesh == 6, pbc is assumed.
+            if size of mesh == 0, no-pbc is assumed.
+        aparam : tf.Tensor
+            The parameters of the descriptor
+        **kwargs : dict
+            The keyword arguments
+
+        Returns
+        -------
+        feed_dict : dict[str, tf.Tensor]
+            The output feed_dict of current descriptor
+        """
+        feed_dict = {
+            "t_coord:0": coord_,
+            "t_type:0": atype_,
+            "t_natoms:0": natoms,
+            "t_box:0": box,
+            "t_aparam:0": kwargs["aparam"],
+        }
+        return feed_dict
+
 
 def gather_placeholder(
     params: tf.Tensor, indices: tf.Tensor, placeholder: float = 0.0, **kwargs

From f41da343aada9eb55f1bd40d37c7972a6821e843 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 17 Jul 2023 16:23:05 -0400
Subject: [PATCH 48/48] merge documentation

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 README.md                  |   1 -
 doc/model/dprc.md          | 171 +++++++++++++++++++++++++++++++++++++
 doc/model/index.md         |   1 -
 doc/model/index.rst        |   1 -
 doc/model/pairwise-dprc.md | 168 ------------------------------------
 5 files changed, 171 insertions(+), 171 deletions(-)
 delete mode 100644 doc/model/pairwise-dprc.md

diff --git a/README.md b/README.md
index 6fafff6979..72d67ca64b 100644
--- a/README.md
+++ b/README.md
@@ -109,7 +109,6 @@ A full [document](doc/train/train-input-auto.rst) on options in the training inp
     - [Train a Deep Potential model using `type embedding` approach](doc/model/train-se-e2-a-tebd.md)
     - [Deep potential long-range](doc/model/dplr.md)
     - [Deep Potential - Range Correction (DPRc)](doc/model/dprc.md)
-    - [Pairwise DPRc](doc/model/pairwise-dprc.md)
 - [Training](doc/train/index.md)
     - [Training a model](doc/train/training.md)
     - [Advanced options](doc/train/training-advanced.md)
diff --git a/doc/model/dprc.md b/doc/model/dprc.md
index 2e4c2220e8..b665facace 100644
--- a/doc/model/dprc.md
+++ b/doc/model/dprc.md
@@ -78,3 +78,174 @@ Note that {ref}`atom_ener <model/fitting_net[ener]/atom_ener>` only works when {
 ## Run MD simulations
 
 The DPRc model has the best practices with the [AMBER](../third-party/out-of-deepmd-kit.md#amber-interface-to-deepmd-kit) QM/MM module. An example is given by [GitLab RutgersLBSR/AmberDPRc](https://gitlab.com/RutgersLBSR/AmberDPRc/). In theory, DPRc is able to be used with any QM/MM package, as long as the DeePMD-kit package accepts QM atoms and MM atoms within the cutoff range and returns energies and forces.
+
+## Pairwise DPRc
+
+If one wants to correct from a low-level method into a full DFT level, and the system is too large to do full DFT calculation, one may try the experimental pairwise DPRc model.
+In a pairwise DPRc model, the total energy is divided into QM internal energy and the sum of QM/MM energy for each MM residue $l$:
+
+$$ E = E_\text{QM} + \sum_{l} E_{\text{QM/MM},l} $$
+
+In this way, the interaction between the QM region and each MM fragmentation can be computed and trained separately.
+Thus, the pairwise DPRc model is divided into two sub-[DPRc models](./dprc.md).
+`qm_model` is for the QM internal interaction and `qmmm_model` is for the QM/MM interaction.
+The configuration for these two models is similar to the non-pairwise DPRc model.
+It is noted that the [`se_atten` descriptor](./train-se-atten.md) should be used, as it is the only descriptor to support the mixed type.
+
+```json
+{
+  "model": {
+    "type": "pairwise_dprc",
+    "type_map": [
+      "C",
+      "P",
+      "O",
+      "H",
+      "OW",
+      "HW"
+    ],
+    "type_embedding": {
+      "neuron": [
+        8
+      ],
+      "precision": "float32"
+    },
+    "qm_model": {
+      "descriptor": {
+        "type": "se_atten",
+        "stripped_type_embedding": true,
+        "sel": 24,
+        "rcut_smth": 0.50,
+        "rcut": 9.00,
+        "attn_layer": 0,
+        "neuron": [
+          25,
+          50,
+          100
+        ],
+        "resnet_dt": false,
+        "axis_neuron": 12,
+        "precision": "float32",
+        "seed": 1
+      },
+      "fitting_net": {
+        "type": "ener",
+        "neuron": [
+          240,
+          240,
+          240
+        ],
+        "resnet_dt": true,
+        "precision": "float32",
+        "atom_ener": [
+          null,
+          null,
+          null,
+          null,
+          0.0,
+          0.0
+        ],
+        "seed": 1
+      }
+    },
+    "qmmm_model": {
+      "descriptor": {
+        "type": "se_atten",
+        "stripped_type_embedding": true,
+        "sel": 27,
+        "rcut_smth": 0.50,
+        "rcut": 6.00,
+        "attn_layer": 0,
+        "neuron": [
+          25,
+          50,
+          100
+        ],
+        "resnet_dt": false,
+        "axis_neuron": 12,
+        "set_davg_zero": true,
+        "exclude_types": [
+          [
+            0,
+            0
+          ],
+          [
+            0,
+            1
+          ],
+          [
+            0,
+            2
+          ],
+          [
+            0,
+            3
+          ],
+          [
+            1,
+            1
+          ],
+          [
+            1,
+            2
+          ],
+          [
+            1,
+            3
+          ],
+          [
+            2,
+            2
+          ],
+          [
+            2,
+            3
+          ],
+          [
+            3,
+            3
+          ],
+          [
+            4,
+            4
+          ],
+          [
+            4,
+            5
+          ],
+          [
+            5,
+            5
+          ]
+        ],
+        "precision": "float32",
+        "seed": 1
+      },
+      "fitting_net": {
+        "type": "ener",
+        "neuron": [
+          240,
+          240,
+          240
+        ],
+        "resnet_dt": true,
+        "seed": 1,
+        "precision": "float32",
+        "atom_ener": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    }
+  }
+}
+```
+
+The pairwise model needs information for MM residues.
+The model uses [`aparam`](../data/system.md) with the shape of `nframes x natoms` to get the residue index.
+The QM residue should always use `0` as the index.
+For example, `0 0 0 1 1 1 2 2 2` means these 9 atoms are grouped into one QM residue and two MM residues.
diff --git a/doc/model/index.md b/doc/model/index.md
index bf0ee21822..c9c25c9812 100644
--- a/doc/model/index.md
+++ b/doc/model/index.md
@@ -15,4 +15,3 @@
 - [Train a Deep Potential model using `type embedding` approach](train-se-e2-a-tebd.md)
 - [Deep potential long-range](dplr.md)
 - [Deep Potential - Range Correction (DPRc)](dprc.md)
-- [Pairwise DPRc](pairwise-dprc.md)
diff --git a/doc/model/index.rst b/doc/model/index.rst
index 6a8c3f2d49..6a01a3b015 100644
--- a/doc/model/index.rst
+++ b/doc/model/index.rst
@@ -19,4 +19,3 @@ Model
    train-se-a-mask
    dplr
    dprc
-   pairwise-dprc
diff --git a/doc/model/pairwise-dprc.md b/doc/model/pairwise-dprc.md
deleted file mode 100644
index 3ba2ec91fc..0000000000
--- a/doc/model/pairwise-dprc.md
+++ /dev/null
@@ -1,168 +0,0 @@
-# Pairwise DPRc
-
-In a pairwise DPRc model, the total energy is divided into QM internal energy and the sum of QM/MM energy for each MM residue $l$:
-
-$$ E = E_\text{QM} + \sum_{l} E_{\text{QM/MM},l} $$
-
-Thus, the pairwise DPRc model is divided into two sub-[DPRc models](./dprc.md).
-`qm_model` is for the QM internal interaction and `qmmm_model` is for the QM/MM interaction.
-The configuration for these two models is similar to [the non-pairwise DPRc model](./dprc.md).
-It is noted that the [`se_atten` descriptor](./train-se-atten.md) should be used, as it is the only descriptor to support the mixed type.
-
-```json
-{
-  "model": {
-    "type": "pairwise_dprc",
-    "type_map": [
-      "C",
-      "P",
-      "O",
-      "H",
-      "OW",
-      "HW"
-    ],
-    "type_embedding": {
-      "neuron": [
-        8
-      ],
-      "precision": "float32"
-    },
-    "qm_model": {
-      "descriptor": {
-        "type": "se_atten",
-        "stripped_type_embedding": true,
-        "sel": 24,
-        "rcut_smth": 0.50,
-        "rcut": 9.00,
-        "attn_layer": 0,
-        "neuron": [
-          25,
-          50,
-          100
-        ],
-        "resnet_dt": false,
-        "axis_neuron": 12,
-        "precision": "float32",
-        "seed": 1
-      },
-      "fitting_net": {
-        "type": "ener",
-        "neuron": [
-          240,
-          240,
-          240
-        ],
-        "resnet_dt": true,
-        "precision": "float32",
-        "atom_ener": [
-          null,
-          null,
-          null,
-          null,
-          0.0,
-          0.0
-        ],
-        "seed": 1
-      }
-    },
-    "qmmm_model": {
-      "descriptor": {
-        "type": "se_atten",
-        "stripped_type_embedding": true,
-        "sel": 27,
-        "rcut_smth": 0.50,
-        "rcut": 6.00,
-        "attn_layer": 0,
-        "neuron": [
-          25,
-          50,
-          100
-        ],
-        "resnet_dt": false,
-        "axis_neuron": 12,
-        "set_davg_zero": true,
-        "exclude_types": [
-          [
-            0,
-            0
-          ],
-          [
-            0,
-            1
-          ],
-          [
-            0,
-            2
-          ],
-          [
-            0,
-            3
-          ],
-          [
-            1,
-            1
-          ],
-          [
-            1,
-            2
-          ],
-          [
-            1,
-            3
-          ],
-          [
-            2,
-            2
-          ],
-          [
-            2,
-            3
-          ],
-          [
-            3,
-            3
-          ],
-          [
-            4,
-            4
-          ],
-          [
-            4,
-            5
-          ],
-          [
-            5,
-            5
-          ]
-        ],
-        "precision": "float32",
-        "seed": 1
-      },
-      "fitting_net": {
-        "type": "ener",
-        "neuron": [
-          240,
-          240,
-          240
-        ],
-        "resnet_dt": true,
-        "seed": 1,
-        "precision": "float32",
-        "atom_ener": [
-          0.0,
-          0.0,
-          0.0,
-          0.0,
-          0.0,
-          0.0
-        ]
-      }
-    }
-  }
-}
-```
-
-The pairwise model needs information for MM residues.
-The model uses [`aparam`](../data/system.md) with the shape of `nframes x natoms` to get the residue index.
-The QM residue should always use `0` as the index.
-For example, `0 0 0 1 1 1 2 2 2` means these 9 atoms are grouped into one QM residue and two MM residues.