diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index f164758304..45b689cb3e 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -42,6 +42,7 @@ jobs: DP_BUILD_TESTING: 1 DP_VARIANT: cuda CUDA_PATH: /usr/local/cuda-12.2 + NUM_WORKERS: 0 - run: dp --version - run: python -m pytest -s --cov=deepmd source/tests --durations=0 - run: source/install/test_cc_local.sh diff --git a/deepmd/pt/utils/dataloader.py b/deepmd/pt/utils/dataloader.py index 7c95f66c9c..7a6684e82e 100644 --- a/deepmd/pt/utils/dataloader.py +++ b/deepmd/pt/utils/dataloader.py @@ -276,13 +276,11 @@ def collate_batch(batch): result[key] = torch.zeros( (n_frames, natoms_extended, 3), dtype=env.GLOBAL_PT_FLOAT_PRECISION, - device=env.PREPROCESS_DEVICE, ) else: result[key] = torch.zeros( (n_frames, natoms_extended), dtype=torch.long, - device=env.PREPROCESS_DEVICE, ) for i in range(len(batch)): natoms_tmp = list[i].shape[0] diff --git a/deepmd/pt/utils/dataset.py b/deepmd/pt/utils/dataset.py index c104e64491..68d4a09ce4 100644 --- a/deepmd/pt/utils/dataset.py +++ b/deepmd/pt/utils/dataset.py @@ -477,11 +477,7 @@ def preprocess(self, batch): if "find_" in kk: pass else: - batch[kk] = torch.tensor( - batch[kk], - dtype=env.GLOBAL_PT_FLOAT_PRECISION, - device=env.PREPROCESS_DEVICE, - ) + batch[kk] = torch.tensor(batch[kk], dtype=env.GLOBAL_PT_FLOAT_PRECISION) if self._data_dict[kk]["atomic"]: batch[kk] = batch[kk].view( n_frames, -1, self._data_dict[kk]["ndof"] @@ -489,9 +485,7 @@ def preprocess(self, batch): for kk in ["type", "real_natoms_vec"]: if kk in batch.keys(): - batch[kk] = torch.tensor( - batch[kk], dtype=torch.long, device=env.PREPROCESS_DEVICE - ) + batch[kk] = torch.tensor(batch[kk], dtype=torch.long) batch["atype"] = batch.pop("type") keys = ["nlist", "nlist_loc", "nlist_type", "shift", "mapping"] @@ -524,13 +518,9 @@ def preprocess(self, batch): batch["nlist_type"] = nlist_type natoms_extended = max([item.shape[0] for item in shift]) batch["shift"] = torch.zeros( - (n_frames, natoms_extended, 3), - dtype=env.GLOBAL_PT_FLOAT_PRECISION, - device=env.PREPROCESS_DEVICE, - ) - batch["mapping"] = torch.zeros( - (n_frames, natoms_extended), dtype=torch.long, device=env.PREPROCESS_DEVICE + (n_frames, natoms_extended, 3), dtype=env.GLOBAL_PT_FLOAT_PRECISION ) + batch["mapping"] = torch.zeros((n_frames, natoms_extended), dtype=torch.long) for i in range(len(shift)): natoms_tmp = shift[i].shape[0] batch["shift"][i, :natoms_tmp] = shift[i] @@ -566,17 +556,13 @@ def single_preprocess(self, batch, sid): pass else: batch[kk] = torch.tensor( - batch[kk][sid], - dtype=env.GLOBAL_PT_FLOAT_PRECISION, - device=env.PREPROCESS_DEVICE, + batch[kk][sid], dtype=env.GLOBAL_PT_FLOAT_PRECISION ) if self._data_dict[kk]["atomic"]: batch[kk] = batch[kk].view(-1, self._data_dict[kk]["ndof"]) for kk in ["type", "real_natoms_vec"]: if kk in batch.keys(): - batch[kk] = torch.tensor( - batch[kk][sid], dtype=torch.long, device=env.PREPROCESS_DEVICE - ) + batch[kk] = torch.tensor(batch[kk][sid], dtype=torch.long) clean_coord = batch.pop("coord") clean_type = batch.pop("type") nloc = clean_type.shape[0] @@ -670,30 +656,22 @@ def single_preprocess(self, batch, sid): NotImplementedError(f"Unknown noise type {self.noise_type}!") noised_coord = _clean_coord.clone().detach() noised_coord[coord_mask] += noise_on_coord - batch["coord_mask"] = torch.tensor( - coord_mask, dtype=torch.bool, device=env.PREPROCESS_DEVICE - ) + batch["coord_mask"] = torch.tensor(coord_mask, dtype=torch.bool) else: noised_coord = _clean_coord batch["coord_mask"] = torch.tensor( - np.zeros_like(coord_mask, dtype=bool), - dtype=torch.bool, - device=env.PREPROCESS_DEVICE, + np.zeros_like(coord_mask, dtype=bool), dtype=torch.bool ) # add mask for type if self.mask_type: masked_type = clean_type.clone().detach() masked_type[type_mask] = self.mask_type_idx - batch["type_mask"] = torch.tensor( - type_mask, dtype=torch.bool, device=env.PREPROCESS_DEVICE - ) + batch["type_mask"] = torch.tensor(type_mask, dtype=torch.bool) else: masked_type = clean_type batch["type_mask"] = torch.tensor( - np.zeros_like(type_mask, dtype=bool), - dtype=torch.bool, - device=env.PREPROCESS_DEVICE, + np.zeros_like(type_mask, dtype=bool), dtype=torch.bool ) if self.pbc: _coord = normalize_coord(noised_coord, region, nloc) @@ -803,7 +781,7 @@ def __len__(self): def __getitem__(self, index): """Get a frame from the selected system.""" b_data = self._data_system._get_item(index) - b_data["natoms"] = torch.tensor(self._natoms_vec, device=env.PREPROCESS_DEVICE) + b_data["natoms"] = torch.tensor(self._natoms_vec) return b_data @@ -878,9 +856,7 @@ def __getitem__(self, index=None): if index is None: index = dp_random.choice(np.arange(self.nsystems), p=self.probs) b_data = self._data_systems[index].get_batch(self._batch_size) - b_data["natoms"] = torch.tensor( - self._natoms_vec[index], device=env.PREPROCESS_DEVICE - ) + b_data["natoms"] = torch.tensor(self._natoms_vec[index]) batch_size = b_data["coord"].shape[0] b_data["natoms"] = b_data["natoms"].unsqueeze(0).expand(batch_size, -1) return b_data @@ -891,9 +867,7 @@ def get_training_batch(self, index=None): if index is None: index = dp_random.choice(np.arange(self.nsystems), p=self.probs) b_data = self._data_systems[index].get_batch_for_train(self._batch_size) - b_data["natoms"] = torch.tensor( - self._natoms_vec[index], device=env.PREPROCESS_DEVICE - ) + b_data["natoms"] = torch.tensor(self._natoms_vec[index]) batch_size = b_data["coord"].shape[0] b_data["natoms"] = b_data["natoms"].unsqueeze(0).expand(batch_size, -1) return b_data @@ -902,10 +876,7 @@ def get_batch(self, sys_idx=None): """TF-compatible batch for testing.""" pt_batch = self[sys_idx] np_batch = {} - for key in ["coord", "box", "force", "energy", "virial"]: - if key in pt_batch.keys(): - np_batch[key] = pt_batch[key].cpu().numpy() - for key in ["atype", "natoms"]: + for key in ["coord", "box", "force", "energy", "virial", "atype", "natoms"]: if key in pt_batch.keys(): np_batch[key] = pt_batch[key].cpu().numpy() batch_size = pt_batch["coord"].shape[0] diff --git a/deepmd/pt/utils/env.py b/deepmd/pt/utils/env.py index 6fa72943c7..559dba0167 100644 --- a/deepmd/pt/utils/env.py +++ b/deepmd/pt/utils/env.py @@ -24,11 +24,6 @@ else: DEVICE = torch.device(f"cuda:{LOCAL_RANK}") -if os.environ.get("PREPROCESS_DEVICE") == "gpu": - PREPROCESS_DEVICE = torch.device(f"cuda:{LOCAL_RANK}") -else: - PREPROCESS_DEVICE = torch.device("cpu") - JIT = False CACHE_PER_SYS = 5 # keep at most so many sets per sys in memory ENERGY_BIAS_TRAINABLE = True diff --git a/deepmd/pt/utils/preprocess.py b/deepmd/pt/utils/preprocess.py index 463ac112ad..18c798138e 100644 --- a/deepmd/pt/utils/preprocess.py +++ b/deepmd/pt/utils/preprocess.py @@ -99,7 +99,7 @@ def build_inside_clist(coord, region: Region3D, ncell): cell_offset[cell_offset < 0] = 0 delta = cell_offset - ncell a2c = compute_serial_cid(cell_offset, ncell) # cell id of atoms - arange = torch.arange(0, loc_ncell, 1, device=env.PREPROCESS_DEVICE) + arange = torch.arange(0, loc_ncell, 1) cellid = a2c == arange.unsqueeze(-1) # one hot cellid c2a = cellid.nonzero() lst = [] @@ -131,18 +131,12 @@ def append_neighbors(coord, region: Region3D, atype, rcut: float): # add ghost atoms a2c, c2a = build_inside_clist(coord, region, ncell) - xi = torch.arange(-ngcell[0], ncell[0] + ngcell[0], 1, device=env.PREPROCESS_DEVICE) - yi = torch.arange(-ngcell[1], ncell[1] + ngcell[1], 1, device=env.PREPROCESS_DEVICE) - zi = torch.arange(-ngcell[2], ncell[2] + ngcell[2], 1, device=env.PREPROCESS_DEVICE) - xyz = xi.view(-1, 1, 1, 1) * torch.tensor( - [1, 0, 0], dtype=torch.long, device=env.PREPROCESS_DEVICE - ) - xyz = xyz + yi.view(1, -1, 1, 1) * torch.tensor( - [0, 1, 0], dtype=torch.long, device=env.PREPROCESS_DEVICE - ) - xyz = xyz + zi.view(1, 1, -1, 1) * torch.tensor( - [0, 0, 1], dtype=torch.long, device=env.PREPROCESS_DEVICE - ) + xi = torch.arange(-ngcell[0], ncell[0] + ngcell[0], 1) + yi = torch.arange(-ngcell[1], ncell[1] + ngcell[1], 1) + zi = torch.arange(-ngcell[2], ncell[2] + ngcell[2], 1) + xyz = xi.view(-1, 1, 1, 1) * torch.tensor([1, 0, 0], dtype=torch.long) + xyz = xyz + yi.view(1, -1, 1, 1) * torch.tensor([0, 1, 0], dtype=torch.long) + xyz = xyz + zi.view(1, 1, -1, 1) * torch.tensor([0, 0, 1], dtype=torch.long) xyz = xyz.view(-1, 3) mask_a = (xyz >= 0).all(dim=-1) mask_b = (xyz < ncell).all(dim=-1) @@ -165,9 +159,7 @@ def append_neighbors(coord, region: Region3D, atype, rcut: float): merged_coord = torch.cat([coord, tmp_coord]) merged_coord_shift = torch.cat([torch.zeros_like(coord), coord_shift[tmp]]) merged_atype = torch.cat([atype, tmp_atype]) - merged_mapping = torch.cat( - [torch.arange(atype.numel(), device=env.PREPROCESS_DEVICE), aid] - ) + merged_mapping = torch.cat([torch.arange(atype.numel()), aid]) return merged_coord_shift, merged_atype, merged_mapping @@ -188,22 +180,16 @@ def build_neighbor_list( distance = coord_l - coord_r distance = torch.linalg.norm(distance, dim=-1) DISTANCE_INF = distance.max().detach() + rcut - distance[:nloc, :nloc] += ( - torch.eye(nloc, dtype=torch.bool, device=env.PREPROCESS_DEVICE) * DISTANCE_INF - ) + distance[:nloc, :nloc] += torch.eye(nloc, dtype=torch.bool) * DISTANCE_INF if min_check: if distance.min().abs() < 1e-6: RuntimeError("Atom dist too close!") if not type_split: sec = sec[-1:] lst = [] - nlist = torch.zeros((nloc, sec[-1].item()), device=env.PREPROCESS_DEVICE).long() - 1 - nlist_loc = ( - torch.zeros((nloc, sec[-1].item()), device=env.PREPROCESS_DEVICE).long() - 1 - ) - nlist_type = ( - torch.zeros((nloc, sec[-1].item()), device=env.PREPROCESS_DEVICE).long() - 1 - ) + nlist = torch.zeros((nloc, sec[-1].item())).long() - 1 + nlist_loc = torch.zeros((nloc, sec[-1].item())).long() - 1 + nlist_type = torch.zeros((nloc, sec[-1].item())).long() - 1 for i, nnei in enumerate(sec): if i > 0: nnei = nnei - sec[i - 1] @@ -216,11 +202,8 @@ def build_neighbor_list( _sorted, indices = torch.topk(tmp, nnei, dim=1, largest=False) else: # when nnei > nall - indices = torch.zeros((nloc, nnei), device=env.PREPROCESS_DEVICE).long() - 1 - _sorted = ( - torch.ones((nloc, nnei), device=env.PREPROCESS_DEVICE).long() - * DISTANCE_INF - ) + indices = torch.zeros((nloc, nnei)).long() - 1 + _sorted = torch.ones((nloc, nnei)).long() * DISTANCE_INF _sorted_nnei, indices_nnei = torch.topk( tmp, tmp.shape[1], dim=1, largest=False ) @@ -284,7 +267,7 @@ def make_env_mat( else: merged_coord_shift = torch.zeros_like(coord) merged_atype = atype.clone() - merged_mapping = torch.arange(atype.numel(), device=env.PREPROCESS_DEVICE) + merged_mapping = torch.arange(atype.numel()) merged_coord = coord.clone() # build nlist diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index 18ee4d9abe..eec7179bcd 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -62,14 +62,9 @@ def make_stat_input(datasets, dataloaders, nbatches): shape = torch.zeros( (n_frames, extend, 3), dtype=env.GLOBAL_PT_FLOAT_PRECISION, - device=env.PREPROCESS_DEVICE, ) else: - shape = torch.zeros( - (n_frames, extend), - dtype=torch.long, - device=env.PREPROCESS_DEVICE, - ) + shape = torch.zeros((n_frames, extend), dtype=torch.long) for i in range(len(item)): natoms_tmp = l[i].shape[0] shape[i, :natoms_tmp] = l[i] diff --git a/source/tests/pt/test_descriptor.py b/source/tests/pt/test_descriptor.py index da38cf007f..2dd996349b 100644 --- a/source/tests/pt/test_descriptor.py +++ b/source/tests/pt/test_descriptor.py @@ -18,6 +18,7 @@ ) from deepmd.pt.utils import ( dp_random, + env, ) from deepmd.pt.utils.dataset import ( DeepmdDataSet, @@ -112,29 +113,33 @@ def setUp(self): def test_consistency(self): avg_zero = torch.zeros( - [self.ntypes, self.nnei * 4], dtype=GLOBAL_PT_FLOAT_PRECISION + [self.ntypes, self.nnei * 4], + dtype=GLOBAL_PT_FLOAT_PRECISION, + device=env.DEVICE, ) std_ones = torch.ones( - [self.ntypes, self.nnei * 4], dtype=GLOBAL_PT_FLOAT_PRECISION + [self.ntypes, self.nnei * 4], + dtype=GLOBAL_PT_FLOAT_PRECISION, + device=env.DEVICE, ) base_d, base_force, nlist = base_se_a( rcut=self.rcut, rcut_smth=self.rcut_smth, sel=self.sel, batch=self.np_batch, - mean=avg_zero, - stddev=std_ones, + mean=avg_zero.detach().cpu(), + stddev=std_ones.detach().cpu(), ) - pt_coord = self.pt_batch["coord"] + pt_coord = self.pt_batch["coord"].to(env.DEVICE) pt_coord.requires_grad_(True) - index = self.pt_batch["mapping"].unsqueeze(-1).expand(-1, -1, 3) + index = self.pt_batch["mapping"].unsqueeze(-1).expand(-1, -1, 3).to(env.DEVICE) extended_coord = torch.gather(pt_coord, dim=1, index=index) - extended_coord = extended_coord - self.pt_batch["shift"] + extended_coord = extended_coord - self.pt_batch["shift"].to(env.DEVICE) my_d, _, _ = prod_env_mat_se_a( extended_coord.to(DEVICE), - self.pt_batch["nlist"], - self.pt_batch["atype"], + self.pt_batch["nlist"].to(env.DEVICE), + self.pt_batch["atype"].to(env.DEVICE), avg_zero.reshape([-1, self.nnei, 4]).to(DEVICE), std_ones.reshape([-1, self.nnei, 4]).to(DEVICE), self.rcut, diff --git a/source/tests/pt/test_descriptor_dpa1.py b/source/tests/pt/test_descriptor_dpa1.py index 689fa7e49c..725369d68d 100644 --- a/source/tests/pt/test_descriptor_dpa1.py +++ b/source/tests/pt/test_descriptor_dpa1.py @@ -243,7 +243,7 @@ def test_descriptor_block(self): dparams["ntypes"] = ntypes des = DescrptBlockSeAtten( **dparams, - ) + ).to(env.DEVICE) des.load_state_dict(torch.load(self.file_model_param)) rcut = dparams["rcut"] nsel = dparams["sel"] @@ -260,7 +260,7 @@ def test_descriptor_block(self): extended_coord, extended_atype, nloc, rcut, nsel, distinguish_types=False ) # handel type_embedding - type_embedding = TypeEmbedNet(ntypes, 8) + type_embedding = TypeEmbedNet(ntypes, 8).to(env.DEVICE) type_embedding.load_state_dict(torch.load(self.file_type_embed)) ## to save model parameters @@ -293,7 +293,7 @@ def test_descriptor(self): dparams["concat_output_tebd"] = False des = DescrptDPA1( **dparams, - ) + ).to(env.DEVICE) target_dict = des.state_dict() source_dict = torch.load(self.file_model_param) type_embd_dict = torch.load(self.file_type_embed) @@ -337,7 +337,7 @@ def test_descriptor(self): dparams["concat_output_tebd"] = True des = DescrptDPA1( **dparams, - ) + ).to(env.DEVICE) descriptor, env_mat, diff, rot_mat, sw = des( extended_coord, extended_atype, diff --git a/source/tests/pt/test_descriptor_dpa2.py b/source/tests/pt/test_descriptor_dpa2.py index 45c95961fe..aa6b16964e 100644 --- a/source/tests/pt/test_descriptor_dpa2.py +++ b/source/tests/pt/test_descriptor_dpa2.py @@ -124,7 +124,7 @@ def test_descriptor_hyb(self): dlist, ntypes, hybrid_mode=dparams["hybrid_mode"], - ) + ).to(env.DEVICE) model_dict = torch.load(self.file_model_param) # type_embd of repformer is removed model_dict.pop("descriptor_list.1.type_embd.embedding.weight") @@ -158,7 +158,7 @@ def test_descriptor_hyb(self): ) nlist = torch.cat(nlist_list, -1) # handel type_embedding - type_embedding = TypeEmbedNet(ntypes, 8) + type_embedding = TypeEmbedNet(ntypes, 8).to(env.DEVICE) type_embedding.load_state_dict(torch.load(self.file_type_embed)) ## to save model parameters @@ -186,7 +186,7 @@ def test_descriptor(self): dparams["concat_output_tebd"] = False des = DescrptDPA2( **dparams, - ) + ).to(env.DEVICE) target_dict = des.state_dict() source_dict = torch.load(self.file_model_param) # type_embd of repformer is removed @@ -232,7 +232,7 @@ def test_descriptor(self): dparams["concat_output_tebd"] = True des = DescrptDPA2( **dparams, - ) + ).to(env.DEVICE) descriptor, env_mat, diff, rot_mat, sw = des( extended_coord, extended_atype, diff --git a/source/tests/pt/test_embedding_net.py b/source/tests/pt/test_embedding_net.py index fc98ddc9f9..407f4949b5 100644 --- a/source/tests/pt/test_embedding_net.py +++ b/source/tests/pt/test_embedding_net.py @@ -8,6 +8,10 @@ import tensorflow.compat.v1 as tf import torch +from deepmd.pt.utils import ( + env, +) + tf.disable_eager_execution() from pathlib import ( @@ -148,18 +152,22 @@ def test_consistency(self): # Keep parameter value consistency between 2 implentations param.data.copy_(torch.from_numpy(var)) - pt_coord = self.torch_batch["coord"] + pt_coord = self.torch_batch["coord"].to(env.DEVICE) pt_coord.requires_grad_(True) - index = self.torch_batch["mapping"].unsqueeze(-1).expand(-1, -1, 3) + index = ( + self.torch_batch["mapping"].unsqueeze(-1).expand(-1, -1, 3).to(env.DEVICE) + ) extended_coord = torch.gather(pt_coord, dim=1, index=index) - extended_coord = extended_coord - self.torch_batch["shift"] + extended_coord = extended_coord - self.torch_batch["shift"].to(env.DEVICE) extended_atype = torch.gather( - self.torch_batch["atype"], dim=1, index=self.torch_batch["mapping"] + self.torch_batch["atype"].to(env.DEVICE), + dim=1, + index=self.torch_batch["mapping"].to(env.DEVICE), ) descriptor_out, _, _, _, _ = descriptor( extended_coord, extended_atype, - self.torch_batch["nlist"], + self.torch_batch["nlist"].to(env.DEVICE), ) my_embedding = descriptor_out.cpu().detach().numpy() fake_energy = torch.sum(descriptor_out) diff --git a/source/tests/pt/test_fitting_net.py b/source/tests/pt/test_fitting_net.py index ed2c428de5..e12a397347 100644 --- a/source/tests/pt/test_fitting_net.py +++ b/source/tests/pt/test_fitting_net.py @@ -11,6 +11,9 @@ from deepmd.pt.model.task import ( EnergyFittingNet, ) +from deepmd.pt.utils import ( + env, +) from deepmd.pt.utils.env import ( GLOBAL_NP_FLOAT_PRECISION, ) @@ -105,7 +108,7 @@ def test_consistency(self): neuron=self.n_neuron, bias_atom_e=self.dp_fn.bias_atom_e, distinguish_types=True, - ) + ).to(env.DEVICE) for name, param in my_fn.named_parameters(): matched = re.match( "filter_layers\.networks\.(\d).layers\.(\d)\.([a-z]+)", name @@ -129,9 +132,9 @@ def test_consistency(self): embedding = torch.from_numpy(self.embedding) embedding = embedding.view(4, -1, self.embedding_width) atype = torch.from_numpy(self.atype) - ret = my_fn(embedding, atype) + ret = my_fn(embedding.to(env.DEVICE), atype.to(env.DEVICE)) my_energy = ret["energy"] - my_energy = my_energy.detach() + my_energy = my_energy.detach().cpu() np.testing.assert_allclose(dp_energy, my_energy.numpy().reshape([-1])) diff --git a/source/tests/pt/test_mlp.py b/source/tests/pt/test_mlp.py index c06047b2a5..26f0041bf9 100644 --- a/source/tests/pt/test_mlp.py +++ b/source/tests/pt/test_mlp.py @@ -5,6 +5,9 @@ import numpy as np import torch +from deepmd.pt.utils import ( + env, +) from deepmd.pt.utils.env import ( PRECISION_DICT, ) @@ -104,23 +107,27 @@ def test_match_native_layer( inp_shap = ashp + inp_shap rtol, atol = get_tols(prec) dtype = PRECISION_DICT[prec] - xx = torch.arange(np.prod(inp_shap), dtype=dtype).view(inp_shap) + xx = torch.arange(np.prod(inp_shap), dtype=dtype, device=env.DEVICE).view( + inp_shap + ) # def mlp layer - ml = MLPLayer(ninp, nout, bias, ut, ac, resnet, precision=prec) + ml = MLPLayer(ninp, nout, bias, ut, ac, resnet, precision=prec).to( + env.DEVICE + ) # check consistency nl = NativeLayer.deserialize(ml.serialize()) np.testing.assert_allclose( - ml.forward(xx).detach().numpy(), - nl.call(xx.detach().numpy()), + ml.forward(xx).detach().cpu().numpy(), + nl.call(xx.detach().cpu().numpy()), rtol=rtol, atol=atol, err_msg=f"(i={ninp}, o={nout}) bias={bias} use_dt={ut} act={ac} resnet={resnet} prec={prec}", ) # check self-consistency - ml1 = MLPLayer.deserialize(ml.serialize()) + ml1 = MLPLayer.deserialize(ml.serialize()).to(env.DEVICE) np.testing.assert_allclose( - ml.forward(xx).detach().numpy(), - ml1.forward(xx).detach().numpy(), + ml.forward(xx).detach().cpu().numpy(), + ml1.forward(xx).detach().cpu().numpy(), rtol=rtol, atol=atol, err_msg=f"(i={ninp}, o={nout}) bias={bias} use_dt={ut} act={ac} resnet={resnet} prec={prec}", @@ -157,7 +164,9 @@ def test_match_native_net( inp_shap = ashp + inp_shap rtol, atol = get_tols(prec) dtype = PRECISION_DICT[prec] - xx = torch.arange(np.prod(inp_shap), dtype=dtype).view(inp_shap) + xx = torch.arange(np.prod(inp_shap), dtype=dtype, device=env.DEVICE).view( + inp_shap + ) # def MLP layers = [] for ii in range(1, len(ndims)): @@ -166,21 +175,21 @@ def test_match_native_net( ndims[ii - 1], ndims[ii], bias, ut, ac, resnet, precision=prec ).serialize() ) - ml = MLP(layers) + ml = MLP(layers).to(env.DEVICE) # check consistency nl = NativeNet.deserialize(ml.serialize()) np.testing.assert_allclose( - ml.forward(xx).detach().numpy(), - nl.call(xx.detach().numpy()), + ml.forward(xx).detach().cpu().numpy(), + nl.call(xx.detach().cpu().numpy()), rtol=rtol, atol=atol, err_msg=f"net={ndims} bias={bias} use_dt={ut} act={ac} resnet={resnet} prec={prec}", ) # check self-consistency - ml1 = MLP.deserialize(ml.serialize()) + ml1 = MLP.deserialize(ml.serialize()).to(env.DEVICE) np.testing.assert_allclose( - ml.forward(xx).detach().numpy(), - ml1.forward(xx).detach().numpy(), + ml.forward(xx).detach().cpu().numpy(), + ml1.forward(xx).detach().cpu().numpy(), rtol=rtol, atol=atol, err_msg=f"net={ndims} bias={bias} use_dt={ut} act={ac} resnet={resnet} prec={prec}", @@ -219,23 +228,23 @@ def test_match_embedding_net( # input rtol, atol = get_tols(prec) dtype = PRECISION_DICT[prec] - xx = torch.arange(idim, dtype=dtype) + xx = torch.arange(idim, dtype=dtype, device=env.DEVICE) # def MLP - ml = EmbeddingNet(idim, nn, act, idt, prec) + ml = EmbeddingNet(idim, nn, act, idt, prec).to(env.DEVICE) # check consistency nl = DPEmbeddingNet.deserialize(ml.serialize()) np.testing.assert_allclose( - ml.forward(xx).detach().numpy(), - nl.call(xx.detach().numpy()), + ml.forward(xx).detach().cpu().numpy(), + nl.call(xx.detach().cpu().numpy()), rtol=rtol, atol=atol, err_msg=f"idim={idim} nn={nn} use_dt={idt} act={act} prec={prec}", ) # check self-consistency - ml1 = EmbeddingNet.deserialize(ml.serialize()) + ml1 = EmbeddingNet.deserialize(ml.serialize()).to(env.DEVICE) np.testing.assert_allclose( - ml.forward(xx).detach().numpy(), - ml1.forward(xx).detach().numpy(), + ml.forward(xx).detach().cpu().numpy(), + ml1.forward(xx).detach().cpu().numpy(), rtol=rtol, atol=atol, err_msg=f"idim={idim} nn={nn} use_dt={idt} act={act} prec={prec}", @@ -246,8 +255,8 @@ def test_jit( ): for idim, nn, act, idt, prec in self.test_cases: # def MLP - ml = EmbeddingNet(idim, nn, act, idt, prec) - ml1 = EmbeddingNet.deserialize(ml.serialize()) + ml = EmbeddingNet(idim, nn, act, idt, prec).to(env.DEVICE) + ml1 = EmbeddingNet.deserialize(ml.serialize()).to(env.DEVICE) model = torch.jit.script(ml) model = torch.jit.script(ml1) @@ -272,7 +281,7 @@ def test_match_fitting_net( # input rtol, atol = get_tols(prec) dtype = PRECISION_DICT[prec] - xx = torch.arange(idim, dtype=dtype) + xx = torch.arange(idim, dtype=dtype, device=env.DEVICE) # def MLP ml = FittingNet( idim, @@ -282,21 +291,21 @@ def test_match_fitting_net( resnet_dt=idt, precision=prec, bias_out=ob, - ) + ).to(env.DEVICE) # check consistency nl = DPFittingNet.deserialize(ml.serialize()) np.testing.assert_allclose( - ml.forward(xx).detach().numpy(), - nl.call(xx.detach().numpy()), + ml.forward(xx).detach().cpu().numpy(), + nl.call(xx.detach().cpu().numpy()), rtol=rtol, atol=atol, err_msg=f"idim={idim} nn={nn} use_dt={idt} act={act} prec={prec}", ) # check self-consistency - ml1 = FittingNet.deserialize(ml.serialize()) + ml1 = FittingNet.deserialize(ml.serialize()).to(env.DEVICE) np.testing.assert_allclose( - ml.forward(xx).detach().numpy(), - ml1.forward(xx).detach().numpy(), + ml.forward(xx).detach().cpu().numpy(), + ml1.forward(xx).detach().cpu().numpy(), rtol=rtol, atol=atol, err_msg=f"idim={idim} nn={nn} use_dt={idt} act={act} prec={prec}", @@ -315,7 +324,7 @@ def test_jit( resnet_dt=idt, precision=prec, bias_out=ob, - ) - ml1 = FittingNet.deserialize(ml.serialize()) + ).to(env.DEVICE) + ml1 = FittingNet.deserialize(ml.serialize()).to(env.DEVICE) model = torch.jit.script(ml) model = torch.jit.script(ml1) diff --git a/source/tests/pt/test_model.py b/source/tests/pt/test_model.py index c6595e6471..e87a53969c 100644 --- a/source/tests/pt/test_model.py +++ b/source/tests/pt/test_model.py @@ -7,6 +7,10 @@ import tensorflow.compat.v1 as tf import torch +from deepmd.pt.utils import ( + env, +) + tf.disable_eager_execution() from pathlib import ( @@ -340,10 +344,16 @@ def test_consistency(self): batch["natoms_vec"], device=batch["coord"].device ).unsqueeze(0) model_predict = my_model( - batch["coord"], batch["atype"], batch["box"], do_atomic_virial=True + batch["coord"].to(env.DEVICE), + batch["atype"].to(env.DEVICE), + batch["box"].to(env.DEVICE), + do_atomic_virial=True, ) model_predict_1 = my_model( - batch["coord"], batch["atype"], batch["box"], do_atomic_virial=False + batch["coord"].to(env.DEVICE), + batch["atype"].to(env.DEVICE), + batch["box"].to(env.DEVICE), + do_atomic_virial=False, ) p_energy, p_force, p_virial, p_atomic_virial = ( model_predict["energy"], @@ -357,8 +367,8 @@ def test_consistency(self): "force": p_force, } label = { - "energy": batch["energy"], - "force": batch["force"], + "energy": batch["energy"].to(env.DEVICE), + "force": batch["force"].to(env.DEVICE), } loss, _ = my_loss(model_pred, label, int(batch["natoms"][0, 0]), cur_lr) np.testing.assert_allclose( diff --git a/source/tests/pt/test_saveload_dpa1.py b/source/tests/pt/test_saveload_dpa1.py index d1043f7029..1b4c41a204 100644 --- a/source/tests/pt/test_saveload_dpa1.py +++ b/source/tests/pt/test_saveload_dpa1.py @@ -129,13 +129,13 @@ def get_data(self): input_dict = {} for item in ["coord", "atype", "box"]: if item in batch_data: - input_dict[item] = batch_data[item] + input_dict[item] = batch_data[item].to(env.DEVICE) else: input_dict[item] = None label_dict = {} for item in ["energy", "force", "virial"]: if item in batch_data: - label_dict[item] = batch_data[item] + label_dict[item] = batch_data[item].to(env.DEVICE) return input_dict, label_dict def test_saveload(self): diff --git a/source/tests/pt/test_saveload_se_e2_a.py b/source/tests/pt/test_saveload_se_e2_a.py index 95d7f97a88..7f8364a16f 100644 --- a/source/tests/pt/test_saveload_se_e2_a.py +++ b/source/tests/pt/test_saveload_se_e2_a.py @@ -123,13 +123,13 @@ def get_data(self): input_dict = {} for item in ["coord", "atype", "box"]: if item in batch_data: - input_dict[item] = batch_data[item] + input_dict[item] = batch_data[item].to(env.DEVICE) else: input_dict[item] = None label_dict = {} for item in ["energy", "force", "virial"]: if item in batch_data: - label_dict[item] = batch_data[item] + label_dict[item] = batch_data[item].to(env.DEVICE) return input_dict, label_dict def test_saveload(self):