diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml new file mode 100644 index 00000000..0ce4cfa6 --- /dev/null +++ b/.github/workflows/python-app.yml @@ -0,0 +1,47 @@ +name: Test NeuralForceField package + +on: [push] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + # python-version: ["pypy3.10", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.10"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Display Python version + run: python -c "import sys; print(sys.version)" + - name: Install basics + run: python -m pip install --upgrade pip setuptools wheel + - name: Install package + run: python -m pip install . + # - name: Install linters + # run: python -m pip install flake8 mypy pylint + # - name: Install documentation requirements + # run: python -m pip install -r docs/requirements.txt + # - name: Test with flake8 + # run: flake8 polymethod + # - name: Test with mypy + # run: mypy polymethod + # - name: Test with pylint + # run: pylint polymethod + - name: Test with pytest + run: | + pip install pytest pytest-cov + pytest nff/tests --doctest-modules --junitxml=junit/test-results-${{ matrix.python-version }}.xml --cov=nff --cov-report=xml --cov-report=html + - name: Upload pytest test results + uses: actions/upload-artifact@v4 + with: + name: pytest-results-${{ matrix.python-version }} + path: junit/test-results-${{ matrix.python-version }}.xml + if: ${{ always() }} + # - name: Test documentation + # run: sphinx-build docs/source docs/build diff --git a/.gitignore b/.gitignore index 6181965e..4b6cfd09 100644 --- a/.gitignore +++ b/.gitignore @@ -66,5 +66,17 @@ dist/ sandbox_excited/ build/ +# Editor files +# vim +*.swp +*.swo + +# pycharm +.idea/ + +# coverage and tests +junit +.coverage + # required exceptions !tutorials/models/ammonia/Ammonia.xyz diff --git a/nff/data/dataset.py b/nff/data/dataset.py index 974ccb8c..7bd20958 100644 --- a/nff/data/dataset.py +++ b/nff/data/dataset.py @@ -86,6 +86,7 @@ def __init__( units: str = "kcal/mol", check_props: bool = True, do_copy: bool = True, + device: str = "cuda" ) -> None: """Constructor for Dataset class. @@ -108,6 +109,7 @@ def __init__( self.props = props self.units = units self.to_units(units) + self.device = device def __len__(self) -> int: """Length of the dataset. @@ -289,6 +291,7 @@ def _get_periodic_neighbor_list( pbc=True, cutoff=cutoff, directed=(not undirected), + device=self.device, ) nbrs, offs = atoms.update_nbr_list() nbrlist.append(nbrs) @@ -444,6 +447,7 @@ def unwrap_xyz(self, mol_dic: dict) -> None: numbers=self.props["nxyz"][i][:, 0], cell=self.props["cell"][i], pbc=True, + device=self.device ) # recontruct coordinates based on subgraphs index @@ -577,6 +581,7 @@ def gen_bond_prior(self, cutoff: float, bond_len_dict: dict | None = None) -> No "cutoff": cutoff, "cell": cell, "nbr_torch": False, + "device": self.device } # the coordinates have been unwrapped and try to results offsets diff --git a/nff/data/tests/__init__.py b/nff/tests/__init__.py similarity index 100% rename from nff/data/tests/__init__.py rename to nff/tests/__init__.py diff --git a/nff/tests/conftest.py b/nff/tests/conftest.py new file mode 100644 index 00000000..66ca0a47 --- /dev/null +++ b/nff/tests/conftest.py @@ -0,0 +1,15 @@ + +import os +import pytest +import torch + +torch.set_num_threads(int(os.getenv("OMP_NUM_THREADS", 1))) + + +def pytest_addoption(parser): + parser.addoption("--device", action="store", default="cpu", help="Whether to use the CPU or GPU for the tests") + + +@pytest.fixture +def device(request): + return request.config.getoption("--device") diff --git a/nff/tests/data/azo_diabat.pth.tar b/nff/tests/data/azo_diabat.pth.tar new file mode 100644 index 00000000..1065bcae Binary files /dev/null and b/nff/tests/data/azo_diabat.pth.tar differ diff --git a/nff/tests/data/dataset.pth.tar b/nff/tests/data/dataset.pth.tar new file mode 100644 index 00000000..51dff90c Binary files /dev/null and b/nff/tests/data/dataset.pth.tar differ diff --git a/nff/md/zhu_nakamura/dynamics_test.py b/nff/tests/dynamics_test.py similarity index 99% rename from nff/md/zhu_nakamura/dynamics_test.py rename to nff/tests/dynamics_test.py index bf391d24..41eb460a 100644 --- a/nff/md/zhu_nakamura/dynamics_test.py +++ b/nff/tests/dynamics_test.py @@ -15,8 +15,8 @@ from ase.io.trajectory import Trajectory from ase import Atoms -from nff.md.utils import mol_dot, mol_norm, ZhuNakamuraLogger, atoms_to_nxyz -from nff.md.nvt_test import NoseHoover, NoseHooverChain +from nff.md.utils_ax import mol_dot, mol_norm, ZhuNakamuraLogger, atoms_to_nxyz +from nff.md.nvt_ax import NoseHoover, NoseHooverChain from nff.utils.constants import BOHR_RADIUS, FS_TO_AU, AMU_TO_AU, FS_TO_ASE, ASE_TO_FS, EV_TO_AU from nff.data import Dataset, collate_dicts from nff.utils.cuda import batch_to diff --git a/nff/io/tests/test_ase.py b/nff/tests/test_ase.py similarity index 92% rename from nff/io/tests/test_ase.py rename to nff/tests/test_ase.py index 14ee02d7..6ebe735b 100644 --- a/nff/io/tests/test_ase.py +++ b/nff/tests/test_ase.py @@ -5,6 +5,8 @@ import numpy as np from ase import Atoms +import pytest + from nff.io.ase import AtomsBatch @@ -19,6 +21,8 @@ def compare_dicts(d1: dict, d2: dict): for key, value in d1.items(): if isinstance(value, dict): compare_dicts(value, d2[key]) + elif isinstance(value, str): + assert value == d2[key] elif isinstance(value, Iterable): assert np.allclose(value, d2[key]) else: @@ -47,10 +51,17 @@ def get_ethanol(): return Atoms(nxyz[:, 0].astype(int), positions=nxyz[:, 1:]) -# @ut.skip("skip this for now") +@pytest.mark.usefixtures("device") # Ensure the fixture is accessible class TestAtomsBatch(ut.TestCase): def setUp(self): self.ethanol = get_ethanol() + # Access the device value from the pytest fixture + self.device = self._test_fixture_device + + @pytest.fixture(autouse=True) + def inject_device(self, device): + # Automatically set the fixture value to an attribute + self._test_fixture_device = device @ut.skip("skip this for now") def test_AtomsBatch(self): @@ -111,7 +122,7 @@ def test_AtomsBatch(self): ] ) - atoms_batch = AtomsBatch(self.ethanol, cutoff=2.5) + atoms_batch = AtomsBatch(self.ethanol, cutoff=2.5, device=self.device) atoms_batch.update_nbr_list() G1 = nx.from_edgelist(expected_nbrlist_cutoff_2dot5) @@ -120,13 +131,13 @@ def test_AtomsBatch(self): assert nx.is_isomorphic(G1, G2) def test_get_batch(self): - atoms_batch = AtomsBatch(self.ethanol, cutoff=5) + atoms_batch = AtomsBatch(self.ethanol, cutoff=5, device=self.device) batch = atoms_batch.get_batch() assert "nxyz" in batch def test_from_atoms(self): - atoms_batch = AtomsBatch.from_atoms(self.ethanol, cutoff=2.5) + atoms_batch = AtomsBatch.from_atoms(self.ethanol, cutoff=2.5, device=self.device) # ensure atomic numbers, positions, and cell are the same assert np.allclose(atoms_batch.get_atomic_numbers(), self.ethanol.get_atomic_numbers()) @@ -134,7 +145,7 @@ def test_from_atoms(self): assert np.allclose(atoms_batch.get_cell(), self.ethanol.get_cell()) def test_copy(self): - atoms_batch = AtomsBatch(self.ethanol, cutoff=2.5) + atoms_batch = AtomsBatch(self.ethanol, cutoff=2.5, device=self.device) atoms_batch.get_batch() # update props atoms_batch_copy = atoms_batch.copy() @@ -154,7 +165,7 @@ def test_copy(self): assert atoms_batch.requires_large_offsets == atoms_batch_copy.requires_large_offsets def test_fromdict(self): - atoms_batch = AtomsBatch(self.ethanol, cutoff=2.5) + atoms_batch = AtomsBatch(self.ethanol, cutoff=2.5, device=self.device) ab_dict = atoms_batch.todict(update_props=True) ab_from_dict = AtomsBatch.fromdict(ab_dict) @@ -183,6 +194,7 @@ def test_fromdict(self): compare_dicts(ab_dict_props, ab_dict_again_props) +@pytest.mark.usefixtures("device") # Ensure the fixture is loaded class TestPeriodic(ut.TestCase): def setUp(self): nxyz = np.array( @@ -205,9 +217,15 @@ def setUp(self): [0.0, 0.0, 5.51891759], ] ) - self.quartz = AtomsBatch(nxyz[:, 0].astype(int), positions=nxyz[:, 1:], cell=lattice, pbc=True) - - def test_ase(self): + self.quartz = AtomsBatch(nxyz[:, 0].astype(int), positions=nxyz[:, 1:], cell=lattice, pbc=True, + device=self._test_fixture_device) + + @pytest.fixture(autouse=True) + def inject_device(self, device): + # Automatically set the fixture value to an attribute + self._test_fixture_device = device + + def test_print(self): print(self.quartz) def test_nbrlist(self): @@ -469,7 +487,6 @@ def test_nbrlist(self): ] ) assert np.allclose(nbrlist, expected_nbrlist) - print(offsets) if __name__ == "__main__": diff --git a/nff/io/tests/__init__.py b/nff/tests/test_data/__init__.py similarity index 100% rename from nff/io/tests/__init__.py rename to nff/tests/test_data/__init__.py diff --git a/nff/data/tests/data/SrIrO3_bulk_55_nff_all_dataset.pth.tar b/nff/tests/test_data/data/SrIrO3_bulk_55_nff_all_dataset.pth.tar similarity index 100% rename from nff/data/tests/data/SrIrO3_bulk_55_nff_all_dataset.pth.tar rename to nff/tests/test_data/data/SrIrO3_bulk_55_nff_all_dataset.pth.tar diff --git a/nff/data/tests/test_dataset.py b/nff/tests/test_data/test_dataset.py similarity index 93% rename from nff/data/tests/test_dataset.py rename to nff/tests/test_data/test_dataset.py index cdbf73bf..16ee7fc4 100644 --- a/nff/data/tests/test_dataset.py +++ b/nff/tests/test_data/test_dataset.py @@ -6,6 +6,8 @@ import numpy as np import torch +import pytest + from nff.data.dataset import ( Dataset, concatenate_dict, @@ -14,8 +16,8 @@ ) current_path = Path(__file__).parent -DATASET_PATH = current_path / "../../../tutorials/data/dataset.pth.tar" -PEROVSKITE_DATA_PATH = current_path / "./data/SrIrO3_bulk_55_nff_all_dataset.pth.tar" +DATASET_PATH = os.path.join(current_path, "..", "..", "..", "tutorials", "data", "dataset.pth.tar") +PEROVSKITE_DATA_PATH = os.path.join(current_path, "data", "SrIrO3_bulk_55_nff_all_dataset.pth.tar") TARG_NAME = "formula" VAL_SIZE = 0.1 TEST_SIZE = 0.1 @@ -223,6 +225,7 @@ def test_inexistent_list_lists(self): self.assertEqual(ab, expected) +@pytest.mark.usefixtures("device") # Ensure the fixture is accessible class TestPeriodicDataset(unittest.TestCase): def setUp(self): self.quartz = { @@ -248,7 +251,12 @@ def setUp(self): ), } - self.qtz_dataset = Dataset(concatenate_dict(*[self.quartz] * 3)) + self.qtz_dataset = Dataset(concatenate_dict(*[self.quartz] * 3), device=self._test_fixture_device) + + @pytest.fixture(autouse=True) + def inject_device(self, device): + # Automatically set the fixture value to an attribute + self._test_fixture_device = device def test_neighbor_list(self): nbrs, offs = self.qtz_dataset.generate_neighbor_list(cutoff=5) diff --git a/nff/data/tests/test_stats.py b/nff/tests/test_data/test_stats.py similarity index 100% rename from nff/data/tests/test_stats.py rename to nff/tests/test_data/test_stats.py diff --git a/nff/tests/test_excited_states_training.py b/nff/tests/test_excited_states_training.py new file mode 100644 index 00000000..8754539d --- /dev/null +++ b/nff/tests/test_excited_states_training.py @@ -0,0 +1,136 @@ + +import os +import pathlib + +import torch +from torch.optim import Adam +from torch.utils.data import DataLoader +from torch.utils.data.sampler import RandomSampler + +import pytest + +from nff.data import Dataset, split_train_validation_test, collate_dicts +from nff.train import Trainer, get_model, loss, hooks, metrics, evaluate + + +@pytest.mark.skip("still taking too long, disable for now") +def test_excited_training(device, tmpdir): + # define loss + loss_dict = { + "mse": [ + {"coef": 0.01, "params": {"key": "d_00"}}, + {"coef": 0.01, "params": {"key": "d_11"}}, + {"coef": 0.01, "params": {"key": "d_22"}}, + {"coef": 0.2, "params": {"key": "energy_0"}}, + {"coef": 1, "params": {"key": "energy_0_grad"}}, + {"coef": 0.1, "params": {"key": "energy_1"}}, + {"coef": 1, "params": {"key": "energy_1_grad"}}, + {"coef": 0.5, "params": {"key": "energy_1_energy_0_delta"}}, + ], + "nacv": [{"coef": 1, "params": {"abs": False, "key": "force_nacv_10", "max": False}}], + } + loss_fn = loss.build_multi_loss(loss_dict) + + # define model + diabat_keys = [["d_00", "d_01", "d_02"], ["d_01", "d_11", "d_12"], ["d_02", "d_12", "d_22"]] + modelparams = { + "feat_dim": 128, + "activation": "swish", + "n_rbf": 20, + "cutoff": 5.0, + "num_conv": 3, + "output_keys": ["energy_0", "energy_1"], + "grad_keys": ["energy_0_grad", "energy_1_grad"], + "diabat_keys": diabat_keys, + "add_nacv": True, + } + model = get_model(modelparams, model_type="PainnDiabat") + + # define training + trainable_params = filter(lambda p: p.requires_grad, model.parameters()) + optimizer = Adam(trainable_params, lr=1e-4) + train_metrics = [ + metrics.MeanAbsoluteError("energy_0"), + metrics.MeanAbsoluteError("energy_1"), + metrics.MeanAbsoluteError("energy_0_grad"), + metrics.MeanAbsoluteError("energy_1_grad"), + metrics.MeanAbsoluteError("energy_1_energy_0_delta"), + ] + + # output + outdir = tmpdir + train_hooks = [ + hooks.CSVHook( + outdir, + metrics=train_metrics, + ), + hooks.PrintingHook(outdir, metrics=train_metrics, separator=" | ", time_strf="%M:%S"), + hooks.ReduceLROnPlateauHook( + optimizer=optimizer, + # patience in the original paper + patience=50, + factor=0.5, + min_lr=1e-7, + window_length=1, + stop_after_min=True, + ), + ] + + # data set + dset = Dataset.from_file(os.path.join(pathlib.Path(__file__).parent.absolute(), "data/azo_diabat.pth.tar")) + train, val, test = split_train_validation_test(dset, val_size=0.1, test_size=0.1) + batch_size = 20 + train_loader = DataLoader(train, batch_size=batch_size, collate_fn=collate_dicts, sampler=RandomSampler(train)) + val_loader = DataLoader(val, batch_size=batch_size, collate_fn=collate_dicts) + test_loader = DataLoader(test, batch_size=batch_size, collate_fn=collate_dicts) + + + # train + T = Trainer( + model_path=outdir, + model=model, + loss_fn=loss_fn, + optimizer=optimizer, + train_loader=train_loader, + validation_loader=val_loader, + checkpoint_interval=1, + hooks=train_hooks, + mini_batches=1, + ) + T.train(device=device, n_epochs=10) + + # evaluation + def correct_nacv(results, targets, key): + num_atoms = targets["num_atoms"] + if not isinstance(num_atoms, list): + num_atoms = num_atoms.tolist() + pred = torch.split(torch.cat(results[key]), num_atoms) + targ = torch.split(torch.cat(targets[key]), num_atoms) + + real_pred = [] + + for p, t in zip(pred, targ): + sub_err = (p - t).abs().mean() + add_err = (p + t).abs().mean() + sign = 1 if sub_err < add_err else -1 + real_pred.append(sign * p) + + return real_pred + + results, targets, test_loss = evaluate( + T.get_best_model(), test_loader, loss_fn=lambda x, y: torch.Tensor([0]), device=device + ) + real_nacv = correct_nacv(results, targets, "force_nacv_10") + results["force_nacv_10"] = real_nacv + + en_keys = ["energy_0", "energy_1", "energy_1_energy_0_delta"] + grad_keys = ["energy_0_grad", "energy_1_grad"] + + for key in [*en_keys, *grad_keys, "force_nacv_10"]: + pred = results[key] + targ = targets[key] + targ_dim = len(targets["energy_0"][0].shape) + fn = torch.stack if targ_dim == 0 else torch.cat + pred = torch.cat(pred).reshape(-1) + targ = fn(targ).reshape(-1) + assert abs(pred - targ).mean() < 12.0 diff --git a/nff/tests/test_training.py b/nff/tests/test_training.py new file mode 100644 index 00000000..8e7772d5 --- /dev/null +++ b/nff/tests/test_training.py @@ -0,0 +1,72 @@ +import os +import pathlib + +import torch +from torch.optim import Adam +from torch.utils.data import DataLoader + +from nff.data import Dataset, split_train_validation_test, collate_dicts, to_tensor +from nff.train import Trainer, get_model, loss, hooks, metrics, evaluate + + +def test_training(device, tmpdir): + # data set + OUTDIR = tmpdir + dataset = Dataset.from_file(os.path.join(pathlib.Path(__file__).parent.absolute(), "data", "dataset.pth.tar")) + train, val, test = split_train_validation_test(dataset, val_size=0.2, test_size=0.2) + train_loader = DataLoader(train, batch_size=50, collate_fn=collate_dicts) + val_loader = DataLoader(val, batch_size=50, collate_fn=collate_dicts) + test_loader = DataLoader(test, batch_size=50, collate_fn=collate_dicts) + + # define model + params = { + "n_atom_basis": 256, + "n_filters": 256, + "n_gaussians": 32, + "n_convolutions": 4, + "cutoff": 5.0, + "trainable_gauss": True, + "dropout_rate": 0.2, + } + model = get_model(params) + + + # define training + loss_fn = loss.build_mse_loss(loss_coef={"energy": 0.01, "energy_grad": 1}) + trainable_params = filter(lambda p: p.requires_grad, model.parameters()) + optimizer = Adam(trainable_params, lr=3e-4) + train_metrics = [metrics.MeanAbsoluteError("energy"), metrics.MeanAbsoluteError("energy_grad")] + + # output + train_hooks = [ + hooks.MaxEpochHook(7), + hooks.CSVHook( + OUTDIR, + metrics=train_metrics, + ), + hooks.PrintingHook(OUTDIR, metrics=train_metrics, separator=" | ", time_strf="%M:%S"), + hooks.ReduceLROnPlateauHook( + optimizer=optimizer, patience=30, factor=0.5, min_lr=1e-7, window_length=1, stop_after_min=True + ), + ] + + # train + T = Trainer( + model_path=OUTDIR, + model=model, + loss_fn=loss_fn, + optimizer=optimizer, + train_loader=train_loader, + validation_loader=val_loader, + checkpoint_interval=1, + hooks=train_hooks, + ) + T.train(device=device, n_epochs=7) + + # evaluation + results, targets, val_loss = evaluate(T.get_best_model(), test_loader, loss_fn, device=device) + for key in ["energy_grad", "energy"]: + pred = torch.stack(results[key], dim=0).view(-1).detach().cpu().numpy() + targ = torch.stack(targets[key], dim=0).view(-1).detach().cpu().numpy() + mae = abs(pred - targ).mean() + assert mae < 10.0 diff --git a/pyproject.toml b/pyproject.toml index 2e3c0e6c..01fd4ec8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ dependencies = [ "rdkit", "scikit-learn", "scipy", - "torch>=2.2.0", + "torch >= 2.2.0, < 2.6.0", "tqdm", "mace-torch>=0.3.4", "chgnet>=0.3.5",