Merge branch 'main' of github.com:materialsvirtuallab/m3gnet

materialsvirtuallab · Jun 6, 2023 · 1f89ecb · 1f89ecb
2 parents 35167cb + 101625a
commit 1f89ecb
Show file tree

Hide file tree

Showing 8 changed files with 73 additions and 32 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -6,40 +6,40 @@ ci:
 
 repos:
  - repo: https://github.com/psf/black
- rev: 22.10.0
+ rev: 23.3.0
  hooks:
  - id: black-jupyter
 
  - repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v4.3.0
+ rev: v4.4.0
  hooks:
  - id: check-yaml
  - id: end-of-file-fixer
  - id: trailing-whitespace
 
  - repo: https://github.com/PyCQA/flake8
- rev: 5.0.4
+ rev: 6.0.0
  hooks:
  - id: flake8
 
  - repo: https://github.com/asottile/pyupgrade
- rev: v3.2.0
+ rev: v3.4.0
  hooks:
  - id: pyupgrade
  args: [--py38-plus]
 
  - repo: https://github.com/PyCQA/autoflake
- rev: v1.7.7
+ rev: v2.1.1
  hooks:
  - id: autoflake
 
  - repo: https://github.com/pre-commit/mirrors-mypy
- rev: v0.982
+ rev: v1.3.0
  hooks:
  - id: mypy
 
  - repo: https://github.com/nbQA-dev/nbQA
- rev: 1.5.3
+ rev: 1.7.0
  hooks:
  - id: nbqa-flake8
  args: [--ignore=E402]

diff --git a/README.md b/README.md
@@ -10,10 +10,7 @@
 
 # M3GNet
 
-M3GNet is a new materials graph neural network architecture that incorporates 3-body interactions. A key difference
-with prior materials graph implementations such as [MEGNet](https://github.com/materialsvirtuallab/megnet) is the
-addition of the coordinates for atoms and the 3×3 lattice matrix in crystals, which are necessary for obtaining
-tensorial quantities such as forces and stresses via auto-differentiation.
+[M3GNet](https://www.nature.com/articles/s43588-022-00349-3) is a new materials graph neural network architecture that incorporates 3-body interactions. A key difference with prior materials graph implementations such as [MEGNet](https://github.com/materialsvirtuallab/megnet) is the addition of the coordinates for atoms and the 3×3 lattice matrix in crystals, which are necessary for obtaining tensorial quantities such as forces and stresses via auto-differentiation.
 
 As a framework, M3GNet has diverse applications, including:
 
@@ -325,6 +322,56 @@ VASP calculations (default unit is kBar) should be multiplied by -0.1 to work di
 
 We use validation dataset to select the stopping epoch number. The dataset has similar format as the training dataset.
 
+If you want to use the offical MPF dataset shared above, here are some code examples that you can follow to load the dataset smoothly and train your own model.
+
+First, load the MPF dataset consisting of block_0 and block_1
+
+```python
+import pickle as pk
+import pandas as pd
+import pymatgen
+
+print('loading the MPF dataset 2021')
+with open('/yourpath/block_0.p', 'rb') as f:
+ data = pk.load(f)
+
+with open('/yourpath/block_1.p', 'rb') as f:
+ data2 = pk.load(f)
+print('MPF dataset 2021 loaded')
+data.update(data2)
+df = pd.DataFrame.from_dict(data)
+```
+
+Then, split the data based on material id and map the energy to formation energy with unit eV/atom
+
+```python
+id_train, id_val, id_test = get_id_train_val_test(
+ total_size=len(data),
+ split_seed=42,
+ train_ratio=0.90,
+ val_ratio=0.05,
+ test_ratio=0.05,
+ keep_data_order=False,
+)
+
+cnt = 0
+for idx, item in df.items():
+ # import pdb; pdb.set_trace()
+ if cnt in id_train:
+ for iid in range(len(item['energy'])):
+ dataset_train.append({"atoms":item['structure'][iid], "energy":item['energy'][iid] / len(item['force'][iid]), "force": np.array(item['force'][iid])})
+ elif cnt in id_val:
+ for iid in range(len(item['energy'])):
+ dataset_val.append({"atoms":item['structure'][iid], "energy":item['energy'][iid] / len(item['force'][iid]), "force": np.array(item['force'][iid])})
+ elif cnt in id_test:
+ for iid in range(len(item['energy'])):
+ dataset_test.append({"atoms":item['structure'][iid], "energy":item['energy'][iid] / len(item['force'][iid]), "force": np.array(item['force'][iid])})
+ cnt += 1
+
+print('using %d samples to train, %d samples to evaluate, and %d samples to test'%(len(dataset_train), len(dataset_val), len(dataset_test)))
+```
+After this, you can use the dataset_train to train, dataset_val to evaluate, and dataset_test to test.
+
 A minimal example of model training is shown below.
 
 ```python
@@ -376,10 +423,7 @@ The training data used to develop the universal M3GNet IAP is `MPF.2021.2.8` and
 
 Please cite the following work:
 
-```txt
-Chi Chen, and Shyue Ping Ong. "A Universal Graph Deep Learning Interatomic Potential for the Periodic Table."
-arXiv preprint [arXiv:2202.02450](https://arxiv.org/abs/2202.02450) (2022).
-```
+> Chen, C., Ong, S.P. A universal graph deep learning interatomic potential for the periodic table. Nat Comput Sci 2, 718–728 (2022). https://doi.org/10.1038/s43588-022-00349-3.
 
 # Acknowledgements
 

diff --git a/examples/Cubic Crystal Test.ipynb b/examples/Cubic Crystal Test.ipynb
@@ -9,9 +9,7 @@
  "\n",
  "This notebook is written to demonstrate the use of M3GNet as a structure relaxer as well as to provide more comprehensive benchmarks for cubic crystals based on exp data on Wikipedia and MP DFT data. This benchmark is limited to cubic crystals for ease of comparison since there is only one lattice parameter. \n",
  "\n",
- "If you are running this notebook from Google Colab, uncomment the next code box to install m3gnet first.\n",
- "\n",
- "> Note: Because Google Colab's python version is sometimes rather old (v3.7 at the time of writing this notebook (Jul 2022), you will need to install m3gnet without dependencies and install the dependencies manually."
+ "If you are running this notebook from Google Colab, uncomment the next code box to install m3gnet first."
  ]
  },
  {
@@ -21,9 +19,7 @@
  "metadata": {},
  "outputs": [],
  "source": [
- "# !pip install --no-deps m3gnet\n",
- "# !pip install pymatgen ase\n",
- "# !pip install lxml"
+ "# !pip install m3gnet"
  ]
  },
  {

diff --git a/examples/Relaxation of LiFePO4.ipynb b/examples/Relaxation of LiFePO4.ipynb
@@ -9,9 +9,7 @@
  "\n",
  "This notebook shows an example of relaxing the LiFePO4 crystal.\n",
  "\n",
- "If you are running this notebook from Google Colab, uncomment the next code box to install m3gnet first.\n",
- "\n",
- "> Note: Because Google Colab's python version is sometimes rather old (v3.7 at the time of writing this notebook (Jul 2022), you will need to install m3gnet without dependencies and install the dependencies manually."
+ "If you are running this notebook from Google Colab, uncomment the next code box to install m3gnet first."
  ]
  },
  {
@@ -21,9 +19,7 @@
  "metadata": {},
  "outputs": [],
  "source": [
- "# !pip install --no-deps m3gnet\n",
- "# !pip install pymatgen ase\n",
- "# !pip install lxml"
+ "# !pip install m3gnet"
  ]
  },
  {

diff --git a/m3gnet/graph/_compute.py b/m3gnet/graph/_compute.py
@@ -156,7 +156,6 @@ def _list_include_threebody_indices(graph: list, threebody_cutoff: float | None
  ij_reverse_map = None
  original_index = np.arange(n_bond)
  if bond_atom_indices.shape[0] > 0:
-
  bond_indices, n_triple_ij, n_triple_i, n_triple_s = _compute_3body(
  bond_atom_indices,
  graph[Index.N_ATOMS],

diff --git a/m3gnet/models/tests/test_model.py b/m3gnet/models/tests/test_model.py
@@ -17,9 +17,9 @@ def setUpClass(cls) -> None:
  cls.mol = Molecule(["C", "O"], [[0, 0, 0], [1.5, 0, 0]])
  cls.structure = Structure(Lattice.cubic(3.30), ["Mo", "Mo"], [[0, 0, 0], [0.5, 0.5, 0.5]])
  cls.atoms = Atoms(["Mo", "Mo"], [[0, 0, 0], [0.5, 0.5, 0.5]], cell=np.eye(3) * 3.30, pbc=True)
+ cls.single_atoms = Structure(Lattice.cubic(6.0), ["Mo"], [[0, 0, 0]])
 
  def test_m3gnet(self):
-
  g = self.model.graph_converter(self.mol)
 
  val = self.model.predict_structure(self.mol).numpy().ravel()
@@ -46,6 +46,12 @@ def test_potential(self):
  )
  )
 
+ def test_single_atoms(self):
+ self.potential.get_efs(self.structure)
+ e, f, s = self.potential.get_efs(self.single_atoms)
+ shapes = f.numpy().shape
+ self.assertTupleEqual(shapes, (1, 3))
+
  def test_relaxer(self):
  relaxer = Relaxer() # this loads the default model
 

diff --git a/mypy.ini b/mypy.ini
@@ -1,5 +1,5 @@
 [mypy]
 exclude = ["pretrained"]
 ignore_missing_imports = True
-follow_imports = skip
-follow_imports_for_stubs = True
+#follow_imports = skip
+#follow_imports_for_stubs = True
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,3 @@
 pymatgen==2022.11.7
-tensorflow==2.11.0
+tensorflow==2.11.1
 ase==3.22.1