cattsunami packaging (#678)

* pyproject.toml for cattsunami * adding tests for cattsunami * ocpneb packaging * cleanup ocpneb tests * normalize project names * move cattsunami tests to root test folder * fix imports * allow ase imports from latest version and development version * pip install cattsunami in tests * cleaning up paths and getting tests to function * install cattsunami for docs as well * sneak-in a doc syntax fix * no space * updating doc md * no pytest in reaction * refactor ocpneb package * fix ocpneb imports * move README.md * fix ocpneb install * rename test directory * test lint * renaming to cattsunami * updating gitbook build with package name * changing runner test yaml to cattsunami * changing adsorbates pkl to adsorbate pkl * update cattsunami dist build directory * updating readme with monorepo links * trimming down test time * reducing batch size incase of mem issues --------- Co-authored-by: Brook Wander <brook.l.wander@gmail.com>
FAIR-Chem · May 14, 2024 · 4c094c4 · 4c094c4
1 parent 7e8aea5
commit 4c094c4
Show file tree

Hide file tree

Showing 38 changed files with 555 additions and 97 deletions.
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -29,6 +29,7 @@ jobs:
         pip install -e packages/fairchem-core[docs,adsorbml]
         pip install -e packages/fairchem-data-oc[dev]
         pip install -e packages/fairchem-demo-ocpapi[dev]
+        pip install -e packages/fairchem-applications-cattsunami
 
     # Build the book
     - name: Build the book
@@ -49,4 +50,3 @@ jobs:
         external_repository: FAIR-Chem/fair-chem.github.io
         publish_branch: gh-pages
         publish_dir: docs/_build/html
-
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -36,6 +36,7 @@ jobs:
           pip install -e packages/fairchem-core[dev]
           pip install -e packages/fairchem-data-oc[dev]
           pip install -e packages/fairchem-demo-ocpapi[dev]
+          pip install -e packages/fairchem-applications-cattsunami
 
       - name: Test core with pytest
         run: |

diff --git a/docs/index.md b/docs/index.md
@@ -17,10 +17,10 @@ this repo:
 * [Open Catalyst Project (OCP)](https://opencatalystproject.org/)
 * [Open Direct Air Capture (OpenDAC)](https://open-dac.github.io/)
 
-:::{note}
+```{note}
 We re-organized and rebranded the repository in 2024 (previously the `fairchem` repo) to reflect the increasingly
 general usability of these models beyond catalysis, including things like direct air capture.
-:::
+```
 
 ### Datasets in `fairchem`:
 `fairchem` provides training and evaluation code for tasks and models that take arbitrary

diff --git a/docs/tutorials/cattsunami_walkthrough.md b/docs/tutorials/cattsunami_walkthrough.md
@@ -14,24 +14,24 @@ kernelspec:
 # CatTSunami tutorial
 
 ```{code-cell} ipython3
-from ocpneb.core.reaction import Reaction
+from fairchem.applications.cattsunami.core import Reaction
 from fairchem.data.oc.core import Slab, Adsorbate, Bulk, AdsorbateSlabConfig
 from fairchem.core.common.relaxation.ase_utils import OCPCalculator
 from ase.optimize import BFGS
 from x3dase.visualize import view_x3d_n
 from ase.io import read
 from x3dase.x3d import X3D
-from ocpneb.databases import DISSOCIATION_REACTION_DB_PATH
-from fairchem.data.oc.databases.pkls import ADSORBATES_PKL_PATH, BULK_PKL_PATH
+from fairchem.applications.cattsunami.databases import DISSOCIATION_REACTION_DB_PATH
+from fairchem.data.oc.databases.pkls import ADSORBATE_PKL_PATH, BULK_PKL_PATH
 from fairchem.core.models.model_registry import model_name_to_local_file
 import matplotlib.pyplot as plt
-from ocpneb.core.autoframe import AutoFrameDissociation
-from ocpneb.core import OCPNEB
+from fairchem.applications.cattsunami.core.autoframe import AutoFrameDissociation
+from fairchem.applications.cattsunami.core import OCPNEB
 from ase.io import read
-
-#Optional
 from IPython.display import Image
-from x3dase.x3d import X3D 
+
+# Optional
+# from x3dase.x3d import X3D
 ```
 
 ## Do enumerations in an AdsorbML style for CH dissociation on Ru (001)
@@ -43,12 +43,12 @@ To start, we generate placements for the reactant and product species on the sur
 # Instantiate the reaction class for the reaction of interest
 reaction = Reaction(reaction_str_from_db="*CH -> *C + *H",
                     reaction_db_path=DISSOCIATION_REACTION_DB_PATH,
-                    adsorbate_db_path = ADSORBATES_PKL_PATH)
+                    adsorbate_db_path = ADSORBATE_PKL_PATH)
 
 # Instantiate our adsorbate class for the reactant and product
-reactant = Adsorbate(adsorbate_id_from_db=reaction.reactant1_idx, adsorbate_db_path=ADSORBATES_PKL_PATH)
-product1 = Adsorbate(adsorbate_id_from_db=reaction.product1_idx, adsorbate_db_path=ADSORBATES_PKL_PATH)
-product2 = Adsorbate(adsorbate_id_from_db=reaction.product2_idx, adsorbate_db_path=ADSORBATES_PKL_PATH)
+reactant = Adsorbate(adsorbate_id_from_db=reaction.reactant1_idx, adsorbate_db_path=ADSORBATE_PKL_PATH)
+product1 = Adsorbate(adsorbate_id_from_db=reaction.product1_idx, adsorbate_db_path=ADSORBATE_PKL_PATH)
+product2 = Adsorbate(adsorbate_id_from_db=reaction.product2_idx, adsorbate_db_path=ADSORBATE_PKL_PATH)
 
 # Grab the bulk and cut the slab we are interested in
 bulk = Bulk(bulk_src_id_from_db="mp-33", bulk_db_path=BULK_PKL_PATH)
@@ -73,15 +73,15 @@ product2_configs = AdsorbateSlabConfig(slab = slab[0], adsorbate = product2,
 # NOTE: Change the checkpoint path to locally downloaded files as needed
 checkpoint_path = model_name_to_local_file('EquiformerV2-31M-S2EF-OC20-All+MD', local_cache='/tmp/ocp_checkpoints/')
 cpu = True
-calc = OCPCalculator(checkpoint_path = CHECKPOINT_PATH, cpu = cpu)
+calc = OCPCalculator(checkpoint_path = checkpoint_path, cpu = cpu)
 ```
 
 ### Run ML local relaxations:
 
 There are 2 options for how to do this.
  1. Using `OCPCalculator` as the calculator within the ASE framework
  2. By writing objects to lmdb and relaxing them using `main.py` in the ocp repo
- 
+
 (1) is really only adequate for small stuff and it is what I will show here, but if you plan to run many relaxations, you should definitely use (2). More details about writing lmdbs has been provided [here](https://github.com/Open-Catalyst-Project/ocp/blob/main/tutorials/lmdb_dataset_creation.ipynb) - follow the IS2RS/IS2RE instructions. And more information about running relaxations once the lmdb has been written is [here](https://github.com/Open-Catalyst-Project/ocp/blob/main/TRAIN.md#initial-structure-to-relaxed-structure-is2rs).
 
 You need to provide the calculator with a path to a model checkpoint file. That can be downloaded [here](../core/model_checkpoints)
@@ -155,7 +155,7 @@ Here we use the custom child class we created to run NEB relaxations using ML. T
 # for idx, frame_set in enumerate(frame_sets):
 #     neb = OCPNEB(
 #         frame_set,
-#         checkpoint_path=CHECKPOINT_PATH,
+#         checkpoint_path=checkpoint_path,
 #         k=1,
 #         batch_size=8,
 #         cpu = cpu,
@@ -170,7 +170,7 @@ Here we use the custom child class we created to run NEB relaxations using ML. T
 #         conv = optimizer.run(fmax=fmax, steps=300)
 #         if conv:
 #             converged_idxs.append(idx)
-            
+
 # print(converged_idxs)
 ```
 
@@ -180,7 +180,7 @@ fmax = 0.05 # [eV / ang**2]
 delta_fmax_climb = 0.4
 neb = OCPNEB(
     frame_sets[0],
-    checkpoint_path=CHECKPOINT_PATH,
+    checkpoint_path=checkpoint_path,
     k=1,
     batch_size=8,
     cpu = cpu,

diff --git a/packages/fairchem-applications-cattsunami/pyproject.toml b/packages/fairchem-applications-cattsunami/pyproject.toml
@@ -0,0 +1,36 @@
+[build-system]
+requires = ["hatchling", "hatch-vcs", "hatch-fancy-pypi-readme>=24"]
+build-backend = "hatchling.build"
+
+[project]
+name = "fairchem-applications-cattsunami"
+dynamic = ["version", "readme"]
+description = "Accelerating Transition State Energy Calculations with Pre-trained Graph Neural Networks"
+license = {text = "MIT License"}
+
+[project.urls]
+repository = "http://github.com/Fair-Chem/src/fairchem/applications/cattsunami"
+
+[tool.hatch.version]
+source = "vcs"
+
+[tool.hatch.version.raw-options]
+root = "../../"
+git_describe_command = 'git describe --tags --match fairchem_applications_cattsunami-*'
+
+[tool.hatch.build]
+directory = "../../dist-applications-cattsunami"
+
+[tool.hatch.build.targets.sdist]
+only-include = ["src/fairchem/applications/cattsunami"]
+
+[tool.hatch.build.targets.wheel]
+sources = ["src"]
+only-include = ["src/fairchem/demo/ocpapi"]
+
+[tool.hatch.metadata.hooks.fancy-pypi-readme]
+content-type = "text/markdown"
+fragments = [
+  { path = "src/fairchem/applications/cattsunami/README.md" },
+  { path = "src/fairchem/applications/cattsunami/DATASET.md" }
+]
diff --git a/packages/fairchem-applications-cattsunami/src b/packages/fairchem-applications-cattsunami/src
@@ -0,0 +1 @@
+../../src/
diff --git a/src/fairchem/applications/CatTSunami/ocpneb/core/__init__.py b/src/fairchem/applications/CatTSunami/ocpneb/core/__init__.py
diff --git a/...irchem/applications/CatTSunami/DATASET.md → ...irchem/applications/cattsunami/DATASET.md b/...irchem/applications/CatTSunami/DATASET.md → ...irchem/applications/cattsunami/DATASET.md
@@ -46,4 +46,4 @@ conv = optimizer.run(fmax=0.45, steps=200)
 if conv:
     neb.climb = True
     conv = optimizer.run(fmax=0.05, steps=300)
-```
+```
diff --git a/...airchem/applications/CatTSunami/README.md → ...airchem/applications/cattsunami/README.md b/...airchem/applications/CatTSunami/README.md → ...airchem/applications/cattsunami/README.md
@@ -2,15 +2,16 @@
 
 ![summary](https://github.com/Open-Catalyst-Project/CatTSunami/blob/master/summary_fig.png)
 
-CatTSunami is a framework for high-throughput enumeration of nudged elastic band (NEB) frame sets. It was built for use with machine learned (ML) models trained on [OC20](https://arxiv.org/abs/2010.09990), which were demonstrated to be performant on this auxiliary task. To train your own model or obtain pre-trained checkpoints, please see [`ocp`](https://github.com/Open-Catalyst-Project/ocp).
+CatTSunami is a framework for high-throughput enumeration of nudged elastic band (NEB) frame sets. It was built for use with machine learned (ML) models trained on [OC20](https://arxiv.org/abs/2010.09990), which were demonstrated to be performant on this auxiliary task. To train your own model or obtain pre-trained checkpoints, please see [`fairchem-core`](https://github.com/FAIR-Chem/fairchem/tree/cattsunami-package/src/fairchem/core).
 
 This repository contains the validation dataset, framework for enumeration, and accompanying code to run ML-accelerated NEBs and validate new models. For more information, please read the manuscript [paper](https://arxiv.org/abs/2405.02078).
 
 ### Getting started
-1. Install [`Open-Catalyst-Dataset`](https://github.com/Open-Catalyst-Project/Open-Catalyst-Dataset) and [`ocp`](https://github.com/Open-Catalyst-Project/ocp)
-2. Clone this repository
-3. `cd CatTSunami && python setup.py develop`
-4. Check out the [tutorial notebook](https://github.com/Open-Catalyst-Project/CatTSunami/blob/master/tutorial/workbook.ipynb) 
+Configured for local development
+1. Clone the [`fairchem repo`](https://github.com/FAIR-Chem/fairchem/tree/main) 
+2. Install `fairchem-data-oc` and `fairchem-core`:  [`instructions`](https://fair-chem.github.io/core/install.html)
+3. Install this repository `pip install -e packages/fairchem-applications-cattsunami`
+4. Check out the [tutorial notebook](https://github.com/FAIR-Chem/fairchem/tree/main/src/fairchem/applications/cattsunami/tutorial/workbook.ipynb)
 
 
 ### Validation Dataset

diff --git a/...rchem/applications/CatTSunami/__init__.py → ...rchem/applications/cattsunami/__init__.py b/...rchem/applications/CatTSunami/__init__.py → ...rchem/applications/cattsunami/__init__.py
diff --git a/src/fairchem/applications/cattsunami/core/__init__.py b/src/fairchem/applications/cattsunami/core/__init__.py
@@ -0,0 +1,4 @@
+from .reaction import Reaction
+from .ocpneb import OCPNEB
+
+__all__ = ["Reaction", "OCPNEB"]
diff --git a/...tions/CatTSunami/ocpneb/core/autoframe.py → ...applications/cattsunami/core/autoframe.py b/...tions/CatTSunami/ocpneb/core/autoframe.py → ...applications/cattsunami/core/autoframe.py
@@ -13,8 +13,8 @@
 import torch
 from fairchem.data.oc.utils import DetectTrajAnomaly
 import networkx as nx
-import ocpneb
 from copy import deepcopy
+from fairchem.applications.cattsunami.core import Reaction
 
 
 class AutoFrame:
@@ -156,7 +156,7 @@ def are_all_adsorbate_atoms_overlapping(
 class AutoFrameDissociation(AutoFrame):
     def __init__(
         self,
-        reaction: ocpneb.core.Reaction,
+        reaction: Reaction,
         reactant_system: ase.Atoms,
         product1_systems: list,
         product1_energies: list,
@@ -480,7 +480,7 @@ def get_sites_within_r(
 class AutoFrameTransfer(AutoFrame):
     def __init__(
         self,
-        reaction: ocpneb.core.Reaction,
+        reaction: Reaction,
         reactant1_systems: list,
         reactant2_systems: list,
         reactant1_energies: list,
@@ -902,7 +902,7 @@ def get_system_pairs_final(self, system1_coord, system2_coord):
 class AutoFrameDesorption(AutoFrame):
     def __init__(
         self,
-        reaction: ocpneb.core.Reaction,
+        reaction: Reaction,
         reactant_systems: list,
         reactant_energies: list,
         z_desorption: float,
@@ -911,7 +911,7 @@ def __init__(
         Initialize class to handle the automatic generation of NEB frames for desorption reactions.
 
         Args:
-            reaction (ocpneb.core.Reaction): the reaction object which provides pertinent info
+            reaction (Reaction): the reaction object which provides pertinent info
             reactant_systems (list[ase.Atoms]): the relaxed atoms objects of the adsorbed system.
                 A list of multiple relaxed adsorbate placements should be provided so that multiple
                 possible NEBs can be created.
@@ -1038,7 +1038,7 @@ def interpolate_and_correct_frames(
     initial: ase.Atoms,
     final: ase.Atoms,
     n_frames: int,
-    reaction: ocpneb.core.Reaction,
+    reaction: Reaction,
     map_idx: int,
 ):
     """
@@ -1050,7 +1050,7 @@ def interpolate_and_correct_frames(
         initial (ase.Atoms): the initial frame of the NEB
         final (ase.Atoms): the proposed final frame of the NEB
         n_frames (int): The desired number of frames for the NEB (not including initial and final)
-        reaction (ocpneb.core.Reaction): the reaction object which provides pertinent info
+        reaction (Reaction): the reaction object which provides pertinent info
         map_idx (int): the index of the mapping to use for the final frame
     """
     # Perform checks
@@ -1139,7 +1139,7 @@ def get_shortest_path(
 
 
 def traverse_adsorbate_transfer(
-    reaction: ocpneb.core.Reaction,
+    reaction: Reaction,
     initial: ase.Atoms,
     final: ase.Atoms,
     initial_tiled: ase.Atoms,
@@ -1153,7 +1153,7 @@ def traverse_adsorbate_transfer(
     and avoids accidental bond breaking events over pbc.
 
     Args:
-        reaction (ocpneb.core.Reaction): the reaction object which provides pertinent info
+        reaction (Reaction): the reaction object which provides pertinent info
         initial (ase.Atoms): the initial frame of the NEB
         final (ase.Atoms): the proposed final frame of the NEB to be corrected
         initial_tiled (ase.Atoms): the initial frame tiled (3,3,1)
@@ -1248,7 +1248,7 @@ def traverse_adsorbate_transfer(
 
 
 def traverse_adsorbate_dissociation(
-    reaction: ocpneb.core.Reaction,
+    reaction: Reaction,
     initial: ase.Atoms,
     final: ase.Atoms,
     initial_tiled: ase.Atoms,
@@ -1262,7 +1262,7 @@ def traverse_adsorbate_dissociation(
     and avoids accidental bond breaking events over pbc.
 
     Args:
-        reaction (ocpneb.core.Reaction): the reaction object which provides pertinent info
+        reaction (Reaction): the reaction object which provides pertinent info
         initial (ase.Atoms): the initial frame of the NEB
         final (ase.Atoms): the proposed final frame of the NEB to be corrected
         initial_tiled (ase.Atoms): the initial frame tiled (3,3,1)
@@ -1341,7 +1341,7 @@ def traverse_adsorbate_dissociation(
 
 
 def traverse_adsorbate_desorption(
-    reaction: ocpneb.core.Reaction,
+    reaction: Reaction,
     initial: ase.Atoms,
     final: ase.Atoms,
     initial_tiled: ase.Atoms,
@@ -1354,7 +1354,7 @@ def traverse_adsorbate_desorption(
     and avoids accidental bond breaking events over pbc.
 
     Args:
-        reaction (ocpneb.core.Reaction): the reaction object which provides pertinent info
+        reaction (Reaction): the reaction object which provides pertinent info
         initial (ase.Atoms): the initial frame of the NEB
         final (ase.Atoms): the proposed final frame of the NEB to be corrected
         initial_tiled (ase.Atoms): the initial frame tiled (3,3,1)
@@ -1399,7 +1399,7 @@ def traverse_adsorbate_desorption(
 
 
 def get_product2_idx(
-    reaction: ocpneb.core.Reaction,
+    reaction: Reaction,
     edge_list_final: list,
     traversal_rxt1_final: list,
 ):
@@ -1409,7 +1409,7 @@ def get_product2_idx(
     as the binding index for traversal in `traverse_adsorbate_dissociation`.
 
     Args:
-        reaction (ocpneb.core.Reaction): the reaction object which provides pertinent info
+        reaction (Reaction): the reaction object which provides pertinent info
         edge_list_final (list): the edge list of the final frame corrected with mapping
             idx changes
         traversal_rxt1_final (list): the traversal of reactant 1 for the final frame
@@ -1473,7 +1473,7 @@ def traverse_adsorbate_general(
 def unwrap_atoms(
     initial: ase.Atoms,
     final: ase.Atoms,
-    reaction: ocpneb.core.Reaction,
+    reaction: Reaction,
     map_idx: int,
 ):
     """
@@ -1487,7 +1487,7 @@ def unwrap_atoms(
         initial (ase.Atoms): the initial atoms object to which the final atoms should
             be proximate
         final (ase.Atoms): the final atoms object to be corrected
-        reaction (ocpneb.core.Reaction): the reaction object which provides pertinent info
+        reaction (Reaction): the reaction object which provides pertinent info
         map_idx (int): the index of the mapping to use for the final frame
     """
 

diff --git a/...ications/CatTSunami/ocpneb/core/ocpneb.py → ...em/applications/cattsunami/core/ocpneb.py b/...ications/CatTSunami/ocpneb/core/ocpneb.py → ...em/applications/cattsunami/core/ocpneb.py
@@ -1,19 +1,19 @@
 import logging
-import warnings
 
 import numpy as np
 import torch
-from ase.neb import DyNEB, NEBState
+
 from ase.optimize.precon import Precon, PreconImages
 from fairchem.core.common.registry import registry
 from fairchem.core.common.utils import setup_imports, setup_logging
 from fairchem.core.datasets import data_list_collater
 from fairchem.core.preprocessing import AtomsToGraphs
 from torch.utils.data import DataLoader
-from ase.constraints import FixAtoms
 
-from tqdm import tqdm
-from functools import partialmethod
+try:
+    from ase.neb import DyNEB, NEBState
+except ImportError:  # newest unreleased version has changed imports
+    from ase.mep.neb import DyNEB, NEBState
 
 
 class OCPNEB(DyNEB):
@@ -81,8 +81,10 @@ def __init__(
 
         # Silence otf_graph warnings
         logging.disable(logging.WARNING)
-
-        ckpt = torch.load(checkpoint_path)
+        if cpu:
+            ckpt = torch.load(checkpoint_path, map_location=torch.device("cpu"))
+        else:
+            ckpt = torch.load(checkpoint_path)
         config = ckpt["config"]
         if "normalizer" not in config:
             del config["dataset"]["src"]