FAIR-Chem · abhshkdz · Jan 24, 2023 · Sep 30, 2022 · Oct 6, 2022 · Oct 6, 2022
diff --git a/configs/oc22/is2re/base.yml b/configs/oc22/is2re/base.yml
@@ -4,8 +4,10 @@ dataset:
   train:
     src: data/oc22/is2re/train
     normalize_labels: False
+    total_energy: True
   val:
     src: data/oc22/is2re/val_id
+    total_energy: True
 
 logger: wandb
 

diff --git a/configs/oc22/s2ef/base.yml b/configs/oc22/s2ef/base.yml
@@ -4,8 +4,10 @@ dataset:
   train:
     src: data/oc22/s2ef/train
     normalize_labels: False
+    total_energy: True
   val:
     src: data/oc22/s2ef/val_id
+    total_energy: True
 
 logger: wandb
 

diff --git a/ocpmodels/trainers/forces_trainer.py b/ocpmodels/trainers/forces_trainer.py
@@ -22,8 +22,8 @@
 from ocpmodels.common.utils import check_traj_files
 from ocpmodels.modules.evaluator import Evaluator
 from ocpmodels.modules.normalizer import Normalizer
-from ocpmodels.trainers.base_trainer import BaseTrainer
 from ocpmodels.modules.scaling.util import ensure_fitted
+from ocpmodels.trainers.base_trainer import BaseTrainer
 
 
 @registry.register_trainer("forces")
@@ -208,14 +208,22 @@ def predict(
                     )
                 ]
                 predictions["id"].extend(systemids)
-                predictions["energy"].extend(
-                    out["energy"].to(torch.float16).tolist()
-                )
                 batch_natoms = torch.cat(
                     [batch.natoms for batch in batch_list]
                 )
                 batch_fixed = torch.cat([batch.fixed for batch in batch_list])
-                forces = out["forces"].cpu().detach().to(torch.float16)
+                # total energy requires predictions are saved in float32
+                # default is ads energy not total energy
+                if self.config["dataset"].get("total_energy", False):
+                    predictions["energy"].extend(
+                        out["energy"].to(torch.float32).tolist()
+                    )
+                    forces = out["forces"].cpu().detach().to(torch.float32)
+                else:
+                    predictions["energy"].extend(
+                        out["energy"].to(torch.float16).tolist()
+                    )
+                    forces = out["forces"].cpu().detach().to(torch.float16)
                 per_image_forces = torch.split(forces, batch_natoms.tolist())
                 per_image_forces = [
                     force.numpy() for force in per_image_forces
@@ -247,9 +255,21 @@ def predict(
                     self.ema.restore()
                 return predictions
 
-        predictions["forces"] = np.array(predictions["forces"])
+        if self.config["dataset"].get("total_energy", False):
+            predictions["forces"] = np.array(
+                predictions["forces"], dtype="float32"
+            )
+            predictions["energy"] = np.array(
+                predictions["energy"], dtype="float32"
+            )
+        else:
+            predictions["forces"] = np.array(
+                predictions["forces"], dtype="float16"
+            )
+            predictions["energy"] = np.array(
+                predictions["energy"], dtype="float16"
+            )
         predictions["chunk_idx"] = np.array(predictions["chunk_idx"])
-        predictions["energy"] = np.array(predictions["energy"])
         predictions["id"] = np.array(predictions["id"])
         self.save_results(
             predictions, results_file, keys=["energy", "forces", "chunk_idx"]

diff --git a/scripts/make_submission_file.py b/scripts/make_submission_file.py
@@ -11,30 +11,36 @@
 
 import numpy as np
 
+SPLITS = {
+    "OC20": ["id", "ood_ads", "ood_cat", "ood_both"],
+    "OC22": ["id", "ood"],
+}
 
-def write_is2re_relaxations(paths, filename, hybrid):
+
+def write_is2re_relaxations(args, dataset):
     import ase.io
     from tqdm import tqdm
 
     submission_file = {}
 
-    if not hybrid:
-        for idx, split in enumerate(["id", "ood_ads", "ood_cat", "ood_both"]):
+    if not args.hybrid:
+        for split in SPLITS[dataset]:
             ids = []
             energies = []
-            systems = glob.glob(os.path.join(paths[idx], "*.traj"))
+            systems = glob.glob(os.path.join(vars(args)[split], "*.traj"))
             for system in tqdm(systems):
                 sid, _ = os.path.splitext(os.path.basename(system))
                 ids.append(str(sid))
+                # Read the last frame in the ML trajectory. Modify "-1" if you wish to modify which frame to use.
                 traj = ase.io.read(system, "-1")
                 energies.append(traj.get_potential_energy())
 
             submission_file[f"{split}_ids"] = np.array(ids)
             submission_file[f"{split}_energy"] = np.array(energies)
 
     else:
-        for idx, split in enumerate(["id", "ood_ads", "ood_cat", "ood_both"]):
-            preds = np.load(paths[idx])
+        for split in SPLITS[dataset]:
+            preds = np.load(vars(args)[split])
             ids = []
             energies = []
             for sid, energy in zip(preds["ids"], preds["energy"]):
@@ -45,54 +51,71 @@ def write_is2re_relaxations(paths, filename, hybrid):
             submission_file[f"{split}_ids"] = np.array(ids)
             submission_file[f"{split}_energy"] = np.array(energies)
 
-    np.savez_compressed(filename, **submission_file)
+    np.savez_compressed(args.out_path, **submission_file)
 
 
-def write_predictions(paths, filename):
-    submission_file = {}
+def write_predictions(args, dataset):
+    if args.is2re_relaxations:
+        write_is2re_relaxations(args, dataset=dataset)
+    else:
+        submission_file = {}
+
+        for split in SPLITS[dataset]:
+            res = np.load(vars(args)[split], allow_pickle=True)
+            verify_dtype(res, dataset)
+            contents = res.files
+            for i in contents:
+                key = "_".join([split, i])
+                submission_file[key] = res[i]
+
+        np.savez_compressed(args.out_path, **submission_file)
 
-    for idx, split in enumerate(["id", "ood_ads", "ood_cat", "ood_both"]):
-        res = np.load(paths[idx], allow_pickle=True)
-        contents = res.files
-        for i in contents:
-            key = "_".join([split, i])
-            submission_file[key] = res[i]
 
-    np.savez_compressed(filename, **submission_file)
+def verify_dtype(preds, dataset):
+    if dataset == "OC22":
+        if "energy" in preds:
+            assert preds["energy"].dtype in [
+                np.float32,
+                np.float64,
+            ], "Predictions written in the wrong precision. Ensure `total_energy` flag is True in the config."
+        if "forces" in preds:
+            assert preds["forces"].dtype in [
+                np.float32,
+                np.float64,
+            ], "Predictions written in the wrong precision. Ensure `total_energy` flag is True in the config."
 
 
 def main(args):
-    id_path = args.id
-    ood_ads_path = args.ood_ads
-    ood_cat_path = args.ood_cat
-    ood_both_path = args.ood_both
+    if args.oc22:
+        for split in SPLITS["OC22"]:
+            assert vars(args).get(split), f"Missing {split} split for OC22"
+        dataset = "OC22"
+    else:
+        for split in SPLITS["OC20"]:
+            assert vars(args).get(split), f"Missing {split} split for OC20"
+        dataset = "OC20"
 
-    paths = [id_path, ood_ads_path, ood_cat_path, ood_both_path]
     if not args.out_path.endswith(".npz"):
         args.out_path = args.out_path + ".npz"
 
-    if not args.is2re_relaxations:
-        write_predictions(paths, filename=args.out_path)
-    else:
-        write_is2re_relaxations(
-            paths, filename=args.out_path, hybrid=args.hybrid
-        )
+    write_predictions(args, dataset=dataset)
     print(f"Results saved to {args.out_path} successfully.")
 
 
 if __name__ == "__main__":
     """
     Create a submission file for evalAI. Ensure that for the task you are
-    submitting for you have generated results files on each of the 4 splits -
-    id, ood_ads, ood_cat, ood_both.
+    submitting for you have generated results files on each of the splits:
+        OC20: id, ood_ads, ood_cat, ood_both
+        OC22: id, ood
 
     Results file can be obtained as follows for the various tasks:
 
     S2EF: config["mode"] = "predict"
     IS2RE: config["mode"] = "predict"
     IS2RS: config["mode"] = "run-relaxations" and config["task"]["write_pos"] = True
 
-    Use this script to join the 4 results files in the format evalAI expects
+    Use this script to join the results files (4 for OC20, 2 for OC22) in the format evalAI expects
     submissions.
 
     If writing IS2RE predictions from relaxations, paths must be directories
@@ -106,10 +129,21 @@ def main(args):
     """
 
     parser = argparse.ArgumentParser()
-    parser.add_argument("--id", help="Path to ID results")
-    parser.add_argument("--ood-ads", help="Path to OOD-Ads results")
-    parser.add_argument("--ood-cat", help="Path to OOD-Cat results")
-    parser.add_argument("--ood-both", help="Path to OOD-Both results")
+    parser.add_argument(
+        "--id", help="Path to ID results. Required for OC20 and OC22."
+    )
+    parser.add_argument(
+        "--ood-ads", help="Path to OOD-Ads results. Required only for OC20."
+    )
+    parser.add_argument(
+        "--ood-cat", help="Path to OOD-Cat results. Required only for OC20."
+    )
+    parser.add_argument(
+        "--ood-both", help="Path to OOD-Both results. Required only for OC20."
+    )
+    parser.add_argument(
+        "--ood", help="Path to OOD OC22 results. Required only for OC22."
+    )
     parser.add_argument("--out-path", help="Path to write predictions to.")
     parser.add_argument(
         "--is2re-relaxations",
@@ -121,6 +155,9 @@ def main(args):
         action="store_true",
         help="Write IS2RE results from S2EF prediction files. Paths specified correspond to S2EF NPZ files.",
     )
+    parser.add_argument(
+        "--oc22", action="store_true", help="Write OC22 prediction files."
+    )
 
     args = parser.parse_args()
     main(args)