Skip to content

Commit

Permalink
New normalization energy + Fixes + Merge
Browse files Browse the repository at this point in the history
  • Loading branch information
FNTwin committed Jan 24, 2024
1 parent 14d3e85 commit 2b88e10
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 8 deletions.
29 changes: 22 additions & 7 deletions src/openqdc/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,18 @@ def _precompute_statistics(self, overwrite_local_cache: bool = False):
logger.info("Loaded precomputed statistics")
else:
logger.info("Precomputing relevant statistics")
(formation_E_mean, formation_E_std, total_E_mean, total_E_std) = self._precompute_E()
(
inter_E_mean,
inter_E_std,
formation_E_mean,
formation_E_std,
total_E_mean,
total_E_std,
) = self._precompute_E()
forces_dict = self._precompute_F()
stats = {
"formation": {"energy": {"mean": formation_E_mean, "std": formation_E_std}, "forces": forces_dict},
"inter": {"energy": {"mean": inter_E_mean, "std": inter_E_std}, "forces": forces_dict},
"total": {"energy": {"mean": total_E_mean, "std": total_E_std}, "forces": forces_dict},
}
with open(local_path, "wb") as f:
Expand All @@ -162,20 +170,24 @@ def _precompute_E(self):
s = np.array(self.data["atomic_inputs"][:, :2], dtype=int)
s[:, 1] += IsolatedAtomEnergyFactory.max_charge
matrixs = [matrix[s[:, 0], s[:, 1]] for matrix in self.__isolated_atom_energies__]
converted_energy_data = self.convert_energy(self.data["energies"])
converted_energy_data = self.data["energies"]
# calculation per molecule formation energy statistics
E = []
for i, matrix in enumerate(matrixs):
c = np.cumsum(np.append([0], matrix))[splits_idx]
c[1:] = c[1:] - c[:-1]
E.append(converted_energy_data[:, i] - c)
E = np.array(E).T
inter_E_mean = np.nanmean(E / self.data["n_atoms"][:, None], axis=0)
inter_E_std = np.nanstd(E / self.data["n_atoms"][:, None], axis=0)
formation_E_mean = np.nanmean(E, axis=0)
formation_E_std = np.nanstd(E, axis=0)
total_E_mean = np.nanmean(converted_energy_data, axis=0)
total_E_std = np.nanstd(converted_energy_data, axis=0)

return (
np.atleast_2d(inter_E_mean),
np.atleast_2d(inter_E_std),
np.atleast_2d(formation_E_mean),
np.atleast_2d(formation_E_std),
np.atleast_2d(total_E_mean),
Expand Down Expand Up @@ -265,10 +277,13 @@ def atoms_per_molecules(self):

def _set_units(self, en, ds):
old_en, old_ds = self.energy_unit, self.distance_unit
if en is not None:
self.set_energy_unit(en)
if ds is not None:
self.set_distance_unit(ds)
en = en if en is not None else old_en
ds = ds if ds is not None else old_ds

# if en is None:
self.set_energy_unit(en)
# if ds is not None:
self.set_distance_unit(ds)
if self.__force_methods__:
self.__forces_unit__ = self.energy_unit + "/" + self.distance_unit
self.__class__.__fn_forces__ = get_conversion(old_en + "/" + old_ds, self.__forces_unit__)
Expand Down Expand Up @@ -566,7 +581,7 @@ def get_statistics(self, normalization: str = "formation", return_none: bool = T
"""
Get the statistics of the dataset.
normalization : str, optional
Type of energy, by default "formation", must be one of ["formation", "total"]
Type of energy, by default "formation", must be one of ["formation", "total", "inter"]
return_none : bool, optional
Whether to return None if the statistics for the forces are not available, by default True
Otherwise, the statistics for the forces are set to 0.0
Expand Down
1 change: 1 addition & 0 deletions src/openqdc/utils/atomization_energies.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,7 @@ def get_matrix(level_of_theory: str) -> np.ndarray:
("V", -3): -942.9562885518893,
("V", -2): -943.4308412125442,
("V", -1): -943.6771718004992,
("V", 0): -943.5386343398394,
("V", 1): -943.4482869898394,
("V", 2): -942.9322435731367,
("V", 3): -941.7985542135455,
Expand Down
2 changes: 1 addition & 1 deletion src/openqdc/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

BOHR2ANG = 0.52917721092

POSSIBLE_NORMALIZATION = ["formation", "total"]
POSSIBLE_NORMALIZATION = ["formation", "total", "inter"]

NOT_DEFINED = {
"mean": None,
Expand Down

0 comments on commit 2b88e10

Please sign in to comment.