run black

mir-group · Jun 9, 2022 · 3f36cb6 · 3f36cb6
1 parent a62306c
commit 3f36cb6
Show file tree

Hide file tree

Showing 9 changed files with 238 additions and 17 deletions.
diff --git a/flare/bffs/sgp/calculator.py b/flare/bffs/sgp/calculator.py
@@ -1,6 +1,7 @@
 from ase.calculators.calculator import Calculator, all_changes
 from flare.utils import NumpyEncoder
 import warnings
+
 try:
     from ._C_flare import Structure
 except Exception as e:
@@ -165,13 +166,17 @@ def from_file(name):
 
         return calc, kernels
 
-    def build_map(self, filename="lmp.flare", contributor="user", map_uncertainty=False):
+    def build_map(
+        self, filename="lmp.flare", contributor="user", map_uncertainty=False
+    ):
         # write potential file for lammps
         self.gp_model.sparse_gp.write_mapping_coefficients(filename, contributor, 0)
 
         # write uncertainty file(s)
         if map_uncertainty:
-            self.gp_model.write_varmap_coefficients(f"map_unc_{filename}", contributor, 0)
+            self.gp_model.write_varmap_coefficients(
+                f"map_unc_{filename}", contributor, 0
+            )
         else:
             # write L_inv and sparse descriptors for variance in lammps
             self.gp_model.sparse_gp.write_L_inverse(f"L_inv_{filename}", contributor)

diff --git a/flare/bffs/sgp/sparse_gp.py b/flare/bffs/sgp/sparse_gp.py
@@ -6,6 +6,7 @@
 import warnings
 from flare.atoms import FLARE_Atoms
 from flare.utils import NumpyEncoder
+
 try:
     from ._C_flare import SparseGP, Structure, NormalizedDotProduct, B2
 except Exception as e:
@@ -303,7 +304,7 @@ def update_db(
         energy: float = None,
         stress: "ndarray" = None,
         mode: str = "specific",
-        sgp = None,  # for creating sgp_var
+        sgp=None,  # for creating sgp_var
         update_qr=True,
         atom_indices=[-1],
     ):

diff --git a/flare/io/output.py b/flare/io/output.py
@@ -631,6 +631,7 @@ def set_logger(
         add_file(logger, fileout_name, verbose)
     return logger
 
+
 def compute_mae(
     atoms,
     output_name,
@@ -672,9 +673,7 @@ def compute_mae(
     per_species_num = np.zeros(len(unique_species))
     for a in range(atoms.nat):
         species_ind = unique_species.index(atoms.numbers[a])
-        per_species_mae[species_ind] += np.mean(
-            np.abs(dft_forces[a] - gp_forces[a])
-        )
+        per_species_mae[species_ind] += np.mean(np.abs(dft_forces[a] - gp_forces[a]))
         per_species_mav[species_ind] += np.mean(np.abs(dft_forces[a]))
         per_species_num[species_ind] += 1
     per_species_mae /= per_species_num

diff --git a/flare/learners/otf.py b/flare/learners/otf.py
@@ -666,7 +666,7 @@ def update_gp(
     def train_gp(self):
         """Optimizes the hyperparameters of the current GP model."""
         tic = time.time()
-        
+
         self.gp.train(logger_name=self.output.basename + "hyps")
 
         self.output.write_wall_time(tic, task="Train Hyps")

diff --git a/flare/learners/utils.py b/flare/learners/utils.py
@@ -419,7 +419,7 @@ def get_env_indices(
         "target_atoms" in structure.info
     ), "The current frame does not have target_atoms"
     target_atoms = structure.info.get("target_atoms")
-    if isinstance(target_atoms, (int, np.int64)): # length 1
+    if isinstance(target_atoms, (int, np.int64)):  # length 1
         target_atoms = [target_atoms]
     elif target_atoms is None:
         target_atoms = []

diff --git a/flare/md/fake.py b/flare/md/fake.py
@@ -16,10 +16,11 @@ class FakeMD(MolecularDynamics):
         timestep (float): The time step.
         filenames (list): The name of the trajectory file to read in.
         format (str): The format supported by ASE IO.
-        index (float or str): The indices of frames to read from the 
+        index (float or str): The indices of frames to read from the
             trajectory. Default is ":", which reads the whole trajectory.
         io_kwargs (dict): The arguments needed for reading specific format.
     """
+
     def __init__(
         self,
         atoms,
@@ -112,7 +113,7 @@ def data_distribution(self, dft_frames):
         dft_frames = np.array(dft_frames)
         stat_strings = ["Data distribution:"]
         for i in range(N):
-            dft_num = np.sum((dft_frames < cum_sum[i+1]) * (dft_frames >= cum_sum[i]))
+            dft_num = np.sum((dft_frames < cum_sum[i + 1]) * (dft_frames >= cum_sum[i]))
             stat_strings.append(f"{self.stat_trj[i]}: {dft_num}")
         return stat_strings
 
@@ -124,7 +125,7 @@ class FakeDFT(Calculator):
     Args:
         filename (str): The name of the trajectory file to read in.
         format (str): The format supported by ASE IO.
-        index (float or str): The indices of frames to read from the 
+        index (float or str): The indices of frames to read from the
             trajectory. Default is ":", which reads the whole trajectory.
     """
 

diff --git a/flare/md/lammps.py b/flare/md/lammps.py
@@ -20,7 +20,7 @@ class LAMMPS_MOD(LAMMPS):
     to allow for more flexible input parameters, including compute,
     region, dump, fix/nvt, fix/npt etc.
 
-    The input arguments are the same as 
+    The input arguments are the same as
     https://databases.fysik.dtu.dk/ase/ase/calculators/lammps.html#ase.calculators.lammpsrun.LAMMPS
 
     Example:
@@ -34,7 +34,7 @@ class LAMMPS_MOD(LAMMPS):
         Ni = bulk("Ni", cubic=True)
         H = Atom("H", position=Ni.cell.diagonal() / 2)
         NiH = Ni + H
-    
+
         files = []
         param_dict = {
             "pair_style": "lj/cut 2.5",
@@ -46,7 +46,7 @@ class LAMMPS_MOD(LAMMPS):
             "timestep": 0.001,
             "keep_alive": False,
         }
-    
+
         lmp_calc = LAMMPS_MOD(
             command=<lammps_executable>,
             label="my_lammps",
@@ -61,7 +61,7 @@ class LAMMPS_MOD(LAMMPS):
 
     Supported customized commands for LAMMPS input:
 
-    .. code-block:: 
+    .. code-block::
 
         mass (set by arg `masses`)
         package
@@ -98,7 +98,7 @@ class LAMMPS_MOD(LAMMPS):
         thermo_style custom (thermo_args)
         thermo_modify flush yes format float %23.16g
         thermo 1
-    
+
 
     Customized parameters:
 
@@ -484,7 +484,10 @@ def check_sgp_match(atoms, sgp_calc, logger, specorder, command):
         assert np.allclose(lmp_energy, gp_energy, atol=1e-3)
         assert np.allclose(lmp_forces, gp_forces, atol=1e-3)
         assert np.allclose(lmp_stress, gp_stress, atol=1e-4)
-        assert np.allclose(lmp_stds[:, 0], gp_stds[:, 0], atol=1e-4), (lmp_stds[:, 0], gp_stds[:, 0])
+        assert np.allclose(lmp_stds[:, 0], gp_stds[:, 0], atol=1e-4), (
+            lmp_stds[:, 0],
+            gp_stds[:, 0],
+        )
     except:
         # if the trajectory does not match sgp, this is probably because
         # 1. the dumped atomic positions in LAMMPS lose precision, or

diff --git a/flare/scripts/rebuild.py b/flare/scripts/rebuild.py
@@ -0,0 +1,30 @@
+import yaml, sys
+from flare.bffs.sgp.calculator import SGP_Calculator
+
+
+def build_map(config):
+    flare_file = config.get("file")
+    lmp_name = config.get("lmp_name")
+    contributor = config.get("contributor")
+    sgp_calc, _ = SGP_Calculator.from_file(flare_file)
+    sgp_calc.gp_model.write_mapping_coefficients(lmp_name, contributor, 0)
+
+
+def rebuild_gp(config):
+    flare_config = config["flare_calc"]
+
+    # get the SGP settings from file
+    flare_from_file, _ = SGP_Calculator.from_file(flare_config.get("file"), build=False)
+
+    flare_from_file
+
+
+def main():
+    with open(sys.argv[1], "r") as f:
+        config = yaml.safe_load(f)
+
+    mode = config.get("otf").get("mode", "fresh")
+    if mode == "fresh":
+        fresh_start_otf(config)
+    elif mode == "restart":
+        restart_otf(config)
diff --git a/flare/utils/regressor.py b/flare/utils/regressor.py
@@ -0,0 +1,182 @@
+# Downloaded from: https://github.com/mir-group/nequip/blob/main/nequip/utils/regressor.py
+# nequip/nequip/utils/regressor.py
+# Author: Lixin Sun
+
+import logging
+import numpy as np
+from typing import Optional
+from sklearn.gaussian_process import GaussianProcessRegressor
+from sklearn.gaussian_process.kernels import DotProduct, Kernel, Hyperparameter
+
+
+def solver(X, y, regressor: Optional[str] = "NormalizedGaussianProcess", **kwargs):
+    if regressor == "GaussianProcess":
+        return gp(X, y, **kwargs)
+    elif regressor == "NormalizedGaussianProcess":
+        return normalized_gp(X, y, **kwargs)
+    else:
+        raise NotImplementedError(f"{regressor} is not implemented")
+
+
+def normalized_gp(X, y, **kwargs):
+    feature_rms = 1.0 / np.sqrt(np.average(X**2, axis=0))
+    feature_rms = np.nan_to_num(feature_rms, 1)
+    y_mean = np.sum(y) / np.sum(X)
+    mean, std = base_gp(
+        X,
+        y - (np.sum(X, axis=1) * y_mean).reshape(y.shape),
+        NormalizedDotProduct,
+        {"diagonal_elements": feature_rms},
+        **kwargs,
+    )
+    return mean + y_mean, std
+
+
+def gp(X, y, **kwargs):
+    return base_gp(
+        X, y, DotProduct, {"sigma_0": 0, "sigma_0_bounds": "fixed"}, **kwargs
+    )
+
+
+def base_gp(
+    X,
+    y,
+    kernel,
+    kernel_kwargs,
+    alpha: Optional[float] = 0.1,
+    max_iteration: int = 20,
+    stride: Optional[int] = None,
+):
+
+    if len(y.shape) == 1:
+        y = y.reshape([-1, 1])
+
+    if stride is not None:
+        X = X[::stride]
+        y = y[::stride]
+
+    not_fit = True
+    iteration = 0
+    mean = None
+    std = None
+    while not_fit:
+        logging.debug(f"GP fitting iteration {iteration} {alpha}")
+        try:
+            _kernel = kernel(**kernel_kwargs)
+            gpr = GaussianProcessRegressor(kernel=_kernel, random_state=0, alpha=alpha)
+            gpr = gpr.fit(X, y)
+
+            vec = np.diag(np.ones(X.shape[1]))
+            mean, std = gpr.predict(vec, return_std=True)
+
+            mean = np.reshape(mean, -1)
+            # ignore all the off-diagonal terms
+            std = np.reshape(std, -1)
+            likelihood = gpr.log_marginal_likelihood()
+
+            res = np.sqrt(np.mean(np.square(np.matmul(X, mean.reshape([-1, 1])) - y)))
+
+            logging.debug(
+                f"GP fitting: alpha {alpha}:\n"
+                f"            residue {res}\n"
+                f"            mean {mean} std {std}\n"
+                f"            log marginal likelihood {likelihood}"
+            )
+            not_fit = False
+
+        except Exception as e:
+            logging.info(f"GP fitting failed for alpha={alpha} and {e.args}")
+            if alpha == 0 or alpha is None:
+                logging.info("try a non-zero alpha")
+                not_fit = False
+                raise ValueError(
+                    f"Please set the {alpha} to non-zero value. \n"
+                    "The dataset energy is rank deficient to be solved with GP"
+                )
+            else:
+                alpha = alpha * 2
+                iteration += 1
+                logging.debug(f"           increase alpha to {alpha}")
+
+            if iteration >= max_iteration or not_fit is False:
+                raise ValueError(
+                    "Please set the per species shift and scale to zeros and ones. \n"
+                    "The dataset energy is to diverge to be solved with GP"
+                )
+
+    return mean, std
+
+
+class NormalizedDotProduct(Kernel):
+    r"""Dot-Product kernel.
+    .. math::
+        k(x_i, x_j) = x_i \cdot A \cdot x_j
+    """
+
+    def __init__(self, diagonal_elements):
+        # TO DO: check shape
+        self.diagonal_elements = diagonal_elements
+        self.A = np.diag(diagonal_elements)
+
+    def __call__(self, X, Y=None, eval_gradient=False):
+        """Return the kernel k(X, Y) and optionally its gradient.
+        Parameters
+        ----------
+        X : ndarray of shape (n_samples_X, n_features)
+            Left argument of the returned kernel k(X, Y)
+        Y : ndarray of shape (n_samples_Y, n_features), default=None
+            Right argument of the returned kernel k(X, Y). If None, k(X, X)
+            if evaluated instead.
+        eval_gradient : bool, default=False
+            Determines whether the gradient with respect to the log of
+            the kernel hyperparameter is computed.
+            Only supported when Y is None.
+        Returns
+        -------
+        K : ndarray of shape (n_samples_X, n_samples_Y)
+            Kernel k(X, Y)
+        K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\
+                optional
+            The gradient of the kernel k(X, X) with respect to the log of the
+            hyperparameter of the kernel. Only returned when `eval_gradient`
+            is True.
+        """
+        X = np.atleast_2d(X)
+        if Y is None:
+            K = (X.dot(self.A)).dot(X.T)
+        else:
+            if eval_gradient:
+                raise ValueError("Gradient can only be evaluated when Y is None.")
+            K = (X.dot(self.A)).dot(Y.T)
+
+        if eval_gradient:
+            return K, np.empty((X.shape[0], X.shape[0], 0))
+        else:
+            return K
+
+    def diag(self, X):
+        """Returns the diagonal of the kernel k(X, X).
+        The result of this method is identical to np.diag(self(X)); however,
+        it can be evaluated more efficiently since only the diagonal is
+        evaluated.
+        Parameters
+        ----------
+        X : ndarray of shape (n_samples_X, n_features)
+            Left argument of the returned kernel k(X, Y).
+        Returns
+        -------
+        K_diag : ndarray of shape (n_samples_X,)
+            Diagonal of kernel k(X, X).
+        """
+        return np.einsum("ij,ij,jj->i", X, X, self.A)
+
+    def __repr__(self):
+        return ""
+
+    def is_stationary(self):
+        """Returns whether the kernel is stationary."""
+        return False
+
+    @property
+    def hyperparameter_diagonal_elements(self):
+        return Hyperparameter("diagonal_elements", "numeric", "fixed")