mir-group · nw13slx · Oct 21, 2019 · Oct 14, 2019 · Oct 14, 2019 · Oct 14, 2019
diff --git a/flare/gp.py b/flare/gp.py
@@ -1,20 +1,24 @@
 """Gaussian process model of the Born Oppenheimer potential energy surface."""
 import math
-from typing import List, Callable
+import pickle
+import json
+
 import numpy as np
+
+from typing import List, Callable
 from scipy.linalg import solve_triangular
 from scipy.optimize import minimize
+
 from flare.env import AtomicEnvironment
 from flare.struc import Structure
 from flare.gp_algebra import get_ky_and_hyp, get_like_grad_from_mats, \
     get_neg_likelihood, get_neg_like_grad, get_ky_and_hyp_par
 from flare.kernels import str_to_kernel
 from flare.mc_simple import str_to_mc_kernel
-
+from flare.util import NumpyEncoder
 
 class GaussianProcess:
     """ Gaussian Process Regression Model.
-
     Implementation is based on Algorithm 2.1 (pg. 19) of
     "Gaussian Processes for Machine Learning" by Rasmussen and Williams"""
 
@@ -59,7 +63,6 @@ def __init__(self, kernel: Callable,
     def update_db(self, struc: Structure, forces: list,
                   custom_range: List[int] = ()):
         """Given structure and forces, add to training set.
-
         :param struc: structure to add to db
         :type struc: Structure
         :param forces: list of corresponding forces to add to db
@@ -86,7 +89,6 @@ def add_one_env(self, env: AtomicEnvironment,
                     force, train: bool = False, **kwargs):
         """
         Tool to add a single environment / force pair into the training set.
-
         :param force:
         :param env:
         :param force: (x,y,z) component associated with environment
@@ -103,7 +105,6 @@ def add_one_env(self, env: AtomicEnvironment,
     @staticmethod
     def force_list_to_np(forces: list):
         """ Convert list of forces to numpy array of forces.
-
         :param forces: list of forces to convert
         :type forces: list<float>
         :return: numpy array forces
@@ -203,7 +204,6 @@ def predict(self, x_t: AtomicEnvironment, d: int) -> [float, float]:
 
     def predict_local_energy(self, x_t: AtomicEnvironment) -> float:
         """Predict the local energy of an atomic environment.
-
         :param x_t: Atomic environment of test atom.
         :type x_t: AtomicEnvironment
         :return: local energy in eV (up to a constant).
@@ -238,7 +238,6 @@ def get_kernel_vector(self, x: AtomicEnvironment,
         """
         Compute kernel vector, comparing input environment to all environments
         in the GP's training set.
-
         :param x: data point to compare against kernel matrix
         :type x: AtomicEnvironment
         :param d_1: Cartesian component of force vector to get (1=x,2=y,3=z)
@@ -427,3 +426,27 @@ def from_dict(dictionary):
         new_gp.likelihood_gradient = dictionary['likelihood_gradient']
 
         return new_gp
+
+    def write_model(self, name: str, format: str = 'json'):
+        """
+        Write model in a variety of formats to a file for later re-use.
+
+        :param name: Output name
+        :param format:
+        :return:
+        """
+
+        supported_formats = ['json', 'pickle', 'binary']
+
+        if format.lower() == 'json':
+            with open(name, 'w') as f:
+                json.dump(self.as_dict(), f, cls=NumpyEncoder)
+
+        elif format.lower() == 'pickle' or format.lower() == 'binary':
+            with open(name, 'wb') as f:
+                pickle.dump(self, f)
+
+        else:
+            raise ValueError("Output format not supported: try from "
+                             "{}".format(supported_formats))
+
diff --git a/flare/gp_from_aimd.py b/flare/gp_from_aimd.py
@@ -4,17 +4,18 @@
 MD engine run.
 """
 import time
-from typing import List, Tuple
-from copy import deepcopy
-
+import json
 import pickle
 import numpy as np
 
+from typing import List, Tuple
+from copy import deepcopy
+
 from flare.output import Output
-from flare.struc import Structure, get_unique_species
+from flare.struc import Structure
 from flare.gp import GaussianProcess
 from flare.env import AtomicEnvironment
-from flare.util import Z_to_element
+from flare.util import Z_to_element, element_to_Z
 from flare.predict import predict_on_structure, \
     predict_on_structure_par, predict_on_structure_en, \
     predict_on_structure_par_en
@@ -37,7 +38,9 @@ def __init__(self, frames: List[Structure],
                  pre_train_seed_frames: List[Structure] = None,
                  pre_train_seed_envs: List[Tuple[AtomicEnvironment,
                                                  np.array]] = None,
-                 pre_train_atoms_per_element: dict = None):
+                 pre_train_atoms_per_element: dict = None,
+                 checkpoint_interval: int = None,
+                 model_format: str = 'json'):
         """
         Class which trains a GP off of an AIMD trajectory, and generates
         error statistics between the DFT and GP calls.
@@ -57,12 +60,14 @@ def __init__(self, frames: List[Structure],
         :param n_cpus: Number of CPUs to parallelize over
         :param shuffle_frames: Randomize order of frames for better training
         :param verbose: 0: Silent, 1: Minimal, 2: Lots of information
-        :param model_write: Write output model here
+        :param model_write: Where to write output model
         :param pre_train_on_skips: Train model on every n frames before running
         :param pre_train_seed_frames: Frames to train on before running
         :param pre_train_seed_envs: Environments to train on before running
         :param pre_train_atoms_per_element: Max # of environments to add from
         each species in the seed pre-training steps
+        :param checkpoint_interval: How often to write model after trainings
+        :param model_format: Format to write GP model to
         """
 
         self.frames = frames
@@ -97,15 +102,26 @@ def __init__(self, frames: List[Structure],
 
         # To later be filled in using the time library
         self.start_time = None
-        self.pickle_name = model_write
 
         self.pre_train_on_skips = pre_train_on_skips
         self.seed_envs = [] if pre_train_seed_envs is None else \
             pre_train_seed_envs
         self.seed_frames = [] if pre_train_seed_frames is None \
             else pre_train_seed_frames
         self.pre_train_env_per_species = {} if pre_train_atoms_per_element \
-                                               is None else pre_train_atoms_per_element
+                                     is None else pre_train_atoms_per_element
+
+        # Convert to Coded Species
+        if self.pre_train_env_per_species:
+            pre_train_species = list(self.pre_train_env_per_species.keys())
+            for key in pre_train_species:
+                self.pre_train_env_per_species[element_to_Z(key)] = \
+                    self.pre_train_env_per_species[key]
+
+        # Output parameters
+        self.checkpoint_interval = checkpoint_interval
+        self.model_write = model_write
+        self.model_format = model_format
 
     def pre_run(self):
         """
@@ -244,9 +260,8 @@ def run(self):
 
         self.output.conclude_run()
 
-        if self.pickle_name:
-            with open(self.pickle_name, 'wb') as f:
-                pickle.dump(self.gp, f)
+        if self.model_write:
+            self.gp.write_model(self.model_write, self.model_format)
 
     def update_gp_and_print(self, frame: Structure, train_atoms: List[int],
                             train: bool=True):
@@ -284,6 +299,11 @@ def train_gp(self):
                                self.gp.likelihood, self.gp.likelihood_gradient)
         self.train_count += 1
 
+        if self.checkpoint_interval \
+                and self.train_count % self.checkpoint_interval == 0 \
+                and self.model_write:
+            self.gp.write_model(self.model_write, self.model_format)
+
     def is_std_in_bound(self, frame: Structure)->(bool, List[int]):
         """
         If the predicted variance is too high, returns a list of atoms
@@ -324,3 +344,6 @@ def is_std_in_bound(self, frame: Structure)->(bool, List[int]):
             return False, target_atoms
         else:
             return True, [-1]
+
+
+
diff --git a/tests/test_gp.py b/tests/test_gp.py
@@ -1,5 +1,11 @@
 import pytest
+import pickle
+import os
+import json
 import numpy as np
+
+from pytest import raises
+
 from flare.gp import GaussianProcess
 from flare.env import AtomicEnvironment
 from flare.struc import Structure
@@ -256,3 +262,29 @@ def test_serialization_method(two_body_gp, test_point):
     for d in [0, 1, 2]:
         assert np.all(two_body_gp.predict(x_t=test_point, d=d) ==
                       new_gp.predict(x_t=test_point, d=d))
+
+
+def test_load_and_reload(two_body_gp, test_point):
+
+    two_body_gp.write_model('two_body.pickle', 'pickle')
+
+    with open('two_body.pickle', 'rb') as f:
+        new_gp = pickle.load(f)
+
+    for d in [0, 1, 2]:
+        assert np.all(two_body_gp.predict(x_t=test_point, d=d) ==
+                      new_gp.predict(x_t=test_point, d=d))
+    os.remove('two_body.pickle')
+
+    two_body_gp.write_model('two_body.json')
+    with open('two_body.json', 'r') as f:
+        new_gp = GaussianProcess.from_dict(json.loads(f.readline()))
+    for d in [0, 1, 2]:
+        assert np.all(two_body_gp.predict(x_t=test_point, d=d) ==
+                      new_gp.predict(x_t=test_point, d=d))
+    os.remove('two_body.json')
+
+    with raises(ValueError):
+        two_body_gp.write_model('two_body.pickle', 'cucumber')
+
+
diff --git a/tests/test_gp_from_aimd.py b/tests/test_gp_from_aimd.py
@@ -1,7 +1,7 @@
 import pytest
 import numpy as np
-import sys
-from flare.gp import GaussianProcess
+import pickle
+
 from flare.env import AtomicEnvironment
 from flare.struc import Structure
 from flare.kernels import two_plus_three_body, two_plus_three_body_grad
@@ -70,6 +70,7 @@ def test_load_trained_gp_and_run(methanol_gp):
     os.system('rm ./gp_from_aimd.xyz')
     os.system('rm ./gp_from_aimd-f.xyz')
 
+
 def test_load_one_frame_and_run():
     the_gp = GaussianProcess(kernel=two_plus_three_body_mc,
                              kernel_grad=two_plus_three_body_mc_grad,
@@ -123,15 +124,27 @@ def test_seed_and_run():
                            skip=15,
                            pre_train_seed_envs=seeds,
                            pre_train_seed_frames=[frames[-1]],
-                           model_write='test_methanol_gp.pickle',
-                           max_atoms_from_frame=4)
+                           max_atoms_from_frame=4,
+                           model_write='meth_test.pickle',
+                           model_format='pickle',
+                           checkpoint_interval=1,
+                           pre_train_atoms_per_element={'H': 1})
 
     tt.run()
 
+    with open('meth_test.pickle', 'rb') as f:
+        new_gp = pickle.load(f)
+
+    test_env = envs[0]
+
+    for d in [0, 1, 2]:
+        assert np.all(the_gp.predict(x_t=test_env, d=d) ==
+                      new_gp.predict(x_t=test_env, d=d))
+
     os.system('rm ./gp_from_aimd.out')
     os.system('rm ./gp_from_aimd.xyz')
     os.system('rm ./gp_from_aimd-f.xyz')
-    os.system('rm ./test_methanol_gp.pickle')
+    os.system('rm ./meth_test.pickle')
 
 
 def test_uncertainty_threshold(fake_gp):