diff --git a/docs/source/tutorials/after_training.ipynb b/docs/source/tutorials/after_training.ipynb
index f10468105..44e6bb6f1 100644
--- a/docs/source/tutorials/after_training.ipynb
+++ b/docs/source/tutorials/after_training.ipynb
@@ -21,7 +21,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -45,24 +45,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Final name of the gp instance is default_gp_2\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "gp_model = otf_object.make_gp(hyp_no=hyp_no)\n",
     "gp_model.parallel = True\n",
     "gp_model.hyp_labels = ['sig2', 'ls2', 'sig3', 'ls3', 'noise']\n",
     "\n",
     "# write model to a binary file\n",
-    "gp_model.write_model('AgI.gp', format='pickle')"
+    "gp_model.write_model('AgI.gp', format='json')"
    ]
   },
   {
@@ -75,21 +67,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Final name of the gp instance is default_gp_2_2\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from flare.gp import GaussianProcess\n",
     "\n",
-    "gp_model = GaussianProcess.from_file('AgI.gp.pickle')"
+    "gp_model = GaussianProcess.from_file('AgI.gp.json')"
    ]
   },
   {
@@ -106,9 +90,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/xiey/Google Drive/flare/flare/mgp/mapxb.py:525: UserWarning: The minimal distance in training data is lower than the current lower bound, will reset lower bound to 2.129780094032895\n",
+      "  f\"The minimal distance in training data is lower than \"\n",
+      "/Users/xiey/Google Drive/flare/flare/mgp/mapxb.py:525: UserWarning: The minimal distance in training data is lower than the current lower bound, will reset lower bound to 2.129780094032895\n",
+      "  f\"The minimal distance in training data is lower than \"\n",
+      "/Users/xiey/Google Drive/flare/flare/mgp/mapxb.py:525: UserWarning: The minimal distance in training data is lower than the current lower bound, will reset lower bound to 2.129780094032895\n",
+      "  f\"The minimal distance in training data is lower than \"\n"
+     ]
+    }
+   ],
    "source": [
     "from flare.mgp import MappedGaussianProcess\n",
     "\n",
@@ -116,10 +113,10 @@
     "               'threebody': {'grid_num': [20, 20, 20]}}\n",
     "\n",
     "data = gp_model.training_statistics\n",
-    "lammps_location = 'AgI_Molten_15.txt'\n",
+    "lammps_location = 'AgI_Molten'\n",
     "\n",
     "mgp_model = MappedGaussianProcess(grid_params, data['species'], \n",
-    "    map_force=False, lmp_file_name='AgI_Molten_15.txt', n_cpus=1)\n",
+    "    var_map=None, lmp_file_name='AgI_Molten', n_cpus=1)\n",
     "mgp_model.build_map(gp_model)"
    ]
   },
@@ -145,28 +142,36 @@
     "pair_coeff * * <lmp_file_name> <chemical_symbols> yes/no yes/no\n",
     "```\n",
     "\n",
-    "An example is using coefficient file `AgI_Molten_15.txt` for AgI system, \n",
+    "An example is using coefficient file `AgI_Molten.mgp` for AgI system, \n",
     "with two-body (the 1st `yes`) together with three-body (the 2nd `yes`).\n",
     "\n",
     "```\n",
-    "pair_coeff * * AgI_Molten_15.txt Ag I yes yes\n",
-    "```\n",
-    "\n",
-    "**Note**: if you build force mapping (`map_force=True`) instead of energy mapping, please use\n",
-    "```\n",
-    "pair_style mgpf\n",
+    "pair_coeff * * AgI_Molten.mgp Ag I yes yes\n",
     "```\n",
     "\n",
-    "2. The third way is to use the ASE LAMMPS interface"
+    "2. Another way is to use the ASE LAMMPS interface"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/xiey/anaconda3/lib/python3.7/site-packages/ase/calculators/lammpsrun.py:189: UserWarning: You are using an old syntax to set 'parameters'.\n",
+      "Please use LAMMPSRUN.set().\n",
+      "  warnings.warn(self.legacy_warn_string.format(\"parameters\"))\n"
+     ]
+    }
+   ],
    "source": [
+    "import os\n",
+    "from flare.utils.element_coder import _Z_to_mass, _element_to_Z\n",
     "from flare.ase.calculator import FLARE_Calculator\n",
+    "from ase.calculators.lammpsrun import LAMMPS\n",
     "\n",
     "# get chemical symbols, masses etc.\n",
     "species = gp_model.training_statistics['species']\n",
@@ -177,9 +182,9 @@
     "parameters = {'command': os.environ.get('lmp'), # set up executable for ASE\n",
     "              'newton': 'off',\n",
     "              'pair_style': 'mgp',\n",
-    "              'pair_coeff': [f'* * {lammps_location} {specie_symbol_list} yes yes'],\n",
+    "              'pair_coeff': [f'* * {lammps_location + \".mgp\"} {specie_symbol_list} yes yes'],\n",
     "              'mass': masses}\n",
-    "files = [lammps_location]\n",
+    "files = [lammps_location + \".mgp\"]\n",
     "\n",
     "# create ASE calc\n",
     "lmp_calc = LAMMPS(label=f'tmp_AgI', keep_tmp_files=True, tmp_dir='./tmp/',\n",
@@ -190,7 +195,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "3. The second way to run LAMMPS is using our LAMMPS interface, please set the\n",
+    "3. The third way to run LAMMPS is using our LAMMPS interface, please set the\n",
     "environment variable `$lmp` to the executable."
    ]
   },
diff --git a/docs/source/tutorials/ase.ipynb b/docs/source/tutorials/ase.ipynb
index 0f5bc0073..e12b1afc8 100644
--- a/docs/source/tutorials/ase.ipynb
+++ b/docs/source/tutorials/ase.ipynb
@@ -6,6 +6,8 @@
    "source": [
     "# On-the-fly training using ASE\n",
     "\n",
+    "Yu Xie (xiey@g.harvard.edu)\n",
+    "\n",
     "This is a quick introduction of how to set up our ASE-OTF interface to train a force field. We will train a force field model for diamond. To run the on-the-fly training, we will need to\n",
     "\n",
     "1. Create a supercell with ASE Atoms object\n",
@@ -49,22 +51,14 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [
     {
-     "name": "stderr",
      "output_type": "stream",
-     "text": [
-      "twobody0 cutoff is not define. it's going to use the universal cutoff.\n",
-      "threebody0 cutoff is not define. it's going to use the universal cutoff.\n"
-     ]
-    },
-    {
      "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "hyps [0.92961609 0.31637555 0.18391881 0.20456028 0.05      ]\n"
-     ]
+     "text": "hyps [0.92961609 0.31637555 0.18391881 0.20456028 0.05      ]\n"
     }
    ],
    "source": [
@@ -88,7 +82,7 @@
     "\n",
     "gp_model = GaussianProcess(\n",
     "    kernels = kernels,\n",
-    "    component = 'sc', # single-component. For multi-comp, use 'mc'\n",
+    "    component = 'mc', # If you are using ASE, please set to \"mc\" no matter for single-component or multi-component\n",
     "    hyps = hyps,\n",
     "    cutoffs = cut,\n",
     "    hyp_labels = ['sig2','ls2','sig3','ls3','noise'],\n",
@@ -120,8 +114,7 @@
     "mgp_model = MappedGaussianProcess(grid_params, \n",
     "                                  unique_species = [6], \n",
     "                                  n_cpus = 1,\n",
-    "                                  map_force = False, \n",
-    "                                  mean_only = False)"
+    "                                  var_map=None)"
    ]
   },
   {
@@ -237,7 +230,9 @@
     "**Note**: Currently, only VelocityVerlet is tested on real system, NPT may have issue with pressure and stress.\n",
     "\n",
     "Set up ASE_OTF training engine:\n",
+    "\n",
     "1. Initialize the velocities of atoms as 500K\n",
+    "\n",
     "2. Set up MD arguments as a dictionary based on [ASE MD parameters](https://wiki.fysik.dtu.dk/ase/ase/md.html). For VelocityVerlet, we don't need to set up extra parameters.\n",
     "   \n",
     "    E.g. for NVTBerendsen, we can set `md_kwargs = {'temperature': 500, 'taut': 0.5e3 * units.fs}`"
@@ -266,8 +261,11 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "3. Set up parameters for On-The-Fly (OTF) training. The descriptions of the parameters are in [ASE OTF module](https://flare-yuuuu.readthedocs.io/en/development/flare/ase/otf.html).\n",
+    "3. Set up parameters for On-The-Fly (OTF) training. The descriptions of the parameters are in [ASE OTF module](https://flare.readthedocs.io/en/latest/flare/ase/otf.html).\n",
     "4. Set up the ASE_OTF training engine, and run\n",
+    "\n",
+    "    **Note**: the ASE Trajectory is supported, but NOT recommended. \n",
+    "\n",
     "5. Check `otf.out` after the training is done."
    ]
   },
@@ -275,176 +273,7 @@
    "cell_type": "code",
    "execution_count": 8,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:otflog:2020-06-10 14:53:00.574573\n",
-      "INFO:otflog:\n",
-      "GaussianProcess Object\n",
-      "Number of cpu cores: 1\n",
-      "Kernel: ['twobody', 'threebody']\n",
-      "Training points: 0\n",
-      "Cutoffs: {'twobody': 5.0, 'threebody': 3.5}\n",
-      "Model Likelihood: None\n",
-      "Number of hyperparameters: 5\n",
-      "Hyperparameters_array: [0.92961609 0.31637555 0.18391881 0.20456028 0.05      ]\n",
-      "Hyperparameters: \n",
-      "sig2: 0.9296160928171479 \n",
-      "ls2: 0.3163755545817859 \n",
-      "sig3: 0.18391881167709445 \n",
-      "ls3: 0.2045602785530397 \n",
-      "noise: 0.05 \n",
-      "Hyps_mask train_noise: True \n",
-      "Hyps_mask nspecie: 1 \n",
-      "Hyps_mask twobody_start: 0 \n",
-      "Hyps_mask ntwobody: 1 \n",
-      "Hyps_mask threebody_start: 2 \n",
-      "Hyps_mask nthreebody: 1 \n",
-      "Hyps_mask kernels: ['twobody', 'threebody'] \n",
-      "Hyps_mask cutoffs: {'twobody': 5.0, 'threebody': 3.5} \n",
-      "\n",
-      "uncertainty tolerance: 2 times noise hyperparameter \n",
-      "timestep (ps): 0.09822694788464063\n",
-      "number of frames: 3\n",
-      "number of atoms: 8\n",
-      "system species: {'C'}\n",
-      "periodic cell: \n",
-      "[[3.52678 0.      0.     ]\n",
-      " [0.      3.52678 0.     ]\n",
-      " [0.      0.      3.52678]]\n",
-      "\n",
-      "previous positions (A):\n",
-      "C        0.0000    0.0000    0.0000\n",
-      "C        0.8817    0.8817    0.8817\n",
-      "C        0.0000    1.7634    1.7634\n",
-      "C        0.8817    2.6451    2.6451\n",
-      "C        1.7634    0.0000    1.7634\n",
-      "C        2.6451    0.8817    2.6451\n",
-      "C        1.7634    1.7634    0.0000\n",
-      "C        2.6451    2.6451    0.8817\n",
-      "--------------------------------------------------------------------------------\n",
-      "\n",
-      "INFO:otflog:\n",
-      "Calling DFT...\n",
-      "\n",
-      "INFO:otflog:DFT run complete.\n",
-      "INFO:otflog:number of DFT calls: 1\n",
-      "INFO:otflog:wall time from start: 0.03 s\n",
-      "INFO:otflog:Adding atom [0, 1, 2, 3] to the training set.\n",
-      "INFO:otflog:Uncertainty: [0. 0. 0.]\n",
-      "INFO:otflog:\n",
-      "GP hyperparameters: \n",
-      "INFO:otflog:Hyp0 : sig2 = 0.9296\n",
-      "INFO:otflog:Hyp1 : ls2 = 0.3164\n",
-      "INFO:otflog:Hyp2 : sig3 = 0.1839\n",
-      "INFO:otflog:Hyp3 : ls3 = 0.2046\n",
-      "INFO:otflog:Hyp4 : noise = 0.0010\n",
-      "INFO:otflog:likelihood: 71.8658\n",
-      "INFO:otflog:likelihood gradient: [-1.79487637e-06 -6.53005436e-06 -8.57610391e-06 -1.76345959e-05\n",
-      " -1.19999904e+04]\n",
-      "INFO:otflog:wall time from start: 7.23 s\n",
-      "INFO:otflog:\n",
-      "*-Frame: 0 \n",
-      "Simulation Time: 0.0 ps \n",
-      "El            Position (A)                    DFT Force (ev/A)                  Std. Dev (ev/A)                  Velocities (A/ps)       \n",
-      "C        0.0000    0.0000    0.0000        0.0000    0.0000    0.0000        0.0000    0.0000    0.0000        0.0417    0.0382    0.0035\n",
-      "C        0.8817    0.8817    0.8817        0.0000    0.0000    0.0000        0.0000    0.0000    0.0000       -0.0135    0.0222    0.0320\n",
-      "C        0.0000    1.7634    1.7634        0.0000    0.0000   -0.0000        0.0000    0.0000    0.0000        0.0204   -0.0705    0.0194\n",
-      "C        0.8817    2.6451    2.6451       -0.0000   -0.0000   -0.0000        0.0000    0.0000    0.0000       -0.0013    0.0346    0.0278\n",
-      "C        1.7634    0.0000    1.7634        0.0000    0.0000   -0.0000        0.0000    0.0000    0.0000       -0.0676   -0.0129    0.0242\n",
-      "C        2.6451    0.8817    2.6451       -0.0000   -0.0000   -0.0000        0.0000    0.0000    0.0000       -0.0027   -0.0121   -0.0340\n",
-      "C        1.7634    1.7634    0.0000        0.0000   -0.0000    0.0000        0.0000    0.0000    0.0000        0.0658   -0.0278   -0.0497\n",
-      "C        2.6451    2.6451    0.8817       -0.0000   -0.0000    0.0000        0.0000    0.0000    0.0000       -0.0427    0.0283   -0.0231\n",
-      "\n",
-      "temperature: 199.00 K \n",
-      "kinetic energy: 0.180060 eV \n",
-      "\n",
-      "INFO:otflog:wall time from start: 7.23 s\n",
-      "INFO:otflog:--------------------------------------------------------------------------------\n",
-      "-Frame: 1 \n",
-      "Simulation Time: 0.0982 ps \n",
-      "El            Position (A)                    GP Force (ev/A)                   Std. Dev (ev/A)                  Velocities (A/ps)       \n",
-      "C        0.0082    0.0075    0.0007       -0.0000   -0.0000   -0.0000       16.1332   16.9877    6.7169        0.0417    0.0382    0.0035\n",
-      "C        0.8790    0.8861    0.8880        0.0000   -0.0000   -0.0000       15.3645    9.5079   17.2434       -0.0135    0.0222    0.0320\n",
-      "C        0.0040    1.7495    1.7672       -0.0000    0.0000   -0.0000       10.8208   37.7660    9.7448        0.0204   -0.0705    0.0194\n",
-      "C        0.8814    2.6519    2.6505       -0.0000   -0.0000    0.0000        9.5129   20.9930    4.7440       -0.0013    0.0346    0.0278\n",
-      "C        1.7501   -0.0025    1.7682        0.0000    0.0000   -0.0000       23.4305   10.7241   12.5271       -0.0676   -0.0129    0.0242\n",
-      "C        2.6446    0.8793    2.6384        0.0000   -0.0000    0.0000        3.1513   13.4187    7.3315       -0.0027   -0.0121   -0.0340\n",
-      "C        1.7763    1.7579   -0.0098       -0.0000    0.0000    0.0000       20.5584    9.0554   17.2228        0.0658   -0.0278   -0.0497\n",
-      "C        2.6367    2.6506    0.8772        0.0000   -0.0000    0.0000       17.8540    7.7304   12.6641       -0.0427    0.0283   -0.0231\n",
-      "\n",
-      "temperature: 199.00 K \n",
-      "kinetic energy: 0.180060 eV \n",
-      "\n",
-      "INFO:otflog:wall time from start: 9.65 s\n",
-      "INFO:otflog:\n",
-      "Calling DFT...\n",
-      "\n",
-      "INFO:otflog:DFT run complete.\n",
-      "INFO:otflog:number of DFT calls: 2\n",
-      "INFO:otflog:wall time from start: 9.67 s\n",
-      "INFO:otflog:\n",
-      "*-Frame: 1 \n",
-      "Simulation Time: 0.0982 ps \n",
-      "El            Position (A)                    DFT Force (ev/A)                  Std. Dev (ev/A)                  Velocities (A/ps)       \n",
-      "C        0.0164    0.0150    0.0013        0.0372   -0.0001   -0.0205       16.1332   16.9877    6.7169        0.0835    0.0764    0.0069\n",
-      "C        0.8763    0.8904    0.8943       -0.0669   -0.0327    0.0578       15.3645    9.5079   17.2434       -0.0274    0.0442    0.0641\n",
-      "C        0.0080    1.7356    1.7710        0.0322   -0.1194    0.0093       10.8208   37.7660    9.7448        0.0409   -0.1414    0.0388\n",
-      "C        0.8812    2.6587    2.6560        0.0353    0.0737   -0.0165        9.5129   20.9930    4.7440       -0.0025    0.0695    0.0555\n",
-      "C        1.7368   -0.0051    1.7729       -0.0330    0.0001    0.0250       23.4305   10.7241   12.5271       -0.1353   -0.0259    0.0486\n",
-      "C        2.6440    0.8770    2.6317       -0.0014    0.0643    0.0114        3.1513   13.4187    7.3315       -0.0053   -0.0238   -0.0680\n",
-      "C        1.7893    1.7525   -0.0195        0.0479    0.0129   -0.0207       20.5584    9.0554   17.2228        0.1317   -0.0555   -0.0994\n",
-      "C        2.6283    2.6562    0.8726       -0.0513    0.0013   -0.0459       17.8540    7.7304   12.6641       -0.0856    0.0565   -0.0465\n",
-      "\n",
-      "temperature: 798.57 K \n",
-      "kinetic energy: 0.722563 eV \n",
-      "\n",
-      "INFO:otflog:wall time from start: 9.68 s\n",
-      "INFO:otflog:mean absolute error: 0.0340 eV/A\n",
-      "INFO:otflog:mean absolute dft component: 0.0340 eV/A\n",
-      "INFO:otflog:Adding atom [6, 3, 4, 2] to the training set.\n",
-      "INFO:otflog:Uncertainty: [20.55839508  9.05540846 17.22284583]\n",
-      "INFO:otflog:\n",
-      "GP hyperparameters: \n",
-      "INFO:otflog:Hyp0 : sig2 = 0.7038\n",
-      "INFO:otflog:Hyp1 : ls2 = 2.0405\n",
-      "INFO:otflog:Hyp2 : sig3 = 0.0000\n",
-      "INFO:otflog:Hyp3 : ls3 = 9.6547\n",
-      "INFO:otflog:Hyp4 : noise = 0.0010\n",
-      "INFO:otflog:likelihood: 122.4930\n",
-      "INFO:otflog:likelihood gradient: [-1.34065483e+00 -1.79554908e-01 -4.94110742e-02  1.54534584e-10\n",
-      " -1.82026091e+04]\n",
-      "INFO:otflog:wall time from start: 30.46 s\n",
-      "INFO:otflog:--------------------------------------------------------------------------------\n",
-      "-Frame: 2 \n",
-      "Simulation Time: 0.196 ps \n",
-      "El            Position (A)                    GP Force (ev/A)                   Std. Dev (ev/A)                  Velocities (A/ps)       \n",
-      "C        0.0247    0.0225    0.0020        0.0748   -0.0003   -0.0400        0.0008    0.0015    0.0008        0.0000    0.0000    0.0000\n",
-      "C        0.8735    0.8947    0.9007       -0.1357   -0.0645    0.1173        0.0014    0.0015    0.0010        0.0000    0.0000    0.0000\n",
-      "C        0.0121    1.7215    1.7748        0.0632   -0.2385    0.0151        0.0010    0.0015    0.0016        0.0000    0.0000    0.0000\n",
-      "C        0.8810    2.6657    2.6614        0.0692    0.1497   -0.0328        0.0011    0.0013    0.0010        0.0000    0.0000    0.0000\n",
-      "C        1.7235   -0.0076    1.7778       -0.0661   -0.0006    0.0515        0.0015    0.0013    0.0008        0.0000    0.0000    0.0000\n",
-      "C        2.6435    0.8748    2.6251       -0.0019    0.1297    0.0235        0.0009    0.0017    0.0010        0.0000    0.0000    0.0000\n",
-      "C        1.8023    1.7471   -0.0293        0.0980    0.0221   -0.0451        0.0015    0.0018    0.0012        0.0000    0.0000    0.0000\n",
-      "C        2.6197    2.6617    0.8679       -0.1015    0.0024   -0.0895        0.0013    0.0012    0.0012        0.0000    0.0000    0.0000\n",
-      "\n",
-      "temperature: 0.00 K \n",
-      "kinetic energy: 0.000000 eV \n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:otflog:wall time from start: 33.38 s\n",
-      "INFO:otflog:--------------------\n",
-      "INFO:otflog:Run complete.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from flare.ase.otf import ASE_OTF\n",
     "\n",
@@ -452,7 +281,8 @@
     "              'output_name': 'otf',\n",
     "              'std_tolerance_factor': 2,\n",
     "              'max_atoms_added' : 4,\n",
-    "              'freeze_hyps': 10}\n",
+    "              'freeze_hyps': 10,\n",
+    "              'write_model': 3}\n",
     "\n",
     "test_otf = ASE_OTF(super_cell, \n",
     "                   timestep = 1 * units.fs,\n",
@@ -464,6 +294,37 @@
     "\n",
     "test_otf.run()"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Then check the `*.out` file for the training log.\n",
+    "\n",
+    "## Step 5 (Optional): Resume Interrupted Training\n",
+    "\n",
+    "At the end of each OTF training step, there will be several checkpoint files dumpped\n",
+    "\n",
+    "- `<output_name>_checkpt.json`: checkpoint of the current MD step of OTF. In the above example, `al_otf_qe_checkpt.json`. \n",
+    "\n",
+    "- `<output_name>_flare.json`: If you've set `write_model=3`, then there will be another file saving the trained FLARE calculator, which will be loaded when restarting OTF.\n",
+    "\n",
+    "- `<output_name>_atoms.json`: The ASE Atoms of the current MD step in the format of `json`\n",
+    "\n",
+    "- `<output_name>_dft.pickle`: The DFT calculator saved in the format of `.pickle`.\n",
+    "\n",
+    "Then, use `ASE_OTF.from_checkpoint(<output_name>_checkpt.json)` to load the OTF state, and resume the training by `run()`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "new_otf = ASE_OTF.from_checkpoint(\"<output_name>_checkpt.json\")\n",
+    "new_otf.run()"
+   ]
   }
  ],
  "metadata": {
@@ -482,9 +343,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.5"
+   "version": "3.7.5-final"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
\ No newline at end of file
diff --git a/docs/source/tutorials/lammps.rst b/docs/source/tutorials/lammps.rst
index 80bbc5e8f..9f99117f9 100644
--- a/docs/source/tutorials/lammps.rst
+++ b/docs/source/tutorials/lammps.rst
@@ -1,9 +1,17 @@
 Compile LAMMPS with MGP Pair Style
 ==================================
-Anders Johansson
+Anders Johansson, Yu Xie
 
 
-After downloading MGP pair style files (`pair_mgp.*`, `pair_mgpf.*`), please follow the instruction below to compile the LAMMPS executable.
+Download
+--------
+
+If you want to use MGP force field in LAMMPS, the first step is to download our MGP pair_style source code from:
+https://github.com/mir-group/flare/tree/master/flare/mgp/pair_styles
+
+If you want to install without Kokkos acceleration, please download `pair_mgp.*`, otherwise, download `pair_mgp_kokkos.*`.
+
+Then, follow the instruction below to compile the LAMMPS executable.
 
 MPI
 ---
diff --git a/docs/source/tutorials/otf.ipynb b/docs/source/tutorials/otf.ipynb
index 18423f41e..568f491ed 100644
--- a/docs/source/tutorials/otf.ipynb
+++ b/docs/source/tutorials/otf.ipynb
@@ -107,10 +107,14 @@
     "freeze_hyps = 3\n",
     "\n",
     "otf = OTF(qe_input, dt, number_of_steps, gp, dft_loc,\n",
-    "          std_tolerance_factor, init_atoms=[0],\n",
-    "          calculate_energy=True, output_name='al_otf_qe',\n",
-    "          freeze_hyps=freeze_hyps, skip=5,\n",
-    "          max_atoms_added=max_atoms_added)"
+    "          std_tolerance_factor, \n",
+    "          init_atoms=[0],\n",
+    "          calculate_energy=True, \n",
+    "          output_name='al_otf_qe',\n",
+    "          freeze_hyps=freeze_hyps, \n",
+    "          skip=5,\n",
+    "          max_atoms_added=max_atoms_added,\n",
+    "          write_model=3)"
    ]
   },
   {
@@ -137,6 +141,16 @@
     "\n",
     "- `skip`: record/dump the information every skip steps.\n",
     "\n",
+    "- `write_model`: identify the frequency of dumpping GP model during the training. \n",
+    "\n",
+    "    - `1`: default, dump GP model only when the OTF is complete\n",
+    "\n",
+    "    - `2`: dump GP model every time after training\n",
+    "    \n",
+    "    - `3`: dump GP model at every step\n",
+    "\n",
+    "    (We recommend using `3` if you are possibly gonna resume the OTF training, as instructed by the next section.)\n",
+    "\n",
     "## Step 4: Launch the OTF Training\n",
     "Finally, let’s run it!"
    ]
@@ -157,6 +171,29 @@
    "source": [
     "After OTF training is finished, we can check log file `al_otf_qe.out` for all the information dumped. This output file can be parsed using our `otf_parser.py` module, which will be introduced in the [after-training-tutorial](https://flare.readthedocs.io/en/latest/tutorials/after_training.html)"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 5 (Optional): Resume Interrupted Training\n",
+    "\n",
+    "At the end of each OTF training step, there will be a checkpoint file dumpped of name `<output_name>_checkpt.json`, in the above example, `al_otf_qe_checkpt.json`. This file saves the state at the current OTF MD step. \n",
+    "\n",
+    "If you've set `write_model=3`, then there will be another file saving the trained GP model, `<output_name>_gp.json`, which will be loaded when restarting OTF.\n",
+    "\n",
+    "Then, use `OTF.from_checkpoint(<output_name>_checkpt.json)` to load the OTF state, and resume the training by `run()`.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "new_otf = OTF.from_checkpoint(log_name + \"_checkpt.json\")\n",
+    "new_otf.run()"
+   ]
   }
  ],
  "metadata": {
@@ -180,4 +217,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
\ No newline at end of file
diff --git a/flare/ase/atoms.py b/flare/ase/atoms.py
index dc336b1a7..c96506496 100644
--- a/flare/ase/atoms.py
+++ b/flare/ase/atoms.py
@@ -6,6 +6,7 @@
 
 import numpy as np
 from ase import Atoms
+from ase.io import read, write
 from flare.utils.learner import get_max_cutoff
 
 
@@ -112,3 +113,11 @@ def wrapped_positions(self):
     @property
     def max_cutoff(self):
         return get_max_cutoff(self.cell)
+
+    def as_dict(self):
+        return self.todict()
+
+    @staticmethod
+    def from_dict(dct):
+        atoms = Atoms.fromdict(dct)
+        return FLARE_Atoms.from_ase_atoms(atoms)
diff --git a/flare/ase/calculator.py b/flare/ase/calculator.py
index 84a37065e..a924855ef 100644
--- a/flare/ase/calculator.py
+++ b/flare/ase/calculator.py
@@ -8,8 +8,10 @@
 import warnings
 import numpy as np
 import multiprocessing as mp
+import json
+
 from flare.env import AtomicEnvironment
-from flare.struc import Structure
+from flare.gp import GaussianProcess
 from flare.mgp import MappedGaussianProcess
 from flare.predict import (
     predict_on_structure_par_en,
@@ -17,6 +19,8 @@
     predict_on_structure_efs,
     predict_on_structure_efs_par,
 )
+from flare.utils.element_coder import NumpyEncoder
+
 from ase.calculators.calculator import Calculator
 
 
@@ -54,7 +58,9 @@ class FLARE_Calculator(Calculator):
             by default.
     """
 
-    def __init__(self, gp_model, mgp_model=None, par=False, use_mapping=False):
+    def __init__(
+        self, gp_model, mgp_model=None, par=False, use_mapping=False, **kwargs
+    ):
         super().__init__()  # all set to default values, TODO: change
         self.mgp_model = mgp_model
         self.gp_model = gp_model
@@ -185,3 +191,54 @@ def calculate_mgp(self, atoms):
 
     def calculation_required(self, atoms, quantities):
         return True
+
+    def as_dict(self):
+        outdict = {}
+
+        gp_dict = self.gp_model.as_dict()
+        outdict["gp_model"] = gp_dict
+
+        outdict["use_mapping"] = self.use_mapping
+        if self.use_mapping:
+            mgp_dict = self.mgp_model.as_dict()
+            outdict["mgp_model"] = mgp_dict
+        else:
+            outdict["mgp_model"] = None
+
+        outdict["par"] = self.par
+        outdict["results"] = self.results
+        return outdict
+
+    @staticmethod
+    def from_dict(dct):
+        dct["gp_model"] = GaussianProcess.from_dict(dct["gp_model"])
+        if dct["use_mapping"]:
+            dct["mgp_model"] = MappedGaussianProcess.from_dict(dct["mgp_model"])
+
+        calc = FLARE_Calculator(**dct)
+        res = dct["results"]
+        for key in res:
+            if isinstance(res[key], float):
+                calc.results[key] = res[key]
+            if isinstance(res[key], list):
+                calc.results[key] = np.array(res[key])
+
+        if dct["use_mapping"]:
+            for xb in calc.mgp_model.maps:
+                xb_map = calc.mgp_model.maps[xb]
+                xb_map.hyps_mask = calc.gp_model.hyps_mask
+
+        return calc
+
+    def write_model(self, name):
+        if ".json" != name[-5:]:
+            name += ".json"
+        with open(name, "w") as f:
+            json.dump(self.as_dict(), f, cls=NumpyEncoder)
+
+    @staticmethod
+    def from_file(name):
+        with open(name, "r") as f:
+            calc = FLARE_Calculator.from_dict(json.loads(f.readline()))
+
+        return calc
diff --git a/flare/ase/otf.py b/flare/ase/otf.py
index f67c57806..0f1a565c7 100644
--- a/flare/ase/otf.py
+++ b/flare/ase/otf.py
@@ -1,12 +1,14 @@
 """
-:class:`OTF` is the on-the-fly training module for ASE, WITHOUT molecular dynamics engine. 
+:class:`ASE_OTF` is the on-the-fly training module for ASE, WITHOUT molecular dynamics engine. 
 It needs to be used adjointly with ASE MD engine. 
 """
 import os
 import sys
 import inspect
+import pickle
 from time import time
 from copy import deepcopy
+import logging
 
 import numpy as np
 from ase.md.npt import NPT
@@ -15,7 +17,9 @@
 from ase.md.verlet import VelocityVerlet
 from ase.md.langevin import Langevin
 from ase import units
+from ase.io import read, write
 
+import flare
 from flare.otf import OTF
 from flare.utils.learner import is_std_in_bound
 
@@ -24,6 +28,17 @@
 import flare.ase.dft as dft_source
 
 
+def reset_npt_momenta(npt_engine, force):
+    # in the last step, the momenta was set by flare forces, change to dft forces
+    npt_engine._calculate_q_future(force)
+    npt_engine.atoms.set_momenta(
+        np.dot(
+            npt_engine.q_future - npt_engine.q_past, npt_engine.h / (2 * npt_engine.dt)
+        )
+        * npt_engine._getmasses()
+    )
+
+
 class ASE_OTF(OTF):
 
     """
@@ -45,7 +60,7 @@ class ASE_OTF(OTF):
             currently in experiment.
 
     The following arguments are for on-the-fly training, the user can also
-    refer to :class:`OTF`
+    refer to :class:`flare.otf.OTF`
 
     Args:
         prev_pos_init ([type], optional): Previous positions. Defaults
@@ -97,7 +112,9 @@ def __init__(
     ):
 
         self.atoms = FLARE_Atoms.from_ase_atoms(atoms)
+        self.timestep = timestep
         self.md_engine = md_engine
+        self.md_kwargs = md_kwargs
 
         if md_engine == "VelocityVerlet":
             MD = VelocityVerlet
@@ -107,6 +124,10 @@ def __init__(
             MD = NPTBerendsen
         elif md_engine == "NPT":
             MD = NPT
+            # TODO: solve the md step
+            assert (
+                md_kwargs["pfactor"] is None
+            ), "Current MD OTF only supports pfactor=None"
         elif md_engine == "Langevin":
             MD = Langevin
         else:
@@ -132,6 +153,10 @@ def __init__(
             **otf_kwargs
         )
 
+        self.flare_name = self.output_name + "_flare.json"
+        self.dft_name = self.output_name + "_dft.pickle"
+        self.atoms_name = self.output_name + "_atoms.json"
+
     def get_structure_from_input(self, prev_pos_init):
         self.structure = self.atoms
         if prev_pos_init is None:
@@ -163,9 +188,10 @@ def compute_properties(self):
 
         # Reset FLARE calculator if necessary.
         if not isinstance(self.atoms.calc, FLARE_Calculator):
-            self.atoms.set_calculator(self.flare_calc)
+            self.atoms.calc = self.flare_calc
 
-        self.atoms.calc.calculate(self.atoms)
+        if not self.flare_calc.results:
+            self.atoms.calc.calculate(self.atoms)
 
     def md_step(self):
         """
@@ -176,12 +202,30 @@ def md_step(self):
         self.structure.prev_positions = np.copy(self.structure.positions)
 
         # Take MD step.
-        self.md.step()
+        f = self.atoms.get_forces()
+
+        if self.md_engine == "NPT":
+            self.flare_calc.results = {}  # init flare calculator
+
+            if self.dft_step:
+                reset_npt_momenta(self.md, f)
+                self.atoms.calc = self.flare_calc
+
+            self.md.step()  # use flare to get force for next step
+        else:
+            self.flare_calc.results = {}  # init flare calculator
+            if self.dft_step:
+                self.atoms.calc = self.flare_calc
+
+            self.md.step(f)
 
         # Update the positions and cell of the structure object.
         self.structure.cell = np.copy(self.atoms.cell)
         self.structure.positions = np.copy(self.atoms.positions)
 
+    def write_gp(self):
+        self.flare_calc.write_model(self.flare_name)
+
     def update_positions(self, new_pos):
         # call OTF method
         super().update_positions(new_pos)
@@ -201,8 +245,84 @@ def update_temperature(self):
         # Convert velocities to Angstrom / ps.
         self.velocities = self.atoms.get_velocities() * units.fs * 1e3
 
-    def update_gp(self, train_atoms, dft_frcs):
-        super().update_gp(train_atoms, dft_frcs)
+    def update_gp(self, train_atoms, dft_frcs, dft_energy=None, dft_stress=None):
+        self.output.add_atom_info(train_atoms, self.structure.stds)
+
+        # update gp model
+        self.gp.update_db(
+            self.structure,
+            dft_frcs,
+            custom_range=train_atoms,
+            energy=dft_energy,
+            stress=dft_stress,
+        )
+
+        self.gp.set_L_alpha()
 
+        # train model
+        if (self.dft_count - 1) < self.freeze_hyps:
+            self.train_gp()
+
+        # update mgp model
         if self.flare_calc.use_mapping:
             self.flare_calc.mgp_model.build_map(self.flare_calc.gp_model)
+
+        # write model
+        if (self.dft_count - 1) < self.freeze_hyps:
+            if self.write_model == 2:
+                self.write_gp()
+        if self.write_model == 3:
+            self.write_gp()
+
+    def as_dict(self):
+
+        # DFT module and Trajectory will cause issue in deepcopy
+        self.dft_module = self.dft_module.__name__
+        md = self.md
+        self.md = None
+
+        dct = deepcopy(dict(vars(self)))
+        self.dft_module = eval(self.dft_module)
+        self.md = md
+
+        # write atoms and flare calculator to separate files
+        write(self.atoms_name, self.atoms)
+        dct["atoms"] = self.atoms_name
+
+        self.flare_calc.write_model(self.flare_name)
+        dct["flare_calc"] = self.flare_name
+
+        # dump dft calculator as pickle
+        with open(self.dft_name, "wb") as f:
+            pickle.dump(self.dft_loc, f)  # dft_loc is the dft calculator
+        dct["dft_loc"] = self.dft_name
+
+        dct["gp"] = self.gp_name
+
+        for key in ["output", "pred_func", "structure", "dft_input", "md"]:
+            dct.pop(key)
+
+        return dct
+
+    @staticmethod
+    def from_dict(dct):
+        flare_calc = FLARE_Calculator.from_file(dct["flare_calc"])
+        dct["atoms"] = read(dct["atoms"])
+        dct["atoms"].calc = flare_calc
+        dct.pop("gp")
+
+        with open(dct["dft_loc"], "rb") as f:
+            dct["dft_calc"] = pickle.load(f)
+
+        for key in ["dt", "dft_loc"]:
+            dct.pop(key)
+
+        new_otf = ASE_OTF(**dct)
+        new_otf.dft_count = dct["dft_count"]
+        new_otf.curr_step = dct["curr_step"]
+
+        if new_otf.md_engine == "NPT":
+            if not new_otf.md.initialized:
+                new_otf.md.initialize()
+
+        return new_otf
diff --git a/flare/gp.py b/flare/gp.py
index 1a30197cf..12c4a0055 100644
--- a/flare/gp.py
+++ b/flare/gp.py
@@ -1,5 +1,4 @@
 import json
-import json
 import logging
 import multiprocessing as mp
 import pickle
@@ -195,6 +194,16 @@ def __init__(self, kernels: List[str] = None,
 
         self.check_instantiation()
 
+    @property
+    def force_noise(self):
+        return Parameters.get_noise(self.hyps_mask, self.hyps,
+                                    constraint=False)
+
+    @property
+    def hyps_and_labels(self):
+        return Parameters.get_hyps(
+            self.hyps_mask, self.hyps, constraint=False, label=True)
+
     def check_instantiation(self):
         """
         Runs a series of checks to ensure that the user has not supplied
@@ -271,8 +280,9 @@ def update_kernel(self, kernels: List[str], component: str = "mc",
         if hyps is not None:
             self.hyps = hyps
 
-    def update_db(self, struc: Structure, forces: List,
-                  custom_range: List[int] = (), energy: float = None):
+    def update_db(self, struc: Structure, forces: 'ndarray',
+                  custom_range: List[int] = (), energy: float = None,
+                  stress: 'ndarray' = None):
         """Given a structure and forces, add local environments from the
         structure to the training set of the GP. If energy is given, add the
         entire structure to the training set.
@@ -287,6 +297,10 @@ def update_db(self, struc: Structure, forces: List,
                 environments will be added to the training set of the GP.
 
             energy (float): Energy of the structure.
+
+            stress (np.ndarray): Stress tensor of the structure. The stress
+                tensor components should be given in the following order:
+                xx, xy, xz, yy, yz, zz.
         """
 
         # By default, use all atoms in the structure
@@ -720,9 +734,8 @@ def as_dict(self):
         # Remove the callables
         for key in ['kernel', 'kernel_grad', 'energy_kernel',
                     'energy_force_kernel', 'efs_energy_kernel',
-                    'efs_force_kernel', 'efs_self_kernel']:
-            if out_dict.get(key) is not None:
-                del out_dict[key]
+                    'efs_force_kernel', 'efs_self_kernel', 'output']:
+            out_dict.pop(key)
 
         return out_dict
 
diff --git a/flare/gp_algebra.py b/flare/gp_algebra.py
index aa7849e34..be4a39123 100644
--- a/flare/gp_algebra.py
+++ b/flare/gp_algebra.py
@@ -844,8 +844,10 @@ def force_force_vector_unit(name, s, e, x, kernel, hyps, cutoffs, hyps_mask,
 
 def efs_force_vector_unit(name, s, e, x, efs_force_kernel, hyps, cutoffs,
                           hyps_mask):
-    size = e - s
 
+    training_data = _global_training_data[name]
+
+    size = e - s
     k_ef = np.zeros((1, size * 3))
     k_ff = np.zeros((3, size * 3))
     k_sf = np.zeros((6, size * 3))
@@ -853,7 +855,7 @@ def efs_force_vector_unit(name, s, e, x, efs_force_kernel, hyps, cutoffs,
     args = from_mask_to_args(hyps, cutoffs, hyps_mask)
 
     for m_index in range(size):
-        x_2 = _global_training_data[name][m_index + s]
+        x_2 = training_data[m_index + s]
         ef, ff, sf = efs_force_kernel(x, x_2, *args)
 
         ind1 = m_index * 3
diff --git a/flare/mgp/map2b.py b/flare/mgp/map2b.py
index e2da674d4..f014caa43 100644
--- a/flare/mgp/map2b.py
+++ b/flare/mgp/map2b.py
@@ -26,6 +26,7 @@ def __init__(
         self.bodies = 2
         self.pred_perm = [[0]]
         self.spc_perm = [[0, 1]]
+        self.num_lmp_maps = 0 
         super().__init__(**kwargs)
 
     def build_bond_struc(self, species_list):
@@ -35,10 +36,12 @@ def build_bond_struc(self, species_list):
 
         # 2 body (2 atoms (1 bond) config)
         self.spc = []
+        self.num_lmp_maps = 0
         for spc1_ind, spc1 in enumerate(species_list):
             for spc2 in species_list[spc1_ind:]:
                 species = [spc1, spc2]
                 self.spc.append(sorted(species))
+                self.num_lmp_maps += 1
 
     def get_arrays(self, atom_env):
         return get_bonds(atom_env.ctype, atom_env.etypes, atom_env.bond_array_2)
diff --git a/flare/mgp/map3b.py b/flare/mgp/map3b.py
index 82753e0f6..fddfbd796 100644
--- a/flare/mgp/map3b.py
+++ b/flare/mgp/map3b.py
@@ -21,6 +21,7 @@ def __init__(self, **kwargs):
         self.bodies = 3
         self.pred_perm = [[0, 1, 2], [1, 0, 2]]
         self.spc_perm = [[0, 1, 2], [0, 2, 1]]
+        self.num_lmp_maps = 0
         super().__init__(**kwargs)
 
     def build_bond_struc(self, species_list):
@@ -31,12 +32,12 @@ def build_bond_struc(self, species_list):
         # 2 body (2 atoms (1 bond) config)
         self.spc = []
         N_spc = len(species_list)
+        self.num_lmp_maps = N_spc ** 3
         for spc1 in species_list:
             for spc2 in species_list:
                 for spc3 in species_list:
-                    if spc2 <= spc3:
-                        species = [spc1, spc2, spc3]
-                        self.spc.append(species)
+                    species = [spc1, spc2, spc3]
+                    self.spc.append(species)
 
     def get_arrays(self, atom_env):
 
@@ -55,6 +56,7 @@ def find_map_index(self, spc):
         return self.spc.index(spc)
 
 
+
 class SingleMap3body(SingleMapXbody):
     def __init__(self, **kwargs):
         """
@@ -73,13 +75,7 @@ def __init__(self, **kwargs):
         self.set_bounds(None, None)
 
         spc = self.species
-        self.species_code = (
-            Z_to_element(spc[0])
-            + "_"
-            + Z_to_element(spc[1])
-            + "_"
-            + Z_to_element(spc[2])
-        )
+        self.species_code = "_".join([Z_to_element(spc) for spc in self.species])
         self.kv3name = f"kv3_{self.species_code}"
 
     def set_bounds(self, lower_bound, upper_bound):
diff --git a/flare/mgp/mapxb.py b/flare/mgp/mapxb.py
index 5629fce32..8dfaf65ab 100644
--- a/flare/mgp/mapxb.py
+++ b/flare/mgp/mapxb.py
@@ -44,7 +44,7 @@ def __init__(
         lower_bound_relax: float = 0.1,
         GP: GaussianProcess = None,
         n_cpus: int = None,
-        n_sample: int = 100,
+        n_sample: int = 10,
         hyps_mask: dict = None,
         hyps: list = None,
         **kwargs,
@@ -106,11 +106,6 @@ def build_map(self, GP):
             m.build_map(GP)
 
     def predict(self, atom_env):
-
-        assert Parameters.compare_dict(
-            self.hyps_mask, atom_env.cutoffs_mask
-        ), "GP.hyps_mask is not the same as atom_env.cutoffs_mask"
-
         f_spcs = np.zeros(3)
         vir_spcs = np.zeros(6)
         v_spcs = 0
@@ -138,21 +133,20 @@ def predict(self, atom_env):
             map_ind = self.find_map_index(spc)
             try:
                 f, vir, v, e = self.maps[map_ind].predict(lengths, xyzs)
+                f_spcs += f
+                vir_spcs += vir
+                v_spcs += v
+                e_spcs += e
             except ValueError as err_msg:
                 rebuild_spc.append(err_msg.args[0])
                 new_bounds.append(err_msg.args[1])
 
-            if len(rebuild_spc) > 0:
-                raise ValueError(
-                    rebuild_spc,
-                    new_bounds,
-                    f"The {self.kernel_name} map needs re-constructing.",
-                )
-
-            f_spcs += f
-            vir_spcs += vir
-            v_spcs += v
-            e_spcs += e
+        if len(rebuild_spc) > 0:
+            raise ValueError(
+                rebuild_spc,
+                new_bounds,
+                f"The {self.kernel_name} map needs re-constructing.",
+            )
 
         return f_spcs, vir_spcs, kern, v_spcs, e_spcs
 
@@ -268,6 +262,17 @@ def set_bounds(self, lower_bound, upper_bound):
     def construct_grids(self):
         raise NotImplementedError("need to be implemented in child class")
 
+    def LoadGrid(self):
+        if "mgp_grids" not in os.listdir(self.load_grid):
+            raise FileNotFoundError(
+                "Please set 'load_grid' as the location of mgp_grids folder"
+            )
+
+        grid_path = f"{self.load_grid}/mgp_grids/{self.bodies}_{self.species_code}"
+        grid_mean = np.load(f"{grid_path}_mean.npy")
+        grid_vars = np.load(f"{grid_path}_var.npy", allow_pickle=True)
+        return grid_mean, grid_vars
+
     def GenGrid(self, GP):
         """
         To use GP to predict value on each grid point, we need to generate the
@@ -282,6 +287,9 @@ def GenGrid(self, GP):
            with GP.alpha
         """
 
+        if self.load_grid is not None:
+            return self.LoadGrid()
+
         if self.n_cpus is None:
             processes = mp.cpu_count()
         else:
@@ -386,6 +394,14 @@ def _gengrid_inner(self, name, kernel_info, force_block, s, e):
 
         r_cut = cutoffs[self.kernel_name]
 
+        n_grids = np.prod(self.grid_num)
+
+        if np.any(np.array(self.bounds[1]) <= 0.0):
+            if force_block:
+                return np.zeros((n_grids, (e-s)*3))
+            else:
+                return np.zeros((n_grids, e-s))
+ 
         grids = self.construct_grids()
         coords = np.zeros(
             (grids.shape[0], self.grid_dim * 3), dtype=np.float64
@@ -406,7 +422,6 @@ def _gengrid_inner(self, name, kernel_info, force_block, s, e):
 
         k_v = []
         chunk_size = 32 ** 3
-        n_grids = grids.shape[0]
         if n_grids > chunk_size:
             n_chunk = ceil(n_grids / chunk_size)
         else:
@@ -431,7 +446,7 @@ def _gengrid_inner(self, name, kernel_info, force_block, s, e):
         if len(k_v) > 0:
             k_v = np.vstack(k_v).T
         else:
-            k_v = np.zeros((grids.shape[0], 0))
+            k_v = np.zeros((n_grids, 0))
 
         return k_v
 
@@ -463,7 +478,12 @@ def build_map_container(self):
         """
         build 1-d spline function for mean, 2-d for var
         """
-        self.mean = CubicSpline(self.bounds[0], self.bounds[1], orders=self.grid_num)
+        if np.any(np.array(self.bounds[1]) <= 0.0):
+            bounds = [np.zeros_like(self.bounds[0]), np.ones_like(self.bounds[1])]
+        else:
+            bounds = self.bounds
+
+        self.mean = CubicSpline(bounds[0], bounds[1], orders=self.grid_num)
 
         if self.var_map == "pca":
             if self.svd_rank == "auto":
@@ -473,14 +493,14 @@ def build_map_container(self):
 
             elif isinstance(self.svd_rank, int):
                 self.var = PCASplines(
-                    self.bounds[0],
-                    self.bounds[1],
+                    bounds[0],
+                    bounds[1],
                     orders=self.grid_num,
                     svd_rank=self.svd_rank,
                 )
 
         if self.var_map == "simple":
-            self.var = CubicSpline(self.bounds[0], self.bounds[1], orders=self.grid_num)
+            self.var = CubicSpline(bounds[0], bounds[1], orders=self.grid_num)
 
     def update_bounds(self, GP):
         rebuild_container = False
@@ -518,18 +538,7 @@ def build_map(self, GP):
 
         self.update_bounds(GP)
 
-        if not self.load_grid:
-            y_mean, y_var = self.GenGrid(GP)
-        else:
-            if "mgp_grids" not in os.listdir(self.load_grid):
-                raise FileNotFoundError(
-                    "Please set 'load_grid' as the location of mgp_grids folder"
-                )
-
-            grid_path = f"{self.load_grid}/mgp_grids/{self.bodies}_{self.species_code}"
-            y_mean = np.load(f"{grid_path}_mean.npy")
-            y_var = np.load(f"{grid_path}_var.npy", allow_pickle=True)
-
+        y_mean, y_var = self.GenGrid(GP)
         self.mean.set_values(y_mean)
 
         if self.var_map == "pca" and self.svd_rank == "auto":
@@ -574,12 +583,18 @@ def search_lower_bound(self, GP):
 
         lower_bound = np.min(upper_bound)
         for env in _global_training_data[GP.name]:
+            if len(env.bond_array_2) == 0:
+                continue
+
             min_dist = env.bond_array_2[0][0]
             if min_dist < lower_bound:
                 lower_bound = min_dist
 
         for struc in _global_training_structures[GP.name]:
             for env in struc:
+                if len(env.bond_array_2) == 0:
+                    continue
+
                 min_dist = env.bond_array_2[0][0]
                 if min_dist < lower_bound:
                     lower_bound = min_dist
@@ -592,13 +607,22 @@ def predict(self, lengths, xyzs):
         """
 
         min_dist = np.min(lengths)
-        if min_dist < np.max(self.bounds[0][0]):
+        if min_dist < np.max(self.bounds[0]):
             raise ValueError(
                 self.species,
                 min_dist,
                 f"The minimal distance {min_dist:.3f}"
                 f" is below the mgp lower bound {self.bounds[0]}",
             )
+        
+        max_dist = np.max(lengths)
+        if max_dist > np.min(self.bounds[1]):
+            raise Exception(
+                self.species,
+                max_dist,
+                f"The atomic environment should have cutoff smaller"
+                f" than the GP cutoff"
+            )
 
         lengths = np.array(lengths)
         xyzs = np.array(xyzs)
@@ -638,7 +662,7 @@ def predict(self, lengths, xyzs):
                 vir_i = (
                     f_d[:, b, vir_order[i][0]]
                     * xyzs[:, b, vir_order[i][1]]
-                    * lengths[:, 0]
+                    * lengths[:, b]
                 )
                 vir[i] += np.sum(vir_i)
 
@@ -646,7 +670,7 @@ def predict(self, lengths, xyzs):
         return f, vir, v, e
 
 
-    def write(self, f, write_var):
+    def write(self, f, write_var, permute=False):
         """ 
         Write LAMMPS coefficient file
 
@@ -660,6 +684,7 @@ def write(self, f, write_var):
 
         # write header
         elems = self.species_code.split("_")
+
         a = self.bounds[0]
         b = self.bounds[1]
         order = self.grid_num
@@ -670,13 +695,15 @@ def write(self, f, write_var):
         header += " " + " ".join(map(str, order))
         f.write(header + "\n")
 
-        # write coefficients
+        # write coeffs
         if write_var:
             coefs = self.var.__coeffs__
         else:
             coefs = self.mean.__coeffs__
+
         self.write_flatten_coeff(f, coefs)
 
+
     def write_flatten_coeff(self, f, coefs):
         """
         flatten the coefficient and write it as
diff --git a/flare/mgp/mgp.py b/flare/mgp/mgp.py
index f3e5ceb68..8681dfe54 100644
--- a/flare/mgp/mgp.py
+++ b/flare/mgp/mgp.py
@@ -43,7 +43,7 @@ class MappedGaussianProcess:
         lmp_file_name (str): LAMMPS coefficient file name
         n_cpus (int): Default None. Set to the number of cores needed for
             parallelization. Used in the construction of the map.
-        n_sample (int): Default 100. The batch size for building map. Not used now.
+        n_sample (int): Default 10. The batch size for building map. Not used now.
 
     Examples:
 
@@ -54,9 +54,9 @@ class MappedGaussianProcess:
     For `grid_params`, the following keys and values are allowed
 
     Args:
-        'two_body' (dict, optional): if 2-body is present, set as a dictionary
+        'twobody' (dict, optional): if 2-body is present, set as a dictionary
             of parameters for 2-body mapping. Parameters see below.
-        'three_body' (dict, optional): if 3-body is present, set as a dictionary
+        'threebody' (dict, optional): if 3-body is present, set as a dictionary
             of parameters for 3-body mapping. Parameters see below.
         'load_grid' (str, optional): Default None. the path to the directory
             where the previously generated grids (``grid_*.npy``) are stored.
@@ -97,9 +97,9 @@ def __init__(
         GP: GaussianProcess = None,
         var_map: str = None,
         container_only: bool = True,
-        lmp_file_name: str = "lmp.mgp",
+        lmp_file_name: str = "lmp",
         n_cpus: int = None,
-        n_sample: int = 100,
+        n_sample: int = 10,
     ):
 
         # load all arguments as attributes
@@ -234,7 +234,7 @@ def write_lmp_file(self, lammps_name, write_var=False):
         xbodies = ["twobody", "threebody"]
         for xb in xbodies:
             if xb in self.maps:
-                num = len(self.maps[xb].maps)
+                num = self.maps[xb].num_lmp_maps #len(self.maps[xb].maps)
             else:
                 num = 0
             header += f"{num} "
diff --git a/flare/otf.py b/flare/otf.py
index 7f186b075..d47476d8a 100644
--- a/flare/otf.py
+++ b/flare/otf.py
@@ -1,19 +1,21 @@
 import logging
+import json
 import numpy as np
 import time
+import warnings
 
 from copy import deepcopy
 from datetime import datetime
 from shutil import copyfile
 from typing import List, Tuple, Union
 
+import flare
 import flare.predict as predict
 from flare import struc, gp, env, md
 from flare.dft_interface import dft_software
 from flare.output import Output
-from flare.parameters import Parameters
 from flare.utils.learner import is_std_in_bound
-
+from flare.utils.element_coder import NumpyEncoder
 
 class OTF:
     """Trains a Gaussian process force field on the fly during
@@ -107,47 +109,57 @@ def __init__(
      dft_loc: str = None, dft_input: str = None, dft_output='dft.out',
      dft_kwargs=None,
      store_dft_output: Tuple[Union[str, List[str]], str] = None,
-     # par args
-     n_cpus: int = 1):
+     # other args
+     n_cpus: int = 1, **kwargs):
+
 
+        # set DFT
+        self.dft_loc = dft_loc
         self.dft_input = dft_input
         self.dft_output = dft_output
-        self.dt = dt
-        self.number_of_steps = number_of_steps
-        self.gp = gp
-        self.dft_loc = dft_loc
-        self.std_tolerance = std_tolerance_factor
-        self.skip = skip
         self.dft_step = True
-        self.freeze_hyps = freeze_hyps
-
+        self.dft_count = 0
         if isinstance(force_source, str):
             self.dft_module = dft_software[force_source]
         else:
             self.dft_module = force_source
 
-        # parse input file
-        self.get_structure_from_input(prev_pos_init)
-
+        # set md
+        self.dt = dt
+        self.number_of_steps = number_of_steps
+        self.get_structure_from_input(prev_pos_init) # parse input file
         self.noa = self.structure.positions.shape[0]
-        self.atom_list = list(range(self.noa))
-        self.curr_step = 0
-
-        self.max_atoms_added = max_atoms_added
+        self.rescale_steps = rescale_steps
+        self.rescale_temps = rescale_temps
 
+        # set flare
+        self.gp = gp
         # initialize local energies
         if calculate_energy:
             self.local_energies = np.zeros(self.noa)
         else:
             self.local_energies = None
 
-        # set atom list for initial dft run
-        if init_atoms is None:
+        # set otf
+        self.std_tolerance = std_tolerance_factor
+        self.skip = skip
+        self.max_atoms_added = max_atoms_added
+        self.freeze_hyps = freeze_hyps
+        if init_atoms is None: # set atom list for initial dft run
             self.init_atoms = [int(n) for n in range(self.noa)]
         else:
             self.init_atoms = init_atoms
 
-        self.dft_count = 0
+        self.n_cpus = n_cpus # set number of cpus and npool for DFT runs
+        self.npool = npool
+        self.mpi = mpi
+
+        self.dft_kwargs = dft_kwargs
+        self.store_dft_output = store_dft_output
+
+        # other args
+        self.atom_list = list(range(self.noa))
+        self.curr_step = 0
 
         # Set the prediction function based on user inputs.
         # Force only prediction.
@@ -168,21 +180,12 @@ def __init__(
         else:
             self.pred_func = predict.predict_on_structure_efs
 
-        # set rescale attributes
-        self.rescale_steps = rescale_steps
-        self.rescale_temps = rescale_temps
-
         # set logger
         self.output = Output(output_name, always_flush=True)
         self.output_name = output_name
+        self.gp_name = self.output_name + '_gp.json'
+        self.checkpt_name = self.output_name + '_checkpt.json'
 
-        # set number of cpus and npool for DFT runs
-        self.n_cpus = n_cpus
-        self.npool = npool
-        self.mpi = mpi
-
-        self.dft_kwargs = dft_kwargs
-        self.store_dft_output = store_dft_output
         self.write_model = write_model
 
     def run(self):
@@ -194,9 +197,10 @@ def run(self):
         'Year.Month.Day:Hour:Minute:Second:'.
         """
 
+        optional_dict = {"Restart": self.curr_step}
         self.output.write_header(
             str(self.gp), self.dt, self.number_of_steps, self.structure,
-            self.std_tolerance)
+            self.std_tolerance, optional_dict)
 
         counter = 0
         self.start_time = time.time()
@@ -210,8 +214,6 @@ def run(self):
                 # When DFT is called, ASE energy, forces, and stresses should
                 # get updated.
                 self.initialize_train()
-                self.update_temperature()
-                self.record_state()
 
             # after step 1, try predicting with GP model
             else:
@@ -220,10 +222,8 @@ def run(self):
                 self.compute_properties()
 
                 # get max uncertainty atoms
-                noise_sig = Parameters.get_noise(
-                        self.gp.hyps_mask, self.gp.hyps, constraint=False)
                 std_in_bound, target_atoms = is_std_in_bound(
-                    self.std_tolerance, noise_sig, self.structure,
+                    self.std_tolerance, self.gp.force_noise, self.structure,
                     self.max_atoms_added)
 
                 if not std_in_bound:
@@ -256,11 +256,13 @@ def run(self):
             # TODO: Reinstate velocity rescaling.
             self.md_step()
             self.curr_step += 1
+            self.checkpoint()
 
         self.output.conclude_run()
 
         if self.write_model >= 1:
-            self.gp.write_model(self.output_name+"_model")
+            self.write_gp()
+            self.checkpoint()
 
     def get_structure_from_input(self, prev_pos_init):
         positions, species, cell, masses = \
@@ -275,6 +277,9 @@ def initialize_train(self):
         self.run_dft()
         dft_frcs = deepcopy(self.structure.forces)
 
+        self.update_temperature()
+        self.record_state()
+
         # make initial gp model and predict forces
         self.update_gp(self.init_atoms, dft_frcs)
 
@@ -291,6 +296,9 @@ def md_step(self):
         '''
         md.update_positions(self.dt, self.noa, self.structure)
 
+    def write_gp(self):
+        self.gp.write_model(self.gp_name)
+
     def run_dft(self):
         """Calculates DFT forces on atoms in the current structure.
 
@@ -332,7 +340,8 @@ def run_dft(self):
             for ofile in to_copy:
                 copyfile(ofile, dest+'/'+dt_string+ofile)
 
-    def update_gp(self, train_atoms: List[int], dft_frcs: 'ndarray'):
+    def update_gp(self, train_atoms: List[int], dft_frcs: 'ndarray',
+                  dft_energy: float = None, dft_stress: 'ndarray' = None):
         """
         Updates the current GP model.
 
@@ -345,8 +354,8 @@ def update_gp(self, train_atoms: List[int], dft_frcs: 'ndarray'):
         self.output.add_atom_info(train_atoms, self.structure.stds)
 
         # update gp model
-        self.gp.update_db(self.structure, dft_frcs,
-                          custom_range=train_atoms)
+        self.gp.update_db(self.structure, dft_frcs, custom_range=train_atoms,
+                          energy=dft_energy, stress=dft_stress)
 
         self.gp.set_L_alpha()
 
@@ -354,19 +363,19 @@ def update_gp(self, train_atoms: List[int], dft_frcs: 'ndarray'):
         if (self.dft_count-1) < self.freeze_hyps:
             self.train_gp()
             if self.write_model == 2:
-                self.gp.write_model(self.output_name+"_model")
+                self.write_gp()
         if self.write_model == 3:
-            self.gp.write_model(self.output_name+'_model')
+            self.write_gp()
 
     def train_gp(self):
         """Optimizes the hyperparameters of the current GP model."""
 
         self.gp.train(logger_name=self.output.basename+'hyps')
-        hyps, labels = Parameters.get_hyps(
-                self.gp.hyps_mask, self.gp.hyps, constraint=False,
-                label=True)
+
+        hyps, labels = self.gp.hyps_and_labels
         if labels is None:
             labels = self.gp.hyp_labels
+
         self.output.write_hyps(labels, hyps,
                                self.start_time,
                                self.gp.likelihood, self.gp.likelihood_gradient,
@@ -413,3 +422,53 @@ def record_state(self):
         self.output.write_md_config(
             self.dt, self.curr_step, self.structure, self.temperature,
             self.KE, self.start_time, self.dft_step, self.velocities)
+
+    def as_dict(self):
+        self.dft_module = self.dft_module.__name__
+        out_dict = deepcopy(dict(vars(self)))
+        self.dft_module = eval(self.dft_module)
+
+        out_dict['gp'] = self.gp_name
+        out_dict['structure'] = self.structure.as_dict()
+
+        for key in ['output', 'pred_func']:
+            out_dict.pop(key)
+
+        return out_dict
+
+    @staticmethod
+    def from_dict(in_dict):
+        if in_dict['write_model'] <= 1: # TODO: detect GP version
+            warnings.warn("The GP model might not be the latest")
+       
+        gp_model = gp.GaussianProcess.from_file(in_dict['gp'])
+        in_dict['gp'] = gp_model
+        in_dict['structure'] = struc.Structure.from_dict(in_dict['structure'])
+
+        if 'flare.dft_interface' in in_dict['dft_module']:
+            for dft_name in ['qe', 'cp2k', 'vasp']:
+                if dft_name in in_dict['dft_module']:
+                    in_dict['force_source'] = dft_name
+                    break
+        else: # if force source is a module
+            in_dict['force_source'] = eval(in_dict['dft_module'])
+
+        new_otf = OTF(**in_dict)
+        new_otf.structure = in_dict['structure']
+        new_otf.dft_count = in_dict['dft_count']
+        new_otf.curr_step = in_dict['curr_step']
+        return new_otf
+
+    def checkpoint(self):
+        name = self.checkpt_name
+        if '.json' != name[-5:]:
+            name += '.json'
+        with open(name, 'w') as f:
+            json.dump(self.as_dict(), f, cls=NumpyEncoder)
+
+    @classmethod
+    def from_checkpoint(cls, filename):
+        with open(filename, 'r') as f:
+            otf_model = cls.from_dict(json.loads(f.readline()))
+
+        return otf_model
diff --git a/flare/otf_parser.py b/flare/otf_parser.py
index 9e96cb56d..30602d1dd 100644
--- a/flare/otf_parser.py
+++ b/flare/otf_parser.py
@@ -1,4 +1,4 @@
-import sys
+import sys, subprocess
 import numpy as np
 
 from copy import deepcopy
@@ -13,76 +13,86 @@ def __init__(self, filename, calculate_energy=False):
 
         self.calculate_energy = calculate_energy
 
-        self.header = parse_header_information(filename)
-
-        position_list, force_list, uncertainty_list, velocity_list,\
-            dft_frames, temperatures, times, msds, dft_times, energies = \
-            self.parse_pos_otf(filename)
-
-        self.position_list = position_list
-        self.force_list = force_list
-        self.uncertainty_list = uncertainty_list
-        self.velocity_list = velocity_list
-        self.dft_frames = dft_frames
-        self.temperatures = temperatures
-        self.times = times
-        self.msds = msds
-        self.dft_times = dft_times
+        blocks = split_blocks(filename)
+
+        self.header = parse_header_information(blocks[0])
+        self.noa = self.header["atoms"]
+        self.noh = self.header["n_hyps"]
+
+        self.position_list = []
+        self.cell_list = []
+        self.force_list = []
+        self.stress_list = []
+        self.uncertainty_list = []
+        self.velocity_list = []
+        self.temperatures = []
+        self.dft_frames = []
+        self.dft_times = []
+        self.times = []
+        self.msds = []
+        self.energies = []
+        self.thermostat = {}
+
+        self.gp_position_list = []
+        self.gp_cell_list = []
+        self.gp_force_list = []
+        self.gp_stress_list = []
+        self.gp_uncertainty_list = []
+        self.gp_velocity_list = []
+        self.gp_atom_list = []
+        self.gp_species_list = []
+        self.gp_atom_count = []
+        self.gp_thermostat = {}
+
+        self.gp_hyp_list = [self.header["hyps"]]
+
+        self.mae_list = []
+        self.maf_list = []
+
+        self.parse_pos_otf(blocks[1:])
 
         if self.calculate_energy:
             self.energies = energies
 
-        gp_position_list, gp_force_list, gp_uncertainty_list,\
-            gp_velocity_list, gp_atom_list, gp_hyp_list, \
-            gp_species_list, gp_atom_count = self.extract_gp_info(filename)
+    def make_gp(
+        self, cell=None, call_no=None, hyps=None, init_gp=None, hyp_no=None, **kwargs,
+    ):
 
-        self.gp_position_list = gp_position_list
-        self.gp_force_list = gp_force_list
-        self.gp_uncertainty_list = gp_uncertainty_list
-        self.gp_velocity_list = gp_velocity_list
-        self.gp_atom_list = gp_atom_list
-        self.gp_hyp_list = gp_hyp_list
-        self.gp_species_list = gp_species_list
-        self.gp_atom_count = gp_atom_count
-
-    def make_gp(self, cell=None, call_no=None, hyps=None, init_gp=None,
-                hyp_no=None, **kwargs,):
+        if "restart" in self.header and self.header["restart"] > 0:
+            assert init_gp is not None, "Please input the init_gp as the gp model dumpped"\
+                    "before restarting otf."
 
         if call_no is None:
             call_no = len(self.gp_position_list)
         if hyp_no is None:
-            hyp_no = call_no
+            hyp_no = len(self.gp_hyp_list)  # use the last hyps by default
         if hyps is None:
             # check out the last non-empty element from the list
-            for icall in reversed(range(hyp_no)):
-                if len(self.gp_hyp_list[icall]) > 0:
-                    hyps = self.gp_hyp_list[icall][-1]
-                    break
+            hyps = self.gp_hyp_list[hyp_no - 1]
         if cell is None:
-            cell = self.header['cell']
-
+            cell = self.header["cell"]
 
         if init_gp is None:
             # Use run's values as extracted from header
             # TODO Allow for kernel gradient in header
 
             dictionary = deepcopy(self.header)
-            dictionary['hyps'] = hyps
+            dictionary["hyps"] = hyps
             for k in kwargs:
                 if kwargs[k] is not None:
                     dictionary[k] = kwargs[k]
 
-            gp_model = \
-                GaussianProcess.from_dict(dictionary)
+            gp_model = GaussianProcess.from_dict(dictionary)
         else:
             gp_model = init_gp
             gp_model.hyps = hyps
 
-        for (positions, forces, atoms, _, species) in \
-            zip(self.gp_position_list[:call_no],
-                self.gp_force_list[:call_no],
-                self.gp_atom_list[:call_no], self.gp_hyp_list[:call_no],
-                self.gp_species_list[:call_no]):
+        for (positions, forces, atoms, species) in zip(
+            self.gp_position_list[:call_no],
+            self.gp_force_list[:call_no],
+            self.gp_atom_list[:call_no],
+            self.gp_species_list[:call_no],
+        ):
 
             struc_curr = struc.Structure(cell, species, positions)
 
@@ -96,149 +106,83 @@ def make_gp(self, cell=None, call_no=None, hyps=None, init_gp=None,
     def get_gp_activation(gp_model):
         pass
 
-    def parse_pos_otf(self, filename):
+    def parse_pos_otf(self, blocks):
         """
         Exclusively parses MD run information
         :param filename:
         :return:
         """
-        position_list = []
-        force_list = []
-        uncertainty_list = []
-        velocity_list = []
-        temperatures = []
-        dft_frames = []
-        dft_times = []
-        times = []
-        msds = []
-        energies = []
-
-        with open(filename, 'r') as f:
-            lines = f.readlines()
-
-        n_steps = 0
-
-        for index, line in enumerate(lines):
-            if line.startswith("number of atoms"):
-                at_line = line.split()
-                noa = int(at_line[3])
-
-            # number of hyperparameters
-            if line.startswith("number of hyperparameters"):
-                line_curr = line.split(':')
-                noh = int(line_curr[-1])
-
-            # DFT frame
-            if line.startswith("*-Frame"):
-                dft_frame_line = line.split()
-                dft_frames.append(int(dft_frame_line[1]))
-                dft_time_line = lines[index+1].split()
-                dft_times.append(float(dft_time_line[-2]))
-
-            # MD frame
-            if line.startswith("-Frame"):
-                n_steps += 1
-                time_line = lines[index+1].split()
-                sim_time = float(time_line[2])
-                times.append(sim_time)
-
-                _, positions, forces, uncertainties, velocities = \
-                    parse_snapshot(lines, index, noa, False, noh)
-
-                position_list.append(positions)
-                force_list.append(forces)
-                uncertainty_list.append(uncertainties)
-                velocity_list.append(velocities)
-
-                temp_line = lines[index+4+noa].split()
-                temperatures.append(float(temp_line[-2]))
-
-                if self.calculate_energy:
-                    en_line = lines[index+5+noa].split()
-                    energies.append(float(en_line[2]))
-
-                msds.append(np.mean((positions - position_list[0])**2))
-
-        return position_list, force_list, uncertainty_list, velocity_list,\
-            dft_frames, temperatures, times, msds, dft_times, energies
-
-    def extract_gp_info(self, filename):
-        """
-        Exclusively parses DFT run information
-        :param filename:
-        :return:
-        """
-        species_list = []
-        position_list = []
-        force_list = []
-        uncertainty_list = []
-        velocity_list = []
-        atom_list = []
-        atom_count = []
-        hyp_list = []
-
-        with open(filename, 'r') as f:
-            lines = f.readlines()
-
-        for index, line in enumerate(lines):
-
-            # number of atoms
-            if line.startswith("number of atoms"):
-                line_curr = line.split()
-                noa = int(line_curr[3])
-
-            # number of hyperparameters
-            if line.startswith("number of hyperparameters"):
-                line_curr = line.split()
-                noh = int(line_curr[3])
-
-            if line.startswith("*-"):
-                # keep track of atoms added to training set
-                ind_count = index+1
-                line_check = lines[ind_count]
-                atoms_added = []
-                hyps_added = []
-                while not line_check.startswith("-"):
-
-                    # keep track of atom number
-                    if line_check.startswith("Adding atom"):
-                        line_split = line_check.split()
-                        atom_strings = line_split[2:-4]
-                        for n, atom_string in enumerate(atom_strings):
-                            if n == 0:
-                                atoms_added.append(int(atom_string[1:-1]))
-                            else:
-                                atoms_added.append(int(atom_string[0:-1]))
-
-                    # keep track of hyperparameters
-                    if line_check.startswith("GP hyperparameters:"):
-                        hyps = []
-                        for hyp_line in lines[(ind_count+1):(ind_count+1+noh)]:
-                            hyp_line = hyp_line.split()
-                            hyps.append(float(hyp_line[-1]))
-                        hyps = np.array(hyps)
-                        hyps_added.append(hyps)
-
-                    ind_count += 1
-                    line_check = lines[ind_count]
-                    # catch case where final frame is a DFT call
-                    if line_check.startswith('Run complete'):
-                        break
-
-                hyp_list.append(hyps_added)
-                atom_list.append(atoms_added)
-                atom_count.append(len(atoms_added))
-
-                # add DFT positions and forces
-                line_curr = line.split()
-
-                # TODO: generalize this to account for arbitrary starting list
-                append_atom_lists(species_list, position_list, force_list,
-                                  uncertainty_list, velocity_list,
-                                  lines, index, noa, True, noh)
-
-        return position_list, force_list, uncertainty_list, velocity_list,\
-            atom_list, hyp_list, species_list, atom_count
+        n_steps = len(blocks) - 1
+
+        for block in blocks:
+            for index, line in enumerate(block):
+                # DFT frame
+                if line.startswith("*-Frame"):
+                    dft_frame_line = line.split()
+                    self.dft_frames.append(int(dft_frame_line[1]))
+                    dft_time_line = block[index + 1].split()
+                    self.dft_times.append(float(dft_time_line[-2]))
+
+                    # TODO: generalize this to account for arbitrary starting list
+                    append_atom_lists(
+                        self.gp_species_list,
+                        self.gp_position_list,
+                        self.gp_force_list,
+                        self.gp_uncertainty_list,
+                        self.gp_velocity_list,
+                        block,
+                        index,
+                        self.noa,
+                        True,
+                        self.noh,
+                    )
+
+                    post_frame = block[index + 3 + self.noa :]
+                    extract_global_info(
+                        self.gp_cell_list,
+                        self.gp_stress_list,
+                        self.gp_thermostat,
+                        post_frame,
+                    )
+
+                    extract_gp_info(
+                        post_frame,
+                        self.mae_list,
+                        self.maf_list,
+                        self.gp_atom_list,
+                        self.gp_hyp_list,
+                        self.noh,
+                    )
+
+                # MD frame
+                if line.startswith("-Frame"):
+                    n_steps += 1
+                    time_line = block[index + 1].split()
+                    sim_time = float(time_line[2])
+                    self.times.append(sim_time)
+
+                    # TODO: generalize this to account for arbitrary starting list
+                    append_atom_lists(
+                        [],
+                        self.position_list,
+                        self.force_list,
+                        self.uncertainty_list,
+                        self.velocity_list,
+                        block,
+                        index,
+                        self.noa,
+                        False,
+                        self.noh,
+                    )
+
+                    post_frame = block[index + 3 + self.noa :]
+                    extract_global_info(
+                        self.cell_list, self.stress_list, self.thermostat, post_frame,
+                    )
+
+                    self.msds.append(
+                        np.mean((self.position_list[-1] - self.position_list[0]) ** 2)
+                    )
 
     def output_md_structures(self):
         """
@@ -248,31 +192,163 @@ def output_md_structures(self):
 
         positions = self.position_list
         structures = []
-        cell = self.header['cell']
-        species = self.header['species']
+        cell = self.header["cell"]
+        species = self.header["species"]
         forces = self.force_list
         stds = self.uncertainty_list
         for i in range(len(positions)):
-            cur_struc = struc.Structure(cell=cell, species=species,
-                                        positions=positions[i])
+            cur_struc = struc.Structure(
+                cell=cell, species=species, positions=positions[i]
+            )
             cur_struc.forces = forces[i]
             cur_struc.stds = stds[i]
             structures.append(cur_struc)
         return structures
 
 
-def append_atom_lists(species_list: List[str],
-                      position_list: List[np.ndarray],
-                      force_list: List[np.ndarray],
-                      uncertainty_list: List[np.ndarray],
-                      velocity_list: List[np.ndarray],
-                      lines: List[str], index: int, noa: int,
-                      dft_call: bool, noh: int) -> None:
+def split_blocks(filename):
+    with open(filename, "r") as f:
+        lines = f.readlines()
+        head = 0
+        blocks = []
+        for index, line in enumerate(lines):
+            if line.startswith("---"):
+                blocks.append(lines[head:index])
+                head = index + 1
+    return blocks
+
+
+def parse_header_information(lines) -> dict:
+    """
+    Get information about the run from the header of the file
+    :param outfile:
+    :return:
+    """
+    header_info = {}
+
+    cutoffs_dict = {}
+    for i, line in enumerate(lines):
+        line_lower = line.lower()
+
+        # gp related
+        if "cutoffs" in line_lower:
+            if "{" in line_lower:
+                line = line[len("Cutoffs: ") :]
+                cutoffs = eval(line)
+            else:
+                line = line.split(":")[1].strip()
+                line = line.strip("[").strip("]")
+                line = line.split()
+                cutoffs = []
+                for val in line:
+                    try:
+                        cutoffs.append(float(val))
+                    except:
+                        cutoffs.append(float(val[:-1]))
+            header_info["cutoffs"] = cutoffs
+
+        if "number of hyperparameters" in line_lower:
+            n_hyps = int(line.split(":")[1].strip())
+            header_info["n_hyps"] = n_hyps 
+            
+            # parse hyps
+            new_line = lines[i+1].replace("[", "")
+            new_line = new_line.replace("]", "")
+            assert "hyperparameter" in new_line.lower()
+
+            hyps_array = new_line[new_line.find(":")+1:].split()
+            if len(hyps_array) < n_hyps:
+                next_new_line = lines[i+2].replace("[", "")
+                next_new_line = next_new_line.replace("]", "")
+                hyps_array += next_new_line.split()
+            assert len(hyps_array) == n_hyps
+
+            hyps_array = [float(h.strip()) for h in hyps_array]
+            header_info["hyps"] = np.array(hyps_array)
+
+        if "kernel_name" in line_lower:
+            header_info["kernel_name"] = line.split(":")[1].strip()
+        elif "kernels" in line_lower:
+            line = line.split(":")[1].strip()
+            line = line.strip("[").strip("]")
+            line = line.split()
+            header_info["kernels"] = line
+        elif "kernel" in line_lower:
+            header_info["kernel_name"] = line.split(":")[1].strip()
+
+        for kw in header_dict:
+            get_header_item(line, header_info, kw)
+
+        if "optimization algorithm" in line:
+            header_info["algo"] = str(line.split(":")[1].strip()).upper()
+
+        if "system species" in line_lower:
+            line = line.split(":")[1]
+            line = line.split("'")
+            species = [item for item in line if item.isalpha()]
+            header_info["species_set"] = set(species)
+        if "periodic cell" in line_lower:
+            vectors = []
+            for cell_line in lines[i + 1 : i + 4]:
+                cell_line = cell_line.strip().replace("[", "").replace("]", "")
+                vec = cell_line.split()
+                vector = [float(vec[0]), float(vec[1]), float(vec[2])]
+                vectors.append(vector)
+            header_info["cell"] = np.array(vectors)
+        if "previous positions" in line_lower:
+            struc_spec = []
+            prev_positions = []
+            for pos_line in lines[i + 1 : i + 1 + header_info.get("atoms", 0)]:
+                pos = pos_line.split()
+                struc_spec.append(pos[0])
+                prev_positions.append((float(pos[1]), float(pos[2]), float(pos[3])))
+            header_info["species"] = struc_spec
+            header_info["prev_positions"] = np.array(prev_positions)
+
+    return header_info
+
+
+def get_header_item(line, header_info, kw):
+    if not isinstance(line, str):
+        return
+
+    pattern = header_dict[kw][0]
+    value_type = header_dict[kw][1]
+
+    if header_dict[kw][2]:
+        pattern = pattern.lower()
+        line = line.lower()
+
+    if pattern in line:
+        header_info[kw] = value_type(line.split(":")[1].strip())
+
+
+header_dict = {
+    "restart": ["Restart", int, False],
+    "frames": ["Frames", int, True],
+    "atoms": ["Number of atoms", int, True],
+    "dt": ["Timestep", float, True],
+}
+
+
+def append_atom_lists(
+    species_list: List[str],
+    position_list: List[np.ndarray],
+    force_list: List[np.ndarray],
+    uncertainty_list: List[np.ndarray],
+    velocity_list: List[np.ndarray],
+    lines: List[str],
+    index: int,
+    noa: int,
+    dft_call: bool,
+    noh: int,
+) -> None:
 
     """Update lists containing atom information at each snapshot."""
 
-    species, positions, forces, uncertainties, velocities = \
-        parse_snapshot(lines, index, noa, dft_call, noh)
+    species, positions, forces, uncertainties, velocities = parse_snapshot(
+        lines, index, noa, dft_call, noh
+    )
 
     species_list.append(species)
     position_list.append(positions)
@@ -294,10 +370,9 @@ def parse_snapshot(lines, index, noa, dft_call, noh):
     # Current setting for # of lines to skip after Frame marker
     skip = 3
 
-    for count, frame_line in enumerate(lines[(index+skip):(index+skip+noa)]):
+    for count, frame_line in enumerate(lines[(index + skip) : (index + skip + noa)]):
         # parse frame line
-        spec, position, force, uncertainty, velocity = \
-            parse_frame_line(frame_line)
+        spec, position, force, uncertainty, velocity = parse_frame_line(frame_line)
 
         # update values
         species.append(spec)
@@ -322,102 +397,6 @@ def strip_and_split(line):
     return stripped_line
 
 
-def parse_header_information(outfile: str = 'otf_run.out') -> dict:
-    """
-    Get information about the run from the header of the file
-    :param outfile:
-    :return:
-    """
-    with open(outfile, 'r') as f:
-        lines = f.readlines()
-
-    header_info = {}
-
-    stopreading = None
-
-    for line in lines:
-        if '---' in line or '=' in line:
-            stopreading = lines.index(line)
-            break
-
-    if stopreading is None:
-        raise Exception("OTF File is malformed")
-
-    cutoffs_dict = {}
-    for i, line in enumerate(lines[:stopreading]):
-        line_lower = line.lower()
-
-        # gp related
-        if 'cutoffs' in line_lower:
-            line = line.split(':')[1].strip()
-            line = line.strip('[').strip(']')
-            line = line.split()
-            cutoffs = []
-            for val in line:
-                try:
-                    cutoffs.append(float(val))
-                except:
-                    cutoffs.append(float(val[:-1]))
-            header_info['cutoffs'] = cutoffs
-        elif 'cutoff' in line_lower:
-            line = line.split(':')
-            name = line[0][7:]
-            value = float(line[1])
-            cutoffs_dict[name] = value
-            header_info['cutoffs'] = cutoffs_dict
-
-        if 'kernel_name' in line_lower:
-            header_info['kernel_name'] = line.split(':')[1].strip()
-        elif 'kernels' in line_lower:
-            line = line.split(':')[1].strip()
-            line = line.strip('[').strip(']')
-            line = line.split()
-            header_info['kernels'] = line
-        elif 'kernel' in line_lower:
-            header_info['kernel_name'] = line.split(':')[1].strip()
-
-        if 'number of hyperparameters:' in line_lower:
-            header_info['n_hyps'] = int(line.split(':')[1])
-        if 'optimization algorithm' in line_lower:
-            header_info['algo'] = line.split(':')[1].strip()
-
-        # otf related
-        if 'frames' in line_lower:
-            header_info['frames'] = int(line.split(':')[1])
-        if 'number of atoms' in line_lower:
-            header_info['atoms'] = int(line.split(':')[1])
-        if 'timestep' in line_lower:
-            header_info['dt'] = float(line.split(':')[1])
-        if 'system species' in line_lower:
-            line = line.split(':')[1]
-            line = line.split("'")
-
-            species = [item for item in line if item.isalpha()]
-
-            header_info['species_set'] = set(species)
-        if 'periodic cell' in line_lower:
-            vectors = []
-            for cell_line in lines[i+1:i+4]:
-                cell_line = \
-                    cell_line.strip().replace('[', '').replace(']', '')
-                vec = cell_line.split()
-                vector = [float(vec[0]), float(vec[1]), float(vec[2])]
-                vectors.append(vector)
-            header_info['cell'] = np.array(vectors)
-        if 'previous positions' in line_lower:
-            struc_spec = []
-            prev_positions = []
-            for pos_line in lines[i+1:i+1+header_info.get('atoms', 0)]:
-                pos = pos_line.split()
-                struc_spec.append(pos[0])
-                prev_positions.append((float(pos[1]), float(pos[2]),
-                                       float(pos[3])))
-            header_info['species'] = struc_spec
-            header_info['prev_positions'] = np.array(prev_positions)
-
-    return header_info
-
-
 def parse_frame_line(frame_line):
     """parse a line in otf output.
     :param frame_line: frame line to be parsed
@@ -435,3 +414,78 @@ def parse_frame_line(frame_line):
     velocity = np.array([float(n) for n in frame_line[10:13]])
 
     return spec, position, force, uncertainty, velocity
+
+
+def extract_global_info(
+    cell_list, stress_list, thermostat, block,
+):
+
+    for ind, line in enumerate(block):
+        if "cell" in line:
+            vectors = []
+            for cell_line in block[ind + 1 : ind + 4]:
+                cell_line = cell_line.strip().replace("[", "").replace("]", "")
+                vec = cell_line.split()
+                vector = [float(vec[0]), float(vec[1]), float(vec[2])]
+                vectors.append(vector)
+            cell_list.append(vectors)
+        if "Stress" in line:
+            vectors = []
+            stress_line = block[ind + 2].split()
+            vectors = [float(s) for s in stress_line]
+            stress_list.append(vectors)
+
+        for t in [
+            "Pressure",
+            "Temperature",
+            "Kinetic energy",
+            "Potential energy",
+            "Total energy",
+        ]:
+            get_thermostat(thermostat, t, line)
+
+
+def get_thermostat(thermostat, kw, line):
+    if kw in line:
+        value = float(line.split()[-1])
+        kw = kw.lower()
+        if kw in thermostat:
+            thermostat[kw].append(value)
+        else:
+            thermostat[kw] = [value]
+
+
+def extract_gp_info(block, mae_list, maf_list, atoms_list, hyps_list, noh):
+    """
+    Exclusively parses DFT run information
+    :param filename:
+    :return:
+    """
+    for ind, line in enumerate(block):
+        if "mean absolute error" in line:
+            value = float(line.split()[3])
+            mae_list.append(value)
+        if "mean absolute dft component" in line:
+            value = float(line.split()[4])
+            maf_list.append(value)
+
+        # keep track of atom number
+        if line.startswith("Adding atom"):
+            atoms_added = []
+            line_split = line.split()
+            atom_strings = line_split[2:-4]
+            for n, atom_string in enumerate(atom_strings):
+                if n == 0:
+                    atoms_added.append(int(atom_string[1:-1]))
+                else:
+                    atoms_added.append(int(atom_string[0:-1]))
+            atoms_list.append(atoms_added)
+
+        # keep track of hyperparameters
+        if line.startswith("GP hyperparameters:"):
+            hyps = []
+            for hyp_line in block[(ind + 1) : (ind + 1 + noh)]:
+                hyp_line = hyp_line.split()
+                hyps.append(float(hyp_line[-1]))
+            hyps = np.array(hyps)
+            hyps_list.append(hyps)
diff --git a/flare/output.py b/flare/output.py
index 6614ba3a0..67e165bec 100644
--- a/flare/output.py
+++ b/flare/output.py
@@ -149,7 +149,7 @@ def write_header(self, gp_str: str,
             headerstring += \
                 f'System species: {set(structure.species_labels)}\n'
             headerstring += 'Periodic cell (A): \n'
-            headerstring += str(structure.cell)+'\n'
+            headerstring += str(np.array(structure.cell))+'\n'
 
         if optional:
             for key, value in optional.items():
@@ -170,6 +170,20 @@ def write_header(self, gp_str: str,
         if self.always_flush:
             f.handlers[0].flush()
 
+
+    def write_md_header(self, dt, curr_step, dft_step):
+        string = ''
+        # Mark if a frame had DFT forces with an asterisk
+        if not dft_step:
+            string += '-' * 80 + '\n'
+            string += f"-Frame: {curr_step} "
+        else:
+            string += f"\n*-Frame: {curr_step} "
+
+        string += f'\nSimulation Time: {(dt * curr_step):.3} ps \n'
+        return string
+
+
     def write_md_config(
             self, dt, curr_step, structure, temperature, KE, start_time, dft_step,
             velocities):
@@ -188,16 +202,7 @@ def write_md_config(
         :return:
         """
 
-        string = ''
-
-        # Mark if a frame had DFT forces with an asterisk
-        if not dft_step:
-            string += '-' * 80 + '\n'
-            string += f"-Frame: {curr_step} "
-        else:
-            string += f"\n*-Frame: {curr_step} "
-
-        string += f'\nSimulation Time: {(dt * curr_step):.3} ps \n'
+        string = self.write_md_header(dt, curr_step, dft_step)
 
         # Construct Header line
         n_space = 30
@@ -235,7 +240,7 @@ def write_md_config(
         # Report cell if stress attribute is present.
         if structure.stress is not None:
             string += 'Periodic cell (A): \n'
-            string += str(structure.cell)+'\n\n'
+            string += str(np.array(structure.cell))+'\n\n'
 
         # Report stress tensor.
         pressure = None
@@ -263,23 +268,23 @@ def write_md_config(
         if pressure is not None:
             string += f'Pressure (GPa): {pressure:.6f} \n'
 
-        string += f'Temperature: {temperature:.2f} K \n'
-        string += f'Kinetic energy: {KE:.6f} eV \n'
+        string += f'Temperature (K): {temperature:.2f} \n'
+        string += f'Kinetic energy (eV): {KE:.6f} \n'
 
         # Report potential energy.
         if structure.potential_energy is not None:
             string += \
-                f'Potential energy: {structure.potential_energy:.6f} eV \n'
+                f'Potential energy (eV): {structure.potential_energy:.6f} \n'
 
         # Report potential energy uncertainty.
         if structure.local_energy_stds is not None:
             pot_en_std = np.sqrt(np.sum(structure.local_energy_stds**2))
-            string += f'Uncertainty: {pot_en_std:.6f} eV \n'
+            string += f'Uncertainty (eV): {pot_en_std:.6f} \n'
 
         # Report total energy.
         if structure.potential_energy is not None:
             tot_en = KE + structure.potential_energy
-            string += f'Total energy: {tot_en:.6f} eV \n'
+            string += f'Total energy (eV): {tot_en:.6f} \n'
 
         logger = logging.getLogger(self.basename+'log')
         logger.info(string)
diff --git a/flare/predict.py b/flare/predict.py
index db996907d..02a50d23f 100644
--- a/flare/predict.py
+++ b/flare/predict.py
@@ -179,7 +179,8 @@ def predict_on_structure_par(
     :rtype: (np.ndarray, np.ndarray)
     """
     # Just work in serial in the number of cpus is 1
-    if n_cpus == 1:
+    # or the gp is not parallelized by atoms
+    if (n_cpus is 1) or (not gp.per_atom_par):
         return predict_on_structure(
             structure=structure, gp=gp, n_cpus=n_cpus,
             write_to_structure=write_to_structure,
@@ -274,8 +275,9 @@ def predict_on_structure_efs_par(
  write_to_structure: bool = True, selective_atoms: List[int] = None,
  skipped_atom_value=0):
 
-    # Just work in serial in the number of cpus is 1
-    if n_cpus is 1:
+    # Just work in serial in the number of cpus is 1, 
+    # or the gp is not parallelized by atoms
+    if (n_cpus is 1) or (not gp.per_atom_par):
         return predict_on_structure_efs(
             structure=structure, gp=gp, n_cpus=n_cpus,
             write_to_structure=write_to_structure,
@@ -384,8 +386,6 @@ def predict_on_structure_en(structure: Structure, gp: GaussianProcess,
     forces = np.zeros((structure.nat, 3))
     stds = np.zeros((structure.nat, 3))
     local_energies = np.zeros(structure.nat)
-    forces = np.zeros(shape=(structure.nat, 3))
-    stds = np.zeros(shape=(structure.nat, 3))
 
     if selective_atoms:
         forces.fill(skipped_atom_value)
@@ -437,7 +437,8 @@ def predict_on_structure_par_en(structure: Structure, gp: GaussianProcess,
     :rtype: (np.ndarray, np.ndarray, np.ndarray)
     """
     # Work in serial if the number of cpus is 1
-    if n_cpus == 1:
+    # or the gp is not parallelized by atoms
+    if (n_cpus is 1) or (not gp.per_atom_par):
         predict_on_structure_en(structure, gp,
                                 n_cpus, write_to_structure,
                                 selective_atoms,
@@ -446,8 +447,6 @@ def predict_on_structure_par_en(structure: Structure, gp: GaussianProcess,
     forces = np.zeros((structure.nat, 3))
     stds = np.zeros((structure.nat, 3))
     local_energies = np.zeros(structure.nat)
-    forces = np.zeros(shape=(structure.nat, 3))
-    stds = np.zeros(shape=(structure.nat, 3))
 
     if selective_atoms:
         forces.fill(skipped_atom_value)
diff --git a/flare/struc.py b/flare/struc.py
index 73431af5b..0f9c202ec 100644
--- a/flare/struc.py
+++ b/flare/struc.py
@@ -99,7 +99,7 @@ def __init__(self, cell: 'ndarray', species: Union[List[str], List[int]],
         self.partial_stress_stds = None
         self.stress = None
         self.stress_stds = None
-        self.potential_energy = None
+        self.potential_energy = None # duplicated with self.energy?
 
         self.mass_dict = mass_dict
 
@@ -331,14 +331,53 @@ def from_ase_atoms(atoms: 'ase.Atoms', cell=None) -> 'flare.struc.Structure':
 
         if cell is None:
             cell = np.array(atoms.cell)
-        struc = Structure(cell=cell, positions=atoms.positions,
-                          species=atoms.get_chemical_symbols())
+
+        try:
+            forces = atoms.get_forces()
+        except:
+            forces = None 
+
+        try:
+            stds = atoms.get_uncertainties()
+        except:
+            stds = None
+
+        try:
+            energy = atoms.get_potential_energy()
+        except:
+            energy = None
+
+        try:
+            stress = atoms.get_stress()
+        except:
+            stress = None 
+
+        struc = Structure(
+            cell = cell, 
+            positions = atoms.positions,
+            species = atoms.get_chemical_symbols(),
+            forces = forces,
+            stds = stds,
+            energy = energy,
+        )
+        struc.stress = stress
         return struc
 
     def to_ase_atoms(self) -> 'ase.Atoms':
         from ase import Atoms
-        return Atoms(self.species_labels, positions=self.positions,
-                     cell=self.cell, pbc=True)
+        from ase.calculators.singlepoint import SinglePointCalculator
+
+        atoms =  Atoms(self.species_labels, positions=self.positions,
+                       cell=self.cell, pbc=True)
+
+        results = {}
+        properties = ["forces", "energy", "stress"]
+        for p in properties:
+            results[p] = getattr(self, p)
+        calculator = SinglePointCalculator(atoms, **results)
+        atoms.set_calculator(calculator)
+
+        return atoms
 
     def to_pmg_structure(self):
         """
diff --git a/lammps_plugins/pair_mgp.cpp b/lammps_plugins/pair_mgp.cpp
new file mode 100644
index 000000000..1bab92d9d
--- /dev/null
+++ b/lammps_plugins/pair_mgp.cpp
@@ -0,0 +1,1088 @@
+/* -------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+-------------------------------------------------------------------- */
+
+/* --------------------------------------------------------------------
+   Contributing authors: Lixin Sun (Harvard) Yu Xie (Harvard)
+-------------------------------------------------------------------- */
+
+#include "pair_mgp.h"
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "neighbor.h"
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+
+#define MAXLINE 1024
+
+/* ----------------------------------------------------------------- */
+
+PairMGP::PairMGP(LAMMPS *lmp) : Pair(lmp) {
+  // no restart info
+  restartinfo = 0;
+  // give a warning if bonds/angles/dihedrals are used with mmf
+  manybody_flag = 1;
+  single_enable = 1;
+  // not sure this is good
+  reinitflag = 0;
+
+  setflag = NULL;
+  map2b = NULL;
+  map3b = NULL;
+  compute2b = true;
+  compute3b = true;
+
+  elements = NULL;
+  n_2body = 0;
+  n_3body = 0;
+
+  lo_2body = NULL;
+  hi_2body = NULL;
+  lo_3body = NULL;
+  hi_3body = NULL;
+  grid_2body = NULL;
+  grid_3body = NULL;
+  ecoeff_2body = NULL;
+  fcoeff_2body = NULL;
+  ecoeff_3body = NULL;
+  fcoeff_3body = NULL;
+
+  cutsq = NULL;
+  cut2bsq = NULL;
+  cut3bsq = NULL;
+
+  cutshortsq = 0;
+
+  maxshort = 20;
+  neighshort = NULL;
+
+  Ad[0][0] = -1.0 / 6.0;
+  Ad[0][1] = 3.0 / 6.0;
+  Ad[0][2] = -3.0 / 6.0;
+  Ad[0][3] = 1.0 / 6.0;
+  Ad[1][0] = 3.0 / 6.0;
+  Ad[1][1] = -6.0 / 6.0;
+  Ad[1][2] = 0.0 / 6.0;
+  Ad[1][3] = 4.0 / 6.0;
+  Ad[2][0] = -3.0 / 6.0;
+  Ad[2][1] = 3.0 / 6.0;
+  Ad[2][2] = 3.0 / 6.0;
+  Ad[2][3] = 1.0 / 6.0;
+  Ad[3][0] = 1.0 / 6.0;
+  Ad[3][1] = 0.0 / 6.0;
+  Ad[3][2] = 0.0 / 6.0;
+  Ad[3][3] = 0.0 / 6.0;
+  // {-1.0/6.0,  3.0/6.0, -3.0/6.0, 1.0/6.0},
+  // { 3.0/6.0, -6.0/6.0,  0.0/6.0, 4.0/6.0},
+  // {-3.0/6.0,  3.0/6.0,  3.0/6.0, 1.0/6.0},
+  // { 1.0/6.0,  0.0/6.0,  0.0/6.0, 0.0/6.0}
+
+  for (int j = 0; j < 4; j++) {
+    for (int i = 1; i < 4; i++) {
+      dAd[j][i] = Ad[j][i - 1] * (4 - i);
+    }
+  }
+
+  for (int j = 0; j < 4; j++) {
+    for (int i = 1; i < 4; i++) {
+      d2Ad[j][i] = dAd[j][i - 1] * (4 - i);
+    }
+  }
+
+  for (int i = 1; i < 4; i++) {
+    Bd[i] = 3 * Ad[i][0] + 2 * Ad[i][1] + Ad[i][2];
+    Cd[i] = Ad[i][0] + Ad[i][1] + Ad[i][2] + Ad[i][3];
+  }
+}
+
+/* --------------------------------------------------------------------
+   check if allocated, since class can be destructed when incomplete
+-------------------------------------------------------------------- */
+
+PairMGP::~PairMGP() {
+  int me;
+  // why comm->me does not work here?
+  MPI_Comm_rank(world, &me);
+
+  if (copymode)
+    return;
+
+  if (elements) {
+    for (int i = 1; i <= atom->ntypes; i++) {
+      if (elements[i])
+        delete[] elements[i];
+    }
+    delete[] elements;
+  }
+
+  memory->destroy(lo_2body);
+  memory->destroy(hi_2body);
+  memory->destroy(grid_2body);
+
+  memory->destroy(lo_3body);
+  memory->destroy(hi_3body);
+  memory->destroy(grid_3body);
+
+  // more to do
+  if (fcoeff_2body) {
+    for (int i = 0; i < n_2body; i++) {
+      memory->destroy(fcoeff_2body[i]);
+      //   memory->destroy(ecoeff_2body[i]);
+    }
+    memory->sfree(fcoeff_2body);
+    // memory->sfree(ecoeff_2body);
+  }
+
+  if (fcoeff_3body) {
+    for (int i = 0; i < n_3body; i++) {
+      memory->destroy(fcoeff_3body[i]);
+      //     memory->destroy(ecoeff_3body[i]);
+    }
+    memory->sfree(fcoeff_3body);
+    // memory->sfree(ecoeff_3body);
+  }
+
+  if (allocated) {
+    memory->destroy(setflag);
+    memory->destroy(cutsq);
+    memory->destroy(neighshort);
+  }
+  memory->destroy(cut2bsq);
+  memory->destroy(cut3bsq);
+  memory->destroy(map2b);
+  memory->destroy(map3b);
+}
+
+/* ----------------------------------------------------------------- */
+
+void PairMGP::compute(int eflag, int vflag) {
+  int me;
+  MPI_Comm_rank(world, &me);
+
+  int i, j, k, m, inum, jnum, itype, jtype, ktype;
+  double xtmp, ytmp, ztmp, del[3];
+  double evdwl = 0;
+  double rsq, r, p, rhoip, rhojp, z2, z2p, recip, phip, psip, phi;
+  double cutoff;
+  int *ilist, *jlist, *numneigh, **firstneigh;
+  tagint *tag = atom->tag;
+  tagint itag, jtag;
+
+  // viral setting
+  evdwl = 0.0;
+  if (eflag || vflag)
+    ev_setup(eflag, vflag);
+  else
+    evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
+
+  double **x = atom->x;
+  double **f = atom->f;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  // int nall = nlocal + atom->nghost;
+  int ntype_p1 = atom->ntypes + 1;
+
+  // TODO: double check this. the flat should not be used.
+  int newton_pair = force->newton_pair;
+
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // compute 2-body forces on each atom
+  // loop over neighbors of my atoms
+
+  for (int ii = 0; ii < inum; ii++) {
+    i = ilist[ii];
+    itag = tag[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    int numshort = 0;
+    for (int jj = 0; jj < jnum; jj++) {
+      double fpair = 0;
+      j = jlist[jj];
+
+      j &= NEIGHMASK;
+      jtype = type[j];
+      jtag = tag[j];
+
+      del[0] = x[j][0] - xtmp;
+      del[1] = x[j][1] - ytmp;
+      del[2] = x[j][2] - ztmp;
+      rsq = del[0] * del[0] + del[1] * del[1] + del[2] * del[2];
+
+      if (compute3b) {
+        // accumulate short list for 3body interactions
+        if (rsq < cutshortsq) {
+          neighshort[numshort++] = j;
+          if (numshort >= maxshort) {
+            maxshort += maxshort / 2;
+            memory->grow(neighshort, maxshort, "pair:neighshort");
+          }
+        }
+      }
+
+      if (compute2b) {
+        int mapid = map2b[itype][jtype];
+        if (mapid != -1) {
+          if (rsq < cut2bsq[mapid]) {
+
+            jtype = type[j];
+            //            if (itag > jtag) {
+            //              if ((itag+jtag) % 2 == 0) continue;
+            //            } else if (itag < jtag) {
+            //              if ((itag+jtag) % 2 == 1) continue;
+            //            } else {
+            //              if (x[j][2] < ztmp) continue;
+            //              if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
+            //              if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] <
+            //              xtmp) continue;
+            //            }
+
+            r = sqrt(rsq);
+
+            // TODO: pay attention to the sign of the force
+
+            // energy and forces are both computed
+            eval_cubic_splines_1d(lo_2body[mapid], hi_2body[mapid],
+                                  grid_2body[mapid], fcoeff_2body[mapid], r,
+                                  &evdwl, &fpair);
+
+            evdwl *= 2;
+            fpair *= 2 / r; // check the prefactor of 2
+
+            double fx, fy, fz;
+            fx = del[0] * fpair;
+            fy = del[1] * fpair;
+            fz = del[2] * fpair;
+
+            f[i][0] += fx;
+            f[i][1] += fy;
+            f[i][2] += fz;
+            //            f[j][0] -= f0;
+            //            f[j][1] -= f1;
+            //            f[j][2] -= f2;
+
+            // viral compute
+            if (evflag)
+              ev_tally_xyz_full(i, evdwl, 0.0, -fx, -fy, -fz, del[0], del[1],
+                                del[2]);
+            //            if (evflag) {
+            //                ev_tally_full(i,evdwl,0.0,-fpair,del[0],del[1],del[2]);
+            //                // if (screen) fprintf(screen, "MGP compute%d: 2b
+            //                forces on %d from %d %lf %lf %lf distance %lf %lf
+            //                %lf\n",
+            //                //         me, tag[i], tag[j], del[0]*fpair,
+            //                del[1]*fpair, del[2]*fpair, del[0], del[1],
+            //                del[2]);
+            //            }
+          }
+        }
+      }
+
+    } // j
+
+    if (compute3b) {
+      // three-body interactions
+      // skip immediately if I-J is not within cutoff
+      // double fxtmp=0, fytmp=0, fztmp=0;
+      double delr1[3], delr2[3], delr12[3];
+      double rsq1, rsq2, rsq12, rij, rik, rjk;
+      double fij[3], fik[3], fjk[3];
+      double fxtmp = 0.0, fytmp = 0.0, fztmp = 0.0;
+      double fjxtmp = 0.0, fjytmp = 0.0, fjztmp = 0.0;
+
+      for (int jj = 0; jj < numshort; jj++) {
+        j = neighshort[jj];
+        jtype = type[j];
+
+        delr1[0] = x[j][0] - xtmp;
+        delr1[1] = x[j][1] - ytmp;
+        delr1[2] = x[j][2] - ztmp;
+        rsq1 = delr1[0] * delr1[0] + delr1[1] * delr1[1] + delr1[2] * delr1[2];
+
+        for (int kk = jj + 1; kk < numshort; kk++) {
+          double ftriplet[3] = {0, 0, 0};
+          k = neighshort[kk];
+          ktype = type[k];
+
+          int mapid1 = map3b[itype][jtype][ktype];
+          int mapid2 = map3b[itype][ktype][jtype];
+          // 3 body coef not found, then continue
+          if (mapid1 == -1 || mapid2 == -1)
+            error->all(FLERR, "MGP coeff: mapid is not found.");
+
+          // TODO: double check whether the cutoffs are consistent
+          cutoff = cut3bsq[mapid1];
+          if (rsq1 >= cutoff)
+            continue;
+
+          delr2[0] = x[k][0] - xtmp;
+          delr2[1] = x[k][1] - ytmp;
+          delr2[2] = x[k][2] - ztmp;
+          rsq2 =
+              delr2[0] * delr2[0] + delr2[1] * delr2[1] + delr2[2] * delr2[2];
+          if (rsq2 >= cutoff)
+            continue;
+
+          delr12[0] = x[k][0] - x[j][0];
+          delr12[1] = x[k][1] - x[j][1];
+          delr12[2] = x[k][2] - x[j][2];
+          rsq12 = delr12[0] * delr12[0] + delr12[1] * delr12[1] +
+                  delr12[2] * delr12[2];
+          if (rsq12 >= cutoff)
+            continue;
+
+          // compute bonds
+          rij = sqrt(rsq1);
+          rik = sqrt(rsq2);
+          rjk = sqrt(rsq12);
+
+          // compute spline
+          eval_cubic_splines_3d(lo_3body[mapid1], hi_3body[mapid1],
+                                grid_3body[mapid1], fcoeff_3body[mapid1], rij,
+                                rik, rjk, &evdwl, ftriplet);
+
+          //           double factor1, factor2, f_d1, f_d2;
+          //           factor1 = 1/r2 - 1/r1 * cos_angle;
+          //           factor2 = 1/r1 - 1/r2 * cos_angle;
+          //
+          //           f_d1 = (ftriplet[0] + ftriplet[2] * factor1) / r1; //
+          //           diveded by length f_d2 = (ftriplet[1] + ftriplet[2] *
+          //           factor2) / r2; // diveded by length
+          double f_ij, f_ik, f_jk;
+
+          f_ij = 3 * ftriplet[0] / rij;
+          f_ik = 3 * ftriplet[1] / rik;
+          //          f_jk = 3 * ftriplet[2] / rjk;
+
+          fij[0] = f_ij * delr1[0]; // delr1, delr2, not unit vector
+          fij[1] = f_ij * delr1[1];
+          fij[2] = f_ij * delr1[2];
+          fik[0] = f_ik * delr2[0];
+          fik[1] = f_ik * delr2[1];
+          fik[2] = f_ik * delr2[2];
+          //          fjk[0] = f_jk * delr12[0];
+          //          fjk[1] = f_jk * delr12[1];
+          //          fjk[2] = f_jk * delr12[2];
+          f[i][0] += (fij[0] + fik[0]);
+          f[i][1] += (fij[1] + fik[1]);
+          f[i][2] += (fij[2] + fik[2]);
+          //          f[j][0] -= fij[0] - fjk[0];
+          //          f[j][0] -= fij[1] - fjk[1];
+          //          f[j][0] -= fij[2] - fjk[2];
+          //          f[k][0] -= fik[0] + fjk[0];
+          //          f[k][1] -= fik[1] + fjk[1];
+          //          f[k][2] -= fik[2] + fjk[2];
+
+          // calculate stress
+          // TO DO, test, the fj, fk sign may be wrong
+          //          if (evflag)
+          //          ev_tally3(i,j,k,evdwl,0.0,fij,fik,delr1,delr2);
+          if (evflag) {
+            ev_tally_full(i,evdwl,0.0,-f_ij,delr1[0],delr1[1],delr1[2]);
+            ev_tally_full(i,evdwl,0.0,-f_ik,delr2[0],delr2[1],delr2[2]);
+
+//            ev_tally_xyz_full(i, evdwl, 0.0, -fij[0], -fij[1], -fij[2],
+//                              delr1[0], delr1[1], delr1[2]);
+//            ev_tally_xyz_full(i, evdwl, 0.0, -fik[0], -fik[1], -fik[2],
+//                              delr2[0], delr2[1], delr2[2]);
+          }
+
+          //          if (evflag) {
+          //           ev_tally_full(i,evdwl,0.0,-f_ij,delr1[0],delr1[1],delr1[2]);
+          //           // check if it should be evdwl3, check prefactor 3
+          //           ev_tally_full(i,evdwl,0.0,-f_ik,delr2[0],delr2[1],delr2[2]);
+          // ev_tally3(i,j,k, evdwl, 0.0, fij, fik, delr1, delr2);
+          // forces on j, force on k, fji, fki
+          // check ev_tally3 default atomic directions!!!!!!!!! refer to
+          // pair_tersoff_table: line 439 if (screen) fprintf(screen, "MGP
+          // compute%d: 3b forces on %d from %d %lf %lf %lf distance %lf %lf %lf
+          // \n",
+          //         me, tag[i], tag[j], fj[0], fj[1], fj[2], delr1[0],
+          //         delr1[1], delr1[2]);
+          // if (screen) fprintf(screen, "MGP compute%d: 3b forces on %d from %d
+          // %lf %lf %lf distance %lf %lf %lf \n",
+          //         me, tag[i], tag[k], fk[0], fk[1], fk[2], delr2[0],
+          //         delr2[1], delr2[2]);
+          //          }
+
+        } // k
+
+      } // j
+    }
+  }
+
+  // TODO, check out this function
+  if (vflag_fdotr)
+    virial_fdotr_compute();
+}
+
+/* --------------------------------------------------------------------
+   allocate all arrays
+-------------------------------------------------------------------- */
+
+void PairMGP::allocate() {
+  int me = comm->me;
+  allocated = 1;
+
+  int np = atom->ntypes + 1;
+  memory->create(setflag, np, np, "pair:setflag");
+  memory->create(cutsq, np, np, "pair:cutsq");
+  memory->create(neighshort, maxshort, "pair:neighshort");
+
+  memset(&setflag[0][0], 0, np * np * sizeof(int));
+  memset(&cutsq[0][0], 0, np * np * sizeof(double));
+}
+
+/* -------------------------------------------------------------------
+   global settings
+-------------------------------------------------------------------- */
+
+void PairMGP::settings(int narg, char ** /*arg*/) {
+  if (narg > 0)
+    error->all(FLERR, "Illegal pair_style command");
+}
+
+/* --------------------------------------------------------------------
+   set coeffs for one or more type pairs
+   read DYNAMO funcfl file
+-------------------------------------------------------------------- */
+
+void PairMGP::coeff(int narg, char **arg) {
+  int me = comm->me;
+
+  int n3 = 3 + atom->ntypes;
+  if (narg < n3)
+    error->all(FLERR, "MGP coeff: Incorrect args for pair coefficients");
+
+  // TO DO, this should be compatible with other potentials?
+  if (strcmp(arg[0], "*") != 0 || strcmp(arg[1], "*") != 0)
+    error->all(FLERR, "MGP coeff: Incorrect args for pair coefficients");
+
+  if (!allocated)
+    allocate();
+
+  // parse pair of atom types
+  elements = new char *[atom->ntypes + 1];
+
+  int n;
+  for (int i = 3; i < n3; i++) {
+    n = strlen(arg[i]);
+    elements[i - 2] = new char[n];
+    strcpy(elements[i - 2], arg[i]);
+    if (me == 0 && screen)
+      fprintf(screen, "MGP coeff: type %d is element %s\n", i - 2,
+              elements[i - 2]);
+  }
+
+  if (narg >= (4 + atom->ntypes)) {
+    if (strcmp(arg[n3], "yes") == 0)
+      compute2b = true;
+    else if (strcmp(arg[n3], "no") == 0)
+      compute2b = false;
+    else {
+      error->all(FLERR, "MGP coeff:Incorrect args for pair coefficients. The "
+                        "argument should be either yes or no");
+    }
+  }
+
+  if (narg > (4 + atom->ntypes)) {
+    if (strcmp(arg[n3 + 1], "yes") == 0)
+      compute3b = true;
+    else if (strcmp(arg[n3 + 1], "no") == 0)
+      compute3b = false;
+    else {
+      error->all(FLERR, "MGP coeff:Incorrect args for pair coefficients. The "
+                        "argument should be either yes or no");
+    }
+  }
+
+  // read_file(arg[2]);
+  if (me == 0)
+    read_file(arg[2]);
+  bcast_table();
+
+  if (me == 0 && screen)
+    fprintf(screen, "MGP coeff: done reading coefficients\n");
+}
+
+/* -------------------------------------------------------------------
+   grab n values from file fp and put them in list
+   values can be several to a line
+   only called by proc 0
+-------------------------------------------------------------------- */
+
+void PairMGP::grab(FILE *fptr, int n, double *list) {
+  char *ptr;
+  char line[MAXLINE];
+
+  int i = 0;
+  while (i < n) {
+    fgets(line, MAXLINE, fptr);
+    ptr = strtok(line, " \t\n\r\f");
+    list[i++] = atof(ptr);
+    while ((ptr = strtok(NULL, " \t\n\r\f")))
+      list[i++] = atof(ptr);
+  }
+}
+
+/* ----------------------------------------------------------------- */
+
+// double PairMGP::single(int i, int j, int itype, int jtype,
+//                       double rsq, double /*factor_coul*/, double
+//                       /*factor_lj*/, double &fforce) {
+//  return 0;
+//}
+//
+
+/* -------------------------------------------------------------------
+   read potential values from a DYNAMO single element funcfl file
+-------------------------------------------------------------------- */
+
+void PairMGP::read_file(char *filename) {
+
+  // variables that need to be set in the class member
+  if (screen)
+    fprintf(screen, "MGP read_file: reading potential parameters...\n");
+
+  FILE *fptr;
+  char line[MAXLINE];
+
+  fptr = force->open_potential(filename);
+  if (fptr == NULL) {
+    char str[128];
+    snprintf(str, 128, "Cannot open MMF potential file %s", filename);
+    error->one(FLERR, str);
+  }
+
+  fgets(line, MAXLINE, fptr); // first line is comment
+  fgets(line, MAXLINE, fptr); // second line is comment
+  fgets(line, MAXLINE, fptr); // third line is comment
+  fgets(line, MAXLINE, fptr);
+  sscanf(line, "%d %d", &n_2body, &n_3body);
+  if (screen)
+    fprintf(screen,
+            "MGP read_file: the file contains %d 2b potential and %d 3b "
+            "potential\n",
+            n_2body, n_3body);
+
+  int ntype_p1 = atom->ntypes + 1;
+  if (n_2body > 0) {
+    memory->create(lo_2body, n_2body, "pair:lo_2body");
+    memory->create(hi_2body, n_2body, "pair:hi_2body");
+    memory->create(grid_2body, n_2body, "pair:grid_2body");
+    memory->create(map2b, ntype_p1, ntype_p1, "pair:map2b");
+    memory->create(cut2bsq, n_2body, "pair:cut2bsq");
+
+    for (int i = 1; i < ntype_p1; i++) {
+      for (int j = 1; j < ntype_p1; j++) {
+        map2b[i][j] = -1;
+      }
+    }
+
+    // ????????????????????/pair: 3body?
+    fcoeff_2body = (double **)memory->smalloc(n_2body * sizeof(double *),
+                                              "pair:fcoeff_3body");
+    // ecoeff_2body = (double **) memory->smalloc(n_2body*sizeof(double
+    // *),"pair:ecoeff_3body");
+  } else {
+    compute2b = false;
+  }
+
+  if (n_3body > 0) {
+    memory->create(lo_3body, n_3body, 3, "pair:lo_3body");
+    memory->create(hi_3body, n_3body, 3, "pair:hi_3body");
+    memory->create(grid_3body, n_3body, 3, "pair:grid_3body");
+    memory->create(map3b, ntype_p1, ntype_p1, ntype_p1, "pair:map3b");
+    memory->create(cut3bsq, n_3body, "pair:cut3bsq");
+    for (int i = 1; i < ntype_p1; i++) {
+      for (int j = 1; j < ntype_p1; j++) {
+        for (int k = 1; k < ntype_p1; k++) {
+          map3b[i][j][k] = -1;
+        }
+      }
+    }
+    fcoeff_3body = (double **)memory->smalloc(n_3body * sizeof(double *),
+                                              "pair:coeff_2body");
+    // ecoeff_3body = (double **) memory->smalloc(n_3body*sizeof(double
+    // *),"pair:ecoeff_2body");
+  } else {
+    compute3b = false;
+  }
+
+  cutmax = 0;
+  for (int idx = 0; idx < n_2body; idx++) {
+    char ele1[10], ele2[10];
+    double a, b;
+    int order;
+    fgets(line, MAXLINE, fptr);
+    sscanf(line, "%s %s %lg %lg %d", &ele1, &ele2, &a, &b, &order);
+
+    bool type1[atom->ntypes + 1];
+    bool type2[atom->ntypes + 1];
+    for (int itype = 1; itype <= atom->ntypes; itype++) {
+      if (strcmp(elements[itype], ele1) == 0)
+        type1[itype] = true;
+      else
+        type1[itype] = false;
+      if (strcmp(elements[itype], ele2) == 0)
+        type2[itype] = true;
+      else
+        type2[itype] = false;
+    }
+
+    double rsq = b * b;
+    cut2bsq[idx] = rsq;
+    for (int itype = 1; itype <= atom->ntypes; itype++) {
+      for (int jtype = 1; jtype <= atom->ntypes; jtype++) {
+        if (type1[itype] && type2[jtype]) {
+          map2b[itype][jtype] = idx;
+          map2b[jtype][itype] = idx;
+          if (compute2b) {
+            setflag[itype][jtype] = 1;
+            setflag[jtype][itype] = 1;
+          }
+          cutsq[itype][jtype] = rsq;
+          cutsq[jtype][itype] = rsq;
+        }
+      }
+    }
+
+    if (cutmax < rsq)
+      cutmax = rsq;
+
+    lo_2body[idx] = a;
+    hi_2body[idx] = b;
+    grid_2body[idx] = order;
+    memory->create(fcoeff_2body[idx], order + 2, "pair:2body_coeff");
+    // memory->create(ecoeff_2body[idx],order+2,"pair:2body_ecoeff");
+
+    grab(fptr, order + 2, fcoeff_2body[idx]);
+    // grab(fptr, order+2, ecoeff_2body[idx]);
+  }
+
+  for (int idx = 0; idx < n_3body; idx++) {
+    if (screen)
+      fprintf(screen, "MGP read_file: reading the %d 3b       potential\n",
+              idx);
+    char ele1[10], ele2[10], ele3[10];
+    double a[3], b[3];
+    int order[3];
+
+    fgets(line, MAXLINE, fptr);
+    sscanf(line, "%s %s %s %lg %lg %lg %lg %lg %lg %d %d %d", &ele1, &ele2,
+           &ele3, &a[0], &a[1], &a[2], &b[0], &b[1], &b[2], &order[0],
+           &order[1], &order[2]);
+
+    bool type1[atom->ntypes + 1];
+    bool type2[atom->ntypes + 1];
+    bool type3[atom->ntypes + 1];
+    for (int itype = 1; itype <= atom->ntypes; itype++) {
+      if (strcmp(elements[itype], ele1) == 0)
+        type1[itype] = true;
+      else
+        type1[itype] = false;
+      if (strcmp(elements[itype], ele2) == 0)
+        type2[itype] = true;
+      else
+        type2[itype] = false;
+      if (strcmp(elements[itype], ele3) == 0)
+        type3[itype] = true;
+      else
+        type3[itype] = false;
+    }
+
+    double rsq = b[0] * b[0];
+    cut3bsq[idx] = rsq;
+    for (int itype = 1; itype <= atom->ntypes; itype++) {
+      for (int jtype = 1; jtype <= atom->ntypes; jtype++) {
+        for (int ktype = 1; ktype <= atom->ntypes; ktype++) {
+          if (type1[itype] && type2[jtype] && type3[ktype]) {
+            map3b[itype][jtype][ktype] = idx;
+            // no permutation sym
+            // map3b[itype][ktype][jtype] = idx;
+            if (compute3b) {
+              setflag[itype][jtype] = 1;
+              setflag[jtype][itype] = 1;
+              setflag[itype][ktype] = 1;
+              setflag[ktype][itype] = 1;
+              setflag[jtype][ktype] = 1;
+              setflag[ktype][jtype] = 1;
+            }
+          }
+        }
+      }
+    }
+
+    if (cutmax < rsq)
+      cutmax = rsq;
+    if (rsq > cutshortsq)
+      cutshortsq = rsq;
+
+    lo_3body[idx][0] = a[0];
+    lo_3body[idx][1] = a[1];
+    lo_3body[idx][2] = a[2];
+    hi_3body[idx][0] = b[0];
+    hi_3body[idx][1] = b[1];
+    hi_3body[idx][2] = b[2];
+    grid_3body[idx][0] = order[0];
+    grid_3body[idx][1] = order[1];
+    grid_3body[idx][2] = order[2];
+
+    int len = (order[0] + 2) * (order[1] + 2) * (order[2] + 2);
+    memory->create(fcoeff_3body[idx], len, "pair:3body_coeff");
+    // memory->create(ecoeff_3body[idx], len, "pair:3body_ecoeff");
+
+    grab(fptr, len, fcoeff_3body[idx]);
+    // grab(fptr, len, ecoeff_3body[idx]);
+  }
+
+  fclose(fptr);
+
+  cutmax = sqrt(cutmax);
+
+  // if (screen) fprintf(screen, "MGP read_file: sanity check on setflag\n");
+  for (int i = 1; i < ntype_p1; i++) {
+    for (int j = 1; j < ntype_p1; j++) {
+      if (setflag[i][j] == 0) {
+        char str[128]; // read files, warning if some potential is not covered
+        snprintf(str, 128,
+                 "MGP read_file: no MGP potential for type %d %d is defined", i,
+                 j);
+        error->all(FLERR, str);
+        // setflag[i][j] == 1; // can be covered by other potentials
+      }
+    }
+  }
+
+  // if (screen) fprintf(screen, "MGP read_file: sanity check on 2b
+  // potential\n");
+  if (compute2b) {
+    for (int i = 1; i < ntype_p1; i++) {
+      for (int j = 1; j < ntype_p1; j++) {
+        if (map2b[i][j] == -1) {
+          char str[128];
+          snprintf(
+              str, 128,
+              "MGP read_file: 2b MGP potential for type %d %d is not defined",
+              i, j);
+          error->warning(FLERR, str);
+        }
+      }
+    }
+  }
+
+  // if (screen) fprintf(screen, "MGP read_file: sanity check on 3b
+  // potential\n");
+  if (compute3b) {
+    for (int i = 1; i < ntype_p1; i++) {
+      for (int j = 1; j < ntype_p1; j++) {
+        for (int k = 1; k < ntype_p1; k++) {
+          if (map3b[i][j][k] == -1) {
+            char str[128];
+            snprintf(str, 128,
+                     "MGP read_file: 3b MGP potential for type %d %d %d are "
+                     "not defined",
+                     i, j, k);
+            error->warning(FLERR, str);
+          }
+        }
+      }
+    }
+  }
+
+  if (screen)
+    fprintf(screen, "MGP read_file: done\n");
+}
+
+void PairMGP::bcast_table() {
+  int me; // = comm->me;
+  MPI_Comm_rank(world, &me);
+
+  MPI_Bcast(&n_2body, 1, MPI_INT, 0, world);
+  MPI_Bcast(&n_3body, 1, MPI_INT, 0, world);
+
+  int ntype_p1 = atom->ntypes + 1;
+  if (me > 0) {
+    if (n_2body > 0) {
+      memory->create(lo_2body, n_2body, "pair:lo_2body");
+      memory->create(hi_2body, n_2body, "pair:hi_2body");
+      memory->create(grid_2body, n_2body, "pair:grid_2body");
+      memory->create(cut2bsq, n_2body, "pair:cut2bsq");
+      memory->create(map2b, ntype_p1, ntype_p1, "pair:map2b");
+      fcoeff_2body = (double **)memory->smalloc(n_2body * sizeof(double *),
+                                                "pair:fcoeff_2body");
+      // ecoeff_2body = (double **) memory->smalloc(n_2body*sizeof(double
+      // *),"pair:ecoeff_2body");
+    }
+    if (n_3body > 0) {
+      memory->create(lo_3body, n_3body, 3, "pair:lo_3body");
+      memory->create(hi_3body, n_3body, 3, "pair:hi_3body");
+      memory->create(grid_3body, n_3body, 3, "pair:grid_3body");
+      memory->create(cut3bsq, n_3body, "pair:cut3bsq");
+      memory->create(map3b, ntype_p1, ntype_p1, ntype_p1, "pair:map3b");
+      fcoeff_3body = (double **)memory->smalloc(n_3body * sizeof(double *),
+                                                "pair:fcoeff_3body");
+      // ecoeff_3body = (double **) memory->smalloc(n_3body*sizeof(double
+      // *),"pair:ecoeff_3body");
+    }
+  }
+
+  MPI_Bcast(cutsq[0], ntype_p1 * ntype_p1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(setflag[0], ntype_p1 * ntype_p1, MPI_INT, 0, world);
+  MPI_Bcast(&cutmax, 1, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&cutshortsq, 1, MPI_DOUBLE, 0, world);
+
+  char temp[100];
+  if (n_2body > 0) {
+    MPI_Bcast(lo_2body, n_2body, MPI_DOUBLE, 0, world);
+    MPI_Bcast(hi_2body, n_2body, MPI_DOUBLE, 0, world);
+    MPI_Bcast(grid_2body, n_2body, MPI_INT, 0, world);
+    MPI_Bcast(cut2bsq, n_2body, MPI_DOUBLE, 0, world);
+    MPI_Bcast(map2b[0], ntype_p1 * ntype_p1, MPI_INT, 0, world);
+    for (int idx = 0; idx < n_2body; idx++) {
+      if (me > 0) {
+        sprintf(temp, "pair:fcoeff_2body%d", idx);
+        memory->create(fcoeff_2body[idx], grid_2body[idx] + 2, temp);
+        // sprintf(temp, "pair:ecoeff_2body%d", idx);
+        // memory->create(fcoeff_2body[idx],grid_2body[idx]+2,temp);
+      }
+      MPI_Bcast(fcoeff_2body[idx], grid_2body[idx] + 2, MPI_DOUBLE, 0, world);
+      // MPI_Bcast(ecoeff_2body[idx],order+2,MPI_DOUBLE,0,world);
+    }
+  }
+
+  if (n_3body > 0) {
+    MPI_Bcast(lo_3body[0], n_3body * 3, MPI_DOUBLE, 0, world);
+    MPI_Bcast(hi_3body[0], n_3body * 3, MPI_DOUBLE, 0, world);
+    MPI_Bcast(grid_3body[0], n_3body * 3, MPI_INT, 0, world);
+    MPI_Bcast(cut3bsq, n_3body, MPI_DOUBLE, 0, world);
+    MPI_Bcast(map3b[0][0], ntype_p1 * ntype_p1 * ntype_p1, MPI_INT, 0, world);
+    for (int idx = 0; idx < n_3body; idx++) {
+      int *order = grid_3body[idx];
+      int len = (order[0] + 2) * (order[1] + 2) * (order[2] + 2);
+      if (me > 0) {
+        sprintf(temp, "pair:fcoeff_3body%d", idx);
+        memory->create(fcoeff_3body[idx], len, temp);
+        // sprintf(temp, "pair:ecoeff_3body%d", idx);
+        // memory->create(fcoeff_3body[idx], len, temp);
+      }
+      MPI_Bcast(fcoeff_3body[idx], len, MPI_DOUBLE, 0, world);
+      // MPI_Bcast(ecoeff_3body[idx],len,MPI_DOUBLE,0,world);
+    }
+  }
+}
+
+/* -------------------------------------------------------------------
+   init specific to this pair style
+-------------------------------------------------------------------- */
+
+void PairMGP::init_style() {
+  if (force->newton_pair == 1) {
+    if (comm->me == 0)
+      error->warning(FLERR, "Pair style MGP requires newton pair off");
+    //          error->warning(FLERR,"Pair style MGP is not using newton pair.
+    //          But it does not need the newton pair flag to be off");
+  }
+
+  // need a full neighbor list
+  int irequest = neighbor->request(this, instance_me);
+  neighbor->requests[irequest]->half = 0;
+  neighbor->requests[irequest]->full = 1;
+}
+
+/* ------------------------------------------------------------------
+ *     init for one type pair i,j and corresponding j,i
+ * ---------------------------------------------------------------- */
+
+double PairMGP::init_one(int i, int j) {
+  if (setflag[i][j] == 0)
+    error->all(FLERR, "All pair coeffs are not set");
+  return cutmax;
+  // return sqrt(cutsq[i][j]);
+}
+
+void PairMGP::eval_cubic_splines_1d(double a, double b, int orders,
+                                    double *coefs, double r, double *val,
+                                    double *dval) {
+  // some coefficients
+  int i, j, ii;
+  double dinv, u, i0, tt;
+  *val = 0;
+  dinv = (orders - 1.0) / (b - a);
+  u = (r - a) * dinv;
+  i0 = floor(u);
+  ii = fmax(fmin(i0, orders - 2), 0);
+  tt = u - ii;
+
+  // interpolation points
+  double tp[4];
+  for (j = 0; j < 4; j++) {
+    tp[j] = pow(tt, 3 - j);
+  }
+
+  // value of cubic spline function
+  double Phi[4], dPhi[4];
+  double dt;
+  int k;
+
+  if (tt < 0) {
+    for (i = 0; i < 4; i++) {
+      Phi[i] = dAd[i][3] * tt + Ad[i][3];
+    }
+  } else if (tt > 1) {
+    dt = tt - 1;
+    for (i = 0; i < 4; i++) {
+      Phi[i] = Bd[i] * dt + Cd[i];
+    }
+  } else {
+    for (i = 0; i < 4; i++) {
+      Phi[i] = 0;
+      for (k = 0; k < 4; k++) {
+        Phi[i] += Ad[i][k] * tp[k];
+      }
+    }
+  }
+
+  // value of derivative of spline
+  for (i = 0; i < 4; i++) {
+    dPhi[i] = 0;
+    for (k = 0; k < 4; k++) {
+      dPhi[i] += dAd[i][k] * tp[k];
+    }
+    dPhi[i] *= dinv;
+  }
+
+  // added by coefficients
+  int N = orders + 2;
+  double pc = 0;
+  double ppc = 0;
+
+  for (j = 0; j < 4; j++) {
+    *val += Phi[j] * coefs[ii + j];
+    *dval += dPhi[j] * coefs[ii + j];
+  }
+}
+
+void PairMGP::eval_cubic_splines_3d(double *a, double *b, int *orders,
+                                    double *coefs, double r1, double r2,
+                                    double a12, double *val, double *dval) {
+  int dim = 3;
+  int i;
+  int j;
+  double point[3] = {r1, r2, a12};
+  double dinv[dim];
+  double u[dim];
+  double i0[dim];
+  int ii[dim];
+  double tt[dim];
+
+  *val = 0;
+  // coefficients
+  for (i = 0; i < dim; i++) {
+    dinv[i] = (orders[i] - 1.0) / (b[i] - a[i]);
+    u[i] = (point[i] - a[i]) * dinv[i];
+    i0[i] = floor(u[i]);
+    ii[i] = fmax(fmin(i0[i], orders[i] - 2), 0);
+    tt[i] = u[i] - ii[i];
+  }
+
+  // points
+  double tp[dim][4];
+  for (i = 0; i < dim; i++) {
+    for (j = 0; j < 4; j++) {
+      tp[i][j] = pow(tt[i], 3 - j);
+    }
+  }
+
+  double Phi[dim][4], dPhi[dim][4];
+  double dt;
+  int k;
+
+  for (j = 0; j < dim; j++) {
+
+    // evaluate spline function
+    if (tt[j] < 0) {
+      for (i = 0; i < 4; i++) {
+        Phi[j][i] = dAd[i][3] * tt[j] + Ad[i][3];
+      }
+    } else if (tt[j] > 1) {
+      dt = tt[j] - 1;
+      for (i = 0; i < 4; i++) {
+        Phi[j][i] = Bd[i] * dt + Cd[i];
+      }
+    } else {
+      for (i = 0; i < 4; i++) {
+        Phi[j][i] = 0;
+        for (k = 0; k < 4; k++) {
+          Phi[j][i] += Ad[i][k] * tp[j][k];
+        }
+      }
+    }
+
+    // evaluate derivatives
+    for (i = 0; i < 4; i++) {
+      dPhi[j][i] = 0;
+      for (k = 0; k < 4; k++) {
+        dPhi[j][i] += dAd[i][k] * tp[j][k];
+      }
+      dPhi[j][i] *= dinv[j];
+    }
+  }
+
+  // added by coefficients
+  int N[dim];
+  for (i = 0; i < dim; i++) {
+    N[i] = orders[i] + 2;
+  }
+  double c, pc, ppc;
+  double dpc, dppc1, dppc2;
+
+  for (i = 0; i < 4; i++) {
+    ppc = 0;
+    dppc1 = 0;
+    dppc2 = 0;
+    for (j = 0; j < 4; j++) {
+      pc = 0;
+      dpc = 0;
+      for (k = 0; k < 4; k++) {
+        c = coefs[((ii[0] + i) * N[1] + ii[1] + j) * N[2] + ii[2] + k];
+        pc += Phi[2][k] * c;
+        dpc += dPhi[2][k] * c;
+      }
+      ppc += Phi[1][j] * pc;
+      dppc1 += dPhi[1][j] * pc;
+      dppc2 += Phi[1][j] * dpc;
+    }
+    *val += Phi[0][i] * ppc;
+    dval[0] += dPhi[0][i] * ppc;
+    dval[1] += Phi[0][i] * dppc1;
+    dval[2] += Phi[0][i] * dppc2;
+  }
+}
diff --git a/lammps_plugins/pair_mgp.h b/lammps_plugins/pair_mgp.h
new file mode 100644
index 000000000..4396ba279
--- /dev/null
+++ b/lammps_plugins/pair_mgp.h
@@ -0,0 +1,110 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(mgp, PairMGP)
+
+#else
+
+#ifndef LMP_PAIR_MGP_H
+#define LMP_PAIR_MGP_H
+
+#include "pair.h"
+#include <cstdio>
+
+namespace LAMMPS_NS {
+
+class PairMGP : public Pair {
+public:
+  PairMGP(class LAMMPS *);
+  virtual ~PairMGP();
+  virtual void compute(int, int);
+  void settings(int, char **);
+  virtual void coeff(int, char **);
+  virtual void init_style();
+  double init_one(int, int);
+  // double single(int, int, int, int, double, double, double, double &);
+  // virtual void *extract(const char *, int &);
+
+protected:
+  int **map2b;  // which element each atom type maps to,
+                // [0,0],[0,1],[1,1],[0,2],...
+  int ***map3b; // which element each atom type maps to
+  char **elements;
+
+  bool compute2b, compute3b;
+  int n_2body, n_3body;
+
+  double *lo_2body, *hi_2body;
+  double **lo_3body, **hi_3body;
+  int *grid_2body, **grid_3body;
+  double **fcoeff_2body, **fcoeff_3body;
+  double **ecoeff_2body, **ecoeff_3body;
+
+  int maxshort;
+  int *neighshort;
+
+  // extra cutoff for 3d and short list
+  double cutmax;
+  double *cut2bsq;
+  double *cut3bsq;
+  double cutshortsq;
+
+  // parameters for spline
+  double Ad[4][4];
+  double dAd[4][4];
+  double d2Ad[4][4];
+  double Bd[4];
+  double Cd[4];
+  double basis[4];
+
+  void allocate();
+
+  void eval_cubic_splines_1d(double, double, int, double *, double, double *,
+                             double *);
+  void eval_cubic_splines_3d(double *, double *, int *, double *, double,
+                             double, double, double *, double *);
+
+  void read_file(char *);
+  void bcast_table();
+  void grab(FILE *, int, double *);
+};
+
+} // namespace LAMMPS_NS
+
+#endif
+#endif
+
+    /* ERROR/WARNING messages:
+
+    E: Illegal ... command
+
+    Self-explanatory.  Check the input script syntax and compare to the
+    documentation for the command.  You can use -echo screen as a
+    command-line option when running LAMMPS to see the offending line.
+
+    E: Incorrect args for pair coefficients
+
+    Self-explanatory.  Check the input script or data file.
+
+    E: Cannot open MGP potential file %s
+
+    The specified MGP potential file cannot be opened.  Check that the
+    path and name are correct.
+
+    E: Invalid MGP potential file
+
+    UNDOCUMENTED
+
+    */
diff --git a/lammps_plugins/pair_mgp_kokkos.cpp b/lammps_plugins/pair_mgp_kokkos.cpp
new file mode 100644
index 000000000..abf6cfc3c
--- /dev/null
+++ b/lammps_plugins/pair_mgp_kokkos.cpp
@@ -0,0 +1,1493 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Stan Moore, original SW (SNL)
+                         Anders Johansson, modified to MGP (Harvard)
+------------------------------------------------------------------------- */
+
+#include "pair_mgp_kokkos.h"
+#include "atom_kokkos.h"
+#include "atom_masks.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "kokkos.h"
+#include "math_const.h"
+#include "memory_kokkos.h"
+#include "neigh_list_kokkos.h"
+#include "neigh_request.h"
+#include "neighbor.h"
+#include "pair_kokkos.h"
+#include <algorithm>
+#include <cmath>
+#include <utility>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define MAXLINE 1024
+#define DELTA 4
+
+/* ---------------------------------------------------------------------- */
+
+template <class DeviceType>
+PairMGPKokkos<DeviceType>::PairMGPKokkos(LAMMPS *lmp) : PairMGP(lmp) {
+  respa_enable = 0;
+
+  atomKK = (AtomKokkos *)atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read =
+      X_MASK | F_MASK | TAG_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
+  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
+}
+
+/* ----------------------------------------------------------------------
+   check if allocated, since class can be destructed when incomplete
+------------------------------------------------------------------------- */
+
+template <class DeviceType> PairMGPKokkos<DeviceType>::~PairMGPKokkos() {
+  if (!copymode) {
+    memoryKK->destroy_kokkos(k_eatom, eatom);
+    memoryKK->destroy_kokkos(k_vatom, vatom);
+    eatom = NULL;
+    vatom = NULL;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class DeviceType>
+void PairMGPKokkos<DeviceType>::compute(int eflag_in, int vflag_in) {
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+  if (neighflag == FULL)
+    no_virial_fdotr_compute = 1;
+
+  ev_init(eflag, vflag, 0);
+
+  // reallocate per-atom arrays if necessary
+
+  if (eflag_atom) {
+    memoryKK->destroy_kokkos(k_eatom, eatom);
+    memoryKK->create_kokkos(k_eatom, eatom, maxeatom, "pair:eatom");
+    d_eatom = k_eatom.view<DeviceType>();
+  }
+  if (vflag_atom) {
+    memoryKK->destroy_kokkos(k_vatom, vatom);
+    memoryKK->create_kokkos(k_vatom, vatom, maxvatom, 6, "pair:vatom");
+    d_vatom = k_vatom.view<DeviceType>();
+  }
+
+  atomKK->sync(execution_space, datamask_read);
+  if (eflag || vflag)
+    atomKK->modified(execution_space, datamask_modify);
+  else
+    atomKK->modified(execution_space, F_MASK);
+
+  x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  tag = atomKK->k_tag.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  nlocal = atom->nlocal;
+  newton_pair = force->newton_pair;
+  nall = atom->nlocal + atom->nghost;
+
+  const int inum = list->inum;
+  const int ignum = inum + list->gnum;
+  NeighListKokkos<DeviceType> *k_list =
+      static_cast<NeighListKokkos<DeviceType> *>(list);
+  d_ilist = k_list->d_ilist;
+  d_numneigh = k_list->d_numneigh;
+  d_neighbors = k_list->d_neighbors;
+
+  need_dup = lmp->kokkos->need_dup<DeviceType>();
+  if (need_dup) {
+    dup_f = Kokkos::Experimental::create_scatter_view<
+        Kokkos::Experimental::ScatterSum,
+        Kokkos::Experimental::ScatterDuplicated>(f);
+    dup_eatom = Kokkos::Experimental::create_scatter_view<
+        Kokkos::Experimental::ScatterSum,
+        Kokkos::Experimental::ScatterDuplicated>(d_eatom);
+    dup_vatom = Kokkos::Experimental::create_scatter_view<
+        Kokkos::Experimental::ScatterSum,
+        Kokkos::Experimental::ScatterDuplicated>(d_vatom);
+  } else {
+    ndup_f = Kokkos::Experimental::create_scatter_view<
+        Kokkos::Experimental::ScatterSum,
+        Kokkos::Experimental::ScatterNonDuplicated>(f);
+    ndup_eatom = Kokkos::Experimental::create_scatter_view<
+        Kokkos::Experimental::ScatterSum,
+        Kokkos::Experimental::ScatterNonDuplicated>(d_eatom);
+    ndup_vatom = Kokkos::Experimental::create_scatter_view<
+        Kokkos::Experimental::ScatterSum,
+        Kokkos::Experimental::ScatterNonDuplicated>(d_vatom);
+  }
+
+  copymode = 1;
+
+  EV_FLOAT ev;
+  EV_FLOAT ev_all;
+
+  // build short neighbor list
+
+  int max_neighs = d_neighbors.extent(1);
+
+  if ((d_neighbors_short.extent(1) != max_neighs) ||
+      (d_neighbors_short.extent(0) != ignum)) {
+    d_neighbors_short = Kokkos::View<int **, DeviceType>("MGP::neighbors_short",
+                                                         ignum, max_neighs);
+  }
+  if (d_numneigh_short.extent(0) != ignum)
+    d_numneigh_short =
+        Kokkos::View<int *, DeviceType>("MGP::numneighs_short", ignum);
+  Kokkos::parallel_for(
+      Kokkos::RangePolicy<DeviceType, TagPairMGPComputeShortNeigh>(
+          0, neighflag == FULL ? ignum : inum),
+      *this);
+
+  // loop over neighbor list of my atoms
+
+  if (neighflag == HALF) {
+    if (evflag)
+      Kokkos::parallel_reduce(
+          Kokkos::RangePolicy<DeviceType, TagPairMGPComputeHalf<HALF, 1>>(0,
+                                                                          inum),
+          *this, ev);
+    else
+      Kokkos::parallel_for(
+          Kokkos::RangePolicy<DeviceType, TagPairMGPComputeHalf<HALF, 0>>(0,
+                                                                          inum),
+          *this);
+    ev_all += ev;
+  } else if (neighflag == HALFTHREAD) {
+    if (evflag)
+      Kokkos::parallel_reduce(
+          Kokkos::RangePolicy<DeviceType, TagPairMGPComputeHalf<HALFTHREAD, 1>>(
+              0, inum),
+          *this, ev);
+    else
+      Kokkos::parallel_for(
+          Kokkos::RangePolicy<DeviceType, TagPairMGPComputeHalf<HALFTHREAD, 0>>(
+              0, inum),
+          *this);
+    ev_all += ev;
+  } else if (neighflag == FULL) {
+    if (evflag)
+      Kokkos::parallel_reduce(
+          Kokkos::RangePolicy<DeviceType, TagPairMGPComputeFullA<FULL, 1>>(
+              0, inum),
+          *this, ev);
+    else
+      Kokkos::parallel_for(
+          Kokkos::RangePolicy<DeviceType, TagPairMGPComputeFullA<FULL, 0>>(
+              0, inum),
+          *this);
+    ev_all += ev;
+
+    /*
+    if (evflag)
+      Kokkos::parallel_reduce(
+          Kokkos::RangePolicy<DeviceType, TagPairMGPComputeFullB<FULL, 1>>(
+              0, ignum),
+          *this, ev);
+    else
+      Kokkos::parallel_for(
+          Kokkos::RangePolicy<DeviceType, TagPairMGPComputeFullB<FULL, 0>>(
+              0, ignum),
+          *this);
+    ev_all += ev;
+    */
+  }
+
+  if (need_dup)
+    Kokkos::Experimental::contribute(f, dup_f);
+
+  if (eflag_global)
+    eng_vdwl += ev_all.evdwl;
+  if (vflag_global) {
+    virial[0] += ev_all.v[0];
+    virial[1] += ev_all.v[1];
+    virial[2] += ev_all.v[2];
+    virial[3] += ev_all.v[3];
+    virial[4] += ev_all.v[4];
+    virial[5] += ev_all.v[5];
+  }
+
+  if (eflag_atom) {
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_eatom, dup_eatom);
+    k_eatom.template modify<DeviceType>();
+    k_eatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_atom) {
+    if (need_dup)
+      Kokkos::Experimental::contribute(d_vatom, dup_vatom);
+    k_vatom.template modify<DeviceType>();
+    k_vatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_fdotr)
+    pair_virial_fdotr_compute(this);
+
+  copymode = 0;
+
+  // free duplicated memory
+  if (need_dup) {
+    dup_f = decltype(dup_f)();
+    dup_eatom = decltype(dup_eatom)();
+    dup_vatom = decltype(dup_vatom)();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class DeviceType>
+KOKKOS_INLINE_FUNCTION void PairMGPKokkos<DeviceType>::
+operator()(TagPairMGPComputeShortNeigh, const int &ii) const {
+  const int i = d_ilist[ii];
+  const X_FLOAT xtmp = x(i, 0);
+  const X_FLOAT ytmp = x(i, 1);
+  const X_FLOAT ztmp = x(i, 2);
+
+  const int jnum = d_numneigh[i];
+  int inside = 0;
+  for (int jj = 0; jj < jnum; jj++) {
+    int j = d_neighbors(i, jj);
+    j &= NEIGHMASK;
+
+    const X_FLOAT delx = xtmp - x(j, 0);
+    const X_FLOAT dely = ytmp - x(j, 1);
+    const X_FLOAT delz = ztmp - x(j, 2);
+    const F_FLOAT rsq = delx * delx + dely * dely + delz * delz;
+
+    if (rsq < cutmax * cutmax) {
+      d_neighbors_short(i, inside) = j;
+      inside++;
+    }
+  }
+  d_numneigh_short(i) = inside;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class DeviceType>
+template <int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION void PairMGPKokkos<DeviceType>::
+operator()(TagPairMGPComputeHalf<NEIGHFLAG, EVFLAG>, const int &ii,
+           EV_FLOAT &ev) const {
+
+  // The f array is duplicated for OpenMP, atomic for CUDA, and neither for
+  // Serial
+
+  auto v_f =
+      ScatterViewHelper<NeedDup<NEIGHFLAG, DeviceType>::value, decltype(dup_f),
+                        decltype(ndup_f)>::get(dup_f, ndup_f);
+  auto a_f = v_f.template access<AtomicDup<NEIGHFLAG, DeviceType>::value>();
+
+  F_FLOAT delr1[3], delr2[3], delr12[3], fj[3], fk[3], ftriplet[3];
+  F_FLOAT evdwl = 0.0;
+  F_FLOAT fpair = 0.0;
+
+  const int i = d_ilist[ii];
+  const tagint itag = tag[i];
+  const int itype = type[i];
+  const X_FLOAT xtmp = x(i, 0);
+  const X_FLOAT ytmp = x(i, 1);
+  const X_FLOAT ztmp = x(i, 2);
+
+  // two-body interactions, skip half of them
+
+  const int jnum = d_numneigh_short[i];
+
+  F_FLOAT fxtmpi = 0.0;
+  F_FLOAT fytmpi = 0.0;
+  F_FLOAT fztmpi = 0.0;
+
+  if (compute2b) {
+    for (int jj = 0; jj < jnum; jj++) {
+      int j = d_neighbors_short(i, jj);
+      j &= NEIGHMASK;
+      const tagint jtag = tag[j];
+
+      if (itag > jtag) {
+        if ((itag + jtag) % 2 == 0)
+          continue;
+      } else if (itag < jtag) {
+        if ((itag + jtag) % 2 == 1)
+          continue;
+      } else {
+        if (x(j, 2) < ztmp)
+          continue;
+        if (x(j, 2) == ztmp && x(j, 1) < ytmp)
+          continue;
+        if (x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)
+          continue;
+      }
+
+      const int jtype = type[j];
+
+      const X_FLOAT delx = xtmp - x(j, 0);
+      const X_FLOAT dely = ytmp - x(j, 1);
+      const X_FLOAT delz = ztmp - x(j, 2);
+      const F_FLOAT rsq = delx * delx + dely * dely + delz * delz;
+
+      const int mapid = d_map2b(itype, jtype);
+      if (rsq >= d_cut2bsq(mapid))
+        continue;
+
+      twobody(mapid, rsq, fpair, evdwl);
+
+      fxtmpi += delx * fpair;
+      fytmpi += dely * fpair;
+      fztmpi += delz * fpair;
+      a_f(j, 0) -= delx * fpair;
+      a_f(j, 1) -= dely * fpair;
+      a_f(j, 2) -= delz * fpair;
+
+      if (EVFLAG) {
+        if (eflag)
+          ev.evdwl += 2 * evdwl;
+        if (vflag_either || eflag_atom)
+          this->template ev_tally<NEIGHFLAG>(ev, i, j, evdwl, fpair, delx, dely,
+                                             delz);
+      }
+    }
+  }
+
+  if (compute3b) {
+    const int jnumm1 = jnum - 1;
+
+    for (int jj = 0; jj < jnumm1; jj++) {
+      int j = d_neighbors_short(i, jj);
+      j &= NEIGHMASK;
+      const int jtype = type[j];
+      // const int ijparam = d_elem2param(itype, jtype, jtype);
+      delr1[0] = x(j, 0) - xtmp;
+      delr1[1] = x(j, 1) - ytmp;
+      delr1[2] = x(j, 2) - ztmp;
+      const F_FLOAT rsq1 =
+          delr1[0] * delr1[0] + delr1[1] * delr1[1] + delr1[2] * delr1[2];
+      // if (rsq1 >= d_cut2bsq(d_map2b(itype, jtype)))
+      // continue;
+
+      F_FLOAT fxtmpj = 0.0;
+      F_FLOAT fytmpj = 0.0;
+      F_FLOAT fztmpj = 0.0;
+
+      for (int kk = jj + 1; kk < jnum; kk++) {
+        int k = d_neighbors_short(i, kk);
+        k &= NEIGHMASK;
+        const int ktype = type[k];
+        const int mapid1 = d_map3b(itype, jtype, ktype),
+                  mapid2 = d_map3b(itype, ktype, jtype);
+
+        const F_FLOAT cutoff = d_cut3bsq[mapid1];
+
+        if (rsq1 >= cutoff)
+          continue;
+
+        delr2[0] = x(k, 0) - xtmp;
+        delr2[1] = x(k, 1) - ytmp;
+        delr2[2] = x(k, 2) - ztmp;
+        const F_FLOAT rsq2 =
+            delr2[0] * delr2[0] + delr2[1] * delr2[1] + delr2[2] * delr2[2];
+
+        if (rsq2 >= cutoff)
+          continue;
+
+        delr12[0] = x(k, 0) - x(j, 0);
+        delr12[1] = x(k, 1) - x(j, 1);
+        delr12[2] = x(k, 2) - x(j, 2);
+        const F_FLOAT rsq12 = delr12[0] * delr12[0] + delr12[1] * delr12[1] +
+                              delr12[2] * delr12[2];
+        if (rsq12 >= cutoff)
+          continue;
+
+        const F_FLOAT r1 = sqrt(rsq1), r2 = sqrt(rsq2), r12 = sqrt(rsq12);
+
+        F_FLOAT evdwl3;
+        threebody(mapid1, r1, r2, r12, evdwl3, ftriplet);
+
+        F_FLOAT f_d1, f_d2;
+
+        // I don't understand the 1.5
+        f_d1 = -0.5 * ftriplet[0] / r1; // divided by length
+        f_d2 = -0.5 * ftriplet[1] / r2; // divided by length
+
+        fj[0] = f_d1 * delr1[0]; // delr1, delr2, not unit vector
+        fj[1] = f_d1 * delr1[1];
+        fj[2] = f_d1 * delr1[2];
+        fk[0] = f_d2 * delr2[0];
+        fk[1] = f_d2 * delr2[1];
+        fk[2] = f_d2 * delr2[2];
+
+        fxtmpi -= 3 * (fj[0] + fk[0]);
+        fytmpi -= 3 * (fj[1] + fk[1]);
+        fztmpi -= 3 * (fj[2] + fk[2]);
+
+        fxtmpj += 3 * fj[0];
+        fytmpj += 3 * fj[1];
+        fztmpj += 3 * fj[2];
+
+        a_f(k, 0) += 3 * fk[0];
+        a_f(k, 1) += 3 * fk[1];
+        a_f(k, 2) += 3 * fk[2];
+
+        if (EVFLAG) {
+          if (eflag)
+            ev.evdwl += evdwl3;
+          if (vflag_either || eflag_atom)
+            this->template ev_tally3<NEIGHFLAG>(ev, i, j, k, evdwl3, 0.0, fj,
+                                                fk, delr1, delr2);
+        }
+      }
+
+      a_f(j, 0) += fxtmpj;
+      a_f(j, 1) += fytmpj;
+      a_f(j, 2) += fztmpj;
+    }
+  }
+
+  a_f(i, 0) += fxtmpi;
+  a_f(i, 1) += fytmpi;
+  a_f(i, 2) += fztmpi;
+}
+
+template <class DeviceType>
+template <int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION void PairMGPKokkos<DeviceType>::
+operator()(TagPairMGPComputeHalf<NEIGHFLAG, EVFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<NEIGHFLAG, EVFLAG>(
+      TagPairMGPComputeHalf<NEIGHFLAG, EVFLAG>(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class DeviceType>
+template <int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION void PairMGPKokkos<DeviceType>::
+operator()(TagPairMGPComputeFullA<NEIGHFLAG, EVFLAG>, const int &ii,
+           EV_FLOAT &ev) const {
+
+  F_FLOAT delr1[3], delr2[3], delr12[3], fj[3], fk[3], ftriplet[3];
+  F_FLOAT evdwl = 0.0;
+  F_FLOAT fpair = 0.0;
+
+  const int i = d_ilist[ii];
+
+  const tagint itag = tag[i];
+  const int itype = type[i];
+  const X_FLOAT xtmp = x(i, 0);
+  const X_FLOAT ytmp = x(i, 1);
+  const X_FLOAT ztmp = x(i, 2);
+
+  // two-body interactions
+
+  const int jnum = d_numneigh_short[i];
+
+  F_FLOAT fxtmpi = 0.0;
+  F_FLOAT fytmpi = 0.0;
+  F_FLOAT fztmpi = 0.0;
+
+  if (compute2b) {
+    for (int jj = 0; jj < jnum; jj++) {
+      int j = d_neighbors_short(i, jj);
+      j &= NEIGHMASK;
+      const tagint jtag = tag[j];
+
+      const int jtype = type[j];
+
+      const X_FLOAT delx = xtmp - x(j, 0);
+      const X_FLOAT dely = ytmp - x(j, 1);
+      const X_FLOAT delz = ztmp - x(j, 2);
+      const F_FLOAT rsq = delx * delx + dely * dely + delz * delz;
+
+      const int mapid = d_map2b(itype, jtype);
+      if (rsq >= d_cut2bsq(mapid))
+        continue;
+
+      twobody(mapid, rsq, fpair, evdwl);
+
+      fxtmpi += delx * fpair;
+      fytmpi += dely * fpair;
+      fztmpi += delz * fpair;
+
+      if (EVFLAG) {
+        if (eflag)
+          ev.evdwl += evdwl;
+        if (vflag_either || eflag_atom)
+          this->template ev_tally<NEIGHFLAG>(ev, i, j, evdwl, fpair, delx, dely,
+                                             delz);
+      }
+    }
+  }
+
+  if (compute3b) {
+    const int jnumm1 = jnum - 1;
+
+    for (int jj = 0; jj < jnumm1; jj++) {
+      int j = d_neighbors_short(i, jj);
+      j &= NEIGHMASK;
+      const int jtype = type[j];
+      // const int ijparam = d_elem2param(itype, jtype, jtype);
+      delr1[0] = x(j, 0) - xtmp;
+      delr1[1] = x(j, 1) - ytmp;
+      delr1[2] = x(j, 2) - ztmp;
+      const F_FLOAT rsq1 =
+          delr1[0] * delr1[0] + delr1[1] * delr1[1] + delr1[2] * delr1[2];
+
+      // if (rsq1 >= d_cut2bsq(d_map2b(itype, jtype)))
+      // continue;
+
+      for (int kk = jj + 1; kk < jnum; kk++) {
+        int k = d_neighbors_short(i, kk);
+        k &= NEIGHMASK;
+        const int ktype = type[k];
+        const int mapid1 = d_map3b(itype, jtype, ktype),
+                  mapid2 = d_map3b(itype, ktype, jtype);
+        // const int ikparam = d_elem2param(itype, ktype, ktype);
+        // const int ijkparam = d_elem2param(itype, jtype, ktype);
+
+        const F_FLOAT cutoff = d_cut3bsq[mapid1];
+
+        if (rsq1 >= cutoff)
+          continue;
+
+        delr2[0] = x(k, 0) - xtmp;
+        delr2[1] = x(k, 1) - ytmp;
+        delr2[2] = x(k, 2) - ztmp;
+        const F_FLOAT rsq2 =
+            delr2[0] * delr2[0] + delr2[1] * delr2[1] + delr2[2] * delr2[2];
+
+        if (rsq2 >= cutoff)
+          continue;
+
+        delr12[0] = x(k, 0) - x(j, 0);
+        delr12[1] = x(k, 1) - x(j, 1);
+        delr12[2] = x(k, 2) - x(j, 2);
+        const F_FLOAT rsq12 = delr12[0] * delr12[0] + delr12[1] * delr12[1] +
+                              delr12[2] * delr12[2];
+        if (rsq12 >= cutoff)
+          continue;
+
+        const F_FLOAT r1 = sqrt(rsq1), r2 = sqrt(rsq2), r12 = sqrt(rsq12);
+
+        F_FLOAT evdwl3;
+        threebody(mapid1, r1, r2, r12, evdwl3, ftriplet);
+
+        // threebody(d_params[ijparam], d_params[ikparam], d_params[ijkparam],
+        //           rsq1, rsq2, delr1, delr2, fj, fk, eflag, evdwl);
+
+        F_FLOAT f_d1, f_d2;
+
+        f_d1 = -1.5 * ftriplet[0] / r1; // divided by length
+        f_d2 = -1.5 * ftriplet[1] / r2; // divided by length
+
+        fj[0] = f_d1 * delr1[0]; // delr1, delr2, not unit vector
+        fj[1] = f_d1 * delr1[1];
+        fj[2] = f_d1 * delr1[2];
+        fk[0] = f_d2 * delr2[0];
+        fk[1] = f_d2 * delr2[1];
+        fk[2] = f_d2 * delr2[2];
+
+        fxtmpi -= 2 * (fj[0] + fk[0]);
+        fytmpi -= 2 * (fj[1] + fk[1]);
+        fztmpi -= 2 * (fj[2] + fk[2]);
+
+        if (EVFLAG) {
+          if (eflag) {
+            ev.evdwl += evdwl3;
+          }
+          if (vflag_either || eflag_atom) {
+            // this->template ev_tally<NEIGHFLAG>(ev, i, j, evdwl3, f_d1,
+            // delr1[0], delr1[1], delr1[2]);
+            // this->template ev_tally<NEIGHFLAG>(ev, i, k, evdwl3, f_d2,
+            // delr2[0], delr2[1], delr2[2]);
+            this->template ev_tally3<NEIGHFLAG>(ev, i, j, k, evdwl3, 0.0, fj,
+                                                fk, delr1, delr2);
+            // TODO: check this, probably a factor off
+            // ev_tally3_atom(ev, i, evdwl3, 0.0, fj, fk, delr1, delr2);
+          }
+        }
+      }
+    }
+  }
+
+  f(i, 0) += fxtmpi;
+  f(i, 1) += fytmpi;
+  f(i, 2) += fztmpi;
+}
+
+template <class DeviceType>
+template <int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION void PairMGPKokkos<DeviceType>::
+operator()(TagPairMGPComputeFullA<NEIGHFLAG, EVFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<NEIGHFLAG, EVFLAG>(
+      TagPairMGPComputeFullA<NEIGHFLAG, EVFLAG>(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class DeviceType>
+template <int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION void PairMGPKokkos<DeviceType>::
+operator()(TagPairMGPComputeFullB<NEIGHFLAG, EVFLAG>, const int &ii,
+           EV_FLOAT &ev) const {
+
+  return; // DON'T THINK THIS IS NEEDED
+  /*
+
+  F_FLOAT delr1[3], delr2[3], fj[3], fk[3];
+  F_FLOAT evdwl = 0.0;
+
+  const int i = d_ilist[ii];
+
+  const int itype = type[i];
+  const X_FLOAT xtmpi = x(i, 0);
+  const X_FLOAT ytmpi = x(i, 1);
+  const X_FLOAT ztmpi = x(i, 2);
+
+  const int jnum = d_numneigh_short[i];
+
+  F_FLOAT fxtmpi = 0.0;
+  F_FLOAT fytmpi = 0.0;
+  F_FLOAT fztmpi = 0.0;
+
+  if (compute3b) {
+    for (int jj = 0; jj < jnum; jj++) {
+      int j = d_neighbors_short(i, jj);
+      j &= NEIGHMASK;
+      if (j >= nlocal)
+        continue;
+      const int jtype = type[j];
+      const int jiparam = d_elem2param(jtype, itype, itype);
+      const X_FLOAT xtmpj = x(j, 0);
+      const X_FLOAT ytmpj = x(j, 1);
+      const X_FLOAT ztmpj = x(j, 2);
+
+      delr1[0] = xtmpi - xtmpj;
+      delr1[1] = ytmpi - ytmpj;
+      delr1[2] = ztmpi - ztmpj;
+      const F_FLOAT rsq1 =
+          delr1[0] * delr1[0] + delr1[1] * delr1[1] + delr1[2] * delr1[2];
+
+      if (rsq1 >= d_params[jiparam].cutsq)
+        continue;
+
+      const int j_jnum = d_numneigh_short[j];
+
+      for (int kk = 0; kk < j_jnum; kk++) {
+        int k = d_neighbors_short(j, kk);
+        k &= NEIGHMASK;
+        if (k == i)
+          continue;
+        const int ktype = type[k];
+        const int jkparam = d_elem2param(jtype, ktype, ktype);
+        const int jikparam = d_elem2param(jtype, itype, ktype);
+
+        delr2[0] = x(k, 0) - xtmpj;
+        delr2[1] = x(k, 1) - ytmpj;
+        delr2[2] = x(k, 2) - ztmpj;
+        const F_FLOAT rsq2 =
+            delr2[0] * delr2[0] + delr2[1] * delr2[1] + delr2[2] * delr2[2];
+
+        if (rsq2 >= d_params[jkparam].cutsq)
+          continue;
+
+        if (vflag_atom)
+          threebody(d_params[jiparam], d_params[jkparam], d_params[jikparam],
+                    rsq1, rsq2, delr1, delr2, fj, fk, eflag, evdwl);
+        else
+          threebodyj(d_params[jiparam], d_params[jkparam], d_params[jikparam],
+                     rsq1, rsq2, delr1, delr2, fj);
+
+        fxtmpi += fj[0];
+        fytmpi += fj[1];
+        fztmpi += fj[2];
+
+        if (EVFLAG)
+          if (vflag_atom || eflag_atom)
+            ev_tally3_atom(ev, i, evdwl, 0.0, fj, fk, delr1, delr2);
+      }
+    }
+  }
+
+  f(i, 0) += fxtmpi;
+  f(i, 1) += fytmpi;
+  f(i, 2) += fztmpi;
+  */
+}
+
+template <class DeviceType>
+template <int NEIGHFLAG, int EVFLAG>
+KOKKOS_INLINE_FUNCTION void PairMGPKokkos<DeviceType>::
+operator()(TagPairMGPComputeFullB<NEIGHFLAG, EVFLAG>, const int &ii) const {
+  EV_FLOAT ev;
+  this->template operator()<NEIGHFLAG, EVFLAG>(
+      TagPairMGPComputeFullB<NEIGHFLAG, EVFLAG>(), ii, ev);
+}
+
+template <class DeviceType>
+template <typename T, typename V>
+void PairMGPKokkos<DeviceType>::copy_1d(V &d, T *h, int n) {
+  // Kokkos::realloc(d, n);
+  // auto h_view = Kokkos::create_mirror(d);
+  Kokkos::DualView<T *, DeviceType> tmp("pair::tmp", n);
+  auto h_view = tmp.h_view;
+
+  for (int i = 0; i < n; i++) {
+    h_view(i) = h[i];
+  }
+
+  // Kokkos::deep_copy(d, h_view);
+
+  tmp.template modify<LMPHostType>();
+  tmp.template sync<DeviceType>();
+
+  d = tmp.template view<DeviceType>();
+}
+
+template <class DeviceType>
+template <typename T, typename V>
+void PairMGPKokkos<DeviceType>::copy_2d(V &d, T **h, int m, int n) {
+  Kokkos::View<T **, Kokkos::LayoutRight> tmp("pair::tmp", m, n);
+  auto h_view = Kokkos::create_mirror(tmp);
+  // typename Kokkos::DualView<T **, Kokkos::LayoutRight, DeviceType> tmp(
+  //    "pair::tmp", m, n);
+  // auto h_view = tmp.h_view;
+
+  for (int i = 0; i < m; i++) {
+    for (int j = 0; j < n; j++) {
+      h_view(i, j) = h[i][j];
+    }
+  }
+
+  Kokkos::deep_copy(tmp, h_view);
+
+  d = tmp;
+
+  // tmp.template modify<LMPHostType>();
+  // tmp.template sync<DeviceType>();
+
+  // d = tmp.template view<DeviceType>();
+}
+
+template <class DeviceType>
+template <typename T, typename V>
+void PairMGPKokkos<DeviceType>::copy_3d(V &d, T ***h, int m, int n, int o) {
+  Kokkos::View<T ***> tmp("pair::tmp", m, n, o);
+  auto h_view = Kokkos::create_mirror(tmp);
+  // Kokkos::DualView<T ***, DeviceType> tmp("pair::tmp", m, n, o);
+  // auto h_view = tmp.h_view;
+  for (int i = 0; i < m; i++) {
+    for (int j = 0; j < n; j++) {
+      for (int k = 0; k < o; k++) {
+        h_view(i, j, k) = h[i][j][k];
+      }
+    }
+  }
+
+  Kokkos::deep_copy(tmp, h_view);
+
+  d = tmp;
+
+  // tmp.template modify<LMPHostType>();
+  // tmp.template sync<DeviceType>();
+
+  // d = tmp.template view<DeviceType>();
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+------------------------------------------------------------------------- */
+
+template <class DeviceType>
+void PairMGPKokkos<DeviceType>::coeff(int narg, char **arg) {
+  // let CPU implementation read coefficients and set everything up
+  PairMGP::coeff(narg, arg);
+
+  // copy all coefficients etc. to device
+
+  int n = atom->ntypes;
+
+  if (compute2b) {
+    copy_1d(d_cut2bsq, cut2bsq, n_2body);
+    copy_1d(d_lo_2body, lo_2body, n_2body);
+    copy_1d<LMP_FLOAT>(d_hi_2body, hi_2body, n_2body);
+    copy_1d(d_grid_2body, grid_2body, n_2body);
+    copy_2d(d_map2b, map2b, n + 1, n + 1);
+  }
+
+  if (compute3b) {
+    copy_1d(d_cut3bsq, cut3bsq, n_3body);
+    copy_3d<int>(d_map3b, map3b, n + 1, n + 1, n + 1);
+
+    copy_2d<LMP_FLOAT>(d_lo_3body, lo_3body, n_3body, 3);
+    copy_2d<LMP_FLOAT>(d_hi_3body, hi_3body, n_3body, 3);
+
+    copy_2d(d_grid_3body, grid_3body, n_3body, 3);
+  }
+
+  copy_1d(d_Bd, Bd, 4);
+  copy_1d(d_Cd, Cd, 4);
+  copy_1d(d_basis, basis, 4);
+
+  Kokkos::View<LMP_FLOAT **, Kokkos::LayoutRight> k_Ad("Ad", 4, 4),
+      k_dAd("dAd", 4, 4), k_d2Ad("d2Ad", 4, 4);
+  auto h_Ad = Kokkos::create_mirror(k_Ad), h_dAd = Kokkos::create_mirror(k_dAd),
+       h_d2Ad = Kokkos::create_mirror(k_d2Ad);
+
+  for (int i = 0; i < 4; i++) {
+    for (int j = 0; j < 4; j++) {
+      h_Ad(i, j) = Ad[i][j];
+      h_dAd(i, j) = dAd[i][j];
+      h_d2Ad(i, j) = d2Ad[i][j];
+    }
+  }
+
+  Kokkos::deep_copy(k_Ad, h_Ad);
+  Kokkos::deep_copy(k_dAd, h_dAd);
+  Kokkos::deep_copy(k_d2Ad, h_d2Ad);
+  d_Ad = k_Ad;
+  d_dAd = k_dAd;
+  d_d2Ad = k_d2Ad;
+
+  // copy fcoeff_(2|3)body. these are ragged, not rectangular,
+  // but kokkos views should be rectangular. I choose to pad.
+
+  if (compute2b) {
+    int maxlen = *std::max_element(grid_2body, grid_2body + n_2body) + 2;
+    Kokkos::View<LMP_FLOAT **, Kokkos::LayoutRight> k_fcoeff_2body(
+        "pair::fcoeff_2body", n_2body, maxlen);
+    auto h_fcoeff_2body = Kokkos::create_mirror(k_fcoeff_2body);
+
+    for (int i = 0; i < n_2body; i++) {
+      for (int j = 0; j < grid_2body[i] + 2; j++) {
+        h_fcoeff_2body(i, j) = fcoeff_2body[i][j];
+      }
+    }
+    Kokkos::deep_copy(k_fcoeff_2body, h_fcoeff_2body);
+    d_fcoeff_2body = k_fcoeff_2body;
+  }
+
+  if (compute3b) {
+    int maxlen = 0;
+    for (int i = 0; i < n_3body; i++) {
+      int len = (grid_3body[i][0] + 2) * (grid_3body[i][1] + 2) *
+                (grid_3body[i][2] + 2);
+      if (len > maxlen)
+        maxlen = len;
+    }
+    Kokkos::View<LMP_FLOAT **, Kokkos::LayoutRight> k_fcoeff_3body(
+        "pair::fcoeff_3body", n_3body, maxlen);
+    auto h_fcoeff_3body = Kokkos::create_mirror(k_fcoeff_3body);
+    for (int i = 0; i < n_3body; i++) {
+      int len = (grid_3body[i][0] + 2) * (grid_3body[i][1] + 2) *
+                (grid_3body[i][2] + 2);
+      for (int j = 0; j < len; j++) {
+        h_fcoeff_3body(i, j) = fcoeff_3body[i][j];
+      }
+    }
+    Kokkos::deep_copy(k_fcoeff_3body, h_fcoeff_3body);
+    d_fcoeff_3body = k_fcoeff_3body;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+template <class DeviceType> void PairMGPKokkos<DeviceType>::init_style() {
+  PairMGP::init_style();
+
+  // irequest = neigh request made by parent class
+
+  neighflag = lmp->kokkos->neighflag;
+  int irequest = neighbor->nrequest - 1;
+
+  neighbor->requests[irequest]->kokkos_host =
+      Kokkos::Impl::is_same<DeviceType, LMPHostType>::value &&
+      !Kokkos::Impl::is_same<DeviceType, LMPDeviceType>::value;
+  neighbor->requests[irequest]->kokkos_device =
+      Kokkos::Impl::is_same<DeviceType, LMPDeviceType>::value;
+
+  // always request a full neighbor list
+
+  if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
+    neighbor->requests[irequest]->full = 1;
+    neighbor->requests[irequest]->half = 0;
+    if (neighflag == FULL)
+      neighbor->requests[irequest]->ghost = 1;
+    else
+      neighbor->requests[irequest]->ghost = 0;
+  } else {
+    error->all(FLERR, "Cannot use chosen neighbor list style with pair sw/kk");
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class DeviceType> void PairMGPKokkos<DeviceType>::setup_params() {
+  /*
+  PairMGP::setup_params();
+
+  // sync elem2param and params
+
+  tdual_int_3d k_elem2param =
+      tdual_int_3d("pair:elem2param", nelements, nelements, nelements);
+  t_host_int_3d h_elem2param = k_elem2param.h_view;
+
+  tdual_param_1d k_params = tdual_param_1d("pair:params", nparams);
+  t_host_param_1d h_params = k_params.h_view;
+
+  for (int i = 0; i < nelements; i++)
+    for (int j = 0; j < nelements; j++)
+      for (int k = 0; k < nelements; k++)
+        h_elem2param(i, j, k) = elem2param[i][j][k];
+
+  for (int m = 0; m < nparams; m++)
+    h_params[m] = params[m];
+
+  k_elem2param.template modify<LMPHostType>();
+  k_elem2param.template sync<DeviceType>();
+  k_params.template modify<LMPHostType>();
+  k_params.template sync<DeviceType>();
+
+  d_elem2param = k_elem2param.template view<DeviceType>();
+  d_params = k_params.template view<DeviceType>();
+  */
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class DeviceType>
+KOKKOS_INLINE_FUNCTION void
+PairMGPKokkos<DeviceType>::twobody(const int &mapid, const F_FLOAT &rsq,
+                                   F_FLOAT &force, F_FLOAT &energy) const {
+  LMP_FLOAT r = sqrt(rsq);
+
+  LMP_FLOAT a = d_lo_2body(mapid), b = d_hi_2body(mapid);
+  int orders = d_grid_2body(mapid);
+  auto coefs = Kokkos::subview(d_fcoeff_2body, mapid, Kokkos::ALL);
+
+  // printf("proc = %d, mapid = %d, a = %g, b = %g, orders = %d\n", comm->me,
+  //       mapid, a, b, orders);
+  // for (int i = 0; i < orders + 2; i++) {
+  //  printf("%.2f, ", coefs(i));
+  //}
+  // printf("\n");
+
+  force = 0.0;
+  energy = 0.0;
+
+  // some coefficients
+  int i, j, ii, i0; // made i0 int
+  F_FLOAT dinv, u, tt;
+  dinv = (orders - 1.0) / (b - a);
+  u = (r - a) * dinv;
+  i0 = floor(u);
+  ii = max(min(i0, orders - 2), 0); // fmax fmin -> max min
+  tt = u - ii;
+
+  // printf("i0=%d, ii=%d, dinv=%.8f, u=%.8f, tt=%.8f, ", i0, ii, dinv, u, tt);
+
+  // interpolation points
+  F_FLOAT tp[4];
+  for (j = 0; j < 4; j++) {
+    tp[j] = pow(tt, 3 - j);
+  }
+
+  // value of cubic spline function
+  F_FLOAT Phi[4], dPhi[4];
+  F_FLOAT dt;
+  int k;
+
+  if (tt < 0) {
+    for (i = 0; i < 4; i++) {
+      Phi[i] = d_dAd(i, 3) * tt + d_Ad(i, 3);
+    }
+  } else if (tt > 1) {
+    dt = tt - 1;
+    for (i = 0; i < 4; i++) {
+      Phi[i] = d_Bd(i) * dt + d_Cd(i);
+    }
+  } else {
+    for (i = 0; i < 4; i++) {
+      Phi[i] = 0;
+      for (k = 0; k < 4; k++) {
+        Phi[i] += d_Ad(i, k) * tp[k];
+      }
+    }
+  }
+
+  // value of derivative of spline
+  for (i = 0; i < 4; i++) {
+    dPhi[i] = 0;
+    for (k = 0; k < 4; k++) {
+      dPhi[i] += d_dAd(i, k) * tp[k];
+    }
+    dPhi[i] *= dinv;
+  }
+
+  // added by coefficients
+  int N = orders + 2;
+  F_FLOAT pc = 0;
+  F_FLOAT ppc = 0;
+
+  for (j = 0; j < 4; j++) {
+    energy += Phi[j] * coefs(ii + j);
+    force += dPhi[j] * coefs(ii + j);
+    // printf("%.8g ", dPhi[j]);
+  }
+
+  force = -2 * force / r;
+  // printf("mapid = %d, a = %g, b = %g, orders = %d, r = %g, f = %g, e = %g\n",
+  // mapid, a, b, orders, r, force, energy);
+}
+
+template <class DeviceType>
+KOKKOS_INLINE_FUNCTION void PairMGPKokkos<DeviceType>::threebody(
+    const int mapid, const F_FLOAT r1, const F_FLOAT r2, const F_FLOAT r12,
+    F_FLOAT &energy, F_FLOAT (&force)[3]) const {
+  F_FLOAT *a = &d_lo_3body(mapid, 0), *b = &d_hi_3body(mapid, 0);
+  int *orders = &d_grid_3body(mapid, 0);
+  F_FLOAT *coefs = &d_fcoeff_3body(mapid, 0);
+  // auto a = Kokkos::subview(d_lo_3body, mapid, Kokkos::ALL),
+  //      b = Kokkos::subview(d_hi_3body, mapid, Kokkos::ALL);
+  // auto orders = Kokkos::subview(d_grid_3body, mapid, Kokkos::ALL);
+  // auto coefs = Kokkos::subview(d_fcoeff_3body, mapid, Kokkos::ALL);
+
+  energy = 0.0;
+  for (int i = 0; i < 3; i++) {
+    force[i] = 0;
+  }
+
+  const int dim = 3;
+  int i;
+  int j;
+  F_FLOAT point[3] = {r1, r2, r12};
+  F_FLOAT dinv[dim];
+  F_FLOAT u[dim];
+  int i0[dim];
+  int ii[dim];
+  F_FLOAT tt[dim];
+
+  // coefficients
+  for (i = 0; i < dim; i++) {
+    dinv[i] = (orders[i] - 1.0) / (b[i] - a[i]);
+    u[i] = (point[i] - a[i]) * dinv[i];
+    i0[i] = floor(u[i]);
+    ii[i] = max(min(i0[i], orders[i] - 2), 0);
+    tt[i] = u[i] - ii[i];
+  }
+
+  // points
+  F_FLOAT tp[dim][4];
+  for (i = 0; i < dim; i++) {
+    for (j = 0; j < 4; j++) {
+      tp[i][j] = pow(tt[i], 3 - j);
+    }
+  }
+
+  F_FLOAT Phi[dim][4], dPhi[dim][4];
+  F_FLOAT dt;
+  int k;
+
+  for (j = 0; j < dim; j++) {
+
+    // evaluate spline function
+    if (tt[j] < 0) {
+      for (i = 0; i < 4; i++) {
+        Phi[j][i] = d_dAd(i, 3) * tt[j] + d_Ad(i, 3);
+      }
+    } else if (tt[j] > 1) {
+      dt = tt[j] - 1;
+      for (i = 0; i < 4; i++) {
+        Phi[j][i] = d_Bd(i) * dt + d_Cd(i);
+      }
+    } else {
+      for (i = 0; i < 4; i++) {
+        Phi[j][i] = 0;
+        for (k = 0; k < 4; k++) {
+          Phi[j][i] += d_Ad(i, k) * tp[j][k];
+        }
+      }
+    }
+
+    // evaluate derivatives
+    for (i = 0; i < 4; i++) {
+      dPhi[j][i] = 0;
+      for (k = 0; k < 4; k++) {
+        dPhi[j][i] += d_dAd(i, k) * tp[j][k];
+      }
+      dPhi[j][i] *= dinv[j];
+    }
+  }
+
+  // added by coefficients
+  int N[dim];
+  for (i = 0; i < dim; i++) {
+    N[i] = orders[i] + 2;
+  }
+  F_FLOAT c, pc, ppc;
+  F_FLOAT dpc, dppc1, dppc2;
+
+  for (i = 0; i < 4; i++) {
+    ppc = 0;
+    dppc1 = 0;
+    dppc2 = 0;
+    for (j = 0; j < 4; j++) {
+      pc = 0;
+      dpc = 0;
+      for (k = 0; k < 4; k++) {
+        c = coefs[((ii[0] + i) * N[1] + ii[1] + j) * N[2] + ii[2] + k];
+        pc += Phi[2][k] * c;
+        dpc += dPhi[2][k] * c;
+      }
+      ppc += Phi[1][j] * pc;
+      dppc1 += dPhi[1][j] * pc;
+      dppc2 += Phi[1][j] * dpc;
+    }
+    energy += Phi[0][i] * ppc;
+    force[0] += dPhi[0][i] * ppc;
+    force[1] += Phi[0][i] * dppc1;
+    force[2] += Phi[0][i] * dppc2;
+  }
+  // printf("r1 = %g, r2 = %g, r12 = %g, e = %g, f = %.16g %.16g %.16g\n", r1,
+  // r1, a12, energy, force[0], force[1], force[2]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+template <class DeviceType>
+KOKKOS_INLINE_FUNCTION void PairMGPKokkos<DeviceType>::threebody(
+    const Param &paramij, const Param &paramik, const Param &paramijk,
+    const F_FLOAT &rsq1, const F_FLOAT &rsq2, F_FLOAT *delr1, F_FLOAT *delr2,
+    F_FLOAT *fj, F_FLOAT *fk, const int &eflag, F_FLOAT &eng) const {
+  F_FLOAT r1, rinvsq1, rainv1, gsrainv1, gsrainvsq1, expgsrainv1;
+  F_FLOAT r2, rinvsq2, rainv2, gsrainv2, gsrainvsq2, expgsrainv2;
+  F_FLOAT rinv12, cs, delcs, delcssq, facexp, facrad, frad1, frad2;
+  F_FLOAT facang, facang12, csfacang, csfac1, csfac2;
+
+  r1 = sqrt(rsq1);
+  rinvsq1 = 1.0 / rsq1;
+  rainv1 = 1.0 / (r1 - paramij.cut);
+  gsrainv1 = paramij.sigma_gamma * rainv1;
+  gsrainvsq1 = gsrainv1 * rainv1 / r1;
+  expgsrainv1 = exp(gsrainv1);
+
+  r2 = sqrt(rsq2);
+  rinvsq2 = 1.0 / rsq2;
+  rainv2 = 1.0 / (r2 - paramik.cut);
+  gsrainv2 = paramik.sigma_gamma * rainv2;
+  gsrainvsq2 = gsrainv2 * rainv2 / r2;
+  expgsrainv2 = exp(gsrainv2);
+
+  rinv12 = 1.0 / (r1 * r2);
+  cs = (delr1[0] * delr2[0] + delr1[1] * delr2[1] + delr1[2] * delr2[2]) *
+       rinv12;
+  delcs = cs - paramijk.costheta;
+  delcssq = delcs * delcs;
+
+  facexp = expgsrainv1 * expgsrainv2;
+
+  // facrad = sqrt(paramij.lambda_epsilon*paramik.lambda_epsilon) *
+  //          facexp*delcssq;
+
+  facrad = paramijk.lambda_epsilon * facexp * delcssq;
+  frad1 = facrad * gsrainvsq1;
+  frad2 = facrad * gsrainvsq2;
+  facang = paramijk.lambda_epsilon2 * facexp * delcs;
+  facang12 = rinv12 * facang;
+  csfacang = cs * facang;
+  csfac1 = rinvsq1 * csfacang;
+
+  fj[0] = delr1[0] * (frad1 + csfac1) - delr2[0] * facang12;
+  fj[1] = delr1[1] * (frad1 + csfac1) - delr2[1] * facang12;
+  fj[2] = delr1[2] * (frad1 + csfac1) - delr2[2] * facang12;
+
+  csfac2 = rinvsq2 * csfacang;
+
+  fk[0] = delr2[0] * (frad2 + csfac2) - delr1[0] * facang12;
+  fk[1] = delr2[1] * (frad2 + csfac2) - delr1[1] * facang12;
+  fk[2] = delr2[2] * (frad2 + csfac2) - delr1[2] * facang12;
+
+  if (eflag)
+    eng = facrad;
+}
+*/
+
+/* ---------------------------------------------------------------------- */
+
+template <class DeviceType>
+KOKKOS_INLINE_FUNCTION void PairMGPKokkos<DeviceType>::threebodyj(
+    const Param &paramij, const Param &paramik, const Param &paramijk,
+    const F_FLOAT &rsq1, const F_FLOAT &rsq2, F_FLOAT *delr1, F_FLOAT *delr2,
+    F_FLOAT *fj) const {
+  F_FLOAT r1, rinvsq1, rainv1, gsrainv1, gsrainvsq1, expgsrainv1;
+  F_FLOAT r2, rainv2, gsrainv2, expgsrainv2;
+  F_FLOAT rinv12, cs, delcs, delcssq, facexp, facrad, frad1;
+  F_FLOAT facang, facang12, csfacang, csfac1;
+
+  r1 = sqrt(rsq1);
+  rinvsq1 = 1.0 / rsq1;
+  rainv1 = 1.0 / (r1 - paramij.cut);
+  gsrainv1 = paramij.sigma_gamma * rainv1;
+  gsrainvsq1 = gsrainv1 * rainv1 / r1;
+  expgsrainv1 = exp(gsrainv1);
+
+  r2 = sqrt(rsq2);
+  rainv2 = 1.0 / (r2 - paramik.cut);
+  gsrainv2 = paramik.sigma_gamma * rainv2;
+  expgsrainv2 = exp(gsrainv2);
+
+  rinv12 = 1.0 / (r1 * r2);
+  cs = (delr1[0] * delr2[0] + delr1[1] * delr2[1] + delr1[2] * delr2[2]) *
+       rinv12;
+  delcs = cs - paramijk.costheta;
+  delcssq = delcs * delcs;
+
+  facexp = expgsrainv1 * expgsrainv2;
+
+  // facrad = sqrt(paramij.lambda_epsilon*paramik.lambda_epsilon) *
+  //          facexp*delcssq;
+
+  facrad = paramijk.lambda_epsilon * facexp * delcssq;
+  frad1 = facrad * gsrainvsq1;
+  facang = paramijk.lambda_epsilon2 * facexp * delcs;
+  facang12 = rinv12 * facang;
+  csfacang = cs * facang;
+  csfac1 = rinvsq1 * csfacang;
+
+  fj[0] = delr1[0] * (frad1 + csfac1) - delr2[0] * facang12;
+  fj[1] = delr1[1] * (frad1 + csfac1) - delr2[1] * facang12;
+  fj[2] = delr1[2] * (frad1 + csfac1) - delr2[2] * facang12;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class DeviceType>
+template <int NEIGHFLAG>
+KOKKOS_INLINE_FUNCTION void
+PairMGPKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j,
+                                    const F_FLOAT &epair, const F_FLOAT &fpair,
+                                    const F_FLOAT &delx, const F_FLOAT &dely,
+                                    const F_FLOAT &delz) const {
+  const int VFLAG = vflag_either;
+
+  // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA,
+  // and neither for Serial
+
+  auto v_eatom =
+      ScatterViewHelper<NeedDup<NEIGHFLAG, DeviceType>::value,
+                        decltype(dup_eatom),
+                        decltype(ndup_eatom)>::get(dup_eatom, ndup_eatom);
+  auto a_eatom =
+      v_eatom.template access<AtomicDup<NEIGHFLAG, DeviceType>::value>();
+
+  auto v_vatom =
+      ScatterViewHelper<NeedDup<NEIGHFLAG, DeviceType>::value,
+                        decltype(dup_vatom),
+                        decltype(ndup_vatom)>::get(dup_vatom, ndup_vatom);
+  auto a_vatom =
+      v_vatom.template access<AtomicDup<NEIGHFLAG, DeviceType>::value>();
+
+  if (eflag_atom) {
+    const E_FLOAT epairhalf = 0.5 * epair;
+    a_eatom[i] += epairhalf;
+    if (NEIGHFLAG != FULL)
+      a_eatom[j] += epairhalf;
+  }
+
+  if (VFLAG) {
+    const E_FLOAT v0 = delx * delx * fpair;
+    const E_FLOAT v1 = dely * dely * fpair;
+    const E_FLOAT v2 = delz * delz * fpair;
+    const E_FLOAT v3 = delx * dely * fpair;
+    const E_FLOAT v4 = delx * delz * fpair;
+    const E_FLOAT v5 = dely * delz * fpair;
+
+    if (vflag_global) {
+      if (NEIGHFLAG != FULL) {
+        ev.v[0] += v0;
+        ev.v[1] += v1;
+        ev.v[2] += v2;
+        ev.v[3] += v3;
+        ev.v[4] += v4;
+        ev.v[5] += v5;
+      } else {
+        ev.v[0] += 0.5 * v0;
+        ev.v[1] += 0.5 * v1;
+        ev.v[2] += 0.5 * v2;
+        ev.v[3] += 0.5 * v3;
+        ev.v[4] += 0.5 * v4;
+        ev.v[5] += 0.5 * v5;
+      }
+    }
+
+    if (vflag_atom) {
+      a_vatom(i, 0) += 0.5 * v0;
+      a_vatom(i, 1) += 0.5 * v1;
+      a_vatom(i, 2) += 0.5 * v2;
+      a_vatom(i, 3) += 0.5 * v3;
+      a_vatom(i, 4) += 0.5 * v4;
+      a_vatom(i, 5) += 0.5 * v5;
+
+      if (NEIGHFLAG != FULL) {
+        a_vatom(j, 0) += 0.5 * v0;
+        a_vatom(j, 1) += 0.5 * v1;
+        a_vatom(j, 2) += 0.5 * v2;
+        a_vatom(j, 3) += 0.5 * v3;
+        a_vatom(j, 4) += 0.5 * v4;
+        a_vatom(j, 5) += 0.5 * v5;
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   tally eng_vdwl and virial into global and per-atom accumulators
+   called by SW and hbond potentials, newton_pair is always on
+   virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drji*fj + drki*fk
+ ------------------------------------------------------------------------- */
+
+template <class DeviceType>
+template <int NEIGHFLAG>
+KOKKOS_INLINE_FUNCTION void PairMGPKokkos<DeviceType>::ev_tally3(
+    EV_FLOAT &ev, const int &i, const int &j, int &k, const F_FLOAT &evdwl,
+    const F_FLOAT &ecoul, F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji,
+    F_FLOAT *drki) const {
+  F_FLOAT epairthird, v[6];
+
+  const int VFLAG = vflag_either;
+
+  // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA,
+  // and neither for Serial
+
+  auto v_eatom =
+      ScatterViewHelper<NeedDup<NEIGHFLAG, DeviceType>::value,
+                        decltype(dup_eatom),
+                        decltype(ndup_eatom)>::get(dup_eatom, ndup_eatom);
+  auto a_eatom =
+      v_eatom.template access<AtomicDup<NEIGHFLAG, DeviceType>::value>();
+
+  auto v_vatom =
+      ScatterViewHelper<NeedDup<NEIGHFLAG, DeviceType>::value,
+                        decltype(dup_vatom),
+                        decltype(ndup_vatom)>::get(dup_vatom, ndup_vatom);
+  auto a_vatom =
+      v_vatom.template access<AtomicDup<NEIGHFLAG, DeviceType>::value>();
+
+  if (eflag_atom) {
+    epairthird = THIRD * (evdwl + ecoul);
+    a_eatom[i] += epairthird;
+    if (NEIGHFLAG != FULL) {
+      a_eatom[j] += epairthird;
+      a_eatom[k] += epairthird;
+    }
+  }
+
+  if (VFLAG) {
+    v[0] = drji[0] * fj[0] + drki[0] * fk[0];
+    v[1] = drji[1] * fj[1] + drki[1] * fk[1];
+    v[2] = drji[2] * fj[2] + drki[2] * fk[2];
+    v[3] = drji[0] * fj[1] + drki[0] * fk[1];
+    v[4] = drji[0] * fj[2] + drki[0] * fk[2];
+    v[5] = drji[1] * fj[2] + drki[1] * fk[2];
+
+    if (vflag_global) {
+      ev.v[0] += v[0];
+      ev.v[1] += v[1];
+      ev.v[2] += v[2];
+      ev.v[3] += v[3];
+      ev.v[4] += v[4];
+      ev.v[5] += v[5];
+    }
+
+    if (vflag_atom) {
+      a_vatom(i, 0) += THIRD * v[0];
+      a_vatom(i, 1) += THIRD * v[1];
+      a_vatom(i, 2) += THIRD * v[2];
+      a_vatom(i, 3) += THIRD * v[3];
+      a_vatom(i, 4) += THIRD * v[4];
+      a_vatom(i, 5) += THIRD * v[5];
+
+      if (NEIGHFLAG != FULL) {
+        a_vatom(j, 0) += THIRD * v[0];
+        a_vatom(j, 1) += THIRD * v[1];
+        a_vatom(j, 2) += THIRD * v[2];
+        a_vatom(j, 3) += THIRD * v[3];
+        a_vatom(j, 4) += THIRD * v[4];
+        a_vatom(j, 5) += THIRD * v[5];
+
+        a_vatom(k, 0) += THIRD * v[0];
+        a_vatom(k, 1) += THIRD * v[1];
+        a_vatom(k, 2) += THIRD * v[2];
+        a_vatom(k, 3) += THIRD * v[3];
+        a_vatom(k, 4) += THIRD * v[4];
+        a_vatom(k, 5) += THIRD * v[5];
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   tally eng_vdwl and virial into global and per-atom accumulators
+   called by SW and hbond potentials, newton_pair is always on
+   virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drji*fj + drki*fk
+ ------------------------------------------------------------------------- */
+
+template <class DeviceType>
+KOKKOS_INLINE_FUNCTION void PairMGPKokkos<DeviceType>::ev_tally3_atom(
+    EV_FLOAT &ev, const int &i, const F_FLOAT &evdwl, const F_FLOAT &ecoul,
+    F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drki) const {
+  F_FLOAT epairthird, v[6];
+
+  const int VFLAG = vflag_either;
+
+  if (eflag_atom) {
+    epairthird = THIRD * (evdwl + ecoul);
+    d_eatom[i] += epairthird;
+  }
+
+  if (VFLAG) {
+    v[0] = drji[0] * fj[0] + drki[0] * fk[0];
+    v[1] = drji[1] * fj[1] + drki[1] * fk[1];
+    v[2] = drji[2] * fj[2] + drki[2] * fk[2];
+    v[3] = drji[0] * fj[1] + drki[0] * fk[1];
+    v[4] = drji[0] * fj[2] + drki[0] * fk[2];
+    v[5] = drji[1] * fj[2] + drki[1] * fk[2];
+
+    if (vflag_atom) {
+      d_vatom(i, 0) += THIRD * v[0];
+      d_vatom(i, 1) += THIRD * v[1];
+      d_vatom(i, 2) += THIRD * v[2];
+      d_vatom(i, 3) += THIRD * v[3];
+      d_vatom(i, 4) += THIRD * v[4];
+      d_vatom(i, 5) += THIRD * v[5];
+    }
+  }
+}
+
+namespace LAMMPS_NS {
+template class PairMGPKokkos<LMPDeviceType>;
+#ifdef KOKKOS_ENABLE_CUDA
+template class PairMGPKokkos<LMPHostType>;
+#endif
+} // namespace LAMMPS_NS
diff --git a/lammps_plugins/pair_mgp_kokkos.h b/lammps_plugins/pair_mgp_kokkos.h
new file mode 100644
index 000000000..e80a59968
--- /dev/null
+++ b/lammps_plugins/pair_mgp_kokkos.h
@@ -0,0 +1,238 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+// clang-format off
+#ifdef PAIR_CLASS
+
+PairStyle(mgp/kk,PairMGPKokkos<LMPDeviceType>)
+PairStyle(mgp/kk/device,PairMGPKokkos<LMPDeviceType>)
+//PairStyle(mgp/kk/host,PairMGPKokkos<LMPHostType>)
+
+#else
+
+#ifndef LMP_PAIR_MGP_KOKKOS_H
+#define LMP_PAIR_MGP_KOKKOS_H
+
+#include "pair_mgp.h"
+#include "pair_kokkos.h"
+// clang-format on
+
+template <int NEIGHFLAG, int EVFLAG> struct TagPairMGPComputeHalf {};
+
+template <int NEIGHFLAG, int EVFLAG> struct TagPairMGPComputeFullA {};
+
+template <int NEIGHFLAG, int EVFLAG> struct TagPairMGPComputeFullB {};
+
+struct TagPairMGPComputeShortNeigh {};
+
+namespace LAMMPS_NS {
+
+template <class DeviceType> class PairMGPKokkos : public PairMGP {
+public:
+  struct Param {
+    double epsilon, sigma;
+    double littlea, lambda, gamma, costheta;
+    double biga, bigb;
+    double powerp, powerq;
+    double tol;
+    double cut, cutsq;
+    double sigma_gamma, lambda_epsilon, lambda_epsilon2;
+    double c1, c2, c3, c4, c5, c6;
+    int ielement, jelement, kelement;
+  };
+  enum { EnabledNeighFlags = FULL };
+  enum { COUL_FLAG = 0 };
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  typedef EV_FLOAT value_type;
+
+  PairMGPKokkos(class LAMMPS *);
+  virtual ~PairMGPKokkos();
+  virtual void compute(int, int);
+  virtual void coeff(int, char **);
+  virtual void init_style();
+
+  template <typename T, typename V> void copy_1d(V &d, T *h, int n);
+
+  template <typename T, typename V> void copy_2d(V &d, T **h, int m, int n);
+
+  template <typename T, typename V>
+  void copy_3d(V &d, T ***h, int m, int n, int o);
+
+  template <int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION void
+  operator()(TagPairMGPComputeHalf<NEIGHFLAG, EVFLAG>, const int &,
+             EV_FLOAT &) const;
+
+  template <int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION void
+  operator()(TagPairMGPComputeHalf<NEIGHFLAG, EVFLAG>, const int &) const;
+
+  template <int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION void
+  operator()(TagPairMGPComputeFullA<NEIGHFLAG, EVFLAG>, const int &,
+             EV_FLOAT &) const;
+
+  template <int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION void
+  operator()(TagPairMGPComputeFullA<NEIGHFLAG, EVFLAG>, const int &) const;
+
+  template <int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION void
+  operator()(TagPairMGPComputeFullB<NEIGHFLAG, EVFLAG>, const int &,
+             EV_FLOAT &) const;
+
+  template <int NEIGHFLAG, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION void
+  operator()(TagPairMGPComputeFullB<NEIGHFLAG, EVFLAG>, const int &) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagPairMGPComputeShortNeigh, const int &) const;
+
+  template <int NEIGHFLAG>
+  KOKKOS_INLINE_FUNCTION void
+  ev_tally(EV_FLOAT &ev, const int &i, const int &j, const F_FLOAT &epair,
+           const F_FLOAT &fpair, const F_FLOAT &delx, const F_FLOAT &dely,
+           const F_FLOAT &delz) const;
+
+  template <int NEIGHFLAG>
+  KOKKOS_INLINE_FUNCTION void
+  ev_tally3(EV_FLOAT &ev, const int &i, const int &j, int &k,
+            const F_FLOAT &evdwl, const F_FLOAT &ecoul, F_FLOAT *fj,
+            F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drki) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void ev_tally3_atom(EV_FLOAT &ev, const int &i, const F_FLOAT &evdwl,
+                      const F_FLOAT &ecoul, F_FLOAT *fj, F_FLOAT *fk,
+                      F_FLOAT *drji, F_FLOAT *drki) const;
+
+protected:
+  typedef Kokkos::DualView<int ***, DeviceType> tdual_int_3d;
+  typedef typename tdual_int_3d::t_dev_const_randomread t_int_3d_randomread;
+  typedef typename tdual_int_3d::t_host t_host_int_3d;
+
+  typedef Kokkos::DualView<int **, DeviceType> tdual_int_2d;
+  typedef typename tdual_int_2d::t_dev_const_randomread t_int_2d_randomread;
+
+  using RA = Kokkos::MemoryTraits<Kokkos::RandomAccess>;
+  Kokkos::View<const int **, Kokkos::LayoutRight, RA> d_map2b;
+  // t_int_2d_randomread d_map2b;
+  Kokkos::View<const int ***, RA> d_map3b;
+
+  t_int_3d_randomread d_elem2param;
+  typename AT::t_int_1d_randomread d_map;
+
+  typename AT::t_float_1d_randomread d_cut2bsq, d_cut3bsq, d_lo_2body,
+      d_hi_2body;
+  Kokkos::View<LMP_FLOAT **, Kokkos::LayoutRight, RA> d_lo_3body, d_hi_3body,
+      d_fcoeff_2body, d_fcoeff_3body;
+  typename AT::t_int_1d_randomread d_grid_2body;
+  Kokkos::View<int **, Kokkos::LayoutRight, RA> d_grid_3body;
+
+  typename AT::t_float_1d_randomread d_Bd, d_Cd, d_basis;
+  Kokkos::View<LMP_FLOAT **, Kokkos::LayoutRight, RA> d_Ad, d_dAd, d_d2Ad;
+
+  typedef Kokkos::DualView<Param *, DeviceType> tdual_param_1d;
+  typedef typename tdual_param_1d::t_dev t_param_1d;
+  typedef typename tdual_param_1d::t_host t_host_param_1d;
+
+  t_param_1d d_params;
+
+  virtual void setup_params();
+
+  KOKKOS_INLINE_FUNCTION
+  void twobody(const int &, const F_FLOAT &, F_FLOAT &, F_FLOAT &) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void threebody(const int mapid, const F_FLOAT r1, const F_FLOAT r2,
+                 const F_FLOAT r12, F_FLOAT &energy, F_FLOAT (&force)[3]) const;
+  // void threebody(const Param &, const Param &, const Param &, const F_FLOAT
+  // &, const F_FLOAT &, F_FLOAT *, F_FLOAT *, F_FLOAT *, F_FLOAT *, const int
+  // &, F_FLOAT &) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void threebodyj(const Param &, const Param &, const Param &, const F_FLOAT &,
+                  const F_FLOAT &, F_FLOAT *, F_FLOAT *, F_FLOAT *) const;
+
+  typename AT::t_x_array_randomread x;
+  typename AT::t_f_array f;
+  typename AT::t_tagint_1d tag;
+  typename AT::t_int_1d_randomread type;
+
+  DAT::tdual_efloat_1d k_eatom;
+  DAT::tdual_virial_array k_vatom;
+  typename AT::t_efloat_1d d_eatom;
+  typename AT::t_virial_array d_vatom;
+
+  int need_dup;
+  Kokkos::Experimental::ScatterView<
+      F_FLOAT * [3], typename DAT::t_f_array::array_layout, DeviceType,
+      Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>
+      dup_f;
+  Kokkos::Experimental::ScatterView<
+      E_FLOAT *, typename DAT::t_efloat_1d::array_layout, DeviceType,
+      Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>
+      dup_eatom;
+  Kokkos::Experimental::ScatterView<
+      F_FLOAT * [6], typename DAT::t_virial_array::array_layout, DeviceType,
+      Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>
+      dup_vatom;
+  Kokkos::Experimental::ScatterView<
+      F_FLOAT * [3], typename DAT::t_f_array::array_layout, DeviceType,
+      Kokkos::Experimental::ScatterSum,
+      Kokkos::Experimental::ScatterNonDuplicated>
+      ndup_f;
+  Kokkos::Experimental::ScatterView<
+      E_FLOAT *, typename DAT::t_efloat_1d::array_layout, DeviceType,
+      Kokkos::Experimental::ScatterSum,
+      Kokkos::Experimental::ScatterNonDuplicated>
+      ndup_eatom;
+  Kokkos::Experimental::ScatterView<
+      F_FLOAT * [6], typename DAT::t_virial_array::array_layout, DeviceType,
+      Kokkos::Experimental::ScatterSum,
+      Kokkos::Experimental::ScatterNonDuplicated>
+      ndup_vatom;
+
+  typename AT::t_int_1d_randomread d_type2frho;
+  typename AT::t_int_2d_randomread d_type2rhor;
+  typename AT::t_int_2d_randomread d_type2z2r;
+
+  typename AT::t_neighbors_2d d_neighbors;
+  typename AT::t_int_1d_randomread d_ilist;
+  typename AT::t_int_1d_randomread d_numneigh;
+  // NeighListKokkos<DeviceType> k_list;
+
+  int neighflag, newton_pair;
+  int nlocal, nall, eflag, vflag;
+
+  int inum;
+  Kokkos::View<int **, DeviceType> d_neighbors_short;
+  Kokkos::View<int *, DeviceType> d_numneigh_short;
+
+  friend void pair_virial_fdotr_compute<PairMGPKokkos>(PairMGPKokkos *);
+};
+KOKKOS_INLINE_FUNCTION int min(int i, int j) { return i < j ? i : j; }
+KOKKOS_INLINE_FUNCTION int max(int i, int j) { return i > j ? i : j; }
+
+} // namespace LAMMPS_NS
+
+#endif
+#endif
+
+    /* ERROR/WARNING messages:
+
+    E: Cannot use chosen neighbor list style with pair mgp/kk
+
+    Self-explanatory.
+
+    */
diff --git a/tests/test_ase_otf.py b/tests/test_ase_otf.py
index bbb8108c6..e31f14255 100644
--- a/tests/test_ase_otf.py
+++ b/tests/test_ase_otf.py
@@ -4,83 +4,100 @@
 import numpy as np
 
 from flare import otf, kernels
+from flare.otf_parser import OtfAnalysis
 from flare.gp import GaussianProcess
 from flare.mgp import MappedGaussianProcess
 from flare.ase.calculator import FLARE_Calculator
 from flare.ase.otf import ASE_OTF
 from flare.utils.parameter_helper import ParameterHelper
-# from flare.ase.logger import OTFLogger
 
+from ase.constraints import FixAtoms
 from ase import units
-from ase.md.velocitydistribution import (MaxwellBoltzmannDistribution,
-                                         Stationary, ZeroRotation)
+from ase.md.velocitydistribution import (
+    MaxwellBoltzmannDistribution,
+    Stationary,
+    ZeroRotation,
+)
 from ase.spacegroup import crystal
 from ase.calculators.espresso import Espresso
 from ase import io
 
+
 def read_qe_results(self):
 
-    out_file = self.label + '.pwo'
+    out_file = self.label + ".pwo"
 
     # find out slurm job id
-    qe_slurm_dat = open('qe_slurm.dat').readlines()[0].split()
+    qe_slurm_dat = open("qe_slurm.dat").readlines()[0].split()
     qe_slurm_id = qe_slurm_dat[3]
 
     # mv scf.pwo to scp+nsteps.pwo
-    if ('forces' not in self.results.keys()) and (out_file in os.listdir()):
-        subprocess.call(['mv', out_file, f'{self.label}{self.nsteps}.pwo'])
-    
+    if ("forces" not in self.results.keys()) and (out_file in os.listdir()):
+        subprocess.call(["mv", out_file, f"{self.label}{self.nsteps}.pwo"])
+
     # sleep until the job is finished
-    job_list = subprocess.check_output(['showq', '-p', 'kozinsky']).decode('utf-8')
+    job_list = subprocess.check_output(["showq", "-p", "kozinsky"]).decode("utf-8")
     while qe_slurm_id in job_list:
         time.sleep(10)
-        job_list = subprocess.check_output(['showq', '-p', 'kozinsky']).decode('utf-8')
+        job_list = subprocess.check_output(["showq", "-p", "kozinsky"]).decode("utf-8")
 
     output = io.read(out_file)
     self.calc = output.calc
     self.results = output.calc.results
 
-md_list = ['VelocityVerlet', 'NVTBerendsen', 'NPTBerendsen', 'NPT', 'Langevin']
 
-@pytest.fixture(scope='module')
+md_list = ["VelocityVerlet", "NVTBerendsen", "NPTBerendsen", "NPT", "Langevin"]
+number_of_steps = 3
+
+@pytest.fixture(scope="module")
 def md_params():
 
-    md_dict = {'temperature': 500}
+    md_dict = {"temperature": 500}
+    print(md_list)
+
     for md_engine in md_list:
-        if md_engine == 'VelocityVerlet':
+        for f in glob.glob(md_engine + "*"):
+            os.remove(f)
+
+        if md_engine == "VelocityVerlet":
             md_dict[md_engine] = {}
         else:
-            md_dict[md_engine] = {'temperature': md_dict['temperature']}
+            md_dict[md_engine] = {"temperature": md_dict["temperature"]}
 
-    md_dict['NVTBerendsen'].update({'taut': 0.5e3 * units.fs})
-    md_dict['NPT'].update({'externalstress': 0, 'ttime': 25, 'pfactor': 3375})
-    md_dict['Langevin'].update({'friction': 0.02})
+    md_dict["NVTBerendsen"].update({"taut": 0.5e3 * units.fs})
+    md_dict["NPT"].update({"externalstress": 0, "ttime": 25, "pfactor": None})
+    md_dict["Langevin"].update({"friction": 0.02})
 
     yield md_dict
     del md_dict
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def super_cell():
 
     from ase.spacegroup import crystal
-    a = 3.855
+
+    a = 10.0
     alpha = 90
-    atoms = crystal(['H', 'He'],
-                    basis=[(0, 0, 0), (0.5, 0.5, 0.5)],
-                    size=(2, 1, 1),
-                    cellpar=[a, a, a, alpha, alpha, alpha])
+    atoms = crystal(
+        ["H", "He"],
+        basis=[(0, 0, 0), (0.5, 0.5, 0.5)],
+        size=(2, 1, 1),
+        cellpar=[a, a, a, alpha, alpha, alpha],
+    )
 
     # jitter positions to give nonzero force on first frame
     for atom_pos in atoms.positions:
         for coord in range(3):
-            atom_pos[coord] += (2*np.random.random()-1) * 0.5
+            atom_pos[coord] += (2 * np.random.random() - 1) * 0.5
+
+    atoms.set_constraint(FixAtoms(indices=[0]))
 
     yield atoms
     del atoms
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def flare_calc():
     flare_calc_dict = {}
     for md_engine in md_list:
@@ -88,42 +105,39 @@ def flare_calc():
         # ---------- create gaussian process model -------------------
 
         # set up GP hyperparameters
-        kernels = ['twobody', 'threebody'] # use 2+3 body kernel
-        parameters = {'cutoff_twobody': 5.0,
-                      'cutoff_threebody': 3.5}
-        pm = ParameterHelper(
-            kernels = kernels,
-            random = True,
-            parameters=parameters
-        )
+        kernels = ["twobody", "threebody"]  # use 2+3 body kernel
+        parameters = {"cutoff_twobody": 5.0, "cutoff_threebody": 3.5}
+        pm = ParameterHelper(kernels=kernels, random=True, parameters=parameters)
 
         hm = pm.as_dict()
-        hyps = hm['hyps']
-        cut = hm['cutoffs']
-        print('hyps', hyps)
+        hyps = hm["hyps"]
+        cut = hm["cutoffs"]
+        print("hyps", hyps)
 
         gp_model = GaussianProcess(
-            kernels = kernels,
-            component = 'sc', # single-component. For multi-comp, use 'mc'
-            hyps = hyps,
-            cutoffs = cut,
-            hyp_labels = ['sig2','ls2','sig3','ls3','noise'],
-            opt_algorithm = 'L-BFGS-B',
-            n_cpus = 1
+            kernels=kernels,
+            component="sc",  # single-component. For multi-comp, use 'mc'
+            hyps=hyps,
+            cutoffs=cut,
+            hyp_labels=["sig2", "ls2", "sig3", "ls3", "noise"],
+            opt_algorithm="L-BFGS-B",
+            n_cpus=1,
         )
 
         # ----------- create mapped gaussian process ------------------
-        grid_params = {'twobody':   {'grid_num': [64]},
-                       'threebody': {'grid_num': [16, 16, 16]}}
+        grid_params = {
+            "twobody": {"grid_num": [64]},
+            "threebody": {"grid_num": [16, 16, 16]},
+        }
 
-        mgp_model = MappedGaussianProcess(grid_params = grid_params,
-                                          unique_species = [1, 2],
-                                          n_cpus = 1,
-                                          var_map = 'pca')
+        mgp_model = MappedGaussianProcess(
+            grid_params=grid_params, unique_species=[1, 2], n_cpus=1, var_map="pca"
+        )
 
         # ------------ create ASE's flare calculator -----------------------
-        flare_calculator = FLARE_Calculator(gp_model, mgp_model=mgp_model,
-                                            par=True, use_mapping=True)
+        flare_calculator = FLARE_Calculator(
+            gp_model, mgp_model=mgp_model, par=True, use_mapping=True
+        )
 
         flare_calc_dict[md_engine] = flare_calculator
         print(md_engine)
@@ -131,53 +145,54 @@ def flare_calc():
     del flare_calc_dict
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def qe_calc():
     from ase.calculators.lj import LennardJones
+
     dft_calculator = LennardJones()
 
     yield dft_calculator
     del dft_calculator
 
 
-@pytest.mark.parametrize('md_engine', md_list)
+@pytest.mark.parametrize("md_engine", md_list)
 def test_otf_md(md_engine, md_params, super_cell, flare_calc, qe_calc):
     np.random.seed(12345)
 
     flare_calculator = flare_calc[md_engine]
     # set up OTF MD engine
-    otf_params = {'init_atoms': [0, 1, 2, 3],
-                  'output_name': md_engine,
-                  'std_tolerance_factor': 2,
-                  'max_atoms_added' : len(super_cell.positions),
-                  'freeze_hyps': 10}
-#                  'use_mapping': flare_calculator.use_mapping}
+    otf_params = {
+        "init_atoms": [0, 1, 2, 3],
+        "output_name": md_engine,
+        "std_tolerance_factor": 2,
+        "max_atoms_added": len(super_cell.positions),
+        "freeze_hyps": 10,
+    }
+    #                  'use_mapping': flare_calculator.use_mapping}
 
     md_kwargs = md_params[md_engine]
 
     # intialize velocity
-    temperature = md_params['temperature']
+    temperature = md_params["temperature"]
     MaxwellBoltzmannDistribution(super_cell, temperature * units.kB)
     Stationary(super_cell)  # zero linear momentum
     ZeroRotation(super_cell)  # zero angular momentum
 
     super_cell.set_calculator(flare_calculator)
-    test_otf = ASE_OTF(super_cell,
-                       timestep = 1 * units.fs,
-                       number_of_steps = 3,
-                       dft_calc = qe_calc,
-                       md_engine = md_engine,
-                       md_kwargs = md_kwargs,
-                       **otf_params)
+    test_otf = ASE_OTF(
+        super_cell,
+        timestep=1 * units.fs,
+        number_of_steps=number_of_steps,
+        dft_calc=qe_calc,
+        md_engine=md_engine,
+        md_kwargs=md_kwargs,
+        trajectory=md_engine+"_otf.traj",
+        **otf_params,
+    )
 
     # TODO: test if mgp matches gp
     # TODO: see if there's difference between MD timestep & OTF timestep
 
-    # set up logger
-#    otf_logger = OTFLogger(test_otf, super_cell,
-#        logfile=md_engine+'.log', mode="w", data_in_logfile=True)
-#    test_otf.attach(otf_logger, interval=1)
-
     test_otf.run()
 
     for f in glob.glob("scf*.pw*"):
@@ -190,9 +205,30 @@ def test_otf_md(md_engine, md_params, super_cell, flare_calc, qe_calc):
         shutil.rmtree(f, ignore_errors=True)
     for f in glob.glob("out"):
         shutil.rmtree(f, ignore_errors=True)
-
     for f in os.listdir("./"):
-        if md_engine in f or 'lmp.mgp' in f:
+        if ".mgp" in f or ".var" in f:
             os.remove(f)
-        if 'slurm' in f:
+        if "slurm" in f:
             os.remove(f)
+
+@pytest.mark.parametrize("md_engine", md_list)
+def test_load_checkpoint(md_engine):
+    new_otf = ASE_OTF.from_checkpoint(md_engine + "_checkpt.json")
+    assert new_otf.curr_step == number_of_steps
+    new_otf.number_of_steps = new_otf.number_of_steps + 2
+    new_otf.run()
+
+@pytest.mark.parametrize("md_engine", md_list)
+def test_otf_parser(md_engine):
+    output_name = f"{md_engine}.out"
+    otf_traj = OtfAnalysis(output_name)
+    try:
+        replicated_gp = otf_traj.make_gp()
+    except:
+        init_flare = FLARE_Calculator.from_file(md_engine + "_flare.json")
+        replicated_gp = otf_traj.make_gp(init_gp=init_flare.gp_model)
+
+    print("ase otf traj parsed")
+
+    for f in glob.glob(md_engine + "*"):
+        os.remove(f)
diff --git a/tests/test_lmp.py b/tests/test_lmp.py
deleted file mode 100644
index b335b6b22..000000000
--- a/tests/test_lmp.py
+++ /dev/null
@@ -1,178 +0,0 @@
-import numpy as np
-import time
-import pytest
-import os, pickle, re, shutil
-
-from flare import struc, env, gp
-from flare import otf_parser
-from flare.ase.calculator import FLARE_Calculator
-from flare.ase.atoms import FLARE_Atoms
-from flare.mgp import MappedGaussianProcess
-from flare.lammps import lammps_calculator
-from flare.utils.element_coder import _Z_to_mass, _element_to_Z
-from ase.calculators.lammpsrun import LAMMPS
-
-from tests.fake_gp import get_gp, get_random_structure
-
-curr_path = os.getcwd()
-force_block_only = True
-
-def clean():
-    for f in os.listdir("./"):
-        if re.search(r"grid.*npy", f):
-            os.remove(f)
-        if re.search("kv3", f):
-            os.rmdir(f)
-
-
-# ASSUMPTION: You have a Lammps executable with the mgp pair style with $lmp
-# as the corresponding environment variable.
-@pytest.mark.skipif(not os.environ.get('lmp',
-                          False), reason='lmp not found '
-                                  'in environment: Please install LAMMPS '
-                                  'and set the $lmp env. '
-                                  'variable to point to the executatble.')
-@pytest.fixture(scope='module')
-def gp_model():
-    np.random.seed(0)
-    # TO DO, should be 2+3 eventually
-    gauss = get_gp('2', 'mc', False, cellabc=[1, 1, 1],
-                   force_only=force_block_only, noa=5)
-    gauss.parallel = True
-    gauss.n_cpus = 2
-    yield gauss
-    del gauss
-
-
-@pytest.fixture(scope='module')
-def mgp_model(gp_model):
-    """
-    test the init function
-    """
-
-    grid_params={}
-    if 'twobody' in gp_model.kernels:
-        grid_params['twobody']={'grid_num': [64],
-                                'lower_bound':[0.1]}
-    if 'threebody' in gp_model.kernels:
-        grid_params['threebody']={'grid_num': [16]*3,
-                                  'lower_bound':[0.1]*3}
-    species_list = [1, 2, 3]
-    lammps_location = f'tmp_lmp'
-    mapped_model = MappedGaussianProcess(grid_params=grid_params, unique_species=species_list, n_cpus=1,
-                lmp_file_name=lammps_location, var_map='simple')
-
-    # import flare.mgp.mapxb
-    # flare.mgp.mapxb.global_use_grid_kern = False
-
-    mapped_model.build_map(gp_model)
-
-    yield mapped_model
-    del mapped_model
-
-
-@pytest.fixture(scope='module')
-def ase_calculator(gp_model, mgp_model):
-    """
-    test the mapping for mc_simple kernel
-    """
-    cal = FLARE_Calculator(gp_model, mgp_model, par=False, use_mapping=True)
-    yield cal
-    del cal
-
-
-@pytest.fixture(scope='module')
-def lmp_calculator(gp_model, mgp_model):
-
-    species = gp_model.training_statistics['species']
-    specie_symbol_list = " ".join(species)
-    masses=[f"{i} {_Z_to_mass[_element_to_Z[species[i]]]}" for i in range(len(species))]
-
-    # set up input params
-    label = 'tmp_lmp'
-    by = 'yes' if 'twobody' in gp_model.kernels else 'no'
-    ty = 'yes' if 'threebody' in gp_model.kernels else 'no'
-    parameters = {'command': os.environ.get('lmp'), # set up executable for ASE
-                  'newton': 'off',
-                  'pair_style': 'mgp',
-                  'pair_coeff': [f'* * {label}.mgp {specie_symbol_list} {by} {ty}'],
-                  'mass': masses}
-    files = [f'{label}.mgp']
-
-    # create ASE calc
-    lmp_calc = LAMMPS(label=f'tmp{label}', keep_tmp_files=True, tmp_dir='./tmp/',
-            parameters=parameters, files=files, specorder=species)
-    yield lmp_calc
-    del lmp_calc
-
-
-@pytest.mark.skipif(not os.environ.get('lmp',
-                          False), reason='lmp not found '
-                                  'in environment: Please install LAMMPS '
-                                  'and set the $lmp env. '
-                                  'variable to point to the executatble.')
-def test_lmp_predict(gp_model, mgp_model, ase_calculator, lmp_calculator):
-    """
-    test the lammps implementation
-    """
-
-    currdir = os.getcwd()
-
-    label = 'tmp_lmp'
-
-    for f in os.listdir("./"):
-        if label in f:
-            os.remove(f)
-        if f in ['log.lammps']:
-            os.remove(f)
-    clean()
-
-    lammps_location = mgp_model.lmp_file_name
-
-    # lmp file is automatically written now every time MGP is constructed
-    mgp_model.write_lmp_file(lammps_location)
-
-    # create test structure
-    np.random.seed(1)
-    cell = np.diag(np.array([1, 1, 1])) * 4
-    nenv = 10
-    unique_species = gp_model.training_statistics['species']
-    cutoffs = gp_model.cutoffs
-    struc_test, f = get_random_structure(cell, unique_species, nenv)
-
-    # build ase atom from struc
-    ase_atoms_flare = struc_test.to_ase_atoms()
-    ase_atoms_flare = FLARE_Atoms.from_ase_atoms(ase_atoms_flare)
-    ase_atoms_flare.set_calculator(ase_calculator)
-
-    ase_atoms_lmp = struc_test.to_ase_atoms()
-    ase_atoms_lmp.set_calculator(lmp_calculator)
-
-    try:
-        lmp_en = ase_atoms_lmp.get_potential_energy()
-        flare_en = ase_atoms_flare.get_potential_energy()
-
-        lmp_stress = ase_atoms_lmp.get_stress()
-        flare_stress = ase_atoms_flare.get_stress()
-
-        lmp_forces = ase_atoms_lmp.get_forces()
-        flare_forces = ase_atoms_flare.get_forces()
-    except Exception as e:
-        os.chdir(currdir)
-        print(e)
-        raise e
-
-    os.chdir(currdir)
-
-    # check that lammps agrees with mgp to within 1 meV/A
-    print("energy", lmp_en, flare_en)
-    assert np.isclose(lmp_en, flare_en, atol=1e-3).all()
-    print("force", lmp_forces, flare_forces)
-    assert np.isclose(lmp_forces, flare_forces, atol=1e-3).all()
-    print("stress", lmp_stress, flare_stress)
-    assert np.isclose(lmp_stress, flare_stress, atol=1e-3).all()
-
-    # for f in os.listdir('./'):
-    #     if (label in f) or (f in ['log.lammps']):
-    #         os.remove(f)
-
diff --git a/tests/test_mgp.py b/tests/test_mgp.py
index a883a8d22..f3d69e69b 100644
--- a/tests/test_mgp.py
+++ b/tests/test_mgp.py
@@ -13,15 +13,18 @@
 from flare.parameters import Parameters
 from flare.mgp import MappedGaussianProcess
 from flare.lammps import lammps_calculator
-from flare.utils.element_coder import _Z_to_mass, _Z_to_element
+from flare.utils.element_coder import _Z_to_mass, _Z_to_element, _element_to_Z
+from flare.ase.calculator import FLARE_Calculator
+from flare.ase.atoms import FLARE_Atoms
+from ase.calculators.lammpsrun import LAMMPS
 
 from .fake_gp import get_gp, get_random_structure
 from .mgp_test import clean, compare_triplet, predict_atom_diag_var
 
 body_list = ['2', '3']
-multi_list = [False, True]
+multi_list = [True, False]
 force_block_only = False
-
+curr_path = os.getcwd()
 
 @pytest.mark.skipif(not os.environ.get('lmp',
                           False), reason='lmp not found '
@@ -56,6 +59,37 @@ def all_mgp():
     yield allmgp_dict
     del allmgp_dict
 
+
+@pytest.fixture(scope='module')
+def all_lmp():
+
+    all_lmp_dict = {}
+    species = ['H', 'He'] 
+    specie_symbol_list = " ".join(species)
+    masses = [f"{i} {_Z_to_mass[_element_to_Z[species[i]]]}" for i in range(len(species))]
+    parameters = {'command': os.environ.get('lmp'), # set up executable for ASE
+                  'newton': 'off',
+                  'pair_style': 'mgp',
+                  'mass': masses}
+
+    # set up input params
+    for bodies in body_list:
+        for multihyps in multi_list:
+            # create ASE calc
+            label = f'{bodies}{multihyps}'
+            files = [f'{label}.mgp']
+            by = 'yes' if bodies == '2' else 'no'
+            ty = 'yes' if bodies == '3' else 'no'
+            parameters['pair_coeff'] = [f'* * {label}.mgp {specie_symbol_list} {by} {ty}']
+
+            lmp_calc = LAMMPS(label=label, keep_tmp_files=True, tmp_dir='./tmp/',
+                    parameters=parameters, files=files, specorder=species)
+            all_lmp_dict[f'{bodies}{multihyps}'] = lmp_calc
+
+    yield all_lmp_dict
+    del all_lmp_dict
+
+
 @pytest.mark.parametrize('bodies', body_list)
 @pytest.mark.parametrize('multihyps', multi_list)
 def test_init(bodies, multihyps, all_mgp, all_gp):
@@ -70,7 +104,7 @@ def test_init(bodies, multihyps, all_mgp, all_gp):
     # grid parameters
     grid_params = {}
     if ('2' in bodies):
-        grid_params['twobody'] = {'grid_num': [128], 'lower_bound': [0.02]}
+        grid_params['twobody'] = {'grid_num': [160], 'lower_bound': [0.02]}
     if ('3' in bodies):
         grid_params['threebody'] = {'grid_num': [31, 32, 33], 'lower_bound':[0.02]*3}
 
@@ -265,77 +299,63 @@ def test_predict(all_gp, all_mgp, bodies, multihyps):
                                   'variable to point to the executatble.')
 @pytest.mark.parametrize('bodies', body_list)
 @pytest.mark.parametrize('multihyps', multi_list)
-def test_lmp_predict(all_gp, all_mgp, bodies, multihyps):
+def test_lmp_predict(all_lmp, all_gp, all_mgp, bodies, multihyps):
     """
     test the lammps implementation
     """
 
-    pytest.skip()
+    #pytest.skip()
 
     prefix = f'{bodies}{multihyps}'
 
-    mgp_model = all_mgp[f'{bodies}{multihyps}']
-    gp_model = all_gp[f'{bodies}{multihyps}']
-    lammps_location = mgp_model.lmp_file_name
-
+    mgp_model = all_mgp[prefix]
+    gp_model = all_gp[prefix]
+    lmp_calculator = all_lmp[prefix]
+    ase_calculator = FLARE_Calculator(gp_model, mgp_model, par=False, use_mapping=True)
+    
     # create test structure
-    cell = 5*np.eye(3)
+    np.random.seed(1)
+    cell = np.diag(np.array([1, 1, 1])) * 4
     nenv = 10
-    unique_species = gp_model.training_data[0].species
+    unique_species = gp_model.training_statistics['species']
     cutoffs = gp_model.cutoffs
     struc_test, f = get_random_structure(cell, unique_species, nenv)
-    atom_num = 1
-    test_envi = env.AtomicEnvironment(struc_test, atom_num, cutoffs, cutoffs_mask=gp_model.hyps_mask)
-
-    all_species=list(set(struc_test.coded_species))
-    atom_types = list(np.arange(len(all_species))+1)
-    atom_masses=[_Z_to_mass[spec] for spec in all_species]
-    atom_species = [ all_species.index(spec)+1 for spec in struc_test.coded_species]
-    specie_symbol_list = " ".join([_Z_to_element[spec] for spec in all_species])
-
-    # create data file
-    data_file_name = f'{prefix}.data'
-    data_text = lammps_calculator.lammps_dat(struc_test, atom_types,
-                                             atom_masses, atom_species)
-    lammps_calculator.write_text(data_file_name, data_text)
-
-    # create lammps input
-    by = 'no'
-    ty = 'no'
-    if '2' in bodies:
-        by = 'yes'
-    if '3' in bodies:
-        ty = 'yes'
-
-    style_string = 'mgp'
-
-    coeff_string = f'* * {lammps_location}.mgp {specie_symbol_list} {by} {ty}'
-    std_string = f'{lammps_location}.var {specie_symbol_list} {by} {ty}'
-    lammps_executable = os.environ.get('lmp')
-    dump_file_name = f'{prefix}.dump'
-    input_file_name = f'{prefix}.in'
-    output_file_name = f'{prefix}.out'
-    input_text = \
-        lammps_calculator.generic_lammps_input(data_file_name, style_string,
-                                               coeff_string, dump_file_name,
-                                               newton=False, std_string=std_string)
-    lammps_calculator.write_text(input_file_name, input_text)
-
-    lammps_calculator.run_lammps(lammps_executable, input_file_name,
-                                 output_file_name)
-
-    pred_std = True
-    lammps_forces, lammps_stds = lammps_calculator.lammps_parser(dump_file_name, std=pred_std)
-    mgp_pred = mgp_model.predict(test_envi)
 
-    # check that lammps agrees with gp to within 1 meV/A
-    for i in range(3):
-        print("isclose? diff:", lammps_forces[atom_num, i]-mgp_pred[0][i], "mgp value", mgp_pred[0][i])
-        assert np.isclose(lammps_forces[atom_num, i], mgp_pred[0][i], rtol=1e-2)
+    # build ase atom from struc
+    ase_atoms_flare = struc_test.to_ase_atoms()
+    ase_atoms_flare = FLARE_Atoms.from_ase_atoms(ase_atoms_flare)
+    ase_atoms_flare.set_calculator(ase_calculator)
+
+    ase_atoms_lmp = deepcopy(struc_test).to_ase_atoms()
+    ase_atoms_lmp.set_calculator(lmp_calculator)
+
+    try:
+        lmp_en = ase_atoms_lmp.get_potential_energy()
+        flare_en = ase_atoms_flare.get_potential_energy()
+
+        lmp_stress = ase_atoms_lmp.get_stress()
+        flare_stress = ase_atoms_flare.get_stress()
+
+        lmp_forces = ase_atoms_lmp.get_forces()
+        flare_forces = ase_atoms_flare.get_forces()
+    except Exception as e:
+        os.chdir(curr_path)
+        print(e)
+        raise e
+
+    os.chdir(curr_path)
+
+    # check that lammps agrees with mgp to within 1 meV/A
+    print("energy", lmp_en-flare_en, flare_en)
+    assert np.isclose(lmp_en, flare_en, atol=1e-3)
+    print("force", lmp_forces-flare_forces, flare_forces)
+    assert np.isclose(lmp_forces, flare_forces, atol=1e-3).all()
+    print("stress", lmp_stress-flare_stress, flare_stress)
+    assert np.isclose(lmp_stress, flare_stress, atol=1e-3).all()
 
     # check the lmp var
-    mgp_std = np.sqrt(mgp_pred[1])
-    print("isclose? diff:", lammps_stds[atom_num]-mgp_std, "mgp value", mgp_std)
-    assert np.isclose(lammps_stds[atom_num], mgp_std, rtol=1e-2)
+    # mgp_std = np.sqrt(mgp_pred[1])
+    # print("isclose? diff:", lammps_stds[atom_num]-mgp_std, "mgp value", mgp_std)
+    # assert np.isclose(lammps_stds[atom_num], mgp_std, rtol=1e-2)
 
     clean(prefix=prefix)
diff --git a/tests/test_otf.py b/tests/test_otf.py
index c0f8ff3d8..fecfae28b 100644
--- a/tests/test_otf.py
+++ b/tests/test_otf.py
@@ -5,29 +5,45 @@
 import numpy as np
 
 from flare.otf import OTF
+from flare.otf_parser import OtfAnalysis
 from flare.gp import GaussianProcess
 from flare.struc import Structure
 
 
-cmd = {'cp2k':'CP2K_COMMAND', 'qe':'PWSCF_COMMAND'}
-software_list = ['cp2k', 'qe']
+cmd = {"cp2k": "CP2K_COMMAND", "qe": "PWSCF_COMMAND"}
+software_list = ["cp2k", "qe"]
 example_list = [1, 2]
-name_list = {1:'h2', 2:'al'}
+name_list = {1: "h2", 2: "al"}
 
-print('running test_otf.py')
-print('current working directory:')
+dt = 0.0001
+number_of_steps = 3
+
+print("running test_otf.py")
+print("current working directory:")
 print(os.getcwd())
 
+
 def get_gp(par=False, per_atom_par=False, n_cpus=1):
     hyps = np.array([1, 1, 1, 1, 1])
-    hyp_labels = ['Signal Std 2b', 'Length Scale 2b',
-                  'Signal Std 3b', 'Length Scale 3b',
-                  'Noise Std']
-    cutoffs = {'twobody':4, 'threebody':4}
-    return GaussianProcess(\
-            kernel_name='23mc', hyps=hyps, cutoffs=cutoffs,
-            hyp_labels=hyp_labels, maxiter=50, par=par,
-            per_atom_par=per_atom_par, n_cpus=n_cpus)
+    hyp_labels = [
+        "Signal Std 2b",
+        "Length Scale 2b",
+        "Signal Std 3b",
+        "Length Scale 3b",
+        "Noise Std",
+    ]
+    cutoffs = {"twobody": 4, "threebody": 4}
+    return GaussianProcess(
+        kernel_name="23mc",
+        hyps=hyps,
+        cutoffs=cutoffs,
+        hyp_labels=hyp_labels,
+        maxiter=50,
+        par=par,
+        per_atom_par=per_atom_par,
+        n_cpus=n_cpus,
+    )
+
 
 def cleanup(software="qe", casename="h2_otf_cp2k"):
 
@@ -38,13 +54,13 @@ def cleanup(software="qe", casename="h2_otf_cp2k"):
     for f in glob.glob(f"*{software}.out"):
         os.remove(f)
 
-    if (software == 'qe'):
+    if software == "qe":
         for f in glob.glob(f"pwscf.wfc*"):
             os.remove(f)
         for f in glob.glob(f"*pwscf.out"):
             os.remove(f)
         for f in os.listdir("./"):
-            if f in ['pwscf.save']:
+            if f in ["pwscf.save"]:
                 shutil.rmtree(f)
     else:
         for f in os.listdir("./"):
@@ -52,65 +68,66 @@ def cleanup(software="qe", casename="h2_otf_cp2k"):
                 os.remove(f)
 
 
-@pytest.mark.parametrize('software', software_list)
-@pytest.mark.parametrize('example', example_list)
+@pytest.mark.parametrize("software", software_list)
+@pytest.mark.parametrize("example", example_list)
 def test_otf(software, example):
     """
     Test that an otf run can survive going for more steps
     :return:
     """
 
-    print('running test_otf.py')
-    print('current working directory:')
+    print("running test_otf.py")
+    print("current working directory:")
     print(os.getcwd())
 
-    outdir = f'test_outputs_{software}'
+    outdir = f"test_outputs_{software}"
     if os.path.isdir(outdir):
         shutil.rmtree(outdir)
 
-    if (not os.environ.get(cmd[software], False)):
-        pytest.skip(f'{cmd[software]} not found in environment:'
-                    f' Please install the code '
-                    f' and set the {cmd[software]} env. '
-                    'variable to point to the executable.')
+    if not os.environ.get(cmd[software], False):
+        pytest.skip(
+            f"{cmd[software]} not found in environment:"
+            f" Please install the code "
+            f" and set the {cmd[software]} env. "
+            "variable to point to the executable."
+        )
 
-    dft_input = f'{software}.in'
-    dft_output = f'{software}.out'
-    shutil.copy(f'./test_files/{software}_input_{example}.in', dft_input)
+    dft_input = f"{software}.in"
+    dft_output = f"{software}.out"
+    shutil.copy(f"./test_files/{software}_input_{example}.in", dft_input)
 
-    dt = 0.0001
-    number_of_steps = 5
     dft_loc = os.environ.get(cmd[software])
     std_tolerance_factor = -0.1
 
-    casename=name_list[example]
+    casename = name_list[example]
 
     gp = get_gp()
 
-    otf = OTF(dt=dt, number_of_steps=number_of_steps,
-              gp=gp, write_model=3,
-              std_tolerance_factor=std_tolerance_factor,
-              init_atoms=[0],
-              calculate_energy=True, max_atoms_added=1,
-              freeze_hyps=1, skip=1,
-              force_source=software,
-              dft_input=dft_input, dft_loc=dft_loc,
-              dft_output=dft_output,
-              output_name=f'{casename}_otf_{software}',
-              store_dft_output=([dft_output, dft_input], '.'))
+    otf = OTF(
+        dt=dt,
+        number_of_steps=number_of_steps,
+        gp=gp,
+        write_model=3,
+        std_tolerance_factor=std_tolerance_factor,
+        init_atoms=[0],
+        calculate_energy=True,
+        max_atoms_added=1,
+        freeze_hyps=1,
+        skip=1,
+        force_source=software,
+        dft_input=dft_input,
+        dft_loc=dft_loc,
+        dft_output=dft_output,
+        output_name=f"{casename}_otf_{software}",
+        store_dft_output=([dft_output, dft_input], "."),
+    )
 
     otf.run()
 
-    if not os.path.isdir(outdir):
-         os.mkdir(outdir)
-    for f in os.listdir("./"):
-        if f'{casename}_otf_{software}' in f:
-            shutil.move(f, outdir)
-    cleanup(software, f'{casename}_otf_{software}')
 
-@pytest.mark.parametrize('software', software_list)
-@pytest.mark.parametrize('per_atom_par', [True, False])
-@pytest.mark.parametrize('n_cpus', [2])
+@pytest.mark.parametrize("software", software_list)
+@pytest.mark.parametrize("per_atom_par", [True, False])
+@pytest.mark.parametrize("n_cpus", [2])
 def test_otf_par(software, per_atom_par, n_cpus):
     """
     Test that an otf run can survive going for more steps
@@ -118,30 +135,29 @@ def test_otf_par(software, per_atom_par, n_cpus):
     """
 
     example = 1
-    outdir = f'test_outputs_{software}'
+    outdir = f"test_outputs_{software}"
     if os.path.isdir(outdir):
         shutil.rmtree(outdir)
 
-    print('running test_otf.py')
-    print('current working directory:')
+    print("running test_otf.py")
+    print("current working directory:")
     print(os.getcwd())
 
-    if (not os.environ.get(cmd[software], False)):
-        pytest.skip(f'{cmd[software]} not found in environment:'
-                    f' Please install the code '
-                    f' and set the {cmd[software]} env. '
-                    'variable to point to the executable.')
-    if (software == 'cp2k'):
-        if (not "popt" in os.environ.get(cmd[software])):
-            pytest.skip(f'cp2k is serial version'
-                        f' skipping the parallel test')
-
-    dft_input = f'{software}.in'
-    dft_output = f'{software}.out'
-    shutil.copy(f'./test_files/{software}_input_{example}.in', dft_input)
-
-    dt = 0.0001
-    number_of_steps = 5
+    if not os.environ.get(cmd[software], False):
+        pytest.skip(
+            f"{cmd[software]} not found in environment:"
+            f" Please install the code "
+            f" and set the {cmd[software]} env. "
+            "variable to point to the executable."
+        )
+    if software == "cp2k":
+        if not "popt" in os.environ.get(cmd[software]):
+            pytest.skip(f"cp2k is serial version" f" skipping the parallel test")
+
+    dft_input = f"{software}.in"
+    dft_output = f"{software}.out"
+    shutil.copy(f"./test_files/{software}_input_{example}.in", dft_input)
+
     dft_loc = os.environ.get(cmd[software])
     std_tolerance_factor = -0.1
 
@@ -149,22 +165,100 @@ def test_otf_par(software, per_atom_par, n_cpus):
 
     gp = get_gp(par=True, n_cpus=n_cpus, per_atom_par=per_atom_par)
 
-    otf = OTF(dt=dt, number_of_steps=number_of_steps, gp=gp,
-              std_tolerance_factor=std_tolerance_factor, init_atoms=[0],
-              calculate_energy=True, max_atoms_added=1,
-              n_cpus=n_cpus,
-              freeze_hyps=1, skip=1,
-              mpi="mpi", force_source=software,
-              dft_input=dft_input, dft_loc=dft_loc,
-              dft_output=dft_output,
-              output_name=f'{casename}_otf_{software}',
-              store_dft_output=([dft_output, dft_input], '.'))
+    otf = OTF(
+        dt=dt,
+        number_of_steps=number_of_steps,
+        gp=gp,
+        write_model=3,
+        std_tolerance_factor=std_tolerance_factor,
+        init_atoms=[0],
+        calculate_energy=True,
+        max_atoms_added=1,
+        n_cpus=n_cpus,
+        freeze_hyps=1,
+        skip=1,
+        mpi="mpi",
+        force_source=software,
+        dft_input=dft_input,
+        dft_loc=dft_loc,
+        dft_output=dft_output,
+        output_name=f"{casename}_otf_{software}",
+        store_dft_output=([dft_output, dft_input], "."),
+    )
 
     otf.run()
 
+@pytest.mark.parametrize("software", software_list)
+def test_otf_parser(software):
+
+    if not os.environ.get(cmd[software], False):
+        pytest.skip(
+            f"{cmd[software]} not found in environment:"
+            f" Please install the code "
+            f" and set the {cmd[software]} env. "
+            "variable to point to the executable."
+        )
+
+    example = 1
+    casename = name_list[example]
+    output_name = f"{casename}_otf_{software}.out"
+    otf_traj = OtfAnalysis(output_name)
+    replicated_gp = otf_traj.make_gp()
+
+
+@pytest.mark.parametrize("software", software_list)
+def test_load_checkpoint(software):
+
+    if not os.environ.get(cmd[software], False):
+        pytest.skip(
+            f"{cmd[software]} not found in environment:"
+            f" Please install the code "
+            f" and set the {cmd[software]} env. "
+            "variable to point to the executable."
+        )
+
+    if software == "cp2k":
+        pytest.skip()
+
+    example = 1
+    casename = name_list[example]
+    log_name = f"{casename}_otf_{software}"
+    new_otf = OTF.from_checkpoint(log_name + "_checkpt.json")
+    print("loaded from checkpoint", log_name)
+    assert new_otf.curr_step == number_of_steps
+    new_otf.number_of_steps = new_otf.number_of_steps + 2
+    new_otf.run()
+
+
+@pytest.mark.parametrize("software", software_list)
+def test_otf_parser_from_checkpt(software):
+
+    if not os.environ.get(cmd[software], False):
+        pytest.skip(
+            f"{cmd[software]} not found in environment:"
+            f" Please install the code "
+            f" and set the {cmd[software]} env. "
+            "variable to point to the executable."
+        )
+
+    if software == "cp2k":
+        pytest.skip()
+
+    example = 1
+    casename = name_list[example]
+    log_name = f"{casename}_otf_{software}"
+    output_name = f"{log_name}.out"
+    otf_traj = OtfAnalysis(output_name)
+    try:
+        replicated_gp = otf_traj.make_gp()
+    except:
+        init_gp = GaussianProcess.from_file(log_name + "_gp.json")
+        replicated_gp = otf_traj.make_gp(init_gp=init_gp)
+
+    outdir = f"test_outputs_{software}"
     if not os.path.isdir(outdir):
-         os.mkdir(outdir)
+        os.mkdir(outdir)
     for f in os.listdir("./"):
-        if f'{casename}_otf_{software}' in f:
+        if f"{casename}_otf_{software}" in f:
             shutil.move(f, outdir)
-    cleanup(software, f'{casename}_otf_{software}')
+    cleanup(software, f"{casename}_otf_{software}")
diff --git a/tests/test_parse_otf.py b/tests/test_parse_otf.py
index cd495b707..61c165aa7 100644
--- a/tests/test_parse_otf.py
+++ b/tests/test_parse_otf.py
@@ -8,47 +8,48 @@
 
 def test_parse_header():
 
-    os.system('cp test_files/sample_slab_otf.out .')
+    os.system("cp test_files/sample_slab_otf.out .")
 
-    header_dict = OtfAnalysis('sample_slab_otf.out').header
+    header_dict = OtfAnalysis("sample_slab_otf.out").header
 
-    assert header_dict['frames'] == 5000
-    assert header_dict['atoms'] == 28
-    assert header_dict['species_set'] == {'Al'}
-    assert header_dict['dt'] == .001
-    assert header_dict['kernel_name'] == 'two_plus_three_body'
-    assert header_dict['n_hyps'] == 5
-    assert header_dict['algo'] == 'BFGS'
-    assert np.equal(header_dict['cell'],
-                    np.array([[8.59135,  0.,       0.],
-                              [4.29567,  7.44033,  0.],
-                              [0., 0., 26.67654]])).all()
+    assert header_dict["frames"] == 5000
+    assert header_dict["atoms"] == 28
+    assert header_dict["species_set"] == {"Al"}
+    assert header_dict["dt"] == 0.001
+    assert header_dict["kernel_name"] == "two_plus_three_body"
+    assert header_dict["n_hyps"] == 5
+    assert header_dict["algo"] == "BFGS"
+    assert np.equal(
+        header_dict["cell"],
+        np.array([[8.59135, 0.0, 0.0], [4.29567, 7.44033, 0.0], [0.0, 0.0, 26.67654]]),
+    ).all()
+
+    os.system("rm sample_slab_otf.out")
 
-    os.system('rm sample_slab_otf.out')
 
 def test_gp_parser():
     """
     Test the capability of otf parser to read GP/DFT info
     :return:
     """
-    os.system('cp test_files/sample_slab_otf.out .')
-    parsed = OtfAnalysis('sample_slab_otf.out')
-    assert (parsed.gp_species_list == [['Al']*28] * 4)
+    os.system("cp test_files/sample_slab_otf.out .")
+    parsed = OtfAnalysis("sample_slab_otf.out")
+    assert parsed.gp_species_list == [["Al"] * 28] * 4
 
     gp_positions = parsed.gp_position_list
     assert len(gp_positions) == 4
 
     pos1 = 1.50245891
     pos2 = 10.06179079
-    assert(pos1 == gp_positions[0][-1][1])
-    assert(pos2 == gp_positions[-1][0][2])
+    assert pos1 == gp_positions[0][-1][1]
+    assert pos2 == gp_positions[-1][0][2]
 
     force1 = 0.29430943
     force2 = -0.02350709
-    assert(force1 == parsed.gp_force_list[0][-1][1])
-    assert(force2 == parsed.gp_force_list[-1][0][2])
+    assert force1 == parsed.gp_force_list[0][-1][1]
+    assert force2 == parsed.gp_force_list[-1][0][2]
 
-    os.system('rm sample_slab_otf.out')
+    os.system("rm sample_slab_otf.out")
 
 
 def test_md_parser():
@@ -56,19 +57,20 @@ def test_md_parser():
     Test the capability of otf parser to read MD info
     :return:
     """
-    os.system('cp test_files/sample_slab_otf.out .')
-    parsed = OtfAnalysis('sample_slab_otf.out')
+    os.system("cp test_files/sample_slab_otf.out .")
+    parsed = OtfAnalysis("sample_slab_otf.out")
 
     pos1 = 10.09769665
-    assert(pos1 == parsed.position_list[0][0][2])
-    assert(len(parsed.position_list[0]) == 28)
+    assert pos1 == parsed.position_list[0][0][2]
+    assert len(parsed.position_list[0]) == 28
+
+    os.system("rm sample_slab_otf.out")
 
-    os.system('rm sample_slab_otf.out')
 
 def test_output_md_structures():
 
-    os.system('cp test_files/sample_slab_otf.out .')
-    parsed = OtfAnalysis('sample_slab_otf.out')
+    os.system("cp test_files/sample_slab_otf.out .")
+    parsed = OtfAnalysis("sample_slab_otf.out")
 
     positions = parsed.position_list
     forces = parsed.force_list
@@ -78,7 +80,7 @@ def test_output_md_structures():
     assert np.isclose(structures[-1].positions, positions[-1]).all()
     assert np.isclose(structures[-1].forces, forces[-1]).all()
 
-    os.system('rm sample_slab_otf.out')
+    os.system("rm sample_slab_otf.out")
 
 
 def test_replicate_gp():
@@ -89,8 +91,8 @@ def test_replicate_gp():
     :return:
     """
 
-    os.system('cp test_files/sample_h2_otf.out .')
-    parsed = OtfAnalysis('sample_h2_otf.out')
+    os.system("cp test_files/sample_h2_otf.out .")
+    parsed = OtfAnalysis("sample_h2_otf.out")
 
     positions = parsed.position_list
     forces = parsed.force_list
@@ -114,4 +116,4 @@ def test_replicate_gp():
         pred_for, pred_stds = predict_on_structure(structure, gp_model)
         assert np.isclose(structure.forces, pred_for, atol=1e-6).all()
         assert np.isclose(structure.stds, pred_stds, atol=1e-6).all()
-    os.system('rm sample_slab_otf.out')
+    os.system("rm sample_slab_otf.out")
diff --git a/tests/test_struc.py b/tests/test_struc.py
index 93827dfa6..76f668e87 100644
--- a/tests/test_struc.py
+++ b/tests/test_struc.py
@@ -6,8 +6,10 @@
 from flare.struc import Structure
 from json import loads, dumps
 from flare.utils.element_coder import Z_to_element, NumpyEncoder
+
 try:
     import pymatgen.core.structure as pmgstruc
+
     _test_pmg = True
 except ImportError:
     _test_pmg = False
@@ -16,9 +18,7 @@
 
 
 def test_random_structure_setup():
-    struct, forces = get_random_structure(cell=np.eye(3),
-                                          unique_species=[1, 2],
-                                          noa=2)
+    struct, forces = get_random_structure(cell=np.eye(3), unique_species=[1, 2], noa=2)
 
     assert np.equal(struct.cell, np.eye(3)).all()
     assert len(struct.positions) == 2
@@ -35,18 +35,15 @@ def test_prev_positions_arg():
         prev_positions.append(np.random.uniform(-1, 1, 3))
 
     test_structure1 = Structure(cell, species, positions)
-    test_structure2 = Structure(cell, species, positions,
-                                prev_positions=positions)
-    test_structure3 = Structure(cell, species, positions,
-                                prev_positions=prev_positions)
+    test_structure2 = Structure(cell, species, positions, prev_positions=positions)
+    test_structure3 = Structure(cell, species, positions, prev_positions=prev_positions)
 
     assert np.equal(test_structure1.positions, test_structure2.positions).all()
-    assert np.equal(test_structure1.prev_positions,
-                    test_structure2.prev_positions).all()
-    assert np.equal(test_structure2.positions,
-                    test_structure2.prev_positions).all()
-    assert not np.equal(test_structure3.positions,
-                        test_structure3.prev_positions).all()
+    assert np.equal(
+        test_structure1.prev_positions, test_structure2.prev_positions
+    ).all()
+    assert np.equal(test_structure2.positions, test_structure2.prev_positions).all()
+    assert not np.equal(test_structure3.positions, test_structure3.prev_positions).all()
 
 
 def test_raw_to_relative():
@@ -55,17 +52,20 @@ def test_raw_to_relative():
     cell = np.random.rand(3, 3)
     noa = 10
     positions = np.random.rand(noa, 3)
-    species = ['Al'] * len(positions)
+    species = ["Al"] * len(positions)
 
     test_struc = Structure(cell, species, positions)
-    rel_vals = test_struc.raw_to_relative(test_struc.positions,
-                                          test_struc.cell_transpose,
-                                          test_struc.cell_dot_inverse)
+    rel_vals = test_struc.raw_to_relative(
+        test_struc.positions, test_struc.cell_transpose, test_struc.cell_dot_inverse
+    )
 
     ind = np.random.randint(0, noa)
-    assert (np.isclose(positions[ind], rel_vals[ind, 0] * test_struc.vec1 +
-                       rel_vals[ind, 1] * test_struc.vec2 +
-                       rel_vals[ind, 2] * test_struc.vec3).all())
+    assert np.isclose(
+        positions[ind],
+        rel_vals[ind, 0] * test_struc.vec1
+        + rel_vals[ind, 1] * test_struc.vec2
+        + rel_vals[ind, 2] * test_struc.vec3,
+    ).all()
 
 
 def test_wrapped_coordinates():
@@ -74,27 +74,26 @@ def test_wrapped_coordinates():
 
     cell = np.random.rand(3, 3)
     positions = np.random.rand(10, 3)
-    species = ['Al'] * len(positions)
+    species = ["Al"] * len(positions)
 
     test_struc = Structure(cell, species, positions)
 
     wrap_diff = test_struc.positions - test_struc.wrapped_positions
-    wrap_rel = test_struc.raw_to_relative(wrap_diff,
-                                          test_struc.cell_transpose,
-                                          test_struc.cell_dot_inverse)
+    wrap_rel = test_struc.raw_to_relative(
+        wrap_diff, test_struc.cell_transpose, test_struc.cell_dot_inverse
+    )
 
-    assert (np.isclose(np.round(wrap_rel) - wrap_rel,
-                       np.zeros(positions.shape)).all())
+    assert np.isclose(np.round(wrap_rel) - wrap_rel, np.zeros(positions.shape)).all()
 
 
 @pytest.fixture
 def varied_test_struc():
-    struc = Structure(np.eye(3), species=[1, 2, 2, 3, 3, 4, 4, 4,
-                                          4, 3],
-                      positions=np.array([np.random.uniform(-1, 1, 3) for i
-                                          in range(10)]))
-    struc.forces = np.array([np.random.uniform(-1, 1, 3) for _ in
-                             range(10)])
+    struc = Structure(
+        np.eye(3),
+        species=[1, 2, 2, 3, 3, 4, 4, 4, 4, 3],
+        positions=np.array([np.random.uniform(-1, 1, 3) for i in range(10)]),
+    )
+    struc.forces = np.array([np.random.uniform(-1, 1, 3) for _ in range(10)])
     return struc
 
 
@@ -109,7 +108,7 @@ def test_to_from_methods(varied_test_struc):
     test_dict = varied_test_struc.as_dict()
 
     assert isinstance(test_dict, dict)
-    assert (test_dict['forces'] == varied_test_struc.forces).all()
+    assert (test_dict["forces"] == varied_test_struc.forces).all()
 
     new_struc_1 = Structure.from_dict(test_dict)
     new_struc_2 = Structure.from_dict(loads(varied_test_struc.as_str()))
@@ -125,69 +124,107 @@ def test_rep_methods(varied_test_struc):
     assert isinstance(str(varied_test_struc), str)
 
     thestr = str(varied_test_struc)
-    assert 'Structure with 10 atoms of types {' in thestr
+    assert "Structure with 10 atoms of types {" in thestr
+
 
 def test_struc_from_ase():
     from ase import Atoms
-    uc = Atoms(['Pd' for i in range(10)]+['Ag' for i in range(10)],
-               positions=np.random.rand(20, 3),
-               cell=np.random.rand(3, 3))
+    from ase.calculators.singlepoint import SinglePointCalculator
+
+    results = {
+        "forces": np.random.randn(20, 3),
+        "energy": np.random.rand(),
+        "stress": np.random.randn(6),
+    }
+
+    uc = Atoms(
+        ["Pd" for i in range(10)] + ["Ag" for i in range(10)],
+        positions=np.random.rand(20, 3),
+        cell=np.random.rand(3, 3),
+    )
+
+    calculator = SinglePointCalculator(uc, **results)
+    uc.set_calculator(calculator)
+
     new_struc = Structure.from_ase_atoms(uc)
+
     assert np.all(new_struc.species_labels == uc.get_chemical_symbols())
     assert np.all(new_struc.positions == uc.get_positions())
     assert np.all(new_struc.cell == uc.get_cell())
+    assert np.all(new_struc.forces == results["forces"])
+    assert np.all(new_struc.energy == results["energy"])
+    assert np.all(new_struc.stress == results["stress"])
+
 
 def test_struc_to_ase():
-    from ase import Atoms
-    uc = Structure(species=['Pd' for i in range(10)]+['Ag' for i in range(10)],
-                   positions=np.random.rand(20, 3),
-                   cell=np.random.rand(3, 3))
+    uc = Structure(
+        species=["Pd" for i in range(10)] + ["Ag" for i in range(10)],
+        positions=np.random.rand(20, 3),
+        cell=np.random.rand(3, 3),
+    )
+
+    uc.forces = np.random.randn(20, 3)
+    uc.energy = np.random.rand()
+    uc.stress = np.random.randn(6)
+
     new_atoms = Structure.to_ase_atoms(uc)
     assert np.all(uc.species_labels == new_atoms.get_chemical_symbols())
     assert np.all(uc.positions == new_atoms.get_positions())
     assert np.all(uc.cell == new_atoms.get_cell())
+    assert np.all(new_atoms.get_forces() == uc.forces)
+    assert np.all(new_atoms.get_potential_energy() == uc.energy)
+    assert np.all(new_atoms.get_stress() == uc.stress)
 
 
-@pytest.mark.skipif(not _test_pmg,reason='Pymatgen not present in available '
-                                        'packages.')
+@pytest.mark.skipif(
+    not _test_pmg, reason="Pymatgen not present in available " "packages."
+)
 def test_from_pmg_structure():
 
-    pmg_struc = pmgstruc.Structure(lattice= np.eye(3),
-                                   species=['H'],
-                                   coords=[[.25, .5, 0]],
-                                   site_properties={
-                                       'force': [np.array((1., 1.,  1.))],
-                                        'std':[np.array((1., 1., 1.))]},
-                                   coords_are_cartesian=True)
+    pmg_struc = pmgstruc.Structure(
+        lattice=np.eye(3),
+        species=["H"],
+        coords=[[0.25, 0.5, 0]],
+        site_properties={
+            "force": [np.array((1.0, 1.0, 1.0))],
+            "std": [np.array((1.0, 1.0, 1.0))],
+        },
+        coords_are_cartesian=True,
+    )
 
     new_struc = Structure.from_pmg_structure(pmg_struc)
 
     assert len(new_struc) == 1
 
-    assert np.equal(new_struc.positions, np.array([.25, .5, 0])).all()
+    assert np.equal(new_struc.positions, np.array([0.25, 0.5, 0])).all()
     assert new_struc.coded_species == [1]
-    assert new_struc.species_labels[0] == 'H'
-    assert np.equal(new_struc.forces, np.array([1., 1., 1.])).all()
-
-    pmg_struc = pmgstruc.Structure(lattice= np.diag([1.2,0.8,1.5]),
-                                   species=['H'],
-                                   coords=[[.25, .5, 0]],
-                                   site_properties={
-                                       'force': [np.array((1., 1.,  1.))],
-                                        'std':[np.array((1., 1., 1.))]},
-                                   coords_are_cartesian=True)
+    assert new_struc.species_labels[0] == "H"
+    assert np.equal(new_struc.forces, np.array([1.0, 1.0, 1.0])).all()
+
+    pmg_struc = pmgstruc.Structure(
+        lattice=np.diag([1.2, 0.8, 1.5]),
+        species=["H"],
+        coords=[[0.25, 0.5, 0]],
+        site_properties={
+            "force": [np.array((1.0, 1.0, 1.0))],
+            "std": [np.array((1.0, 1.0, 1.0))],
+        },
+        coords_are_cartesian=True,
+    )
 
     new_struc = Structure.from_pmg_structure(pmg_struc)
 
     assert len(new_struc) == 1
 
-    assert np.equal(new_struc.positions, np.array([.25, .5, 0])).all()
+    assert np.equal(new_struc.positions, np.array([0.25, 0.5, 0])).all()
     assert new_struc.coded_species == [1]
-    assert new_struc.species_labels[0] == 'H'
-    assert np.equal(new_struc.forces, np.array([1., 1., 1.])).all()
+    assert new_struc.species_labels[0] == "H"
+    assert np.equal(new_struc.forces, np.array([1.0, 1.0, 1.0])).all()
+
 
-@pytest.mark.skipif(not _test_pmg,reason='Pymatgen not present in available '
-                                        'packages.')
+@pytest.mark.skipif(
+    not _test_pmg, reason="Pymatgen not present in available " "packages."
+)
 def test_to_pmg_structure(varied_test_struc):
 
     new_struc = Structure.to_pmg_structure(varied_test_struc)
@@ -195,91 +232,89 @@ def test_to_pmg_structure(varied_test_struc):
     assert np.equal(new_struc.cart_coords, varied_test_struc.positions).all()
     assert (new_struc.atomic_numbers == varied_test_struc.coded_species).all()
 
+
 def test_to_xyz(varied_test_struc):
 
-    simple_str = varied_test_struc.to_xyz(extended_xyz=False,
-                print_stds=False, print_forces=False, print_max_stds=False)
+    simple_str = varied_test_struc.to_xyz(
+        extended_xyz=False, print_stds=False, print_forces=False, print_max_stds=False
+    )
 
-    simple_str_by_line = simple_str.split('\n')
+    simple_str_by_line = simple_str.split("\n")
 
-    assert len(simple_str_by_line)-2 == len(varied_test_struc)
+    assert len(simple_str_by_line) - 2 == len(varied_test_struc)
 
     for i, atom_line in enumerate(simple_str_by_line[2:-1]):
         split_line = atom_line.split()
-        assert split_line[0] == \
-               Z_to_element(int(varied_test_struc.species_labels[i]))
+        assert split_line[0] == Z_to_element(int(varied_test_struc.species_labels[i]))
         for j in range(3):
-            assert float(split_line[1+j]) == varied_test_struc.positions[i][j]
-
+            assert float(split_line[1 + j]) == varied_test_struc.positions[i][j]
 
-    complex_str = varied_test_struc.to_xyz(True,True,True,True)
-    complex_str_by_line = complex_str.split('\n')
+    complex_str = varied_test_struc.to_xyz(True, True, True, True)
+    complex_str_by_line = complex_str.split("\n")
 
-    assert len(complex_str_by_line)-2 == len(varied_test_struc)
+    assert len(complex_str_by_line) - 2 == len(varied_test_struc)
 
     for i, atom_line in enumerate(complex_str_by_line[2:-1]):
         split_line = atom_line.split()
-        assert split_line[0] == \
-               Z_to_element(int(varied_test_struc.species_labels[i]))
-        for j in range(1,4):
-            assert float(split_line[j]) == varied_test_struc.positions[i][j-1]
-        for j in range(4,7):
-            assert float(split_line[j]) == varied_test_struc.stds[i][j-4]
-        for j in range(7,10):
-            assert float(split_line[j]) == varied_test_struc.forces[i][j-7]
+        assert split_line[0] == Z_to_element(int(varied_test_struc.species_labels[i]))
+        for j in range(1, 4):
+            assert float(split_line[j]) == varied_test_struc.positions[i][j - 1]
+        for j in range(4, 7):
+            assert float(split_line[j]) == varied_test_struc.stds[i][j - 4]
+        for j in range(7, 10):
+            assert float(split_line[j]) == varied_test_struc.forces[i][j - 7]
         assert float(split_line[10]) == np.max(varied_test_struc.stds[i])
 
 
 def test_file_load():
-    struct1, forces = get_random_structure(cell=np.eye(3),
-                                           unique_species=[1, 2],
-                                           noa=2)
-    struct2, forces = get_random_structure(cell=np.eye(3),
-                                           unique_species=[1, 2],
-                                           noa=2)
-
-    with open("test_write.json",'w') as f:
+    struct1, forces = get_random_structure(cell=np.eye(3), unique_species=[1, 2], noa=2)
+    struct2, forces = get_random_structure(cell=np.eye(3), unique_species=[1, 2], noa=2)
+
+    with open("test_write.json", "w") as f:
         f.write(dumps(struct1.as_dict(), cls=NumpyEncoder))
 
     with pytest.raises(NotImplementedError):
-        Structure.from_file(file_name='test_write.json', format='xyz')
+        Structure.from_file(file_name="test_write.json", format="xyz")
 
-    struct1a = Structure.from_file('test_write.json')
+    struct1a = Structure.from_file("test_write.json")
     assert dumpcompare(struct1.as_dict(), struct1a.as_dict())
-    os.remove('test_write.json')
+    os.remove("test_write.json")
 
-    with open("test_write_set.json", 'w') as f:
-        f.write(dumps(struct1.as_dict(), cls=NumpyEncoder)+'\n')
-        f.write(dumps(struct2.as_dict(), cls=NumpyEncoder)+'\n')
+    with open("test_write_set.json", "w") as f:
+        f.write(dumps(struct1.as_dict(), cls=NumpyEncoder) + "\n")
+        f.write(dumps(struct2.as_dict(), cls=NumpyEncoder) + "\n")
 
-    structs = Structure.from_file('test_write_set.json')
+    structs = Structure.from_file("test_write_set.json")
     for struc in structs:
         assert isinstance(struc, Structure)
     assert len(structs) == 2
     assert dumpcompare(structs[0].as_dict(), struct1.as_dict())
     assert dumpcompare(structs[1].as_dict(), struct2.as_dict())
 
-    os.remove('test_write_set.json')
+    os.remove("test_write_set.json")
 
     with pytest.raises(FileNotFoundError):
-        Structure.from_file('fhqwhgads')
+        Structure.from_file("fhqwhgads")
 
-    vasp_struct = Structure.from_file('./test_files/test_POSCAR')
+    vasp_struct = Structure.from_file("./test_files/test_POSCAR")
     assert isinstance(vasp_struct, Structure)
     assert len(vasp_struct) == 6
 
+
 def test_is_valid():
     """
     Try a trivial case of 1-len structure and then one above and below
     tolerance
     :return:
     """
-    test_struc = Structure(cell=np.eye(3), species=['H'],
-                           positions=np.array([[0, 0, 0]]))
+    test_struc = Structure(
+        cell=np.eye(3), species=["H"], positions=np.array([[0, 0, 0]])
+    )
 
     assert test_struc.is_valid()
 
-    test_struc = Structure(cell=np.eye(3), species=['H', 'H'],
-                           positions=[[0, 0, 0], [.3, 0, 0]])
+    test_struc = Structure(
+        cell=np.eye(3), species=["H", "H"], positions=[[0, 0, 0], [0.3, 0, 0]]
+    )
     assert not test_struc.is_valid()
-    assert test_struc.is_valid(tolerance=.25)
+    assert test_struc.is_valid(tolerance=0.25)