From 1f67348fb801588bcb85f9f803f8f53bedbcccf0 Mon Sep 17 00:00:00 2001
From: "kodiakhq[bot]" <49736102+kodiakhq[bot]@users.noreply.github.com>
Date: Fri, 24 Feb 2023 18:28:08 +0000
Subject: [PATCH] Make trajectories more reproducible during checkpointing
 (#4677)

Fixes #4657

Description of changes:
- explain which factors affect reproducibility in checkpointed simulations in the user guide
- re-purpose the save/load samples to help measuring force jumps during checkpointing
- make the P3M family of algorithms more deterministic by avoiding re-tuning during checkpointing
- improve docstrings of the MMM1D family of algorithms
---
 doc/sphinx/io.rst                             | 19 +++++++++++++++++++
 samples/load_checkpoint.py                    | 11 +++++++----
 samples/save_checkpoint.py                    |  7 +++++++
 src/python/espressomd/electrostatics.py       | 17 +++++++++++------
 .../electrostatics/CoulombP3M.hpp             |  2 +-
 .../electrostatics/CoulombP3MGPU.hpp          |  2 +-
 .../magnetostatics/DipolarP3M.hpp             |  2 +-
 .../scripts/samples/test_load_checkpoint.py   |  5 +++++
 .../scripts/samples/test_save_checkpoint.py   |  3 ++-
 9 files changed, 54 insertions(+), 14 deletions(-)

diff --git a/doc/sphinx/io.rst b/doc/sphinx/io.rst
index 02f7f3e3e2f..47f30093605 100644
--- a/doc/sphinx/io.rst
+++ b/doc/sphinx/io.rst
@@ -141,6 +141,25 @@ Be aware of the following limitations:
   versions. It may therefore not be possible to load a checkpoint in a
   different environment than where it was written.
 
+* To be fully deterministic when loading from a checkpoint with an active
+  thermostat, the first step of the integration should be called with the flag
+  ``reuse_forces=True``, e.g. ``system.integrator.run(2, reuse_forces=True)``.
+  This is because loading a checkpoint reinitializes the system and enforces
+  a recalculation of the forces. However, this computes the forces from the
+  velocities at the current time step and not at the previous half time step.
+  Please note that long-range actors can make trajectories non-reproducible.
+  For example, lattice-Boltzmann introduces errors of the order of 1e-15 with
+  binary checkpoint files, or 1e-7 with ASCII checkpoint files. In addition,
+  several electrostatic and magnetostatic actors automatically introduce
+  a deviation of the order of 1e-7, either due to floating-point rounding
+  errors (:class:`~espressomd.electrostatics.P3MGPU`), or due to re-tuning
+  using the most recent system state (:class:`~espressomd.electrostatics.MMM1D`,
+  :class:`~espressomd.electrostatics.MMM1DGPU`).
+  When in doubt, you can easily verify the absence of a "force jump" when
+  loading from a checkpoint by replacing the electrostatics actor with your
+  combination of features in files :file:`samples/save_checkpoint.py` and
+  :file:`samples/load_checkpoint.py` and running them sequentially.
+
 For additional methods of the checkpointing class, see
 :class:`espressomd.checkpointing.Checkpoint`.
 
diff --git a/samples/load_checkpoint.py b/samples/load_checkpoint.py
index 1a30dbb4096..de96e0fc530 100644
--- a/samples/load_checkpoint.py
+++ b/samples/load_checkpoint.py
@@ -30,6 +30,7 @@
 import espressomd
 import espressomd.electrostatics
 import espressomd.checkpointing
+import numpy as np
 
 required_features = ["P3M", "WCA"]
 espressomd.assert_features(required_features)
@@ -69,7 +70,6 @@
 print("\n### p3m test ###")
 print(f"p3m.get_params() = {p3m.get_params()}")
 
-
 # test registered objects
 # all objects that are registered when writing a checkpoint are
 # automatically registered after loading this checkpoint
@@ -77,8 +77,11 @@
 print(
     f"checkpoint.get_registered_objects() = {checkpoint.get_registered_objects()}")
 
-
-# integrate system
+# integrate system while re-using forces (and velocities at half time step)
 print("Integrating...")
+system.integrator.run(2, reuse_forces=True)
 
-system.integrator.run(1000)
+# measure deviation from reference forces (trajectory must be deterministic)
+forces_ref = np.loadtxt("mycheckpoint/forces.npy")
+forces_diff = np.abs(system.part.all().f - forces_ref)
+print(f"max deviation from reference forces = {np.max(forces_diff):.2e}")
diff --git a/samples/save_checkpoint.py b/samples/save_checkpoint.py
index a7743d5eaa5..052a3c0f3fb 100644
--- a/samples/save_checkpoint.py
+++ b/samples/save_checkpoint.py
@@ -79,4 +79,11 @@
 # let's also register the p3m reference for easy access later
 checkpoint.register("p3m")
 
+# get velocities at half time step (for thermostat reproducibility)
+system.integrator.run(1)
+
 checkpoint.save()
+
+# write reference forces to file
+system.integrator.run(2)
+np.savetxt("mycheckpoint/forces.npy", np.copy(system.part.all().f))
diff --git a/src/python/espressomd/electrostatics.py b/src/python/espressomd/electrostatics.py
index 3b795f58ad2..d634e8817d8 100644
--- a/src/python/espressomd/electrostatics.py
+++ b/src/python/espressomd/electrostatics.py
@@ -396,11 +396,13 @@ class MMM1D(ElectrostaticInteraction):
         Maximal pairwise error.
     far_switch_radius : :obj:`float`, optional
         Radius where near-field and far-field calculation are switched.
-    bessel_cutoff : :obj:`int`, optional
-    tune : :obj:`bool`, optional
-        Specify whether to automatically tune or not. Defaults to ``True``.
-    timings : :obj:`int`
+    verbose : :obj:`bool`, optional
+        If ``False``, disable log output during tuning.
+    timings : :obj:`int`, optional
         Number of force calculations during tuning.
+    check_neutrality : :obj:`bool`, optional
+        Raise a warning if the system is not electrically neutral when
+        set to ``True`` (default).
 
     """
     _so_name = "Coulomb::CoulombMMM1D"
@@ -445,8 +447,11 @@ class MMM1DGPU(ElectrostaticInteraction):
     far_switch_radius : :obj:`float`, optional
         Radius where near-field and far-field calculation are switched
     bessel_cutoff : :obj:`int`, optional
-    tune : :obj:`bool`, optional
-        Specify whether to automatically tune or not. Defaults to ``True``.
+    timings : :obj:`int`, optional
+        Number of force calculations during tuning.
+    check_neutrality : :obj:`bool`, optional
+        Raise a warning if the system is not electrically neutral when
+        set to ``True`` (default).
     """
     _so_name = "Coulomb::CoulombMMM1DGpu"
     _so_creation_policy = "GLOBAL"
diff --git a/src/script_interface/electrostatics/CoulombP3M.hpp b/src/script_interface/electrostatics/CoulombP3M.hpp
index 14e0b3e7643..c3e4576d7b9 100644
--- a/src/script_interface/electrostatics/CoulombP3M.hpp
+++ b/src/script_interface/electrostatics/CoulombP3M.hpp
@@ -75,7 +75,7 @@ class CoulombP3M : public Actor<CoulombP3M, ::CoulombP3M> {
   void do_construct(VariantMap const &params) override {
     m_tune = get_value<bool>(params, "tune");
     context()->parallel_try_catch([&]() {
-      auto p3m = P3MParameters{get_value<bool>(params, "tune"),
+      auto p3m = P3MParameters{!get_value_or<bool>(params, "is_tuned", !m_tune),
                                get_value<double>(params, "epsilon"),
                                get_value<double>(params, "r_cut"),
                                get_value<Utils::Vector3i>(params, "mesh"),
diff --git a/src/script_interface/electrostatics/CoulombP3MGPU.hpp b/src/script_interface/electrostatics/CoulombP3MGPU.hpp
index fec2219523c..c786a37edae 100644
--- a/src/script_interface/electrostatics/CoulombP3MGPU.hpp
+++ b/src/script_interface/electrostatics/CoulombP3MGPU.hpp
@@ -76,7 +76,7 @@ class CoulombP3MGPU : public Actor<CoulombP3MGPU, ::CoulombP3MGPU> {
   void do_construct(VariantMap const &params) override {
     m_tune = get_value<bool>(params, "tune");
     context()->parallel_try_catch([&]() {
-      auto p3m = P3MParameters{get_value<bool>(params, "tune"),
+      auto p3m = P3MParameters{!get_value_or<bool>(params, "is_tuned", !m_tune),
                                get_value<double>(params, "epsilon"),
                                get_value<double>(params, "r_cut"),
                                get_value<Utils::Vector3i>(params, "mesh"),
diff --git a/src/script_interface/magnetostatics/DipolarP3M.hpp b/src/script_interface/magnetostatics/DipolarP3M.hpp
index 6923ded75b6..9565db44dec 100644
--- a/src/script_interface/magnetostatics/DipolarP3M.hpp
+++ b/src/script_interface/magnetostatics/DipolarP3M.hpp
@@ -74,7 +74,7 @@ class DipolarP3M : public Actor<DipolarP3M, ::DipolarP3M> {
   void do_construct(VariantMap const &params) override {
     m_tune = get_value<bool>(params, "tune");
     context()->parallel_try_catch([&]() {
-      auto p3m = P3MParameters{get_value<bool>(params, "tune"),
+      auto p3m = P3MParameters{!get_value_or<bool>(params, "is_tuned", !m_tune),
                                get_value<double>(params, "epsilon"),
                                get_value<double>(params, "r_cut"),
                                get_value<Utils::Vector3i>(params, "mesh"),
diff --git a/testsuite/scripts/samples/test_load_checkpoint.py b/testsuite/scripts/samples/test_load_checkpoint.py
index 1d31cfb5f82..c3e6d9bb9ce 100644
--- a/testsuite/scripts/samples/test_load_checkpoint.py
+++ b/testsuite/scripts/samples/test_load_checkpoint.py
@@ -16,6 +16,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 import unittest as ut
+import numpy as np
 import importlib_wrapper
 
 
@@ -32,6 +33,10 @@ def test_file_generation(self):
                          {'myvar', 'system', 'p3m'})
         self.assertEqual(sample.myvar, "some script variable (updated value)")
 
+    def test_trajectory_reproducibility(self):
+        self.assertTrue(sample.p3m.is_tuned)
+        np.testing.assert_array_less(sample.forces_diff, 1e-16)
+
 
 if __name__ == "__main__":
     ut.main()
diff --git a/testsuite/scripts/samples/test_save_checkpoint.py b/testsuite/scripts/samples/test_save_checkpoint.py
index 1edc132d64e..e4b09ab33ec 100644
--- a/testsuite/scripts/samples/test_save_checkpoint.py
+++ b/testsuite/scripts/samples/test_save_checkpoint.py
@@ -28,9 +28,10 @@ class Sample(ut.TestCase):
     system = sample.system
 
     def test_file_generation(self):
-        # test .checkpoint files exist
         filepath = pathlib.Path("mycheckpoint") / "0.checkpoint"
         self.assertTrue(filepath.exists(), f"File {filepath} not created")
+        filepath = pathlib.Path("mycheckpoint") / "forces.npy"
+        self.assertTrue(filepath.exists(), f"File {filepath} not created")
 
 
 if __name__ == "__main__":