From 251d06c5bd581ad12e1b8b3c25a197f6df7b3f6c Mon Sep 17 00:00:00 2001
From: gvdoord <g.vandenOord@gmail.com>
Date: Wed, 22 Jun 2022 22:01:08 +0200
Subject: [PATCH] Multi-replica fits via trvl-mask layers + lru caches in
 validphys

---
 conda-recipe/meta.yaml                        |   3 +-
 .../NNPDF40_nnlo_as_01180_1000.yml            |   2 +-
 .../backends/keras_backend/operations.py      |   6 +
 n3fit/src/n3fit/checks.py                     |   5 -
 n3fit/src/n3fit/layers/losses.py              |  53 +++++--
 n3fit/src/n3fit/layers/mask.py                |  19 ++-
 n3fit/src/n3fit/model_gen.py                  | 130 ++++++++----------
 n3fit/src/n3fit/model_trainer.py              |  44 ++++--
 n3fit/src/n3fit/performfit.py                 |  35 ++---
 n3fit/src/n3fit/stopping.py                   |   2 +-
 validphys2/src/validphys/commondata.py        |   3 +
 validphys2/src/validphys/config.py            |   6 +-
 validphys2/src/validphys/core.py              |   3 +-
 validphys2/src/validphys/covmats.py           |  31 ++---
 validphys2/src/validphys/filters.py           |  31 ++---
 validphys2/src/validphys/n3fit_data.py        |  12 +-
 validphys2/src/validphys/n3fit_data_utils.py  |   1 +
 validphys2/src/validphys/utils.py             |  41 ++++++
 18 files changed, 248 insertions(+), 179 deletions(-)

diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index 877431f21d..28889cbd1a 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -55,7 +55,8 @@ requirements:
         - pineappl >=0.6.2
         - eko >=0.14.1
         - fiatlux
-        - curio >=1.0 # reportengine uses it but it's not in its dependencies 
+        - frozendict  # needed for caching of data loading
+        - curio >=1.0 # reportengine uses it but it's not in its dependencies
 
 test:
     requires:
diff --git a/n3fit/runcards/reproduce_nnpdf40/NNPDF40_nnlo_as_01180_1000.yml b/n3fit/runcards/reproduce_nnpdf40/NNPDF40_nnlo_as_01180_1000.yml
index da30fc1b2e..4e2295470f 100644
--- a/n3fit/runcards/reproduce_nnpdf40/NNPDF40_nnlo_as_01180_1000.yml
+++ b/n3fit/runcards/reproduce_nnpdf40/NNPDF40_nnlo_as_01180_1000.yml
@@ -163,4 +163,4 @@ integrability:
 
 ############################################################
 debug: false
-maxcores: 4
+maxcores: 4
\ No newline at end of file
diff --git a/n3fit/src/n3fit/backends/keras_backend/operations.py b/n3fit/src/n3fit/backends/keras_backend/operations.py
index f965152f42..1087620815 100644
--- a/n3fit/src/n3fit/backends/keras_backend/operations.py
+++ b/n3fit/src/n3fit/backends/keras_backend/operations.py
@@ -211,6 +211,12 @@ def flatten(x):
     return tf.reshape(x, (-1,))
 
 
+@tf.function
+def reshape(x, shape):
+    """reshape tensor x"""
+    return tf.reshape(x, shape)
+
+
 def boolean_mask(*args, **kwargs):
     """
     Applies a boolean mask to a tensor
diff --git a/n3fit/src/n3fit/checks.py b/n3fit/src/n3fit/checks.py
index 885785a268..089427750f 100644
--- a/n3fit/src/n3fit/checks.py
+++ b/n3fit/src/n3fit/checks.py
@@ -380,11 +380,6 @@ def check_consistent_parallel(parameters, parallel_models, same_trvl_per_replica
     """
     if not parallel_models:
         return
-    if not same_trvl_per_replica:
-        raise CheckError(
-            "Replicas cannot be run in parallel with different training/validation "
-            " masks, please set `same_trvl_per_replica` to True in the runcard"
-        )
     if parameters.get("layer_type") != "dense":
         raise CheckError("Parallelization has only been tested with layer_type=='dense'")
 
diff --git a/n3fit/src/n3fit/layers/losses.py b/n3fit/src/n3fit/layers/losses.py
index 4315919907..9797b0929d 100644
--- a/n3fit/src/n3fit/layers/losses.py
+++ b/n3fit/src/n3fit/layers/losses.py
@@ -8,6 +8,7 @@
 
 """
 import numpy as np
+
 from n3fit.backends import MetaLayer
 from n3fit.backends import operations as op
 
@@ -37,12 +38,11 @@ class LossInvcovmat(MetaLayer):
     True
     """
 
-    def __init__(self, invcovmat, y_true, mask=None, covmat=None, **kwargs):
+    def __init__(self, invcovmat, y_true, mask=None, covmat=None, diag=False, **kwargs):
         # If we have a diagonal matrix, padd with 0s and hope it's not too heavy on memory
-        if len(invcovmat.shape) == 1:
-            invcovmat = np.diag(invcovmat)
         self._invcovmat = op.numpy_to_tensor(invcovmat)
         self._covmat = covmat
+        self._diag = diag
         self._y_true = op.numpy_to_tensor(y_true)
         self._ndata = y_true.shape[-1]
         if mask is None or all(mask):
@@ -56,9 +56,7 @@ def build(self, input_shape):
         """Transform the inverse covmat and the mask into
         weights of the layers"""
         init = MetaLayer.init_constant(self._invcovmat)
-        self.kernel = self.builder_helper(
-            "invcovmat", (self._ndata, self._ndata), init, trainable=False
-        )
+        self.kernel = self.builder_helper("invcovmat", self._invcovmat.shape, init, trainable=False)
         mask_shape = (1, 1, self._ndata)
         if self._mask is None:
             init_mask = MetaLayer.init_constant(np.ones(mask_shape))
@@ -71,7 +69,10 @@ def add_covmat(self, covmat):
         Note, however, that the _covmat attribute of the layer will
         still refer to the original data covmat
         """
-        new_covmat = np.linalg.inv(self._covmat + covmat)
+        if self._diag:
+            new_covmat = np.invert(self._covmat + covmat)
+        else:
+            new_covmat = np.linalg.inv(self._covmat + covmat)
         self.kernel.assign(new_covmat)
 
     def update_mask(self, new_mask):
@@ -82,13 +83,45 @@ def call(self, y_pred, **kwargs):
         tmp_raw = self._y_true - y_pred
         # TODO: most of the time this is a y * I multiplication and can be skipped
         # benchmark how much time (if any) is lost in this in actual fits for the benefit of faster kfolds
+        #        import pdb; pdb.set_trace()
         tmp = op.op_multiply([tmp_raw, self.mask])
+        if self._diag:
+            return LossInvcovmat.contract_covmat_diag(self.kernel, tmp)
+        else:
+            return LossInvcovmat.contract_covmat(self.kernel, tmp)
+
+    @staticmethod
+    def contract_covmat(kernel, tmp):
+        if tmp.shape[1] == 1:
+            # einsum is not well suited for CPU, so use tensordot if not multimodel
+            if len(kernel.shape) == 3:
+                right_dot = op.tensor_product(kernel[0, ...], tmp[0, 0, :], axes=1)
+                res = op.tensor_product(tmp[0, :, :], right_dot, axes=1)
+            else:
+                right_dot = op.tensor_product(kernel, tmp[0, 0, :], axes=1)
+                res = op.tensor_product(tmp[0, :, :], right_dot, axes=1)
+        else:
+            if len(kernel.shape) == 3:
+                res = op.einsum("bri, rij, brj -> r", tmp, kernel, tmp)
+            else:
+                res = op.einsum("bri, ij, brj -> r", tmp, kernel, tmp)
+        return res
+
+    @staticmethod
+    def contract_covmat_diag(kernel, tmp):
         if tmp.shape[1] == 1:
             # einsum is not well suited for CPU, so use tensordot if not multimodel
-            right_dot = op.tensor_product(self.kernel, tmp[0, 0, :], axes=1)
-            res = op.tensor_product(tmp[0, :, :], right_dot, axes=1)
+            if len(kernel.shape) == 2:
+                right_dot = op.tensor_product(kernel[0, :], tmp[0, 0, :], axes=1)
+                res = op.tensor_product(tmp[0, :, :], right_dot, axes=1)
+            else:
+                right_dot = op.tensor_product(kernel, tmp[0, 0, :], axes=1)
+                res = op.tensor_product(tmp[0, :, :], right_dot, axes=1)
         else:
-            res = op.einsum("bri, ij, brj -> r", tmp, self.kernel, tmp)
+            if len(kernel.shape) == 2:
+                res = op.einsum("bri, ri, bri -> r", tmp, kernel, tmp)
+            else:
+                res = op.einsum("bri, i, bri -> r", tmp, kernel, tmp)
         return res
 
 
diff --git a/n3fit/src/n3fit/layers/mask.py b/n3fit/src/n3fit/layers/mask.py
index 877df3b502..3aca2ec2d0 100644
--- a/n3fit/src/n3fit/layers/mask.py
+++ b/n3fit/src/n3fit/layers/mask.py
@@ -1,3 +1,5 @@
+from numpy import count_nonzero
+
 from n3fit.backends import MetaLayer
 from n3fit.backends import operations as op
 
@@ -19,14 +21,20 @@ class Mask(MetaLayer):
         c: float
             constant multiplier for every output
         axis: int
-            axis in which to apply the mask
+            axis in which to apply the mask. Currently,
+            only the last axis gives the correct output shape
     """
 
     def __init__(self, bool_mask=None, c=None, axis=None, **kwargs):
         if bool_mask is None:
             self.mask = None
+            self.last_dim = -1
         else:
             self.mask = op.numpy_to_tensor(bool_mask, dtype=bool)
+            if len(bool_mask.shape) == 1:
+                self.last_dim = count_nonzero(bool_mask)
+            else:
+                self.last_dim = count_nonzero(bool_mask[0, ...])
         self.c = c
         self.axis = axis
         super().__init__(**kwargs)
@@ -34,14 +42,15 @@ def __init__(self, bool_mask=None, c=None, axis=None, **kwargs):
     def build(self, input_shape):
         if self.c is not None:
             initializer = MetaLayer.init_constant(value=self.c)
-            self.kernel = self.builder_helper(
-                "mask", (1,), initializer, trainable=False
-            )
+            self.kernel = self.builder_helper("mask", (1,), initializer, trainable=False)
         super(Mask, self).build(input_shape)
 
     def call(self, ret):
         if self.mask is not None:
-            ret = op.boolean_mask(ret, self.mask, axis=self.axis)
+            flat_res = op.boolean_mask(ret, self.mask, axis=self.axis)
+            output_shape = [-1 if d is None else d for d in ret.get_shape()]
+            output_shape[-1] = self.last_dim
+            ret = op.reshape(flat_res, shape=output_shape)
         if self.c is not None:
             ret = ret * self.kernel
         return ret
diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py
index fc180f392f..9b59585fc7 100644
--- a/n3fit/src/n3fit/model_gen.py
+++ b/n3fit/src/n3fit/model_gen.py
@@ -32,6 +32,7 @@
     AddPhoton,
     FkRotation,
     FlavourToEvolution,
+    Mask,
     ObsRotation,
     Preprocessing,
     losses,
@@ -56,6 +57,7 @@ class ObservableWrapper:
 
     name: str
     observables: list
+    trvl_mask_layer: Mask
     dataset_xsizes: list
     invcovmat: np.array = None
     covmat: np.array = None
@@ -70,7 +72,12 @@ def _generate_loss(self, mask=None):
         was initialized with"""
         if self.invcovmat is not None:
             loss = losses.LossInvcovmat(
-                self.invcovmat, self.data, mask, covmat=self.covmat, name=self.name
+                self.invcovmat,
+                self.data,
+                mask,
+                covmat=self.covmat,
+                name=self.name,
+                diag=(self.rotation is not None),
             )
         elif self.positivity:
             loss = losses.LossPositivity(name=self.name, c=self.multiplier)
@@ -95,10 +102,15 @@ def _generate_experimental_layer(self, pdf):
         else:
             output_layers = [obs(pdf) for obs in self.observables]
 
-        # Finally concatenate all observables (so that experiments are one single entitiy)
-        ret = op.concatenate(output_layers, axis=2)
+        # Finally concatenate all observables (so that experiments are one single entity)
+        ret = op.concatenate(output_layers)
+
         if self.rotation is not None:
             ret = self.rotation(ret)
+
+        if self.trvl_mask_layer is not None:
+            ret = self.trvl_mask_layer(ret)
+
         return ret
 
     def __call__(self, pdf_layer, mask=None):
@@ -108,7 +120,14 @@ def __call__(self, pdf_layer, mask=None):
 
 
 def observable_generator(
-    spec_dict, positivity_initial=1.0, integrability=False
+    spec_dict,
+    mask_array=None,
+    training_data=None,
+    validation_data=None,
+    invcovmat_tr=None,
+    invcovmat_vl=None,
+    positivity_initial=1.0,
+    integrability=False,
 ):  # pylint: disable=too-many-locals
     """
     This function generates the observable models for each experiment.
@@ -157,9 +176,7 @@ def observable_generator(
     spec_name = spec_dict["name"]
     dataset_xsizes = []
     model_inputs = []
-    model_obs_tr = []
-    model_obs_vl = []
-    model_obs_ex = []
+    model_observables = []
     # The first step is to compute the observable for each of the datasets
     for dataset in spec_dict["datasets"]:
         # Get the generic information of the dataset
@@ -180,50 +197,14 @@ def observable_generator(
         # list of validphys.coredata.FKTableData objects
         #   these will then be used to check how many different pdf inputs are needed
         #   (and convolutions if given the case)
-
-        if spec_dict["positivity"]:
-            # Positivity (and integrability, which is a special kind of positivity...)
-            # enters only at the "training" part of the models
-            obs_layer_tr = Obs_Layer(
-                dataset.fktables_data,
-                dataset.training_fktables(),
-                operation_name,
-                name=f"dat_{dataset_name}",
-            )
-            obs_layer_ex = obs_layer_vl = None
-        elif spec_dict.get("data_transformation_tr") is not None:
-            # Data transformation needs access to the full array of output data
-            obs_layer_ex = Obs_Layer(
-                dataset.fktables_data,
-                dataset.fktables(),
-                operation_name,
-                name=f"exp_{dataset_name}",
-            )
-            obs_layer_tr = obs_layer_vl = obs_layer_ex
-        else:
-            obs_layer_tr = Obs_Layer(
-                dataset.fktables_data,
-                dataset.training_fktables(),
-                operation_name,
-                name=f"dat_{dataset_name}",
-            )
-            obs_layer_ex = Obs_Layer(
-                dataset.fktables_data,
-                dataset.fktables(),
-                operation_name,
-                name=f"exp_{dataset_name}",
-            )
-            obs_layer_vl = Obs_Layer(
-                dataset.fktables_data,
-                dataset.validation_fktables(),
-                operation_name,
-                name=f"val_{dataset_name}",
-            )
+        obs_layer = Obs_Layer(
+            dataset.fktables_data, dataset.fktables(), operation_name, name=f"dat_{dataset_name}"
+        )
 
         # If the observable layer found that all input grids are equal, the splitting will be None
         # otherwise the different xgrids need to be stored separately
         # Note: for pineappl grids, obs_layer_tr.splitting should always be None
-        if obs_layer_tr.splitting is None:
+        if obs_layer.splitting is None:
             xgrid = dataset.fktables_data[0].xgrid
             model_inputs.append(xgrid)
             dataset_xsizes.append(len(xgrid))
@@ -232,9 +213,7 @@ def observable_generator(
             model_inputs += xgrids
             dataset_xsizes.append(sum([len(i) for i in xgrids]))
 
-        model_obs_tr.append(obs_layer_tr)
-        model_obs_vl.append(obs_layer_vl)
-        model_obs_ex.append(obs_layer_ex)
+        model_observables.append(obs_layer)
 
     # Check whether all xgrids of all observables in this experiment are equal
     # if so, simplify the model input
@@ -245,11 +224,25 @@ def observable_generator(
     # Reshape all inputs arrays to be (1, nx)
     model_inputs = np.concatenate(model_inputs).reshape(1, -1)
 
-    full_nx = sum(dataset_xsizes)
+    # Make the mask layers...
+    if mask_array is not None:
+        tr_mask_layer = Mask(mask_array, axis=1, name=f"trmask_{spec_name}")
+        vl_mask_layer = Mask(~mask_array, axis=1, name=f"vlmask_{spec_name}")
+    else:
+        tr_mask_layer = None
+        vl_mask_layer = None
+
+    # Make rotations of the final data (if any)
+    if spec_dict.get("data_transformation") is not None:
+        obsrot = ObsRotation(spec_dict.get("data_transformation"))
+    else:
+        obsrot = None
+
     if spec_dict["positivity"]:
         out_positivity = ObservableWrapper(
             spec_name,
-            model_obs_tr,
+            model_observables,
+            tr_mask_layer,
             dataset_xsizes,
             multiplier=positivity_initial,
             positivity=not integrability,
@@ -259,38 +252,33 @@ def observable_generator(
         layer_info = {
             "inputs": model_inputs,
             "output_tr": out_positivity,
-            "experiment_xsize": full_nx,
+            "experiment_xsize": sum(dataset_xsizes),
         }
         # For positivity we end here
         return layer_info
 
-    # Generate the loss function and rotations of the final data (if any)
-    if spec_dict.get("data_transformation_tr") is not None:
-        obsrot_tr = ObsRotation(spec_dict.get("data_transformation_tr"))
-        obsrot_vl = ObsRotation(spec_dict.get("data_transformation_vl"))
-    else:
-        obsrot_tr = None
-        obsrot_vl = None
-
     out_tr = ObservableWrapper(
         spec_name,
-        model_obs_tr,
+        model_observables,
+        tr_mask_layer,
         dataset_xsizes,
-        invcovmat=spec_dict["invcovmat"],
-        data=spec_dict["expdata"],
-        rotation=obsrot_tr,
+        invcovmat=invcovmat_tr,
+        data=training_data,
+        rotation=obsrot,
     )
     out_vl = ObservableWrapper(
         f"{spec_name}_val",
-        model_obs_vl,
+        model_observables,
+        vl_mask_layer,
         dataset_xsizes,
-        invcovmat=spec_dict["invcovmat_vl"],
-        data=spec_dict["expdata_vl"],
-        rotation=obsrot_vl,
+        invcovmat=invcovmat_vl,
+        data=validation_data,
+        rotation=obsrot,
     )
     out_exp = ObservableWrapper(
         f"{spec_name}_exp",
-        model_obs_ex,
+        model_observables,
+        None,
         dataset_xsizes,
         invcovmat=spec_dict["invcovmat_true"],
         covmat=spec_dict["covmat"],
@@ -303,7 +291,7 @@ def observable_generator(
         "output": out_exp,
         "output_tr": out_tr,
         "output_vl": out_vl,
-        "experiment_xsize": full_nx,
+        "experiment_xsize": sum(dataset_xsizes),
     }
     return layer_info
 
diff --git a/n3fit/src/n3fit/model_trainer.py b/n3fit/src/n3fit/model_trainer.py
index acbcc8b3cd..23b6fb12b1 100644
--- a/n3fit/src/n3fit/model_trainer.py
+++ b/n3fit/src/n3fit/model_trainer.py
@@ -143,15 +143,13 @@ def __init__(
                 list with the replicas ids to be fitted
         """
         # Save all input information
-        self.exp_info = exp_info
-        if pos_info is None:
-            pos_info = []
+        self.exp_info = list(exp_info)
         self.pos_info = pos_info
         self.integ_info = integ_info
         if self.integ_info is not None:
-            self.all_info = exp_info + pos_info + integ_info
+            self.all_info = self.exp_info[0] + pos_info + integ_info
         else:
-            self.all_info = exp_info + pos_info
+            self.all_info = self.exp_info[0] + pos_info
         self.flavinfo = flavinfo
         self.fitbasis = fitbasis
         self._nn_seeds = nnseeds
@@ -219,6 +217,7 @@ def __init__(
             "posdatasets": [],
         }
         self.experimental = {"output": [], "expdata": [], "ndata": 0, "model": None, "folds": []}
+        self.tr_masks = []
 
         self._fill_the_dictionaries()
 
@@ -268,7 +267,7 @@ def _fill_the_dictionaries(self):
             - ``name``: names of the experiment
             - ``ndata``: number of experimental points
         """
-        for exp_dict in self.exp_info:
+        for index, exp_dict in enumerate(self.exp_info[0]):
             self.training["expdata"].append(exp_dict["expdata"])
             self.validation["expdata"].append(exp_dict["expdata_vl"])
             self.experimental["expdata"].append(exp_dict["expdata_true"])
@@ -293,6 +292,7 @@ def _fill_the_dictionaries(self):
             self.training["posdatasets"].append(pos_dict["name"])
             self.validation["expdata"].append(pos_dict["expdata"])
             self.validation["posdatasets"].append(pos_dict["name"])
+
         if self.integ_info is not None:
             for integ_dict in self.integ_info:
                 self.training["expdata"].append(integ_dict["expdata"])
@@ -483,7 +483,7 @@ def _reset_observables(self):
             self.experimental[key] = []
 
     ############################################################################
-    # # Parametizable functions                                                #
+    # # Parameterizable functions                                                #
     #                                                                          #
     # The functions defined in this block accept a 'params' dictionary which   #
     # defines the fit and the behaviours of the Neural Networks                #
@@ -524,11 +524,25 @@ def _generate_observables(
         log.info("Generating layers")
 
         # Now we need to loop over all dictionaries (First exp_info, then pos_info and integ_info)
-        for exp_dict in self.exp_info:
+        for index, exp_dict in enumerate(self.exp_info[0]):
             if not self.mode_hyperopt:
                 log.info("Generating layers for experiment %s", exp_dict["name"])
 
-            exp_layer = model_gen.observable_generator(exp_dict)
+            # Stacked tr-vl mask array for all replicas for this dataset
+            replica_masks = np.stack([e[index]["trmask"] for e in self.exp_info])
+            training_data = np.stack([e[index]["expdata"].flatten() for e in self.exp_info])
+            validation_data = np.stack([e[index]["expdata_vl"].flatten() for e in self.exp_info])
+            invcovmat = np.stack([e[index]["invcovmat"] for e in self.exp_info])
+            invcovmat_vl = np.stack([e[index]["invcovmat_vl"] for e in self.exp_info])
+
+            exp_layer = model_gen.observable_generator(
+                exp_dict,
+                mask_array=replica_masks,
+                training_data=training_data,
+                validation_data=validation_data,
+                invcovmat_tr=invcovmat,
+                invcovmat_vl=invcovmat_vl,
+            )
 
             # Save the input(s) corresponding to this experiment
             self.input_list.append(exp_layer["inputs"])
@@ -549,8 +563,18 @@ def _generate_observables(
             pos_initial, pos_multiplier = _LM_initial_and_multiplier(
                 all_pos_initial, all_pos_multiplier, max_lambda, positivity_steps
             )
+            replica_masks = np.stack([pos_dict["trmask"] for i in range(len(self.exp_info))])
+            training_data = np.stack(
+                [pos_dict["expdata"].flatten() for i in range(len(self.exp_info))]
+            )
 
-            pos_layer = model_gen.observable_generator(pos_dict, positivity_initial=pos_initial)
+            pos_layer = model_gen.observable_generator(
+                pos_dict,
+                positivity_initial=pos_initial,
+                mask_array=replica_masks,
+                training_data=training_data,
+                validation_data=training_data,
+            )
             # The input list is still common
             self.input_list.append(pos_layer["inputs"])
 
diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py
index 55e4811287..3df0109e91 100644
--- a/n3fit/src/n3fit/performfit.py
+++ b/n3fit/src/n3fit/performfit.py
@@ -160,31 +160,21 @@ def performfit(
         replicas, replica_experiments, nnseeds = zip(*replicas_nnseed_fitting_data_dict)
         # Parse the experiments so that the output data contain information for all replicas
         # as the only different from replica to replica is the experimental training/validation data
-        all_experiments = copy.deepcopy(replica_experiments[0])
-        for i_exp in range(len(all_experiments)):
-            training_data = []
-            validation_data = []
-            for i_rep in range(n_models):
-                training_data.append(replica_experiments[i_rep][i_exp]['expdata'])
-                validation_data.append(replica_experiments[i_rep][i_exp]['expdata_vl'])
-            all_experiments[i_exp]['expdata'] = np.concatenate(training_data, axis=0)
-            all_experiments[i_exp]['expdata_vl'] = np.concatenate(validation_data, axis=0)
         log.info(
-            "Starting parallel fits from replica %d to %d",
-            replicas[0],
-            replicas[0] + n_models - 1,
+            "Starting parallel fits from replica %d to %d", replicas[0], replicas[0] + n_models - 1
         )
-        replicas_info = [(replicas, all_experiments, nnseeds)]
+        replicas_info = [(replicas, replica_experiments, nnseeds)]
     else:
+        # Cases 1 and 2 above are a special case of 3 where the replica idx and the seed should
+        # be a list of just one element
         replicas_info = replicas_nnseed_fitting_data_dict
+        for i, info_tuple in enumerate(replicas_info):
+            replica_idxs = info_tuple[0]
+            nnseeds = info_tuple[2]
+            replicas_info[i] = (tuple([replica_idxs]), [info_tuple[1]], tuple([nnseeds]))
 
     for replica_idxs, exp_info, nnseeds in replicas_info:
-        if not parallel_models or n_models == 1:
-            # Cases 1 and 2 above are a special case of 3 where the replica idx and the seed should
-            # be a list of just one element
-            replica_idxs = [replica_idxs]
-            nnseeds = [nnseeds]
-            log.info("Starting replica fit %d", replica_idxs[0])
+        log.info("Starting replica fit " + str(replica_idxs))
 
         # Generate a ModelTrainer object
         # this object holds all necessary information to train a PDF (up to the NN definition)
@@ -270,12 +260,7 @@ def performfit(
         q0 = theoryid.get_description().get("Q0")
         pdf_instances = [N3PDF(pdf_model, fit_basis=basis, Q=q0) for pdf_model in pdf_models]
         writer_wrapper = WriterWrapper(
-            replica_idxs,
-            pdf_instances,
-            stopping_object,
-            all_chi2s,
-            theoryid,
-            final_time,
+            replica_idxs, pdf_instances, stopping_object, all_chi2s, theoryid, final_time
         )
         writer_wrapper.write_data(replica_path, output_path.name, save)
 
diff --git a/n3fit/src/n3fit/stopping.py b/n3fit/src/n3fit/stopping.py
index 8884e7f01b..8f8cbc5a67 100644
--- a/n3fit/src/n3fit/stopping.py
+++ b/n3fit/src/n3fit/stopping.py
@@ -384,7 +384,7 @@ def e_best_chi2(self):
     @property
     def stop_epoch(self):
         """Epoch in which the fit is stopped"""
-        return self._history.final_epoch + 1
+        return 0 if self._history.final_epoch is None else self._history.final_epoch + 1
 
     @property
     def positivity_status(self):
diff --git a/validphys2/src/validphys/commondata.py b/validphys2/src/validphys/commondata.py
index b67bc3ab4f..5c0b534d75 100644
--- a/validphys2/src/validphys/commondata.py
+++ b/validphys2/src/validphys/commondata.py
@@ -6,10 +6,13 @@
 :py:mod:`validphys.coredata`
 
 """
+import functools
+
 from reportengine import collect
 from validphys.commondataparser import load_commondata
 
 
+@functools.lru_cache
 def loaded_commondata_with_cuts(commondata, cuts):
     """Load the commondata and apply cuts.
 
diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
index 0add44c70c..e1fb850fd4 100644
--- a/validphys2/src/validphys/config.py
+++ b/validphys2/src/validphys/config.py
@@ -15,6 +15,7 @@
 import numbers
 import pathlib
 
+from frozendict import frozendict
 import pandas as pd
 
 from reportengine import configparser, report
@@ -46,6 +47,7 @@
 from validphys.paramfits.config import ParamfitsConfig
 from validphys.plotoptions import get_info
 import validphys.scalevariations
+from validphys.utils import freeze_args
 
 log = logging.getLogger(__name__)
 
@@ -1247,6 +1249,8 @@ def parse_default_filter_rules_recorded_spec_(self, spec):
     def parse_added_filter_rules(self, rules: (list, type(None)) = None):
         return rules
 
+    @freeze_args
+    @functools.lru_cache
     def produce_rules(
         self,
         theoryid,
@@ -1286,7 +1290,7 @@ def produce_rules(
 
         if added_filter_rules:
             for i, rule in enumerate(added_filter_rules):
-                if not isinstance(rule, dict):
+                if not (isinstance(rule, dict) or isinstance(rule, frozendict)):
                     raise ConfigError(f"added rule {i} is not a dict")
                 try:
                     rule_list.append(
diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py
index ef50faf3ff..2f4b3f3351 100644
--- a/validphys2/src/validphys/core.py
+++ b/validphys2/src/validphys/core.py
@@ -489,8 +489,9 @@ def load_cfactors(self):
 
         return [[parse_cfactor(c.open("rb")) for c in cfacs] for cfacs in self.cfactors]
 
+    @functools.lru_cache()
     def load_with_cuts(self, cuts):
-        """Load the fktable and apply cuts inmediately. Returns a FKTableData"""
+        """Load the fktable and apply cuts immediately. Returns a FKTableData"""
         return load_fktable(self).with_cuts(cuts)
 
 
diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py
index c41e90bca7..d90973418b 100644
--- a/validphys2/src/validphys/covmats.py
+++ b/validphys2/src/validphys/covmats.py
@@ -1,6 +1,7 @@
 """Module for handling logic and manipulation of covariance and correlation
 matrices on different levels of abstraction
 """
+import functools
 import logging
 
 import numpy as np
@@ -226,6 +227,7 @@ def dataset_inputs_covmat_from_systematics(
 
 
 @check_cuts_considered
+@functools.lru_cache
 def dataset_t0_predictions(dataset, t0set):
     """Returns the t0 predictions for a ``dataset`` which are the predictions
     calculated using the central member of ``pdf``. Note that if ``pdf`` has
@@ -372,10 +374,7 @@ def dataset_inputs_t0_exp_covmat_separate(
     return covmat
 
 
-def dataset_inputs_total_covmat_separate(
-    dataset_inputs_exp_covmat_separate,
-    loaded_theory_covmat,
-):
+def dataset_inputs_total_covmat_separate(dataset_inputs_exp_covmat_separate, loaded_theory_covmat):
     """
     Function to compute the covmat to be used for the sampling by make_replica.
     In this case the t0 prescription is not used for the experimental covmat and the multiplicative
@@ -409,10 +408,7 @@ def dataset_inputs_exp_covmat_separate(
     return covmat
 
 
-def dataset_inputs_t0_total_covmat(
-    dataset_inputs_t0_exp_covmat,
-    loaded_theory_covmat,
-):
+def dataset_inputs_t0_total_covmat(dataset_inputs_t0_exp_covmat, loaded_theory_covmat):
     """
     Function to compute the covmat to be used for the sampling by make_replica and for the chi2
     by fitting_data_dict. In this case the t0 prescription is used for the experimental covmat
@@ -447,10 +443,7 @@ def dataset_inputs_t0_exp_covmat(
     return covmat
 
 
-def dataset_inputs_total_covmat(
-    dataset_inputs_exp_covmat,
-    loaded_theory_covmat,
-):
+def dataset_inputs_total_covmat(dataset_inputs_exp_covmat, loaded_theory_covmat):
     """
     Function to compute the covmat to be used for the sampling by make_replica and for the chi2
     by fitting_data_dict. In this case the t0 prescription is not used for the experimental covmat
@@ -588,7 +581,7 @@ def sqrt_covmat(covariance_matrix):
     dimensions = covariance_matrix.shape
 
     if covariance_matrix.size == 0:
-        return np.zeros((0,0))
+        return np.zeros((0, 0))
     elif dimensions[0] != dimensions[1]:
         raise ValueError(
             "The input covariance matrix should be square but "
@@ -740,9 +733,9 @@ def pdferr_plus_covmat(dataset, pdf, covmat_t0_considered):
     elif pdf.error_type == 'hessian':
         rescale_fac = pdf._rescale_factor()
         hessian_eigenvectors = th.error_members
-        
+
         # see core.HessianStats
-        X = (hessian_eigenvectors[:,0::2] - hessian_eigenvectors[:,1::2])*0.5
+        X = (hessian_eigenvectors[:, 0::2] - hessian_eigenvectors[:, 1::2]) * 0.5
         # need to rescale the Hessian eigenvectors in case the eigenvector confidence interval is not 68%
         X = X / rescale_fac
         pdf_cov = X @ X.T
@@ -956,10 +949,4 @@ def _dataset_inputs_covmat_t0_considered(dataset_inputs_covmat_t0_considered, fi
 
 datasets_covmat = collect('covariance_matrix', ('data',))
 
-datasets_covariance_matrix = collect(
-    'covariance_matrix',
-    (
-        'experiments',
-        'experiment',
-    ),
-)
+datasets_covariance_matrix = collect('covariance_matrix', ('experiments', 'experiment'))
diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py
index 96099a4a22..d94a9e4372 100644
--- a/validphys2/src/validphys/filters.py
+++ b/validphys2/src/validphys/filters.py
@@ -3,6 +3,7 @@
 """
 
 from collections.abc import Mapping
+import functools
 from importlib.resources import read_text
 import logging
 import re
@@ -13,6 +14,7 @@
 from reportengine.compat import yaml
 from validphys.commondatawriter import write_commondata_to_file, write_systype_to_file
 import validphys.cuts
+from validphys.utils import freeze_args
 
 log = logging.getLogger(__name__)
 
@@ -178,9 +180,7 @@ def _filter_real_data(filter_path, data):
     return total_data_points, total_cut_data_points
 
 
-def _filter_closure_data(
-    filter_path, data, fakepdf, fakenoise, filterseed, data_index, sep_mult
-):
+def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, data_index, sep_mult):
     """
     This function is accessed within a closure test only, that is, the fakedata
     namespace has to be True (If fakedata = False, the _filter_real_data function
@@ -393,14 +393,7 @@ class Rule:
 
     numpy_functions = {"sqrt": np.sqrt, "log": np.log, "fabs": np.fabs}
 
-    def __init__(
-        self,
-        initial_data: dict,
-        *,
-        defaults: dict,
-        theory_parameters: dict,
-        loader=None,
-    ):
+    def __init__(self, initial_data: dict, *, defaults: dict, theory_parameters: dict, loader=None):
         self.dataset = None
         self.process_type = None
         self._local_variables_code = {}
@@ -451,13 +444,7 @@ def __init__(
         self.rule_string = self.rule
         self.defaults = defaults
         self.theory_params = theory_parameters
-        ns = {
-            *self.numpy_functions,
-            *self.defaults,
-            *self.variables,
-            "idat",
-            "central_value",
-        }
+        ns = {*self.numpy_functions, *self.defaults, *self.variables, "idat", "central_value"}
         for k, v in self.local_variables.items():
             try:
                 self._local_variables_code[k] = lcode = compile(
@@ -534,11 +521,7 @@ def __call__(self, dataset, idat):
             return eval(
                 self.rule,
                 self.numpy_functions,
-                {
-                    **{"idat": idat, "central_value": central_value},
-                    **self.defaults,
-                    **ns,
-                },
+                {**{"idat": idat, "central_value": central_value}, **self.defaults, **ns},
             )
         except Exception as e:  # pragma: no cover
             raise FatalRuleError(f"Error when applying rule {self.rule_string!r}: {e}") from e
@@ -561,6 +544,8 @@ def _make_point_namespace(self, dataset, idat) -> dict:
         return ns
 
 
+@freeze_args
+@functools.lru_cache
 def get_cuts_for_dataset(commondata, rules) -> list:
     """Function to generate a list containing the index
     of all experimental points that passed kinematic
diff --git a/validphys2/src/validphys/n3fit_data.py b/validphys2/src/validphys/n3fit_data.py
index 38f946248d..5cba76bf0b 100644
--- a/validphys2/src/validphys/n3fit_data.py
+++ b/validphys2/src/validphys/n3fit_data.py
@@ -74,7 +74,7 @@ def __iter__(self):
             yield m
 
 
-def tr_masks(data, replica_trvlseed):
+def tr_masks(data, replica_trvlseed, parallel_models=False):
     """Generate the boolean masks used to split data into training and
     validation points. Returns a list of 1-D boolean arrays, one for each
     dataset. Each array has length equal to N_data, the datapoints which
@@ -82,6 +82,9 @@ def tr_masks(data, replica_trvlseed):
 
         tr_data = data[tr_mask]
 
+    The single_datapoints_toss flag signals whether single-point datasets
+    should be always included in the training set only (True), or randomly
+    selected. The former is required for parallel replica fits.
     """
     nameseed = int(hashlib.sha256(str(data).encode()).hexdigest(), 16) % 10**8
     nameseed += replica_trvlseed
@@ -98,8 +101,10 @@ def tr_masks(data, replica_trvlseed):
         trmax = int(ndata * frac)
         if trmax == 0:
             # If that number is 0, then get 1 point with probability frac
-            trmax = int(rng.random() < frac)
-        mask = np.concatenate([np.ones(trmax, dtype=bool), np.zeros(ndata - trmax, dtype=bool)])
+            trmax = int(rng.random() < frac) if not parallel_models else 1
+        mask = np.concatenate(
+            [np.ones(trmax, dtype=bool), np.zeros(ndata - trmax, dtype=bool)]
+        )
         rng.shuffle(mask)
         trmask_partial.append(mask)
     return _TrMasks(str(data), replica_trvlseed, trmask_partial)
@@ -311,6 +316,7 @@ def fitting_data_dict(
         "folds": folds,
         "data_transformation_tr": dt_trans_tr,
         "data_transformation_vl": dt_trans_vl,
+        "data_transformation": dt_trans,
     }
     return dict_out
 
diff --git a/validphys2/src/validphys/n3fit_data_utils.py b/validphys2/src/validphys/n3fit_data_utils.py
index fe908d73cb..5575ebfcaf 100644
--- a/validphys2/src/validphys/n3fit_data_utils.py
+++ b/validphys2/src/validphys/n3fit_data_utils.py
@@ -8,6 +8,7 @@
 loading their fktables (and applying any necessary cuts).
 """
 import dataclasses
+import functools
 from itertools import zip_longest
 
 import numpy as np
diff --git a/validphys2/src/validphys/utils.py b/validphys2/src/validphys/utils.py
index e78fc866bc..9bb3fa28ea 100644
--- a/validphys2/src/validphys/utils.py
+++ b/validphys2/src/validphys/utils.py
@@ -5,14 +5,55 @@
 @author: Zahari Kassabov
 """
 import contextlib
+import functools
 import pathlib
 import shutil
 import tempfile
+from typing import Any, Mapping, Sequence
 
+from frozendict import frozendict
 import numpy as np
 from validobj import ValidationError, parse_input
 
 
+# Since typing.Hashable doesn't check recursively you actually
+# have to try hashing it.
+def is_hashable(obj):
+    try:
+        hash(obj)
+        return True
+    except:
+        return False
+
+
+def immute(obj: Any):
+    # So that we don't infinitely recurse since frozenset and tuples
+    # are Sequences.
+    if is_hashable(obj):
+        return obj
+    elif isinstance(obj, Mapping):
+        return frozendict(obj)
+    elif isinstance(obj, Sequence):
+        return tuple([immute(i) for i in obj])
+    else:
+        raise ValueError("Object is not hashable")
+
+
+def freeze_args(func):
+    """Transform mutable dictionary
+    Into immutable
+    Useful to be compatible with cache
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args, **kwargs):
+        args = tuple([immute(arg) for arg in args])
+        kwargs = {k: immute(v) for k, v in kwargs.items()}
+        return func(*args, **kwargs)
+
+    return wrapped
+
+
 def parse_yaml_inp(inp, spec, path):
     """Helper function to parse yaml using the `validobj` library and print
     useful error messages in case of a parsing error.