Merge pull request #1910 from NNPDF/other_thcovmat_plots

Extend th covmat plots
NNPDF · Feb 9, 2024 · ec76404 · ec76404
2 parents d41bfcb + cc29211
commit ec76404
Show file tree

Hide file tree

Showing 11 changed files with 1,109 additions and 97 deletions.
diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py
@@ -372,10 +372,7 @@ def dataset_inputs_t0_exp_covmat_separate(
     return covmat
 
 
-def dataset_inputs_total_covmat_separate(
-    dataset_inputs_exp_covmat_separate,
-    loaded_theory_covmat,
-):
+def dataset_inputs_total_covmat_separate(dataset_inputs_exp_covmat_separate, loaded_theory_covmat):
     """
     Function to compute the covmat to be used for the sampling by make_replica.
     In this case the t0 prescription is not used for the experimental covmat and the multiplicative
@@ -409,10 +406,7 @@ def dataset_inputs_exp_covmat_separate(
     return covmat
 
 
-def dataset_inputs_t0_total_covmat(
-    dataset_inputs_t0_exp_covmat,
-    loaded_theory_covmat,
-):
+def dataset_inputs_t0_total_covmat(dataset_inputs_t0_exp_covmat, loaded_theory_covmat):
     """
     Function to compute the covmat to be used for the sampling by make_replica and for the chi2
     by fitting_data_dict. In this case the t0 prescription is used for the experimental covmat
@@ -447,10 +441,7 @@ def dataset_inputs_t0_exp_covmat(
     return covmat
 
 
-def dataset_inputs_total_covmat(
-    dataset_inputs_exp_covmat,
-    loaded_theory_covmat,
-):
+def dataset_inputs_total_covmat(dataset_inputs_exp_covmat, loaded_theory_covmat):
     """
     Function to compute the covmat to be used for the sampling by make_replica and for the chi2
     by fitting_data_dict. In this case the t0 prescription is not used for the experimental covmat
@@ -529,7 +520,7 @@ def generate_exp_covmat(
 
 
 def sqrt_covmat(covariance_matrix):
-    """Function that computes the square root of the covariance matrix.
+    r"""Function that computes the square root of the covariance matrix.
 
     Parameters
     ----------
@@ -588,7 +579,7 @@ def sqrt_covmat(covariance_matrix):
     dimensions = covariance_matrix.shape
 
     if covariance_matrix.size == 0:
-        return np.zeros((0,0))
+        return np.zeros((0, 0))
     elif dimensions[0] != dimensions[1]:
         raise ValueError(
             "The input covariance matrix should be square but "
@@ -740,9 +731,9 @@ def pdferr_plus_covmat(dataset, pdf, covmat_t0_considered):
     elif pdf.error_type == 'hessian':
         rescale_fac = pdf._rescale_factor()
         hessian_eigenvectors = th.error_members
-        
+
         # see core.HessianStats
-        X = (hessian_eigenvectors[:,0::2] - hessian_eigenvectors[:,1::2])*0.5
+        X = (hessian_eigenvectors[:, 0::2] - hessian_eigenvectors[:, 1::2]) * 0.5
         # need to rescale the Hessian eigenvectors in case the eigenvector confidence interval is not 68%
         X = X / rescale_fac
         pdf_cov = X @ X.T
@@ -913,7 +904,7 @@ def dataspecs_datasets_covmat_differences_table(dataspecs_speclabel, dataspecs_c
     return df
 
 
-def _covmat_t0_considered(covmat_t0_considered, fitthcovmat, dataset):
+def _covmat_t0_considered(covmat_t0_considered, fitthcovmat, dataset_input):
     """Helper function so we can dispatch the full
     covariance matrix, having considered both ``use_t0``
     and ``use_pdferr``
@@ -922,10 +913,11 @@ def _covmat_t0_considered(covmat_t0_considered, fitthcovmat, dataset):
         # exploit `reorder_thcovmat_as_expcovmat` to take only the part of the covmat for the relevant dataset
         return (
             covmat_t0_considered
-            + reorder_thcovmat_as_expcovmat(fitthcovmat, [dataset]).values
+            + reorder_thcovmat_as_expcovmat(fitthcovmat, [dataset_input]).values
         )
     return covmat_t0_considered
 
+
 def _dataset_inputs_covmat_t0_considered(dataset_inputs_covmat_t0_considered, fitthcovmat, data):
     """Helper function so we can dispatch the full
     covariance matrix accross dataset_inputs, having considered both ``use_t0``
@@ -956,10 +948,4 @@ def _dataset_inputs_covmat_t0_considered(dataset_inputs_covmat_t0_considered, fi
 
 datasets_covmat = collect('covariance_matrix', ('data',))
 
-datasets_covariance_matrix = collect(
-    'covariance_matrix',
-    (
-        'experiments',
-        'experiment',
-    ),
-)
+datasets_covariance_matrix = collect('covariance_matrix', ('experiments', 'experiment'))
diff --git a/validphys2/src/validphys/pdfgrids.py b/validphys2/src/validphys/pdfgrids.py
@@ -375,6 +375,45 @@ def distance_grids(pdfs, xplotting_grids, normalize_to: (int, str, type(None)) =
     return newgrids
 
 
+@check_pdf_normalize_to
+def pull_grids(pdfs, xplotting_grids, normalize_to: (int, str, type(None)) = None):
+    """Return an object containing the value of the pull between the two PDFs at the specified values
+    of x and flavour.
+    The parameter ``normalize_to`` identifies the reference PDF set with respect to the
+    pull is computed.
+    This method returns pull grids where the relative pull between both PDF
+    sets, defined as the distance in terms of the standard deviations of the reference PDF, is computed. At least one grid will be identical to zero.
+    """
+
+    gr2_stats = xplotting_grids[normalize_to].grid_values
+    cv2 = gr2_stats.central_value()
+    sg2 = gr2_stats.std_error()
+
+    newgrids = []
+    for grid, pdf in zip(xplotting_grids, pdfs):
+        if pdf == pdfs[normalize_to]:
+            # Zero the PDF we are normalizing against
+            pdf_zero = pdf.stats_class(np.zeros_like(gr2_stats.data[0:1]))
+            newgrid = grid.copy_grid(grid_values=pdf_zero)
+            newgrids.append(newgrid)
+            continue
+
+        g_stats = grid.grid_values
+        cv1 = g_stats.central_value()
+        sg1 = g_stats.std_error()
+
+        # Wrap the pull into a Stats (1, flavours, points)
+        pull = Stats([np.sqrt((cv1 - cv2) ** 2 / (sg1**2 + sg2**2))])
+
+        newgrid = grid.copy_grid(grid_values=pull)
+        newgrids.append(newgrid)
+
+    return newgrids
+
+
+pull_grids_list = collect(pull_grids, ('pdfs_list',))
+
+
 @check_pdf_normalize_to
 def variance_distance_grids(pdfs, xplotting_grids, normalize_to: (int, str, type(None)) = None):
     """Return an object containing the value of the variance distance PDF at the specified values

diff --git a/validphys2/src/validphys/pdfplots.py b/validphys2/src/validphys/pdfplots.py
@@ -274,6 +274,112 @@ def plot_pdf_uncertainties(
     yield from UncertaintyPDFPlotter(pdfs, xplotting_grids, xscale, normalize_to, ymin, ymax)
 
 
+class PullPDFPlotter(metaclass=abc.ABCMeta):
+    """Auxiliary class which groups multiple pulls in one plot.
+
+    pdfs_list is a list of dictionaries, each containing the two PDFs to be used
+    for the pull.
+    pull_grids_list is the list of the pull computed for the PDF pairs described
+    by pdfs_list.
+    """
+
+    def __init__(self, pdfs_list, pull_grids_list, xscale, normalize_to, ymin, ymax):
+        self.pdfs_list = pdfs_list
+        self.pull_grids_list = pull_grids_list
+        self._xscale = xscale
+        self.normalize_to = normalize_to
+        self.ymin = ymin
+        self.ymax = ymax
+        self.firstgrid = pull_grids_list[0][0]
+
+    def legend(self, flstate):
+        return flstate.ax.legend()
+
+    def get_ylabel(self):
+        return "Pull"
+
+    @property
+    def Q(self):
+        return self.firstgrid.Q
+
+    @property
+    def xscale(self):
+        if self._xscale is None:
+            return scale_from_grid(self.firstgrid)
+        return self._xscale
+
+    def get_title(self, flstate):
+        return '$%s$' % flstate.parton_name + f', Q={self.Q : .1f} GeV'
+
+    def draw(self, pdfs, grid, flstate):
+        ax = flstate.ax
+        flindex = flstate.flindex
+        color = ax._get_lines.get_next_color()
+
+        # The grid for the pull is (1, flavours, points)
+        # take only the flavour we are interested in
+        gv = grid.select_flavour(flindex).grid_values.data.squeeze()
+
+        ax.plot(grid.xgrid, gv, color=color, label=f'{pdfs[0].label}-{pdfs[1].label}')
+
+        return gv
+
+    def plot_call(self):
+        basis = self.firstgrid.basis
+        for flindex, fl in enumerate(self.firstgrid.flavours):
+            fig, ax = plotutils.subplots()
+            parton_name = basis.elementlabel(fl)
+            flstate = FlavourState(flindex=flindex, fl=fl, fig=fig, ax=ax, parton_name=parton_name)
+            ax.set_title(self.get_title(flstate))
+
+            all_vals = []
+            for pdf, grids in zip(self.pdfs_list, self.pull_grids_list):
+                limits = self.draw([pdf['pdfs'][0], pdf['pdfs'][1]], grids[1], flstate)
+                if limits is not None:
+                    all_vals.append(np.atleast_2d(limits))
+
+            # Note these two lines do not conmute!
+            ax.set_xscale(self.xscale)
+            plotutils.frame_center(ax, self.firstgrid.xgrid, np.concatenate(all_vals))
+            if self.ymin is not None:
+                ax.set_ylim(ymin=self.ymin)
+            if self.ymax is not None:
+                ax.set_ylim(ymax=self.ymax)
+
+            ax.set_xlabel('$x$')
+            ax.set_xlim(self.firstgrid.xgrid[0])
+
+            ax.set_ylabel(self.get_ylabel())
+
+            ax.set_axisbelow(True)
+
+            self.legend(flstate)
+            yield fig, parton_name
+
+    def __call__(self):
+        for fig, parton_name in self.plot_call():
+            ax = fig.get_axes()[0]
+            ymin, _ = ax.get_ylim()
+            ax.set_ylim(max(0, ymin), None)
+            yield fig, parton_name
+
+
+@figuregen
+@check_scale('xscale', allow_none=True)
+def plot_pdf_pulls(
+    pdfs_list,
+    pull_grids_list,
+    xscale: (str, type(None)) = None,
+    normalize_to: (int, str, type(None)) = None,
+    ymin=None,
+    ymax=None,
+):
+    """Plot the PDF standard deviations as a function of x.
+    If normalize_to is set, the ratio to that
+    PDF's central value is plotted. Otherwise it is the absolute values."""
+    yield from PullPDFPlotter(pdfs_list, pull_grids_list, xscale, normalize_to, ymin, ymax)()
+
+
 class AllFlavoursPlotter(PDFPlotter):
     """Auxiliary class which groups multiple PDF flavours in one plot."""
 
@@ -799,11 +905,7 @@ def plot_lumi1d(
         handles.append(handle)
         labels.append(f"{pdf.label} {label_add}")
 
-    ax.legend(
-        handles,
-        labels,
-        handler_map={plotutils.HandlerSpec: plotutils.ComposedHandler()},
-    )
+    ax.legend(handles, labels, handler_map={plotutils.HandlerSpec: plotutils.ComposedHandler()})
 
     ax.set_ylabel(ylabel)
     ax.set_xlabel("$m_{X}$ (GeV)")

diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """
 results.py
 
@@ -103,12 +102,15 @@ def sqrtcovmat(self):
     def name(self):
         return self._dataset.name
 
+
 class ThPredictionsResult(StatsResult):
     """Class holding theory prediction, inherits from StatsResult
     When created with `from_convolution`, it keeps tracks of the PDF for which it was computed
     """
 
-    def __init__(self, dataobj, stats_class, datasetnames=None, label=None, pdf=None, theoryid=None):
+    def __init__(
+        self, dataobj, stats_class, datasetnames=None, label=None, pdf=None, theoryid=None
+    ):
         self.stats_class = stats_class
         self.label = label
         self._datasetnames = datasetnames
@@ -129,7 +131,7 @@ def make_label(pdf, dataset):
         elif hasattr(th, "label"):
             label = th.label
         else:
-            label = "%s@<Theory %s>" % (pdf, th.id)
+            label = "{}@<Theory {}>".format(pdf, th.id)
         return label
 
     @classmethod
@@ -291,9 +293,19 @@ def procs_data_values(proc_result_table):
     return data_central_values
 
 
+def procs_data_values_experiment(proc_result_table_experiment):
+    """Like groups_data_values but grouped by experiment."""
+    data_central_values = proc_result_table_experiment["data_central"]
+    return data_central_values
+
+
 groups_results = collect("dataset_inputs_results", ("group_dataset_inputs_by_metadata",))
 
-procs_results = collect("dataset_inputs_results", ("group_dataset_inputs_by_process",))
+procs_results = collect("dataset_inputs_results_central", ("group_dataset_inputs_by_process",))
+
+procs_results_experiment = collect(
+    "dataset_inputs_results_central", ("group_dataset_inputs_by_experiment",)
+)
 
 
 def group_result_table_no_table(groups_results, groups_index):
@@ -334,6 +346,11 @@ def proc_result_table(proc_result_table_no_table):
     return proc_result_table_no_table
 
 
+@table
+def proc_result_table_experiment(procs_results_experiment, experiments_index):
+    return group_result_table_no_table(procs_results_experiment, experiments_index)
+
+
 experiment_result_table = collect("group_result_table", ("group_dataset_inputs_by_experiment",))
 
 
@@ -567,6 +584,13 @@ def results_with_scale_variations(results, theory_covmat_dataset):
     return (data_result, theory_error_result)
 
 
+def dataset_inputs_results_central(
+    data, pdf: PDF, dataset_inputs_covariance_matrix, dataset_inputs_sqrt_covmat
+):
+    """Like `dataset_inputs_results` but for a group of datasets and replica0."""
+    return results_central(data, pdf, dataset_inputs_covariance_matrix, dataset_inputs_sqrt_covmat)
+
+
 def dataset_inputs_results(
     data, pdf: PDF, dataset_inputs_covariance_matrix, dataset_inputs_sqrt_covmat
 ):

diff --git a/validphys2/src/validphys/scalevariations/pointprescriptions.yaml b/validphys2/src/validphys/scalevariations/pointprescriptions.yaml
@@ -21,3 +21,7 @@
 'n3lo 3pt hadronic': ['(0, 0, 0, 0)','(1, 0, 0, 0)','(2, 0, 0, 0)','(3, 0, 0, 0)','(4, 0, 0, 0)','(5, 0, 0, 0)','(6, 0, 0, 0)','(7, 0, 0, 0)','(8, 0, 0, 0)','(9, 0, 0, 0)','(10, 0, 0, 0)','(11, 0, 0, 0)','(12, 0, 0, 0)','(13, 0, 0, 0)','(14, 0, 0, 0)','(15, 0, 0, 0)','(16, 0, 0, 0)','(17, 0, 0, 0)','(18, 0, 0, 0)','(19, 0, 0, 0)','(0, 1, 0, 0)','(0, 2, 0, 0)','(0, 3, 0, 0)','(0, 4, 0, 0)','(0, 5, 0, 0)','(0, 6, 0, 0)','(0, 7, 0, 0)','(0, 8, 0, 0)','(0, 9, 0, 0)','(0, 10, 0, 0)','(0, 11, 0, 0)','(0, 12, 0, 0)','(0, 13, 0, 0)','(0, 14, 0, 0)','(0, 15, 0, 0)','(0, 16, 0, 0)','(0, 17, 0, 0)','(0, 18, 0, 0)','(0, 19, 0, 0)','(0, 20, 0, 0)','(0, 21, 0, 0)','(0, 0, 1, 0)','(0, 0, 2, 0)','(0, 0, 3, 0)','(0, 0, 4, 0)','(0, 0, 5, 0)','(0, 0, 6, 0)','(0, 0, 7, 0)','(0, 0, 8, 0)','(0, 0, 9, 0)','(0, 0, 10, 0)','(0, 0, 11, 0)','(0, 0, 12, 0)','(0, 0, 13, 0)','(0, 0, 14, 0)','(0, 0, 15, 0)','(0, 0, 0, 1)','(0, 0, 0, 2)','(0, 0, 0, 3)','(0, 0, 0, 4)','(0, 0, 0, 5)','(0, 0, 0, 6)', '(1, 0.5 hadronic)', '(1, 2 hadronic)','(-1, -1)','(1, 1)']
 # N3LO 7 point scale variations
 'n3lo 7 point': ['(0, 0, 0, 0)', '(2, 1)', '(0.5, 1)', '(1, 2)', '(1, 0.5)', '(2, 2)', '(0.5, 0.5)']
+# N3LO 3 point hadronic renormalization scale variations
+'n3lo 3r point hadronic': ['(0, 0, 0, 0)', '(1, 0.5 hadronic)', '(1, 2 hadronic)']
+# N3LO 3 point missing renormalization scale variations
+'n3lo 3r point missing': ['(0, 0, 0, 0)', '(1, 0.5 missing)', '(1, 2 missing)']