diff --git a/n3fit/src/n3fit/io/writer.py b/n3fit/src/n3fit/io/writer.py index a68cc33404..a988bf25a8 100644 --- a/n3fit/src/n3fit/io/writer.py +++ b/n3fit/src/n3fit/io/writer.py @@ -11,6 +11,7 @@ from reportengine.compat import yaml import validphys import n3fit +from n3fit import vpinterface class WriterWrapper: @@ -57,9 +58,9 @@ def write_data(self, replica_path_set, fitname, tr_chi2, vl_chi2, true_chi2): chi2 of the replica to the central experimental data """ # Compute the arclengths - arc_lengths = self.pdf_object.compute_arclength() + arc_lengths = vpinterface.compute_arclength(self.pdf_object) # Compute the integrability numbers - integrability_numbers = self.pdf_object.integrability_numbers() + integrability_numbers = vpinterface.integrability_numbers(self.pdf_object) # Construct the chi2exp file allchi2_lines = self.stopping_object.chi2exps_str() # Construct the preproc file (the information is only in the json file) @@ -140,8 +141,8 @@ def jsonfit(replica_status, pdf_object, tr_chi2, vl_chi2, true_chi2, stop_epoch, all_info["erf_vl"] = vl_chi2 all_info["chi2"] = true_chi2 all_info["pos_state"] = replica_status.positivity_status - all_info["arc_lengths"] = pdf_object.compute_arclength().tolist() - all_info["integrability"] = pdf_object.integrability_numbers().tolist() + all_info["arc_lengths"] = vpinterface.compute_arclength(pdf_object).tolist() + all_info["integrability"] = vpinterface.integrability_numbers(pdf_object).tolist() all_info["timing"] = timing # Versioning info all_info["version"] = version() diff --git a/n3fit/src/n3fit/performfit.py b/n3fit/src/n3fit/performfit.py index c2acd8a712..ff3a846196 100644 --- a/n3fit/src/n3fit/performfit.py +++ b/n3fit/src/n3fit/performfit.py @@ -264,14 +264,15 @@ def performfit( replica_path_set = replica_path / f"replica_{replica_number}" # Create a pdf instance - pdf_instance = N3PDF(pdf_model, fit_basis=basis) + q0 = theoryid.get_description().get("Q0") + pdf_instance = N3PDF(pdf_model, fit_basis=basis, Q=q0) # Generate the writer wrapper writer_wrapper = WriterWrapper( replica_number, pdf_instance, stopping_object, - theoryid.get_description().get("Q0") ** 2, + q0**2, final_time, ) diff --git a/n3fit/src/n3fit/tests/test_vpinterface.py b/n3fit/src/n3fit/tests/test_vpinterface.py index 255a2acb82..6b9edf0d15 100644 --- a/n3fit/src/n3fit/tests/test_vpinterface.py +++ b/n3fit/src/n3fit/tests/test_vpinterface.py @@ -6,8 +6,7 @@ from hypothesis import given, settings, example from hypothesis.strategies import integers from validphys.pdfgrids import xplotting_grid, distance_grids -from validphys.core import MCStats -from n3fit.vpinterface import N3PDF +from n3fit.vpinterface import N3PDF, integrability_numbers, compute_arclength from n3fit.model_gen import pdfNN_layer_generator @@ -40,20 +39,18 @@ def test_N3PDF(members, layers): xsize = np.random.randint(2, 20) xx = np.random.rand(xsize) n3pdf = generate_n3pdf(layers=layers, members=members) - assert len(n3pdf) == members + 1 - assert n3pdf.stats_class == MCStats - assert n3pdf.load() is n3pdf + assert len(n3pdf) == members w = n3pdf.get_nn_weights() assert len(w) == members assert len(w[0]) == 16 + (layers + 1) * 2 # 16=8*2 preprocessing ret = n3pdf(xx) assert ret.shape == (members, xsize, 14) - int_numbers = n3pdf.integrability_numbers() + int_numbers = integrability_numbers(n3pdf) if members == 1: assert int_numbers.shape == (5,) else: assert int_numbers.shape == (members, 5) - assert n3pdf.compute_arclength().shape == (5,) + assert compute_arclength(n3pdf).shape == (5,) # Try to get a plotting grid res = xplotting_grid(n3pdf, 1.6, xx) assert res.grid_values.data.shape == (members, 8, xsize) diff --git a/n3fit/src/n3fit/vpinterface.py b/n3fit/src/n3fit/vpinterface.py index 98ccd92bed..55e0665c0d 100644 --- a/n3fit/src/n3fit/vpinterface.py +++ b/n3fit/src/n3fit/vpinterface.py @@ -24,6 +24,7 @@ import numpy.linalg as la from validphys.core import PDF, MCStats from validphys.pdfbases import ALL_FLAVOURS, check_basis +from validphys.lhapdfset import LHAPDFSet from validphys.arclength import integrability_number, arc_lengths log = logging.getLogger(__name__) @@ -46,94 +47,39 @@ ] -class N3PDF(PDF): - """ - Creates a N3PDF object, extension of the validphys PDF object to perform calculation - with a n3fit generated model. +class N3Stats(MCStats): + """The PDFs from n3fit are MC PDFs + however, since there is no grid, the CV has to be computed manually""" - Parameters - ---------- - pdf_models: :py:class:`n3fit.backends.MetaModel` (or list thereof) - PDF trained with n3fit, x -> f(x)_{i} where i are the flavours in the evol basis - fit_basis: list(dict) - basis of the training, used for reporting - name: str - name of the N3PDF object - """ + def error_members(self): + return self.data - def __init__(self, pdf_models, fit_basis=None, name="n3fit"): - self.fit_basis = fit_basis - self.basis = check_basis("evolution", EVOL_LIST)["basis"] + def central_value(self): + return np.mean(self.data, axis=0) - if isinstance(pdf_models, Iterable): - self._models = pdf_models - else: - self._models = [pdf_models] - - super().__init__(name) - # The number of members will be minimum 2, needed for legacy compatibility - # Note that the member 0 does not exist as an actual model as it corresponds - # to the average of all others - self._info = {"ErrorType": "replicas", "NumMembers": len(self._models) + 1} - @property - def stats_class(self): - """The stats class of N3PDF is always MCStats""" - return MCStats +class N3LHAPDFSet(LHAPDFSet): + """Extension of LHAPDFSet using n3fit models""" - def load(self): - """Many vp functions ask for a LHAPDF pdf - from nnpdflib, this class fakes it until a time in which vp is free from C++ - """ - return self - - def get_member_model(self, replica): - """Return the member corresponding to the given replica""" - return self._models[replica - 1] - - def get_nn_weights(self): - """Outputs all weights of the NN as numpy.ndarrays""" - return [model.get_weights() for model in self._models] + def __init__(self, name, pdf_models, Q=1.65): + log.debug("Creating LHAPDF-like n3fit PDF") + self._error_type = "replicas" + self._name = name + self._lhapdf_set = pdf_models + self._flavors = None + self._fitting_q = Q + self.basis = check_basis("evolution", EVOL_LIST)["basis"] - def get_preprocessing_factors(self, replica=None): - """Loads the preprocessing alpha and beta arrays from the PDF trained model. - If a ``fit_basis`` given in the format of ``n3fit`` runcards is given it will be used - to generate a new dictionary with the names, the exponent and whether they are trainable - otherwise outputs a Nx2 array where [:,0] are alphas and [:,1] betas - """ - # If no replica is explicitly requested, get the preprocessing layer for the first model - if replica is None: - replica = 1 - preprocessing_layers = self.get_member_model(replica).get_layer_re(r"pdf_prepro_\d") - if len(preprocessing_layers) != 1: - # We really don't want to fail at this point, but print a warning at least... - log.warning("More than one preprocessing layer found within the model!") - preprocessing_layer = preprocessing_layers[0] - - alphas_and_betas = None - if self.fit_basis is not None: - output_dictionaries = [] - for d in self.fit_basis: - flavour = d["fl"] - alpha = preprocessing_layer.get_weight_by_name(f"alpha_{flavour}") - beta = preprocessing_layer.get_weight_by_name(f"beta_{flavour}") - if alpha is not None: - alpha = float(alpha.numpy()) - if beta is not None: - beta = float(beta.numpy()) - output_dictionaries.append( - { - "fl": flavour, - "smallx": alpha, - "largex": beta, - "trainable": d.get("trainable", True), - } - ) - alphas_and_betas = output_dictionaries - return alphas_and_betas + def xfxQ(self, x, Q, n, fl): + """Return the value of the PDF member for the given value in x""" + if Q != self._fitting_q: + log.warning( + "Querying N3LHAPDFSet at a value of Q=%f different from %f", Q, self._fitting_q + ) + return self.grid_values([fl], [x]).squeeze()[n] def __call__(self, xarr, flavours=None, replica=None): - """Uses the internal model to produce pdf values. + """Uses the internal model to produce pdf values for the grid The output is on the evolution basis. Parameters @@ -159,12 +105,12 @@ def __call__(self, xarr, flavours=None, replica=None): if replica is None or replica == 0: # We need generate output values for all replicas - result = np.concatenate([m.predict([mod_xgrid]) for m in self._models], axis=0) + result = np.concatenate([m.predict([mod_xgrid]) for m in self._lhapdf_set], axis=0) if replica == 0: # We want _only_ the central value result = np.mean(result, axis=0, keepdims=True) else: - result = self.get_member_model(replica).predict([mod_xgrid]) + result = self._lhapdf_set[replica - 1].predict([mod_xgrid]) if flavours != "n3fit": # Ensure that the result has its flavour in the basis-defined order @@ -211,62 +157,161 @@ def grid_values(self, flavours, xarr, qmat=None): ret = ret.repeat(lq, -1) return ret - # Utilities - def integrability_numbers(self, q0=1.65, flavours=None): - """Compute the integrability numbers for the current PDF - using the corresponding validphys action - Parameters - ---------- - q0: float - energy at which the integrability is computed - flavours: list - flavours for which the integrability is computed +class N3PDF(PDF): + """ + Creates a N3PDF object, extension of the validphys PDF object to perform calculation + with a n3fit generated model. - Returns - ------- - np.array(float) - Value for the integrability for each of the flavours + Parameters + ---------- + pdf_models: :py:class:`n3fit.backends.MetaModel` (or list thereof) + PDF trained with n3fit, x -> f(x)_{i} where i are the flavours in the evol basis + fit_basis: list(dict) + basis of the training, used for reporting + name: str + name of the N3PDF object + """ - Example - ------- - >>> from n3fit.vpinterface import N3PDF - >>> from n3fit.model_gen import pdfNN_layer_generator - >>> fake_fl = [{'fl' : i, 'largex' : [0,1], 'smallx': [1,2]} for i in ['u', 'ubar', 'd', 'dbar', 'c', 'cbar', 's', 'sbar']] - >>> pdf_model = pdfNN_layer_generator(nodes=[8], activations=['linear'], seed=0, flav_info=fake_fl) - >>> n3pdf = N3PDF(pdf_model) - >>> res = n3pdf.integrability_numbers() - """ - if flavours is None: - flavours = ["V", "T3", "V3", "T8", "V8"] - return integrability_number(self, [q0], flavours=flavours) + def __init__(self, pdf_models, fit_basis=None, name="n3fit", Q=1.65): + self.fit_basis = fit_basis - def compute_arclength(self, q0=1.65, basis="evolution", flavours=None): + if isinstance(pdf_models, Iterable): + self._models = pdf_models + else: + self._models = [pdf_models] + + super().__init__(name) + self._stats_class = N3Stats + self._lhapdf_set = N3LHAPDFSet(self.name, self._models, Q=Q) + # Since there is no info file, create a fake `_info` dictionary + self._info = {"ErrorType": "replicas", "NumMembers": len(self._models)} + + def load(self): + """If the function needs an LHAPDF object, return a N3LHAPDFSet""" + return self._lhapdf_set + + def get_nn_weights(self): + """Outputs all weights of the NN as numpy.ndarrays""" + return [model.get_weights() for model in self._models] + + def get_preprocessing_factors(self, replica=None): + """Loads the preprocessing alpha and beta arrays from the PDF trained model. + If a ``fit_basis`` given in the format of ``n3fit`` runcards is given it will be used + to generate a new dictionary with the names, the exponent and whether they are trainable + otherwise outputs a Nx2 array where [:,0] are alphas and [:,1] betas """ - Given the layer with the fit basis computes the arc length - using the corresponding validphys action + # If no replica is explicitly requested, get the preprocessing layer for the first model + if replica is None: + replica = 1 + # Replicas start counting in 1 so: + preprocessing_layers = self._models[replica - 1].get_layer_re(r"pdf_prepro_\d") + if len(preprocessing_layers) != 1: + # We really don't want to fail at this point, but print a warning at least... + log.warning("More than one preprocessing layer found within the model!") + preprocessing_layer = preprocessing_layers[0] + + alphas_and_betas = None + if self.fit_basis is not None: + output_dictionaries = [] + for d in self.fit_basis: + flavour = d["fl"] + alpha = preprocessing_layer.get_weight_by_name(f"alpha_{flavour}") + beta = preprocessing_layer.get_weight_by_name(f"beta_{flavour}") + if alpha is not None: + alpha = float(alpha.numpy()) + if beta is not None: + beta = float(beta.numpy()) + output_dictionaries.append( + { + "fl": flavour, + "smallx": alpha, + "largex": beta, + "trainable": d.get("trainable", True), + } + ) + alphas_and_betas = output_dictionaries + return alphas_and_betas + + def __call__(self, xarr, flavours=None, replica=None): + """Uses the internal model to produce pdf values for the grid + The output is on the evolution basis. Parameters ---------- - pdf_function: function - pdf function has received by the writer or ``pdf_model`` - q0: float - energy at which the arc length is computed - basis: str - basis in which to compute the arc length + xarr: numpy.ndarray + x-points with shape (xgrid_size,) (size-1 dimensions are removed) flavours: list - output flavours + list of flavours to output + replica: int + replica whose value must be returned (by default return all members) + replica 0 corresponds to the central value - Example + Returns ------- - >>> from n3fit.vpinterface import N3PDF - >>> from n3fit.model_gen import pdfNN_layer_generator - >>> fake_fl = [{'fl' : i, 'largex' : [0,1], 'smallx': [1,2]} for i in ['u', 'ubar', 'd', 'dbar', 'c', 'cbar', 's', 'sbar']] - >>> pdf_model = pdfNN_layer_generator(nodes=[8], activations=['linear'], seed=0, flav_info=fake_fl) - >>> n3pdf = N3PDF(pdf_model) - >>> res = n3pdf.compute_arclength() + numpy.ndarray + (xgrid_size, flavours) pdf result """ - if flavours is None: - flavours = ["sigma", "gluon", "V", "V3", "V8"] - ret = arc_lengths(self, [q0], basis, flavours) - return ret.stats.central_value() + return self._lhapdf_set(xarr, flavours=flavours, replica=replica) + + +# Utilities and wrapper to avoid having to pass around unnecessary information +def integrability_numbers(n3pdf, q0=1.65, flavours=None): + """Compute the integrability numbers for the current PDF + using the corresponding validphys action + + Parameters + ---------- + q0: float + energy at which the integrability is computed + flavours: list + flavours for which the integrability is computed + + Returns + ------- + np.array(float) + Value for the integrability for each of the flavours + + Example + ------- + >>> from n3fit.vpinterface import N3PDF + >>> from n3fit.model_gen import pdfNN_layer_generator + >>> fake_fl = [{'fl' : i, 'largex' : [0,1], 'smallx': [1,2]} for i in ['u', 'ubar', 'd', 'dbar', 'c', 'cbar', 's', 'sbar']] + >>> pdf_model = pdfNN_layer_generator(nodes=[8], activations=['linear'], seed=0, flav_info=fake_fl) + >>> n3pdf = N3PDF(pdf_model) + >>> res = n3pdf.integrability_numbers() + """ + if flavours is None: + flavours = ["V", "T3", "V3", "T8", "V8"] + return integrability_number(n3pdf, [q0], flavours=flavours) + + +def compute_arclength(self, q0=1.65, basis="evolution", flavours=None): + """ + Given the layer with the fit basis computes the arc length + using the corresponding validphys action + + Parameters + ---------- + pdf_function: function + pdf function has received by the writer or ``pdf_model`` + q0: float + energy at which the arc length is computed + basis: str + basis in which to compute the arc length + flavours: list + output flavours + + Example + ------- + >>> from n3fit.vpinterface import N3PDF + >>> from n3fit.model_gen import pdfNN_layer_generator + >>> fake_fl = [{'fl' : i, 'largex' : [0,1], 'smallx': [1,2]} for i in ['u', 'ubar', 'd', 'dbar', 'c', 'cbar', 's', 'sbar']] + >>> pdf_model = pdfNN_layer_generator(nodes=[8], activations=['linear'], seed=0, flav_info=fake_fl) + >>> n3pdf = N3PDF(pdf_model) + >>> res = n3pdf.compute_arclength() + """ + if flavours is None: + flavours = ["sigma", "gluon", "V", "V3", "V8"] + ret = arc_lengths(self, [q0], basis, flavours) + return ret.stats.central_value() diff --git a/validphys2/src/validphys/arclength.py b/validphys2/src/validphys/arclength.py index f380d9fb36..8a27efba22 100644 --- a/validphys2/src/validphys/arclength.py +++ b/validphys2/src/validphys/arclength.py @@ -41,8 +41,6 @@ def arc_lengths( npoints = 199 # 200 intervals seg_min = [1e-6, 1e-4, 1e-2] seg_max = [1e-4, 1e-2, 1.0] - # Note that we discard replica 0 for MC PDFs, see core.PDF.grid_values_index - # for more details res = np.zeros((pdf.get_members(), len(flavours))) # Integrate the separate segments for a, b in zip(seg_min, seg_max): diff --git a/validphys2/src/validphys/convolution.py b/validphys2/src/validphys/convolution.py index f35e872fda..5203cb8e77 100644 --- a/validphys2/src/validphys/convolution.py +++ b/validphys2/src/validphys/convolution.py @@ -147,9 +147,7 @@ def predictions(dataset, pdf): A dataframe corresponding to the hadronic prediction for each data point for the PDF members. The index of the dataframe corresponds to the selected data points, based on the dataset :ref:`cuts `. The - columns correspond to the selected PDF members in the LHAPDF set, which - depend on the PDF error type (see - :py:meth:`validphys.core.PDF.grid_values_index`) + columns correspond to the selected PDF members in the LHAPDF set. Examples -------- @@ -219,9 +217,7 @@ def fk_predictions(loaded_fk, pdf): point for the PDF members. The index of the dataframe corresponds to the selected data points (use :py:meth:`validphys.coredata.FKTableData.with_cuts` to filter out - points). The columns correspond to the selected PDF members in the - LHAPDF set, which depend on the PDF error type (see - :py:meth:`validphys.core.PDF.grid_values_index`) + points). The columns correspond to the selected PDF members in the LHAPDF set. Notes ----- @@ -351,7 +347,7 @@ def hadron_predictions(loaded_fk, pdf): """Implementation of :py:func:`fk_predictions` for hadronic observables.""" gv = functools.partial(evolution.grid_values, pdf=pdf) res = _gv_hadron_predictions(loaded_fk, gv) - res.columns = pdf.grid_values_index + res.columns = range(pdf.get_members()) return res @@ -380,7 +376,7 @@ def gv2(*args, **kwargs): return 2 * replica_values - central_value res = _gv_hadron_predictions(loaded_fk, gv1, gv2) - res.columns = pdf.grid_values_index + res.columns = range(pdf.get_members()) return res @@ -388,7 +384,7 @@ def dis_predictions(loaded_fk, pdf): """Implementation of :py:func:`fk_predictions` for DIS observables.""" gv = functools.partial(evolution.grid_values, pdf=pdf) res = _gv_dis_predictions(loaded_fk, gv) - res.columns = pdf.grid_values_index + res.columns = range(pdf.get_members()) return res diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py index 85536b7e23..8681856129 100644 --- a/validphys2/src/validphys/core.py +++ b/validphys2/src/validphys/core.py @@ -226,36 +226,10 @@ def legacy_load(self): return libNNPDF_LHAPDFSet(self.name, et) - @property - def grid_values_index(self): - """A range object describing which members are selected in a - ``pdf.load().grid_values`` operation. This is ``range(1, - len(pdf))`` for Monte Carlo sets, because replica 0 is not selected - and ``range(0, len(pdf))`` for hessian sets. - - - Returns - ------- - index : range - A range object describing the proper indexing. - - Notes - ----- - The range object can be used efficiently as a Pandas index. - """ - if self.error_type == "replicas": - return range(1, len(self)) - elif self.error_type in ("hessian", "symmhessian"): - return range(0, len(self)) - else: - raise RuntimeError(f"Unknown error type: {self.stats_class}") - def get_members(self): """Return the number of members selected in ``pdf.load().grid_values`` - operation. See :py:meth:`PDF.grid_values_index` for details on differences - between types of PDF sets. """ - return len(self.grid_values_index) + return len(self) kinlabels_latex = CommonData.kinLabel_latex.asdict() @@ -777,10 +751,10 @@ def __init__(self, data): self.data = np.atleast_2d(data) def central_value(self): - raise NotImplementedError() + return self.data[0] def error_members(self): - raise NotImplementedError() + return self.data[1:] def std_error(self): raise NotImplementedError() @@ -807,16 +781,12 @@ def errorbarstd(self): class MCStats(Stats): """Result obtained from a Monte Carlo sample""" - - def central_value(self): - return np.mean(self.data, axis=0) - def std_error(self): # ddof == 1 to match libNNPDF behaviour - return np.std(self.data, ddof=1, axis=0) + return np.std(self.error_members(), ddof=1, axis=0) def moment(self, order): - return np.mean(np.power(self.data-self.central_value(),order), axis=0) + return np.mean(np.power(self.error_members()-self.central_value(),order), axis=0) def errorbar68(self): #Use nanpercentile here because we can have e.g. 0/0==nan normalization @@ -828,9 +798,6 @@ def errorbar68(self): def sample_values(self, size): return np.random.choice(self, size=size) - def error_members(self): - return self.data - class SymmHessianStats(Stats): """Compute stats in the 'symetric' hessian format: The first index (0) @@ -842,15 +809,9 @@ def __init__(self, data, rescale_factor=1): super().__init__(data) self.rescale_factor = rescale_factor - def central_value(self): - return self.data[0] - def errorbar68(self): return self.errorbarstd() - def error_members(self): - return self.data[1:] - def std_error(self): data = self.data diffsq = (data[0] - data[1:])**2 diff --git a/validphys2/src/validphys/correlations.py b/validphys2/src/validphys/correlations.py index 7b1f6b8920..2841d2bb4b 100644 --- a/validphys2/src/validphys/correlations.py +++ b/validphys2/src/validphys/correlations.py @@ -9,17 +9,17 @@ from reportengine import collect +from validphys.core import Stats from validphys.checks import check_pdf_is_montecarlo - #This would be a good candidate to be optimized to calculate everything in one #pass over x, -def _basic_obs_pdf_correlation(pdf_stats, obs_stats): +def _basic_obs_pdf_correlation(pdf_val, obs_val): """Calculate the correlation between pdfs and observables. - The expected format is are Stats instances of: + The expected format is two arrays - obs_stats: (nbins x nreplicas), as returned from thresult. - pdf_stats: (nreplicas x nf x nx), as returned from xplotting_grid.grid_values + obs_val: (nbin x nreplicas) as returned from thresults.error_members + pdf_val: (nreplicas x nf x nf) as returned from xplotting_grid.grid_values.error_members The returned array contains the PDF correlation between the value of the obsevable and the PDF at the corresponding point in (fl,x) @@ -27,8 +27,8 @@ def _basic_obs_pdf_correlation(pdf_stats, obs_stats): (nbins x nf x nx), compatible with grid_values, upon changing replicas->bins. """ - x = pdf_stats.error_members() - pdf_stats.central_value() - y = obs_stats.error_members() - obs_stats.central_value() + x = pdf_val - np.mean(pdf_val, axis=0) + y = (obs_val - np.mean(obs_val, axis=-1, keepdims=True)).T #We want to compute: #sum(x*y)/(norm(x)*norm(y)) @@ -45,32 +45,37 @@ def _basic_obs_pdf_correlation(pdf_stats, obs_stats): def _basic_obs_obs_correlation(obs1, obs2): """Calculate the correlation between two observables. The expected format is - Stats instances of: - - obs1: (nreplicas, nbins1) - obs2: (nreplicas, nbins2) + arrays instances of: + + obs1: (nbins, nreplicas) + obs2: (nbins, nreplicas) The result is (nbins1 , nbins2), a mareix containing the correlation coefficients between the two sets. """ - x = (obs1.error_members() - obs1.central_value()).T - y = obs2.error_members() - obs2.central_value() + x = obs1 - np.mean(obs1, axis=1, keepdims=True) + y = (obs2 - np.mean(obs2, axis=1, keepdims=True)).T return x@y/np.outer(la.norm(x,axis=1),la.norm(y,axis=0)) +@check_pdf_is_montecarlo def obs_pdf_correlations(pdf, results, xplotting_grid): """Return the correlations between each point in a dataset and the PDF values on a grid of (x,f) points in a format similar to `xplotting_grid`.""" _, th = results - corrs = pdf.stats_class(_basic_obs_pdf_correlation(xplotting_grid.grid_values, th.stats)) + # Wrap the result in a standard Stats class + # since the result is (npoints, flavours, ndata) and has nothing to do with the PDF replicas + pdf_val = xplotting_grid.grid_values.error_members() + obs_val = th.error_members + corrs = Stats(_basic_obs_pdf_correlation(pdf_val, obs_val)) return xplotting_grid.copy_grid(grid_values=corrs) corrpair_results = collect("results", ["corrpair"]) corrpair_datasets = collect("dataset", ["corrpair"]) - +@check_pdf_is_montecarlo def obs_obs_correlations(pdf, corrpair_results): """Return the theoretical correlation matrix between a pair of observables.""" (_, th1), (_, th2) = corrpair_results - return _basic_obs_obs_correlation(th1.stats, th2.stats) + return _basic_obs_obs_correlation(th1.error_members, th2.error_members) diff --git a/validphys2/src/validphys/dataplots.py b/validphys2/src/validphys/dataplots.py index 725fb3ec65..3a7460ab05 100644 --- a/validphys2/src/validphys/dataplots.py +++ b/validphys2/src/validphys/dataplots.py @@ -871,7 +871,7 @@ def plot_smpdf(pdf, dataset, obs_pdf_correlations, mark_threshold:float=0.9): basis = obs_pdf_correlations.basis - fullgrid = obs_pdf_correlations.grid_values.error_members() + fullgrid = obs_pdf_correlations.grid_values.data fls = obs_pdf_correlations.flavours x = obs_pdf_correlations.xgrid diff --git a/validphys2/src/validphys/deltachi2.py b/validphys2/src/validphys/deltachi2.py index 2ba7297458..c1e8aff2fd 100644 --- a/validphys2/src/validphys/deltachi2.py +++ b/validphys2/src/validphys/deltachi2.py @@ -144,8 +144,13 @@ def pos_neg_xplotting_grids(delta_chi2_hessian, xplotting_grid): pos_mask = np.append(True, positive_eigenvalue_mask) neg_mask = np.append(True, ~positive_eigenvalue_mask) - pos_xplotting_grid = xplotting_grid.mask_replicas(pos_mask) - neg_xplotting_grid = xplotting_grid.mask_replicas(neg_mask) + pos_grid = xplotting_grid.grid_values.data[pos_mask] + neg_grid = xplotting_grid.grid_values.data[neg_mask] + + # Wrap everything back into the original stats class + stats_class = xplotting_grid.grid_values.__class__ + pos_xplotting_grid = xplotting_grid.copy_grid(stats_class(pos_grid)) + neg_xplotting_grid = xplotting_grid.copy_grid(stats_class(neg_grid)) return [xplotting_grid, pos_xplotting_grid, neg_xplotting_grid] diff --git a/validphys2/src/validphys/gridvalues.py b/validphys2/src/validphys/gridvalues.py index 3b77f8bd3a..d038faa7f4 100644 --- a/validphys2/src/validphys/gridvalues.py +++ b/validphys2/src/validphys/gridvalues.py @@ -75,12 +75,6 @@ def grid_values(pdf:PDF, flmat, xmat, qmat): grid_values[replica][flavour][x][Q] - Notes - ---- - This uses libnnpdf, and therefore follows the convention to throw away - replica 0 for Monte Carlo ensembles (so index 0 corresponds to replica 1). - Use ``pdf.grid_values_index`` to index the result properly. - See Also -------- :py:meth:`validphys.pdfbases.Basis.grid_values` offers a higher level diff --git a/validphys2/src/validphys/lhapdfset.py b/validphys2/src/validphys/lhapdfset.py index 2dfa4f1bd2..0ab1f0f866 100644 --- a/validphys2/src/validphys/lhapdfset.py +++ b/validphys2/src/validphys/lhapdfset.py @@ -7,15 +7,13 @@ The ``.members`` and ``.central_member`` of the ``LHAPDFSet`` are LHAPDF objects (the typical output from ``mkPDFs``) and can be used normally. - For MC PDFs the ``central_member`` is the average of all replicas (members 1-100) - while for Hessian PDFfs the ``central_member`` is also ``members[0]`` Examples -------- >>> from validphys.lhapdfset import LHAPDFSet >>> pdf = LHAPDFSet("NNPDF40_nnlo_as_01180", "replicas") >>> len(pdf.members) - 100 + 101 >>> pdf.central_member.alphasQ(91.19) 0.11800 >>> pdf.members[0].xfxQ2(0.5, 15625) @@ -43,9 +41,6 @@ class LHAPDFSet: Once instantiated this class will load the PDF set from LHAPDF. If it is a T0 set only the CV will be loaded. - - For Monte Carlo sets the central value (member 0) is by default not included when taking - the results for all members (i.e., when using ``grid_values``). """ def __init__(self, name, error_type): @@ -70,15 +65,11 @@ def n_members(self): @property def members(self): - """Return the members of the set, this depends on the error type: - t0: returns only member 0 - MC: skip member 0 - Hessian: return all + """Return the members of the set + the special error type t0 returns only member 0 """ if self.is_t0: return self._lhapdf_set[0:1] - if self._error_type == "replicas": - return self._lhapdf_set[1:] return self._lhapdf_set @property diff --git a/validphys2/src/validphys/pdfgrids.py b/validphys2/src/validphys/pdfgrids.py index 97596d2602..b89d98e4eb 100644 --- a/validphys2/src/validphys/pdfgrids.py +++ b/validphys2/src/validphys/pdfgrids.py @@ -55,6 +55,11 @@ class XPlottingGrid: grid_values: Stats scale: str + def __post_init__(self): + """Enforce grid_values being a Stats instance""" + if not isinstance(self.grid_values, Stats): + raise ValueError("`XPlottingGrid` grid_values can only be instances of `Stats`") + def select_flavour(self, flindex): """Return a new grid for one single flavour""" if isinstance(flindex, str): @@ -66,17 +71,8 @@ def select_flavour(self, flindex): gv = self.grid_values.__class__(new_grid) return dataclasses.replace(self, grid_values=gv, flavours=[flstr]) - def mask_replicas(self, mask): - """Return a copy of XPlottingGrid with the mask applied to the replicas""" - new_grid = self.grid_values.data[mask] - gv = self.grid_values.__class__(new_grid) - return dataclasses.replace(self, grid_values=gv) - - def copy_grid(self, grid_values=None): + def copy_grid(self, grid_values): """Create a copy of the grid with potentially a different set of values""" - if not isinstance(grid_values, Stats): - log.warning("XPlottingGrid being called with a numpy grid, should be using Stats instead!") - grid_values = self.grid_values.__class__(grid_values) return dataclasses.replace(self, grid_values=grid_values) diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index e8e304e8e7..e46ce9bf2e 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -547,8 +547,7 @@ def phi_data(abs_chi2_data): 1410.8849 """ alldata, central, npoints = abs_chi2_data - cv = float(alldata.central_value()) - return (np.sqrt((cv - central) / npoints), npoints) + return (np.sqrt((alldata.error_members().mean() - central) / npoints), npoints) def dataset_inputs_phi_data(dataset_inputs_abs_chi2_data): diff --git a/validphys2/src/validphys/sumrules.py b/validphys2/src/validphys/sumrules.py index 702ef350fd..5196feea16 100644 --- a/validphys2/src/validphys/sumrules.py +++ b/validphys2/src/validphys/sumrules.py @@ -11,24 +11,23 @@ import numpy as np import pandas as pd -import scipy.integrate as integrate +from scipy.integrate import quad from reportengine.table import table from reportengine.checks import check_positive from reportengine.floatformatting import format_error_value_columns from validphys.core import PDF -from validphys.pdfbases import ALL_FLAVOURS, parse_flarr -from validphys.lhapdfset import LHAPDFSet +from validphys.pdfbases import parse_flarr -def _momentum_sum_rule_integrand(x, lpdf:LHAPDFSet, irep, Q): - return sum([lpdf.xfxQ(x, Q=Q, n=irep, fl=fl) for fl in ALL_FLAVOURS]) +def _momentum_sum_rule_integrand(x, lpdf, Q): + xqvals = lpdf.xfxQ(x, Q) + return sum([xqvals[f] for f in lpdf.flavors()]) def _make_momentum_fraction_integrand(fldict): """Make a suitable integrand function, which takes x to be integrated over - and fixed loaded PDF, replica number and Q that computes the momentum - fraction based on ``fldict``. + and a PDF member and Q that computes the momentum fraction based on ``fldict``. The keys of ``fldict`` are free form values corresponding to PDG parton ids (that end up being passed by :py:func:`validphys.pdfbases.parse_flarr` and @@ -49,9 +48,9 @@ def _make_momentum_fraction_integrand(fldict): # Do this outside to aid integration time fldict = {parse_flarr([k])[0]: v for k, v in fldict.items()} - def f(x, lpdf, irep, Q): + def f(x, lpdf, Q): return sum( - multiplier * lpdf.xfxQ(x, Q=Q, n=irep, fl=flavour) + multiplier * lpdf.xfxQ(x, Q)[flavour] for flavour, multiplier in fldict.items() ) @@ -59,8 +58,7 @@ def f(x, lpdf, irep, Q): def _make_pdf_integrand(fldict): """Make a suitable integrand function, which takes x to be integrated over - and fixed loaded PDF, replica number and Q that computes the integrand of - the PDFs based on ``fldict``. + and a PDF member and Q that computes the integrand of the PDFs based on ``fldict``. The keys of ``fldict`` are free form values corresponfing to PDG parton ids (that end up being passed :py:func:`validphys.pdfbases.parse_flarr` and @@ -81,10 +79,10 @@ def _make_pdf_integrand(fldict): # Do this outsde to aid integration time fldict = {parse_flarr([k])[0]: v for k, v in fldict.items()} - def f(x, lpdf, irep, Q): + def f(x, lpdf, Q): return ( sum( - multiplier * lpdf.xfxQ(x, Q=Q, n=irep, fl=flavour) + multiplier * lpdf.xfxQ(x, Q)[flavour] for flavour, multiplier in fldict.items() ) / x @@ -123,24 +121,22 @@ def f(x, lpdf, irep, Q): } +def _integral(rule_f, pdf_member, Q, config=None): + """Integrate `rule_f` for a given `pdf_member` at a given energy + separating the regions of integration. Uses quad. + """ + if config is None: + config = {"limit":1000, "epsabs": 1e-4, "epsrel": 1e-4} + res = 0.0 + lims = [(1e-9, 1e-5), (1e-5, 1e-3), (1e-3, 1)] + for lim in lims: + res += quad(rule_f, *lim, args=(pdf_member, Q), **config)[0] + return res + + def _sum_rules(rules_dict, lpdf, Q): """Compute a SumRulesGrid from the loaded PDF, at Q""" - nmembers = lpdf.n_members - #TODO: Write this in something fast - #If nothing else, at least allocate and store the result contiguously - res = np.zeros((len(rules_dict), nmembers)) - integrands = rules_dict.values() - def integral(f, a, b, irep): - #We increase the limit to capture the log scale fluctuations - return integrate.quad(f, a, b, args=(lpdf, irep, Q), - limit=1000, - epsabs=1e-4, epsrel=1e-4)[0] - - for irep in range(nmembers): - for r, f in enumerate(integrands): - res[r,irep] = (integral(f, 1e-9, 1e-5, irep) + - integral(f, 1e-5, 1e-3, irep) + integral(f, 1e-3, 1, irep)) - return {k: v for k,v in zip(rules_dict, res)} + return {k: [_integral(r, m, Q) for m in lpdf.members] for k,r in rules_dict.items()} @check_positive('Q') @@ -152,6 +148,7 @@ def sum_rules(pdf:PDF, Q:numbers.Real): lpdf = pdf.load() return _sum_rules(KNOWN_SUM_RULES, lpdf, Q) + @check_positive('Q') def central_sum_rules(pdf:PDF, Q:numbers.Real): """Compute the sum rules for the central member, at the scale Q""" @@ -159,7 +156,6 @@ def central_sum_rules(pdf:PDF, Q:numbers.Real): return _sum_rules(KNOWN_SUM_RULES, lpdf, Q) - @check_positive('Q') def unknown_sum_rules(pdf: PDF, Q: numbers.Real): """Compute the following integrals diff --git a/validphys2/src/validphys/tests/baseline/test_dataspecschi2.png b/validphys2/src/validphys/tests/baseline/test_dataspecschi2.png index 0115b2daa3..103835fd4d 100644 Binary files a/validphys2/src/validphys/tests/baseline/test_dataspecschi2.png and b/validphys2/src/validphys/tests/baseline/test_dataspecschi2.png differ diff --git a/validphys2/src/validphys/tests/regressions/test_datasetchi2.csv b/validphys2/src/validphys/tests/regressions/test_datasetchi2.csv index 7f00493b6c..73d8e50cc8 100644 --- a/validphys2/src/validphys/tests/regressions/test_datasetchi2.csv +++ b/validphys2/src/validphys/tests/regressions/test_datasetchi2.csv @@ -1,6 +1,6 @@ test test ndata $\chi^2/ndata$ group dataset -NMC NMC 204 1.6064258130123148 -ATLAS ATLASTTBARTOT 3 1.9377358549444097 -CMS CMSZDIFF12 28 1.8519163102572869 +NMC NMC 204 1.6064258192222478 +ATLAS ATLASTTBARTOT 3 1.9383178404814991 +CMS CMSZDIFF12 28 1.8520435691568384 diff --git a/validphys2/src/validphys/tests/regressions/test_replicachi2data.csv b/validphys2/src/validphys/tests/regressions/test_replicachi2data.csv index a669de4f65..0db9efa215 100644 --- a/validphys2/src/validphys/tests/regressions/test_replicachi2data.csv +++ b/validphys2/src/validphys/tests/regressions/test_replicachi2data.csv @@ -1,6 +1,6 @@ name Total NMC ATLAS CMS npoints 236 204 4 28 -0 1.6331414605961554 1.6064258130123148 1.4642155397440857 1.851916310257288 +0 1.6331643952670938 1.6064258192222478 1.4646775563260181 1.8520435691568402 1 1.7334090365532264 1.734962043323776 1.109948798376749 1.8111600212501475 2 1.6877884536672854 1.6453401923939792 2.065149379329199 1.9431456535639562 3 1.6987177907128304 1.6814300483245213 1.2167235538564396 1.8935276619499943 diff --git a/validphys2/src/validphys/tests/regressions/test_sum_rules_MC.csv b/validphys2/src/validphys/tests/regressions/test_sum_rules_MC.csv index 1493b61173..e90c460b2e 100644 --- a/validphys2/src/validphys/tests/regressions/test_sum_rules_MC.csv +++ b/validphys2/src/validphys/tests/regressions/test_sum_rules_MC.csv @@ -1,15 +1,15 @@ mean std min max Central value -momentum 1.0000359397024803 5.384464994542894e-07 1.000034619691424 1.0000372097117414 1.0000359463817983 -uvalence 1.9997929722405798 0.00028226750040615813 1.9987658750095758 2.000123119074607 1.9997934800108916 -dvalence 0.9997171906346781 0.00028320184172691307 0.9986900055519239 1.0000492249686646 0.9997173761604233 -svalence 0.00024972763167990374 0.0003322524099365173 -0.00042330789669951796 0.00144823410093127 0.0002510958414509573 +momentum 1.0000359402582584 5.358395010408105e-07 1.0000346238340416 1.0000372086565177 1.0000359458599606 +uvalence 1.9997926747574069 0.0002809483175437137 1.998765120611357 2.0001202779127008 1.9997937994497308 +dvalence 0.9997164295681845 0.00028118151759189136 0.9986902415578234 1.000046111974624 0.9997173113369415 +svalence 0.00025025303862949585 0.0003303956500365186 -0.00042417491773433236 0.0014490597353980783 0.0002507254062854847 u momentum fraction 0.2555 0.0021 -1.0 -1.0 -1.0 -ubar momentum fraction 3.387E-2 7.5E-4 -1.0 -1.0 -1.0 +ubar momentum fraction 0.03387 0.00074 -1.0 -1.0 -1.0 d momentum fraction 0.1278 0.0021 -1.0 -1.0 -1.0 dbar momentum fraction 0.0422 0.0012 -1.0 -1.0 -1.0 -s momentum fraction 0.0316 0.0020 -1.0 -1.0 -1.0 +s momentum fraction 0.0316 0.002 -1.0 -1.0 -1.0 sbar momentum fraction 0.0253 0.0012 -1.0 -1.0 -1.0 cp momentum fraction 0.0286 0.0017 -1.0 -1.0 -1.0 g momentum fraction 0.4466 0.0017 -1.0 -1.0 -1.0 T3 0.79 0.23 -1.0 -1.0 -1.0 -T8 4.73 0.70 -1.0 -1.0 -1.0 +T8 4.73 0.7 -1.0 -1.0 -1.0 diff --git a/validphys2/src/validphys/tests/regressions/test_sum_rules_hessian.csv b/validphys2/src/validphys/tests/regressions/test_sum_rules_hessian.csv index 2cbd36e8e6..04168d5838 100644 --- a/validphys2/src/validphys/tests/regressions/test_sum_rules_hessian.csv +++ b/validphys2/src/validphys/tests/regressions/test_sum_rules_hessian.csv @@ -1,8 +1,8 @@ mean std min max Central value -momentum 1.0000359343288223 3.262768320141462e-08 1.0000357928883208 1.0000360043344905 1.0000359428250625 -uvalence 1.999794587352472 3.771059740749569e-05 1.9996273397296627 1.9998792759145556 1.999804641339146 -dvalence 0.999719265014927 3.893093441227586e-05 0.9995458470781201 0.9998028245378152 0.9997279363904001 -svalence 0.00021669265424246442 4.864440927527348e-05 3.0026081505308416e-05 0.000370776050729299 0.0002126701429466457 +momentum 1.0000359346945835 3.249002390618474e-08 1.0000357949386856 1.000036007099919 1.000035943610909 +uvalence 1.9997947975241677 3.774745222364767e-05 1.9996255039155293 1.9998795897064394 1.9998052948252174 +dvalence 0.9997193904389039 3.913237872100671e-05 0.9995455646219639 0.9998033688013618 0.9997281952294002 +svalence 0.00021668009787772745 4.881864767300481e-05 2.8212616664191614e-05 0.00037062725233145966 0.00021200815810076823 u momentum fraction 0.2554 0.00025 -1.0 -1.0 -1.0 ubar momentum fraction 0.03386 0.0001 -1.0 -1.0 -1.0 d momentum fraction 0.1277 0.00029 -1.0 -1.0 -1.0 diff --git a/validphys2/src/validphys/tests/test_core.py b/validphys2/src/validphys/tests/test_core.py index 5dfefc705d..17ca8b3c4c 100644 --- a/validphys2/src/validphys/tests/test_core.py +++ b/validphys2/src/validphys/tests/test_core.py @@ -16,9 +16,8 @@ def test_pdf(pdf_name): assert pdf.isinstalled error_type = pdf.error_type if error_type == "replicas": - assert pdf.get_members() == (len(pdf)-1) assert pdf.error_conf_level is None else: - assert pdf.get_members() == len(pdf) assert isinstance(pdf.error_conf_level, (int, float)) + assert pdf.get_members() == len(pdf) assert pdf.name == pdf._plotname == pdf_name == str(pdf)