From 0d129822e9d8ca2941da6ce8be8174218ce08ef2 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Mon, 14 Feb 2022 13:06:11 +0100 Subject: [PATCH 1/5] use error_members instead of rawdata --- validphys2/src/validphys/calcutils.py | 4 +-- .../validphys/closuretest/closure_results.py | 6 ++--- .../src/validphys/closuretest/multiclosure.py | 8 +++--- .../validphys/closuretest/multiclosure_pdf.py | 2 +- .../closuretest/multiclosure_pseudodata.py | 4 +-- validphys2/src/validphys/correlations.py | 20 +++++++-------- validphys2/src/validphys/covmats.py | 2 +- validphys2/src/validphys/dataplots.py | 2 +- validphys2/src/validphys/deltachi2.py | 12 ++++----- validphys2/src/validphys/pdfgrids.py | 4 +-- validphys2/src/validphys/pdfplots.py | 6 ++--- validphys2/src/validphys/replica_selector.py | 4 +-- validphys2/src/validphys/results.py | 25 +++++++++++++------ .../src/validphys/tests/test_closuretest.py | 1 + 14 files changed, 53 insertions(+), 47 deletions(-) diff --git a/validphys2/src/validphys/calcutils.py b/validphys2/src/validphys/calcutils.py index 2601e084d6..f020623b0b 100644 --- a/validphys2/src/validphys/calcutils.py +++ b/validphys2/src/validphys/calcutils.py @@ -67,8 +67,8 @@ def calc_chi2(sqrtcov, diffs): return np.einsum('i...,i...->...', vec,vec) def all_chi2(results): - """Return the chi² for all elements in the result. Note that the - interpretation of the result will depend on the PDF error type""" + """Return the chi² for all elements in the result, regardless of the Stats class + Note that the interpretation of the result will depend on the PDF error type""" data_result, th_result = results diffs = th_result.rawdata - data_result.central_value[:,np.newaxis] return calc_chi2(sqrtcov=data_result.sqrtcovmat, diffs=diffs) diff --git a/validphys2/src/validphys/closuretest/closure_results.py b/validphys2/src/validphys/closuretest/closure_results.py index 857720fe4a..cfda6ae555 100644 --- a/validphys2/src/validphys/closuretest/closure_results.py +++ b/validphys2/src/validphys/closuretest/closure_results.py @@ -122,7 +122,7 @@ def bootstrap_bias_experiment( """ dt_ct, th_ct = dataset_inputs_results ((_, th_ul),) = underlying_dataset_inputs_results - th_ct_boot_cv = bootstrap_values(th_ct.rawdata, bootstrap_samples) + th_ct_boot_cv = bootstrap_values(th_ct.error_members, bootstrap_samples) boot_diffs = th_ct_boot_cv - th_ul.central_value[:, np.newaxis] boot_bias = calc_chi2(dt_ct.sqrtcovmat, boot_diffs) / len(dt_ct) return boot_bias @@ -191,7 +191,7 @@ def variance_dataset(results, fit, use_fitcommondata): """ dt, th = results - diff = th.central_value[:, np.newaxis] - th.rawdata + diff = th.central_value[:, np.newaxis] - th.error_members var_unnorm = calc_chi2(dt.sqrtcovmat, diff).mean() return VarianceData(var_unnorm, len(th)) @@ -210,7 +210,7 @@ def bootstrap_variance_experiment(dataset_inputs_results, bootstrap_samples=500) normalised to the number of data in the experiment. """ dt_ct, th_ct = dataset_inputs_results - diff = th_ct.central_value[:, np.newaxis] - th_ct.rawdata + diff = th_ct.central_value[:, np.newaxis] - th_ct.error_members var_unnorm_boot = bootstrap_values( diff, bootstrap_samples, diff --git a/validphys2/src/validphys/closuretest/multiclosure.py b/validphys2/src/validphys/closuretest/multiclosure.py index c48388b238..052bea56e8 100644 --- a/validphys2/src/validphys/closuretest/multiclosure.py +++ b/validphys2/src/validphys/closuretest/multiclosure.py @@ -130,7 +130,7 @@ def fits_dataset_bias_variance( """ closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader # The dimentions here are (fit, data point, replica) - reps = np.asarray([th.rawdata[:, :_internal_max_reps] for th in closures_th]) + reps = np.asarray([th.error_members[:, :_internal_max_reps] for th in closures_th]) # take mean across replicas - since we might have changed no. of reps centrals = reps.mean(axis=2) # place bins on first axis @@ -226,7 +226,7 @@ def dataset_xi(internal_multiclosure_dataset_loader): """ closures_th, law_th, covmat, _ = internal_multiclosure_dataset_loader - replicas = np.asarray([th.rawdata for th in closures_th]) + replicas = np.asarray([th.error_members for th in closures_th]) centrals = np.mean(replicas, axis=-1) underlying = law_th.central_value @@ -341,7 +341,7 @@ def _bootstrap_multiclosure_fits( # construct proxy fits theory predictions for fit_th in fit_boot_th: rep_boot_index = rng.choice(n_rep_max, size=n_rep, replace=use_repeats) - boot_ths.append(BootstrappedTheoryResult(fit_th.rawdata[:, rep_boot_index])) + boot_ths.append(BootstrappedTheoryResult(fit_th.error_members[:, rep_boot_index])) return (boot_ths, *input_tuple) @@ -741,7 +741,7 @@ def dataset_fits_bias_replicas_variance_samples( """ closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader # The dimentions here are (fit, data point, replica) - reps = np.asarray([th.rawdata[:, :_internal_max_reps] for th in closures_th]) + reps = np.asarray([th.error_members[:, :_internal_max_reps] for th in closures_th]) # take mean across replicas - since we might have changed no. of reps centrals = reps.mean(axis=2) # place bins on first axis diff --git a/validphys2/src/validphys/closuretest/multiclosure_pdf.py b/validphys2/src/validphys/closuretest/multiclosure_pdf.py index a8d0f5f3f8..040c17b2a8 100644 --- a/validphys2/src/validphys/closuretest/multiclosure_pdf.py +++ b/validphys2/src/validphys/closuretest/multiclosure_pdf.py @@ -90,7 +90,7 @@ def xi_grid_values(xi_pdfgrids): glu_sin_grid, nonsin_grid = xi_pdfgrids # grid values have shape: replica, flavour, x # concatenate along flavour - return np.concatenate((glu_sin_grid.grid_values.data, nonsin_grid.grid_values.data), axis=1) + return np.concatenate((glu_sin_grid.grid_values.error_members(), nonsin_grid.grid_values.error_members()), axis=1) def underlying_xi_grid_values( diff --git a/validphys2/src/validphys/closuretest/multiclosure_pseudodata.py b/validphys2/src/validphys/closuretest/multiclosure_pseudodata.py index 22b2c78a1a..c2ea5a405f 100644 --- a/validphys2/src/validphys/closuretest/multiclosure_pseudodata.py +++ b/validphys2/src/validphys/closuretest/multiclosure_pseudodata.py @@ -126,8 +126,8 @@ def experiments_closure_pseudodata_estimators_table( fits_erep_delta_chi2 = [] for i_fit, fit_th in enumerate(closures_th): # some of these could be done outside of loop, but easier to do here. - th_replicas = fit_th.rawdata - th_central = np.mean(th_replicas, axis=-1) + th_replicas = fit_th.error_members + th_central = fit_th.central_value dt_replicas = fits_reps_pseudo[i_fit].xs(exp_name, axis=0, level=0).to_numpy() dt_central = fits_cv.xs(exp_name, axis=0, level=0).iloc[:, i_fit].to_numpy() diff --git a/validphys2/src/validphys/correlations.py b/validphys2/src/validphys/correlations.py index 2e6132c3cf..7b1f6b8920 100644 --- a/validphys2/src/validphys/correlations.py +++ b/validphys2/src/validphys/correlations.py @@ -27,8 +27,8 @@ def _basic_obs_pdf_correlation(pdf_stats, obs_stats): (nbins x nf x nx), compatible with grid_values, upon changing replicas->bins. """ - x = pdf_stats.data - pdf_stats.central_value() - y = obs_stats.data - obs_stats.central_value() + x = pdf_stats.error_members() - pdf_stats.central_value() + y = obs_stats.error_members() - obs_stats.central_value() #We want to compute: #sum(x*y)/(norm(x)*norm(y)) @@ -53,26 +53,24 @@ def _basic_obs_obs_correlation(obs1, obs2): The result is (nbins1 , nbins2), a mareix containing the correlation coefficients between the two sets. """ - x = (obs1.data - obs1.central_value()).T - y = (obs2.data - obs2.central_value()) + x = (obs1.error_members() - obs1.central_value()).T + y = obs2.error_members() - obs2.central_value() return x@y/np.outer(la.norm(x,axis=1),la.norm(y,axis=0)) -#TODO: Implement for other error types. Do not use the rawdata. -@check_pdf_is_montecarlo def obs_pdf_correlations(pdf, results, xplotting_grid): """Return the correlations between each point in a dataset and the PDF values on a grid of (x,f) points in a format similar to `xplotting_grid`.""" - _ , th = results + _, th = results corrs = pdf.stats_class(_basic_obs_pdf_correlation(xplotting_grid.grid_values, th.stats)) return xplotting_grid.copy_grid(grid_values=corrs) -corrpair_results = collect('results', ['corrpair']) -corrpair_datasets = collect('dataset', ['corrpair']) +corrpair_results = collect("results", ["corrpair"]) +corrpair_datasets = collect("dataset", ["corrpair"]) + -@check_pdf_is_montecarlo def obs_obs_correlations(pdf, corrpair_results): """Return the theoretical correlation matrix between a pair of observables.""" - (_,th1), (_,th2) = corrpair_results + (_, th1), (_, th2) = corrpair_results return _basic_obs_obs_correlation(th1.stats, th2.stats) diff --git a/validphys2/src/validphys/covmats.py b/validphys2/src/validphys/covmats.py index d4d389ea6b..1d4c73c424 100644 --- a/validphys2/src/validphys/covmats.py +++ b/validphys2/src/validphys/covmats.py @@ -531,7 +531,7 @@ def pdferr_plus_covmat(dataset, pdf, covmat_t0_considered): True """ th = ThPredictionsResult.from_convolution(pdf, dataset) - pdf_cov = np.cov(th.rawdata, rowvar=True) + pdf_cov = np.cov(th.error_members, rowvar=True) return pdf_cov + covmat_t0_considered diff --git a/validphys2/src/validphys/dataplots.py b/validphys2/src/validphys/dataplots.py index b9bfd1def3..925f974b37 100644 --- a/validphys2/src/validphys/dataplots.py +++ b/validphys2/src/validphys/dataplots.py @@ -871,7 +871,7 @@ def plot_smpdf(pdf, dataset, obs_pdf_correlations, mark_threshold:float=0.9): basis = obs_pdf_correlations.basis - fullgrid = obs_pdf_correlations.grid_values.data + fullgrid = obs_pdf_correlations.grid_values.error_members() fls = obs_pdf_correlations.flavours x = obs_pdf_correlations.xgrid diff --git a/validphys2/src/validphys/deltachi2.py b/validphys2/src/validphys/deltachi2.py index bba69a87e5..665b9680f7 100644 --- a/validphys2/src/validphys/deltachi2.py +++ b/validphys2/src/validphys/deltachi2.py @@ -140,7 +140,7 @@ def pos_neg_xplotting_grids(delta_chi2_hessian, xplotting_grid): """ positive_eigenvalue_mask = delta_chi2_hessian >= 0 - # include replica 0 in both new grids + # include replica 0 in both new grids, thus use the raw .data pos_mask = np.append(True, positive_eigenvalue_mask) neg_mask = np.append(True, ~positive_eigenvalue_mask) @@ -210,12 +210,10 @@ def draw(self, pdf, grid, flstate): labels = flstate.labels handles = flstate.handles - # pick all replicas grid_values for the flavour flindex. stats_class is a method - # of the PDF class, which returns the stats calculator (object) for the pdf error type. - # Basically stats is an object which says what is the type class of the replicas: - # MCStats, HessianStats, SymmHessianStats. In this way validphys use the right methods - # to compute statistical values. - stats = pdf.stats_class(grid.grid_values.data[:, flindex, :]) + # Create a copy of the `Stats` instance of the grid + # with only the flavours we are interested in + gv = grid.grid_values.data + stats = grid.grid_values.copy_grid(grid_values=gv[:, flindex, :]) # Ignore spurious normalization warnings with warnings.catch_warnings(): diff --git a/validphys2/src/validphys/pdfgrids.py b/validphys2/src/validphys/pdfgrids.py index ae1eabd976..5b97e2ed6a 100644 --- a/validphys2/src/validphys/pdfgrids.py +++ b/validphys2/src/validphys/pdfgrids.py @@ -92,9 +92,9 @@ def xplotting_grid(pdf:PDF, Q:(float,int), xgrid=None, basis:(str, Basis)='flavo raise TypeError(f"Invalid xgrid {xgrid!r}") gv = basis.grid_values(pdf, flavours, xgrid, Q) #Eliminante Q axis - gv = pdf.stats_class(gv.reshape(gv.shape[:-1])) + stats_gv = pdf.stats_class(gv.reshape(gv.shape[:-1])) - res = XPlottingGrid(Q, basis, flavours, xgrid, gv, scale) + res = XPlottingGrid(Q, basis, flavours, xgrid, stats_gv, scale) return res xplotting_grids = collect(xplotting_grid, ('pdfs',)) diff --git a/validphys2/src/validphys/pdfplots.py b/validphys2/src/validphys/pdfplots.py index 3b668dba45..fe8ba91af5 100644 --- a/validphys2/src/validphys/pdfplots.py +++ b/validphys2/src/validphys/pdfplots.py @@ -186,7 +186,7 @@ def draw(self, pdf, grid, flstate): ax = flstate.ax next_prop = next(ax._get_lines.prop_cycler) color = next_prop['color'] - gv = grid.grid_values.data[:,flstate.flindex,:] + gv = grid.grid_values.error_members()[:,flstate.flindex,:] ax.plot(grid.xgrid, gv.T, alpha=0.2, linewidth=0.5, color=color, zorder=1) stats = pdf.stats_class(gv) @@ -223,7 +223,7 @@ def get_ylabel(self, parton_name): def draw(self, pdf, grid, flstate): ax = flstate.ax flindex = flstate.flindex - gv = grid.grid_values.data[:,flindex,:] + gv = grid.grid_values.error_members()[:,flindex,:] stats = pdf.stats_class(gv) res = stats.std_error() @@ -330,7 +330,7 @@ def draw(self, pdf, grid, flstate): next_prop = next(pcycler) color = next_prop['color'] - gv = grid.grid_values.data[flindex,:] + gv = grid.grid_values.error_members()[flindex,:] ax.plot(grid.xgrid, gv, color=color, label='$%s$' % flstate.parton_name) return gv diff --git a/validphys2/src/validphys/replica_selector.py b/validphys2/src/validphys/replica_selector.py index 08a82bdf1c..0bdafaf895 100644 --- a/validphys2/src/validphys/replica_selector.py +++ b/validphys2/src/validphys/replica_selector.py @@ -169,7 +169,7 @@ def not_outlier_mask( """Return a boolean mask with the replicas that are never outside of the given percentile in the constrained region""" lim = unconstrained_region_index - gv = xplotting_grid.grid_values.data[:, :, lim:] + gv = xplotting_grid.grid_values.error_members()[:, :, lim:] delta = nsigma * np.std(gv, axis=0) mean = np.mean(gv, axis=0) return ((gv >= mean - delta) & (gv <= mean+delta)).all(axis=2).all(axis=1) @@ -309,7 +309,7 @@ def draw(self, pdf, grid, flstate): #PDFs, and instead would do something different next_prop = next(ax._get_lines.prop_cycler) color = next_prop['color'] - gv = grid.grid_values.data[filt, flstate.flindex, :] + gv = grid.grid_values.error_members()[filt, flstate.flindex, :] ax.plot( grid.xgrid, gv.T, diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index 87f95450cd..01531a9e65 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -56,6 +56,11 @@ def rawdata(self): """Returns the raw data with shape (Npoints, Npdf)""" return self.stats.data.T + @property + def error_members(self): + """Returns the error members with shape (Npoints, Npdf)""" + return self.stats.error_members().T + @property def central_value(self): return self.stats.central_value() @@ -71,6 +76,7 @@ def __len__(self): class DataResult(StatsResult): """Holds the relevant information from a given dataset""" + def __init__(self, dataobj, covmat, sqrtcovmat): self._central_value = dataobj.get_cv() stats = Stats(self._central_value) @@ -101,8 +107,8 @@ def sqrtcovmat(self): class ThPredictionsResult(StatsResult): - """Class holding theory prediction, inherits from StatsResult - """ + """Class holding theory prediction, inherits from StatsResult""" + def __init__(self, dataobj, stats_class, label=None): self.stats_class = stats_class self.label = label @@ -135,8 +141,10 @@ def from_convolution(cls, pdf, dataset): try: th_predictions = pd.concat([predictions(d, pdf) for d in datasets]) except PredictionsRequireCutsError as e: - raise PredictionsRequireCutsError("Predictions from FKTables always require cuts, " - "if you want to use the fktable intrinsic cuts set `use_cuts: 'internal'`") from e + raise PredictionsRequireCutsError( + "Predictions from FKTables always require cuts, " + "if you want to use the fktable intrinsic cuts set `use_cuts: 'internal'`" + ) from e label = cls.make_label(pdf, dataset) @@ -158,6 +166,7 @@ def from_convolution(cls, pdf, posset): procs_data = collect("data", ("group_dataset_inputs_by_process",)) + def groups_index(groups_data): """Return a pandas.MultiIndex with levels for group, dataset and point respectively, the group is determined by a key in the dataset metadata, and @@ -228,7 +237,7 @@ def group_result_table_no_table(groups_results, groups_index): ): replicas = ( ("rep_%05d" % (i + 1), th_rep) - for i, th_rep in enumerate(th.rawdata[index, :]) + for i, th_rep in enumerate(th.error_members[index, :]) ) result_records.append( @@ -583,7 +592,7 @@ def dataset_inputs_bootstrap_phi_data(dataset_inputs_results, bootstrap_samples= For more information on how phi is calculated see `phi_data` """ dt, th = dataset_inputs_results - diff = np.array(th.rawdata - dt.central_value[:, np.newaxis]) + diff = np.array(th.error_members - dt.central_value[:, np.newaxis]) phi_resample = bootstrap_values( diff, bootstrap_samples, @@ -603,7 +612,7 @@ def dataset_inputs_bootstrap_chi2_central( a different value can be specified in the runcard. """ dt, th = dataset_inputs_results - diff = np.array(th.rawdata - dt.central_value[:, np.newaxis]) + diff = np.array(th.error_members - dt.central_value[:, np.newaxis]) cchi2 = lambda x, y: calc_chi2(y, x.mean(axis=1)) chi2_central_resample = bootstrap_values( diff, @@ -698,7 +707,7 @@ def positivity_predictions_data_result(pdf, posdataset): def count_negative_points(possets_predictions): """Return the number of replicas with negative predictions for each bin in the positivity observable.""" - return np.sum([(r.rawdata < 0).sum(axis=0) for r in possets_predictions], axis=0) + return np.sum([(r.error_members < 0).sum(axis=0) for r in possets_predictions], axis=0) chi2_stat_labels = { diff --git a/validphys2/src/validphys/tests/test_closuretest.py b/validphys2/src/validphys/tests/test_closuretest.py index 68fc91dc6e..fd89fb06ee 100644 --- a/validphys2/src/validphys/tests/test_closuretest.py +++ b/validphys2/src/validphys/tests/test_closuretest.py @@ -14,6 +14,7 @@ class TestResult: def __init__(self, central_value, rawdata=None): self.central_value = central_value self.rawdata = rawdata + self.error_members = rawdata self.ndata = len(central_value) self.sqrtcovmat = np.identity(self.ndata) From 0365995c0648253fe96b68ed7cec3c44dce03c8a Mon Sep 17 00:00:00 2001 From: juacrumar Date: Mon, 14 Feb 2022 18:39:19 +0100 Subject: [PATCH 2/5] change data to error_member as well --- validphys2/src/validphys/arclength.py | 2 +- validphys2/src/validphys/dataplots.py | 2 +- validphys2/src/validphys/deltachi2.py | 2 +- validphys2/src/validphys/paramfits/plots.py | 2 +- validphys2/src/validphys/pdfplots.py | 4 +++- validphys2/src/validphys/results.py | 9 ++------- validphys2/src/validphys/reweighting.py | 4 ++-- validphys2/src/validphys/tests/test_pyfkdata.py | 3 +++ 8 files changed, 14 insertions(+), 14 deletions(-) diff --git a/validphys2/src/validphys/arclength.py b/validphys2/src/validphys/arclength.py index 3484c5742e..97e5f5d8cc 100644 --- a/validphys2/src/validphys/arclength.py +++ b/validphys2/src/validphys/arclength.py @@ -49,7 +49,7 @@ def arc_lengths( # Finite diff. step-size, x-grid eps = (b - a) / npoints ixgrid = xgrid(a, b, "linear", npoints) - # PDFs evaluated on grid + # PDFs evaluated on grid, use the entire thing, the stats class will chose later xfgrid = xplotting_grid(pdf, Q, ixgrid, basis, flavours).grid_values.data * ixgrid[1] fdiff = np.diff(xfgrid) / eps # Compute forward differences res += integrate.simps(1 + np.square(fdiff), ixgrid[1][1:]) diff --git a/validphys2/src/validphys/dataplots.py b/validphys2/src/validphys/dataplots.py index 925f974b37..725fb3ec65 100644 --- a/validphys2/src/validphys/dataplots.py +++ b/validphys2/src/validphys/dataplots.py @@ -68,7 +68,7 @@ def _chi2_distribution_plots(chi2_data, stats, pdf, plot_type): ax.set_xlabel(r"Replica $\chi^2$") if plot_type == "hist": - ax.hist(alldata.data, label=label, zorder=100) + ax.hist(alldata.error_members(), label=label, zorder=100) elif plot_type == "kde": # We need the squeeze here to change shape from (x, 1) to (x,) ax = plotutils.kde_plot(alldata.data.squeeze(), label=label) diff --git a/validphys2/src/validphys/deltachi2.py b/validphys2/src/validphys/deltachi2.py index 665b9680f7..7bcd028ce1 100644 --- a/validphys2/src/validphys/deltachi2.py +++ b/validphys2/src/validphys/deltachi2.py @@ -213,7 +213,7 @@ def draw(self, pdf, grid, flstate): # Create a copy of the `Stats` instance of the grid # with only the flavours we are interested in gv = grid.grid_values.data - stats = grid.grid_values.copy_grid(grid_values=gv[:, flindex, :]) + stats = grid(grid_values=gv[:, flindex, :]).grid_values # Ignore spurious normalization warnings with warnings.catch_warnings(): diff --git a/validphys2/src/validphys/paramfits/plots.py b/validphys2/src/validphys/paramfits/plots.py index c78caae70c..77f6a3060f 100644 --- a/validphys2/src/validphys/paramfits/plots.py +++ b/validphys2/src/validphys/paramfits/plots.py @@ -778,7 +778,7 @@ def plot_total_as_distribution_dataspecs( dataspecs_parabolic_as_determination_for_total, dataspecs_speclabel): #Remember that *_for_total is a len 1 list, so take the first element. - kde_plot(dist[0].data, ax=ax, label=label) + kde_plot(dist[0].error_members(), ax=ax, label=label) ax.set_xlabel(r"$\alpha_S$") ax.legend() return fig diff --git a/validphys2/src/validphys/pdfplots.py b/validphys2/src/validphys/pdfplots.py index fe8ba91af5..1e13305d4a 100644 --- a/validphys2/src/validphys/pdfplots.py +++ b/validphys2/src/validphys/pdfplots.py @@ -410,7 +410,9 @@ def draw(self, pdf, grid, flstate): hatchit = flstate.hatchit labels = flstate.labels handles = flstate.handles - stats = pdf.stats_class(grid.grid_values.data[:,flindex,:]) + # Take only the flavours we are interested in + gv = grid.grid_values.data + stats = grid.copy_grid(grid_values=gv[:, flindex, :]).grid_values pcycler = ax._get_lines.prop_cycler #This is ugly but can't think of anything better diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index 01531a9e65..f8857749df 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -547,7 +547,7 @@ def phi_data(abs_chi2_data): 1410.8849 """ alldata, central, npoints = abs_chi2_data - return (np.sqrt((alldata.data.mean() - central) / npoints), npoints) + return (np.sqrt((alldata.central_value() - central) / npoints), npoints) def dataset_inputs_phi_data(dataset_inputs_abs_chi2_data): @@ -624,11 +624,6 @@ def dataset_inputs_bootstrap_chi2_central( return chi2_central_resample -def _chs_per_replica(chs): - th, _, l = chs - return th.data.ravel() / l - - @table def predictions_by_kinematics_table(results, kinematics_table_notable): """Return a table combining the output of @@ -1041,7 +1036,7 @@ def total_chi2_data_from_experiments(experiments_chi2_data, pdf): ) # we sum data, not error_members here because we feed it back into the stats - # class, some stats class error_members cuts off the CV + # class, the stats class error_members cuts off the CV if needed data_sum = np.sum( [exp_chi2_data.replica_result.data for exp_chi2_data in experiments_chi2_data], axis=0 diff --git a/validphys2/src/validphys/reweighting.py b/validphys2/src/validphys/reweighting.py index 1471ff319e..515805d42a 100644 --- a/validphys2/src/validphys/reweighting.py +++ b/validphys2/src/validphys/reweighting.py @@ -50,7 +50,7 @@ def nnpdf_weights_numerator(chi2_data_for_reweighting_experiments): """Compute the numerator of the NNPDF weights. This is useful for P(α), which uses a different normalization.""" total_ndata = 0 - chi2s = np.zeros_like(chi2_data_for_reweighting_experiments[0][0].data) + chi2s = np.zeros_like(chi2_data_for_reweighting_experiments[0][0].error_members()) for data in chi2_data_for_reweighting_experiments: res, _, ndata = data total_ndata += ndata @@ -102,7 +102,7 @@ def reweighting_stats(pdf, nnpdf_weights, p_alpha_study): return pd.Series(result, index=result.keys(), name='Reweighting stats') def _get_p_alpha_val(alpha, chi2_data_for_reweighting_experiments): - new_chi2 = [((type(res)(res.data/alpha**2)), central, ndata) + new_chi2 = [((type(res)(res.error_members()/alpha**2)), central, ndata) for (res,central,ndata) in chi2_data_for_reweighting_experiments] diff --git a/validphys2/src/validphys/tests/test_pyfkdata.py b/validphys2/src/validphys/tests/test_pyfkdata.py index 37c45720c9..aed335d2e2 100644 --- a/validphys2/src/validphys/tests/test_pyfkdata.py +++ b/validphys2/src/validphys/tests/test_pyfkdata.py @@ -68,6 +68,7 @@ def test_predictions(pdf_name): ds = l.check_dataset(**daset, theoryid=THEORYID) preds = predictions(ds, pdf) core_predictions = ThPredictionsResult.from_convolution(pdf, ds) + # Uses rawdata since we want to check all members for which we computed the convolution assert_allclose(preds.values, core_predictions.rawdata, atol=1e-8, rtol=1e-3) # Now check that the stats class does the right thing with the data cv_predictions = central_predictions(ds, pdf).squeeze() @@ -75,6 +76,8 @@ def test_predictions(pdf_name): # rtol to 1e-2 due to DYE906R and DYE906_D for MC sets # TODO: check whether this tolerance can be decreased when using double precision assert_allclose(cv_predictions, stats_predictions.central_value(), rtol=1e-2) + assert_allclose(core_predictions.error_members, stats_predictions.error_members().T, rtol=1e-3) + assert_allclose(core_predictions.central_value, stats_predictions.central_value(), rtol=1e-2) @pytest.mark.parametrize("pdf_name", [PDF, HESSIAN_PDF]) From 95857f01ccd087cacb46624080f2122488d79562 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Tue, 15 Feb 2022 10:37:54 +0100 Subject: [PATCH 3/5] fix phi so it still returns a float --- validphys2/src/validphys/results.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index f8857749df..5060ac9455 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -541,13 +541,14 @@ def dataset_inputs_abs_chi2_data(dataset_inputs_results): def phi_data(abs_chi2_data): """Calculate phi using values returned by `abs_chi2_data`. - Returns tuple of (phi, numpoints) + Returns tuple of (float, int): (phi, numpoints) For more information on how phi is calculated see Eq.(24) in 1410.8849 """ alldata, central, npoints = abs_chi2_data - return (np.sqrt((alldata.central_value() - central) / npoints), npoints) + cv = float(alldata.central_value()) + return (np.sqrt((cv - central) / npoints), npoints) def dataset_inputs_phi_data(dataset_inputs_abs_chi2_data): From df094d5b8fbc1ac3744b01433552e83d0a81a386 Mon Sep 17 00:00:00 2001 From: juacrumar Date: Tue, 15 Feb 2022 13:04:38 +0100 Subject: [PATCH 4/5] add functions to xplotting grid to select flavours and/or replicas --- n3fit/src/n3fit/tests/test_vpinterface.py | 4 ++-- validphys2/src/validphys/deltachi2.py | 9 +++----- validphys2/src/validphys/pdfgrids.py | 27 ++++++++++++++++++----- validphys2/src/validphys/pdfplots.py | 17 +++++++------- 4 files changed, 36 insertions(+), 21 deletions(-) diff --git a/n3fit/src/n3fit/tests/test_vpinterface.py b/n3fit/src/n3fit/tests/test_vpinterface.py index 26e77ce2cd..255a2acb82 100644 --- a/n3fit/src/n3fit/tests/test_vpinterface.py +++ b/n3fit/src/n3fit/tests/test_vpinterface.py @@ -68,7 +68,7 @@ def test_vpinterface(): res_2 = xplotting_grid(fit_2, 1.6, xgrid) distances = distance_grids([fit_1, fit_2], [res_1, res_2], 0) assert len(distances) == 2 - assert distances[0].grid_values.data.shape == (8, 40) - assert distances[1].grid_values.data.shape == (8, 40) + assert distances[0].grid_values.data.shape == (1, 8, 40) + assert distances[1].grid_values.data.shape == (1, 8, 40) np.testing.assert_allclose(distances[0].grid_values.data, 0.0) assert not np.allclose(distances[1].grid_values.data, 0.0) diff --git a/validphys2/src/validphys/deltachi2.py b/validphys2/src/validphys/deltachi2.py index 7bcd028ce1..2ba7297458 100644 --- a/validphys2/src/validphys/deltachi2.py +++ b/validphys2/src/validphys/deltachi2.py @@ -140,15 +140,12 @@ def pos_neg_xplotting_grids(delta_chi2_hessian, xplotting_grid): """ positive_eigenvalue_mask = delta_chi2_hessian >= 0 - # include replica 0 in both new grids, thus use the raw .data + # The masks do not include replica 0, add it in both grids pos_mask = np.append(True, positive_eigenvalue_mask) neg_mask = np.append(True, ~positive_eigenvalue_mask) - pos_grid = xplotting_grid.grid_values.data[pos_mask] - neg_grid = xplotting_grid.grid_values.data[neg_mask] - - pos_xplotting_grid = xplotting_grid.copy_grid(grid_values=pos_grid) - neg_xplotting_grid = xplotting_grid.copy_grid(grid_values=neg_grid) + pos_xplotting_grid = xplotting_grid.mask_replicas(pos_mask) + neg_xplotting_grid = xplotting_grid.mask_replicas(neg_mask) return [xplotting_grid, pos_xplotting_grid, neg_xplotting_grid] diff --git a/validphys2/src/validphys/pdfgrids.py b/validphys2/src/validphys/pdfgrids.py index 5b97e2ed6a..97596d2602 100644 --- a/validphys2/src/validphys/pdfgrids.py +++ b/validphys2/src/validphys/pdfgrids.py @@ -55,6 +55,23 @@ class XPlottingGrid: grid_values: Stats scale: str + def select_flavour(self, flindex): + """Return a new grid for one single flavour""" + if isinstance(flindex, str): + flstr = flindex + flindex = self.flavours.index(flindex) + else: + flstr = self.flavours[flindex] + new_grid = self.grid_values.data[:, flindex] + gv = self.grid_values.__class__(new_grid) + return dataclasses.replace(self, grid_values=gv, flavours=[flstr]) + + def mask_replicas(self, mask): + """Return a copy of XPlottingGrid with the mask applied to the replicas""" + new_grid = self.grid_values.data[mask] + gv = self.grid_values.__class__(new_grid) + return dataclasses.replace(self, grid_values=gv) + def copy_grid(self, grid_values=None): """Create a copy of the grid with potentially a different set of values""" if not isinstance(grid_values, Stats): @@ -263,7 +280,7 @@ def distance_grids(pdfs, xplotting_grids, normalize_to:(int,str,type(None))=None if pdf == pdfs[normalize_to]: # Zero the PDF we are normalizing against - pdf_zero = pdf.stats_class(np.zeros_like(gr2_stats.data[0])) + pdf_zero = pdf.stats_class(np.zeros_like(gr2_stats.data[0:1])) newgrid = grid.copy_grid(grid_values=pdf_zero) newgrids.append(newgrid) continue @@ -273,8 +290,8 @@ def distance_grids(pdfs, xplotting_grids, normalize_to:(int,str,type(None))=None sg1 = g_stats.std_error() N1 = pdf.get_members() - # the distance definition - distance = pdf.stats_class(np.sqrt((cv1-cv2)**2/(sg1**2/N1+sg2**2/N2))) + # Wrap the distance into a Stats (1, flavours, points) + distance = Stats([np.sqrt((cv1-cv2)**2/(sg1**2/N1+sg2**2/N2))]) newgrid = grid.copy_grid(grid_values=distance) newgrids.append(newgrid) @@ -316,8 +333,8 @@ def variance_distance_grids(pdfs, xplotting_grids, normalize_to:(int,str,type(No N1 = pdf.get_members() s1 = (mo1-(N1-3)/(N1-1)*sg1**4)/N1 - # the distance definition - variance_distance = pdf.stats_class(np.sqrt((sg1**2-sg2**2)**2/(s1+s2))) + # Wrap the distance into a Stats (1, flavours, points) + variance_distance = Stats([np.sqrt((sg1**2-sg2**2)**2/(s1+s2))]) newgrid = grid.copy_grid(grid_values=variance_distance) newgrids.append(newgrid) diff --git a/validphys2/src/validphys/pdfplots.py b/validphys2/src/validphys/pdfplots.py index 1e13305d4a..b75a3dacec 100644 --- a/validphys2/src/validphys/pdfplots.py +++ b/validphys2/src/validphys/pdfplots.py @@ -186,10 +186,11 @@ def draw(self, pdf, grid, flstate): ax = flstate.ax next_prop = next(ax._get_lines.prop_cycler) color = next_prop['color'] - gv = grid.grid_values.error_members()[:,flstate.flindex,:] + flavour_grid = grid.select_flavour(flstate.flindex) + stats = flavour_grid.grid_values + gv = stats.data ax.plot(grid.xgrid, gv.T, alpha=0.2, linewidth=0.5, color=color, zorder=1) - stats = pdf.stats_class(gv) ax.plot(grid.xgrid, stats.central_value(), color=color, linewidth=2, label=pdf.label) @@ -223,8 +224,7 @@ def get_ylabel(self, parton_name): def draw(self, pdf, grid, flstate): ax = flstate.ax flindex = flstate.flindex - gv = grid.grid_values.error_members()[:,flindex,:] - stats = pdf.stats_class(gv) + stats = grid.select_flavour(flindex).grid_values res = stats.std_error() @@ -330,7 +330,10 @@ def draw(self, pdf, grid, flstate): next_prop = next(pcycler) color = next_prop['color'] - gv = grid.grid_values.error_members()[flindex,:] + # The grid for the distance is (1, flavours, points) + # take only the flavour we are interested in + gv = grid.select_flavour(flindex).grid_values.data.squeeze() + ax.plot(grid.xgrid, gv, color=color, label='$%s$' % flstate.parton_name) return gv @@ -406,13 +409,11 @@ def setup_flavour(self, flstate): def draw(self, pdf, grid, flstate): ax = flstate.ax - flindex = flstate.flindex hatchit = flstate.hatchit labels = flstate.labels handles = flstate.handles # Take only the flavours we are interested in - gv = grid.grid_values.data - stats = grid.copy_grid(grid_values=gv[:, flindex, :]).grid_values + stats = grid.select_flavour(flstate.flindex).grid_values pcycler = ax._get_lines.prop_cycler #This is ugly but can't think of anything better From 420ccc1dbc75cfde032542c52ce68dd0ea2f048a Mon Sep 17 00:00:00 2001 From: "Juan M. Cruz-Martinez" Date: Wed, 16 Feb 2022 15:16:49 +0100 Subject: [PATCH 5/5] Apply suggestions from code review --- validphys2/src/validphys/arclength.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validphys2/src/validphys/arclength.py b/validphys2/src/validphys/arclength.py index 97e5f5d8cc..f380d9fb36 100644 --- a/validphys2/src/validphys/arclength.py +++ b/validphys2/src/validphys/arclength.py @@ -49,7 +49,7 @@ def arc_lengths( # Finite diff. step-size, x-grid eps = (b - a) / npoints ixgrid = xgrid(a, b, "linear", npoints) - # PDFs evaluated on grid, use the entire thing, the stats class will chose later + # PDFs evaluated on grid, use the entire thing, the Stats class will chose later xfgrid = xplotting_grid(pdf, Q, ixgrid, basis, flavours).grid_values.data * ixgrid[1] fdiff = np.diff(xfgrid) / eps # Compute forward differences res += integrate.simps(1 + np.square(fdiff), ixgrid[1][1:])