From 0d050bf5f4bbe83ecb321f783e2803202bf263c2 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Wed, 16 Oct 2024 10:35:45 +0100 Subject: [PATCH 01/12] added inconsistent_data_settings parser --- validphys2/src/validphys/config.py | 33 ++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 1f8f7fd7f4..b35f299e87 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -477,6 +477,39 @@ def parse_dataset_input(self, dataset: Mapping): variant=variant, ) + def parse_inconsistent_data_settings(self, settings): + """ + Parse + """ + known_keys = { + "ADD", + "MULT", + "CORR", + "UNCORR", + "SPECIAL", + "inconsistent_datasets", + "sys_rescaling_factor", + } + + kdiff = settings.keys() - known_keys + for k in kdiff: + log.warning( + ConfigError(f"Key '{k}' in inconsistent_data_settings not known.", k, known_keys) + ) + + ict_data_settings = {} + + ict_data_settings["ADD"] = settings.get("ADD", False) + ict_data_settings["MULT"] = settings.get("MULT", False) + ict_data_settings["CORR"] = settings.get("CORR", False) + ict_data_settings["UNCORR"] = settings.get("UNCORR", False) + ict_data_settings["SPECIAL"] = settings.get("SPECIAL", False) + + ict_data_settings["inconsistent_datasets"] = settings.get("inconsistent_datasets", []) + ict_data_settings["sys_rescaling_factor"] = settings.get("sys_rescaling_factor", 1) + + return ict_data_settings + def parse_use_fitcommondata(self, do_use: bool): """Use the commondata files in the fit instead of those in the data directory.""" From 5d0993b63622c6579890bc6c6c878d313fe77f5c Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Wed, 16 Oct 2024 14:35:25 +0100 Subject: [PATCH 02/12] added module with InconsistentCommonData class --- .../inconsistent_ct.py | 162 ++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 validphys2/src/validphys/closuretest/inconsistent_closuretest/inconsistent_ct.py diff --git a/validphys2/src/validphys/closuretest/inconsistent_closuretest/inconsistent_ct.py b/validphys2/src/validphys/closuretest/inconsistent_closuretest/inconsistent_ct.py new file mode 100644 index 0000000000..e8ff4f0b88 --- /dev/null +++ b/validphys2/src/validphys/closuretest/inconsistent_closuretest/inconsistent_ct.py @@ -0,0 +1,162 @@ +""" +This module contains the InconsistentCommonData class which is meant to have all the +methods needed in order to introduce an inconsistency within a Closure Test. +""" +import dataclasses +from validphys.coredata import CommonData +import pandas as pd + + +@dataclasses.dataclass(eq=False) +class InconsistentCommonData(CommonData): + """ + Class that inherits all of the methods + of coredata.CommonData class. + + This class is meant to have all the + methods needed in order to introduce + an inconsistency within a Closure Test. + """ + + setname: str + ndata: int + commondataproc: str + nkin: int + nsys: int + commondata_table: pd.DataFrame = dataclasses.field(repr=False) + systype_table: pd.DataFrame = dataclasses.field(repr=False) + systematics_table: pd.DataFrame = dataclasses.field(init=None, repr=False) + + def with_MULT_sys(self, mult_sys): + """ + returns an InconsistentCommonData instance + with MULT systematics replaced by mult_sys + + Parameters + ---------- + mult_sys : pd.DataFrame() + all MULT columns of + InconsistentCommonData.commondata_table + """ + table = self.commondata_table.copy() + table["MULT"] = mult_sys + return dataclasses.replace(self, commondata_table=table) + + def with_ADD_sys(self, add_sys): + """ + returns an InconsistentCommonData instance + with ADD systematics replaced by add_sys + + Parameters + ---------- + add_sys : pd.DataFrame() + all ADD columns of + InconsistentCommonData.commondata_table + """ + table = self.commondata_table.copy() + table["ADD"] = add_sys + return dataclasses.replace(self, commondata_table=table) + + def rescale_sys(self, type_err, CORR, UNCORR, SPECIAL, sys_rescaling_factor): + """ + rescale the sys (MULT or ADD) by constant factor, sys_rescaling_factor, + a distinction is done between CORR, UNCORR and SPECIAL systematics + + Parameters + ---------- + + type_err : str + e.g. 'MULT' or 'ADD' + + CORR : bool + + UNCORR : bool + + SPECIAL : bool + + sys_rescaling_factor : float, int + + Returns + ------- + pd.DataFrame corresponding to the rescaled MULT systematics + """ + # avoid circular import error + from validphys.covmats import INTRA_DATASET_SYS_NAME + + err_table = self.systematics_table.loc[:, [type_err]].copy() + # get indices of CORR / UNCORR sys + systype_corr = self.systype_table[ + (self.systype_table["type"] == type_err) + & (self.systype_table["name"].isin(["CORR", "THEORYCORR"])) + ] + + systype_uncorr = self.systype_table[ + (self.systype_table["type"] == type_err) + & (self.systype_table["name"].isin(["UNCORR", "THEORYUNCORR"])) + ] + + # get indices of special (intra datasets) correlations + systype_special = self.systype_table[ + (self.systype_table["type"] == type_err) + & (~self.systype_table["name"].isin(INTRA_DATASET_SYS_NAME)) + ] + + # rescale systematics + if CORR: + err_table.iloc[:, systype_corr.index - 1] *= sys_rescaling_factor + if UNCORR: + err_table.iloc[:, systype_uncorr.index - 1] *= sys_rescaling_factor + if SPECIAL: + err_table.iloc[:, systype_special.index - 1] *= sys_rescaling_factor + + return err_table + + def process_commondata( + self, ADD, MULT, CORR, UNCORR, SPECIAL, inconsistent_datasets, sys_rescaling_factor + ): + """ + returns a commondata instance + with modified systematics. + Note that if commondata.setname + is not within the inconsistent_datasets or if both ADD and + MULT are False, then the commondata object + will not be modified. + + Parameters + ---------- + + ADD : bool + + MULT : bool + + CORR : bool + + UNCORR : bool + + SPECIAL : bool + + inconsistent_datasets : list + list of the datasets for which an inconsistency should be introduced + + sys_rescaling_factor : float, int + + Returns + ------- + validphys.inconsistent_ct.InconsistentCommonData + """ + new_commondata = self + + if not self.setname in inconsistent_datasets: + return self + + if MULT: + new_commondata = new_commondata.with_MULT_sys( + self.rescale_sys("MULT", CORR, UNCORR, SPECIAL, sys_rescaling_factor) + ) + + if ADD: + new_commondata = new_commondata.with_ADD_sys( + self.rescale_sys("ADD", CORR, UNCORR, SPECIAL, sys_rescaling_factor) + ) + + return new_commondata From afe592e725dce0db03dfbd92bc9fd8fbc08f6184 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Thu, 17 Oct 2024 14:35:41 +0100 Subject: [PATCH 03/12] added explicit node for filtering of inconsistent fakedata --- validphys2/src/validphys/config.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index b35f299e87..45f0e3ec0f 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -479,7 +479,7 @@ def parse_dataset_input(self, dataset: Mapping): def parse_inconsistent_data_settings(self, settings): """ - Parse + Parse the inconsistent data settings from the yaml file. """ known_keys = { "ADD", @@ -1749,7 +1749,9 @@ def produce_scale_variation_theories(self, theoryid, point_prescription): return {"theoryids": NSList(theoryids, nskey="theoryid")} @configparser.explicit_node - def produce_filter_data(self, fakedata: bool = False, theorycovmatconfig=None): + def produce_filter_data( + self, fakedata: bool = False, theorycovmatconfig=None, inconsistent_fakedata: bool = False + ): """Set the action used to filter the data to filter either real or closure data. If the closure data filter is being used and if the theory covariance matrix is not being closure tested then filter @@ -1768,6 +1770,9 @@ def produce_filter_data(self, fakedata: bool = False, theorycovmatconfig=None): "Generating closure test data which samples from the theory " "covariance matrix has not been implemented yet." ) + elif inconsistent_fakedata: + return validphys.filters.filter_inconsistent_closure_data_by_experiment + return validphys.filters.filter_closure_data_by_experiment @configparser.explicit_node From 67370479e960083feab09159dcc9a19f5bf57ad0 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Thu, 17 Oct 2024 14:36:37 +0100 Subject: [PATCH 04/12] added _filter_inconsistent_closure_data to filters --- validphys2/src/validphys/filters.py | 120 ++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index 944d11c5cb..a526299dd8 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -17,6 +17,8 @@ import validphys.cuts from validphys.process_options import PROCESSES from validphys.utils import generate_path_filtered_data +from validphys.closuretest.inconsistent_closuretest.inconsistent_ct import InconsistentCommonData + log = logging.getLogger(__name__) @@ -217,6 +219,45 @@ def filter_closure_data_by_experiment( return res +def filter_inconsistent_closure_data_by_experiment( + filter_path, + experiments_data, + fakepdf, + fakenoise, + filterseed, + data_index, + sep_mult, + inconsistent_data_settings, +): + """ + Like :py:func:`filter_closure_data` except filters data by experiment. + + This function just peforms a ``for`` loop over ``experiments``, the reason + we don't use ``reportengine.collect`` is that it can permute the order + in which closure data is generate, which means that the pseudodata is + not reproducible. + + """ + + res = [] + for exp in experiments_data: + experiment_index = data_index[data_index.isin([exp.name], level=0)] + res.append( + _filter_inconsistent_closure_data( + filter_path, + exp, + fakepdf, + fakenoise, + filterseed, + experiment_index, + sep_mult, + inconsistent_data_settings, + ) + ) + + return res + + def filter_real_data(filter_path, data): """Filter real data, cutting any points which do not pass the filter rules.""" log.info('Filtering real data.') @@ -354,6 +395,85 @@ def _filter_closure_data(filter_path, data, fakepdf, fakenoise, filterseed, data return total_data_points, total_cut_data_points +def _filter_inconsistent_closure_data( + filter_path, + data, + fakepdf, + fakenoise, + filterseed, + data_index, + sep_mult, + inconsistent_data_settings, +): + """ + TODO + """ + total_data_points = 0 + total_cut_data_points = 0 + + # circular import generated @ core.py + from validphys.pseudodata import level0_commondata_wc, make_level1_data + + closure_data = level0_commondata_wc(data, fakepdf) + + # Keep track of the original commondata, since it is what will be used to export + # the data afterwards + all_raw_commondata = {} + + for dataset in data.datasets: + # == print number of points passing cuts, make dataset directory and write FKMASK ==# + path = filter_path / dataset.name + nfull, ncut = _write_ds_cut_data(path, dataset) + total_data_points += nfull + total_cut_data_points += ncut + all_raw_commondata[dataset.name] = dataset.commondata.load() + + if fakenoise: + # ======= Level 1 closure test =======# + + closure_data = make_level1_data(data, closure_data, filterseed, data_index, sep_mult) + + closure_data = [ + InconsistentCommonData( + setname=cd.setname, + ndata=cd.ndata, + commondataproc=cd.commondataproc, + nkin=cd.nkin, + nsys=cd.nsys, + commondata_table=cd.commondata_table, + systype_table=cd.systype_table, + ) + for cd in closure_data + ] + + closure_data = [cd.process_commondata(**inconsistent_data_settings) for cd in closure_data] + + log.info("Writing Level1 data") + + else: + log.info("Writing Level0 data") + + # ====== write commondata and systype files ======# + for cd in closure_data: + # Write the full dataset, not only the points that pass the filter + data_path, unc_path = generate_path_filtered_data(filter_path.parent, cd.setname) + data_path.parent.mkdir(exist_ok=True, parents=True) + + raw_cd = all_raw_commondata[cd.setname] + + data_range = np.arange(1, 1 + raw_cd.ndata) + + # Now put the closure data into the raw original commondata + new_cv = cd.central_values.reindex(data_range, fill_value=0.0).values + output_cd = raw_cd.with_central_value(new_cv) + + # And export it to file + output_cd.export_data(data_path.open("w", encoding="utf-8")) + output_cd.export_uncertainties(unc_path.open("w", encoding="utf-8")) + + return total_data_points, total_cut_data_points + + def check_t0pdfset(t0pdfset): """T0 pdf check""" t0pdfset.load() From f967e4bd9bc34652d6fa1501cbc7991cbeaf5d9c Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Thu, 17 Oct 2024 15:19:32 +0100 Subject: [PATCH 05/12] added test for inconsistent coredata commondata class --- .../validphys/tests/test_inconsistent_ct.py | 190 ++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 validphys2/src/validphys/tests/test_inconsistent_ct.py diff --git a/validphys2/src/validphys/tests/test_inconsistent_ct.py b/validphys2/src/validphys/tests/test_inconsistent_ct.py new file mode 100644 index 0000000000..b3abc58a3d --- /dev/null +++ b/validphys2/src/validphys/tests/test_inconsistent_ct.py @@ -0,0 +1,190 @@ +""" +Module to test the InconsistentCommonData class. +""" + +from numpy.testing import assert_allclose + +from validphys.tests.conftest import SINGLE_DATASET +from validphys.closuretest.inconsistent_closuretest.inconsistent_ct import InconsistentCommonData + + +def load_cd(): + """ + Load a commondata instance and an inconsistent commondata instance. + """ + # avoid circular import + from validphys.api import API + + cd = API.commondata(**{"dataset_input": {**SINGLE_DATASET}}).load() + + inconsys_cd = InconsistentCommonData( + setname=cd.setname, + ndata=cd.ndata, + commondataproc=cd.commondataproc, + nkin=cd.nkin, + nsys=cd.nsys, + commondata_table=cd.commondata_table, + systype_table=cd.systype_table, + ) + return cd, inconsys_cd + + +def test_with_MULT_sys(): + """ + test if MULT commondata_table is + replaced correctly by + dataclasses.replace(self, commondata_table = new_table) + """ + cd, inconsys_cd = load_cd() + + mult_sys_tab = 3 * cd.commondata_table["MULT"].to_numpy() + + inc_mult_sys_tab = inconsys_cd.with_MULT_sys(mult_sys_tab).commondata_table["MULT"].to_numpy() + + assert_allclose(mult_sys_tab, inc_mult_sys_tab) + + +def test_with_ADD_sys(): + """ + test if ADD commondata_table is + replaced correctly by + dataclasses.replace(self, commondata_table = new_table) + """ + cd, inconsys_cd = load_cd() + mult_sys_tab = 3 * cd.commondata_table["ADD"].to_numpy() + + inc_mult_sys_tab = inconsys_cd.with_ADD_sys(mult_sys_tab).commondata_table["ADD"].to_numpy() + + assert_allclose(mult_sys_tab, inc_mult_sys_tab) + + +def test_rescale_sys_CORR_MULT(): + """ + Check whether rescaling of + CORR MULT uncertainties works + as expected + """ + cd, inconsys_cd = load_cd() + + rescaling_factor = 2.0 + type_err = "MULT" + new_icd = inconsys_cd.with_MULT_sys( + inconsys_cd.rescale_sys( + type_err=type_err, + CORR=True, + UNCORR=False, + SPECIAL=False, + sys_rescaling_factor=rescaling_factor, + ) + ) + + # get indices of CORR sys + systype_corr = cd.systype_table[ + (cd.systype_table["treatment"] == type_err) + & (~cd.systype_table["name"].isin(["UNCORR", "THEORYUNCORR"])) + ] + + tab2 = rescaling_factor * cd.systematics_table.iloc[:, systype_corr.index - 1].to_numpy() + + tab1 = new_icd.systematics_table.iloc[:, systype_corr.index - 1] + + assert_allclose(tab1, tab2) + + +def test_rescale_sys_CORR_ADD(): + """ + Check whether rescaling of + CORR ADD uncertainties works + as expected + """ + cd, inconsys_cd = load_cd() + + rescaling_factor = 2.0 + type_err = "ADD" + new_icd = inconsys_cd.with_ADD_sys( + inconsys_cd.rescale_sys( + type_err, CORR=True, UNCORR=False, SPECIAL=False, sys_rescaling_factor=rescaling_factor + ) + ) + + # get indices of CORR sys + systype_corr = cd.systype_table[ + (cd.systype_table["treatment"] == type_err) + & (~cd.systype_table["name"].isin(["UNCORR", "THEORYUNCORR"])) + ] + + tab2 = rescaling_factor * cd.systematics_table.iloc[:, systype_corr.index - 1].to_numpy() + + tab1 = new_icd.systematics_table.iloc[:, systype_corr.index - 1] + + assert_allclose(tab1, tab2) + + +def test_process_commondata(): + """ + Check whether process_commondata + leaves the commondata instance + unchanged when told to do so. + """ + cd, inconsys_cd = load_cd() + new_icd = inconsys_cd.process_commondata( + ADD=False, MULT=False, CORR=False, UNCORR=False, SPECIAL=False, inconsistent_datasets=[SINGLE_DATASET['dataset']], sys_rescaling_factor=1 + ) + tab1 = new_icd.commondata_table.drop(['process'], axis=1).to_numpy() + tab2 = inconsys_cd.commondata_table.drop(['process'], axis=1).to_numpy() + + assert_allclose(tab1, tab2) + + +def test_process_commondata_CORR_MULT(): + """ + Check whether rescaling of + CORR MULT uncertainties works + as expected with process_commondata + method + """ + cd, inconsys_cd = load_cd() + type_err = "MULT" + rescaling_factor = 2.0 + new_icd = inconsys_cd.process_commondata( + ADD=False, MULT=True, CORR=True, UNCORR=False, SPECIAL=False, inconsistent_datasets=[SINGLE_DATASET['dataset']], sys_rescaling_factor=rescaling_factor + ) + + # get indices of CORR sys + systype_corr = cd.systype_table[ + (cd.systype_table["treatment"] == type_err) + & (~cd.systype_table["name"].isin(["UNCORR", "THEORYUNCORR"])) + ] + + tab2 = rescaling_factor * cd.systematics_table.iloc[:, systype_corr.index - 1].to_numpy() + + tab1 = new_icd.systematics_table.iloc[:, systype_corr.index - 1] + + assert_allclose(tab1, tab2) + + +def test_process_commondata_CORR_ADD(): + """ + Check whether rescaling of + CORR ADD uncertainties works + as expected with process_commondata + method + """ + cd, inconsys_cd = load_cd() + type_err = "ADD" + rescaling_factor = 2.0 + new_icd = inconsys_cd.process_commondata( + ADD=True, MULT=False, CORR=True, UNCORR=False, SPECIAL=False, inconsistent_datasets=[SINGLE_DATASET['dataset']], sys_rescaling_factor=rescaling_factor + ) + + # get indices of CORR sys + systype_corr = cd.systype_table[ + (cd.systype_table["treatment"] == type_err) + & (~cd.systype_table["name"].isin(["UNCORR", "THEORYUNCORR"])) + ] + + tab2 = rescaling_factor * cd.systematics_table.iloc[:, systype_corr.index - 1].to_numpy() + + tab1 = new_icd.systematics_table.iloc[:, systype_corr.index - 1] + + assert_allclose(tab1, tab2) From 42a082bef99a4daa659f70b0b36032eb16d2660e Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Thu, 17 Oct 2024 15:21:46 +0100 Subject: [PATCH 06/12] renamed type err to treatment err --- .../inconsistent_ct.py | 23 +++++---- .../validphys/tests/test_inconsistent_ct.py | 48 ++++++++++++++----- 2 files changed, 49 insertions(+), 22 deletions(-) diff --git a/validphys2/src/validphys/closuretest/inconsistent_closuretest/inconsistent_ct.py b/validphys2/src/validphys/closuretest/inconsistent_closuretest/inconsistent_ct.py index e8ff4f0b88..feed3e0d2d 100644 --- a/validphys2/src/validphys/closuretest/inconsistent_closuretest/inconsistent_ct.py +++ b/validphys2/src/validphys/closuretest/inconsistent_closuretest/inconsistent_ct.py @@ -2,6 +2,7 @@ This module contains the InconsistentCommonData class which is meant to have all the methods needed in order to introduce an inconsistency within a Closure Test. """ + import dataclasses from validphys.coredata import CommonData import pandas as pd @@ -57,7 +58,7 @@ def with_ADD_sys(self, add_sys): table["ADD"] = add_sys return dataclasses.replace(self, commondata_table=table) - def rescale_sys(self, type_err, CORR, UNCORR, SPECIAL, sys_rescaling_factor): + def rescale_sys(self, treatment_err, CORR, UNCORR, SPECIAL, sys_rescaling_factor): """ rescale the sys (MULT or ADD) by constant factor, sys_rescaling_factor, a distinction is done between CORR, UNCORR and SPECIAL systematics @@ -65,7 +66,7 @@ def rescale_sys(self, type_err, CORR, UNCORR, SPECIAL, sys_rescaling_factor): Parameters ---------- - type_err : str + treatment_err : str e.g. 'MULT' or 'ADD' CORR : bool @@ -83,31 +84,35 @@ def rescale_sys(self, type_err, CORR, UNCORR, SPECIAL, sys_rescaling_factor): # avoid circular import error from validphys.covmats import INTRA_DATASET_SYS_NAME - err_table = self.systematics_table.loc[:, [type_err]].copy() + # err_table = self.systematics_table.loc[:, [treatment_err]].copy() # get indices of CORR / UNCORR sys systype_corr = self.systype_table[ - (self.systype_table["type"] == type_err) + (self.systype_table["treatment"] == treatment_err) & (self.systype_table["name"].isin(["CORR", "THEORYCORR"])) ] systype_uncorr = self.systype_table[ - (self.systype_table["type"] == type_err) + (self.systype_table["treatment"] == treatment_err) & (self.systype_table["name"].isin(["UNCORR", "THEORYUNCORR"])) ] # get indices of special (intra datasets) correlations systype_special = self.systype_table[ - (self.systype_table["type"] == type_err) + (self.systype_table["treatment"] == treatment_err) & (~self.systype_table["name"].isin(INTRA_DATASET_SYS_NAME)) ] # rescale systematics + if CORR: - err_table.iloc[:, systype_corr.index - 1] *= sys_rescaling_factor + err_table = self.systematics_table.iloc[:, systype_corr.index - 1] + err_table *= sys_rescaling_factor if UNCORR: - err_table.iloc[:, systype_uncorr.index - 1] *= sys_rescaling_factor + err_table = self.systematics_table.iloc[:, systype_uncorr.index - 1] + err_table *= sys_rescaling_factor if SPECIAL: - err_table.iloc[:, systype_special.index - 1] *= sys_rescaling_factor + err_table = self.systematics_table.iloc[:, systype_special.index - 1] + err_table *= sys_rescaling_factor return err_table diff --git a/validphys2/src/validphys/tests/test_inconsistent_ct.py b/validphys2/src/validphys/tests/test_inconsistent_ct.py index b3abc58a3d..8d3287b075 100644 --- a/validphys2/src/validphys/tests/test_inconsistent_ct.py +++ b/validphys2/src/validphys/tests/test_inconsistent_ct.py @@ -67,10 +67,10 @@ def test_rescale_sys_CORR_MULT(): cd, inconsys_cd = load_cd() rescaling_factor = 2.0 - type_err = "MULT" + treatment_err = "MULT" new_icd = inconsys_cd.with_MULT_sys( inconsys_cd.rescale_sys( - type_err=type_err, + treatment_err=treatment_err, CORR=True, UNCORR=False, SPECIAL=False, @@ -80,7 +80,7 @@ def test_rescale_sys_CORR_MULT(): # get indices of CORR sys systype_corr = cd.systype_table[ - (cd.systype_table["treatment"] == type_err) + (cd.systype_table["treatment"] == treatment_err) & (~cd.systype_table["name"].isin(["UNCORR", "THEORYUNCORR"])) ] @@ -100,16 +100,20 @@ def test_rescale_sys_CORR_ADD(): cd, inconsys_cd = load_cd() rescaling_factor = 2.0 - type_err = "ADD" + treatment_err = "ADD" new_icd = inconsys_cd.with_ADD_sys( inconsys_cd.rescale_sys( - type_err, CORR=True, UNCORR=False, SPECIAL=False, sys_rescaling_factor=rescaling_factor + treatment_err, + CORR=True, + UNCORR=False, + SPECIAL=False, + sys_rescaling_factor=rescaling_factor, ) ) # get indices of CORR sys systype_corr = cd.systype_table[ - (cd.systype_table["treatment"] == type_err) + (cd.systype_table["treatment"] == treatment_err) & (~cd.systype_table["name"].isin(["UNCORR", "THEORYUNCORR"])) ] @@ -128,7 +132,13 @@ def test_process_commondata(): """ cd, inconsys_cd = load_cd() new_icd = inconsys_cd.process_commondata( - ADD=False, MULT=False, CORR=False, UNCORR=False, SPECIAL=False, inconsistent_datasets=[SINGLE_DATASET['dataset']], sys_rescaling_factor=1 + ADD=False, + MULT=False, + CORR=False, + UNCORR=False, + SPECIAL=False, + inconsistent_datasets=[SINGLE_DATASET['dataset']], + sys_rescaling_factor=1, ) tab1 = new_icd.commondata_table.drop(['process'], axis=1).to_numpy() tab2 = inconsys_cd.commondata_table.drop(['process'], axis=1).to_numpy() @@ -144,15 +154,21 @@ def test_process_commondata_CORR_MULT(): method """ cd, inconsys_cd = load_cd() - type_err = "MULT" + treatment_err = "MULT" rescaling_factor = 2.0 new_icd = inconsys_cd.process_commondata( - ADD=False, MULT=True, CORR=True, UNCORR=False, SPECIAL=False, inconsistent_datasets=[SINGLE_DATASET['dataset']], sys_rescaling_factor=rescaling_factor + ADD=False, + MULT=True, + CORR=True, + UNCORR=False, + SPECIAL=False, + inconsistent_datasets=[SINGLE_DATASET['dataset']], + sys_rescaling_factor=rescaling_factor, ) # get indices of CORR sys systype_corr = cd.systype_table[ - (cd.systype_table["treatment"] == type_err) + (cd.systype_table["treatment"] == treatment_err) & (~cd.systype_table["name"].isin(["UNCORR", "THEORYUNCORR"])) ] @@ -171,15 +187,21 @@ def test_process_commondata_CORR_ADD(): method """ cd, inconsys_cd = load_cd() - type_err = "ADD" + treatment_err = "ADD" rescaling_factor = 2.0 new_icd = inconsys_cd.process_commondata( - ADD=True, MULT=False, CORR=True, UNCORR=False, SPECIAL=False, inconsistent_datasets=[SINGLE_DATASET['dataset']], sys_rescaling_factor=rescaling_factor + ADD=True, + MULT=False, + CORR=True, + UNCORR=False, + SPECIAL=False, + inconsistent_datasets=[SINGLE_DATASET['dataset']], + sys_rescaling_factor=rescaling_factor, ) # get indices of CORR sys systype_corr = cd.systype_table[ - (cd.systype_table["treatment"] == type_err) + (cd.systype_table["treatment"] == treatment_err) & (~cd.systype_table["name"].isin(["UNCORR", "THEORYUNCORR"])) ] From ab96658cf6df43dbd99c9cf5cc4740bbd916435b Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Thu, 17 Oct 2024 15:33:14 +0100 Subject: [PATCH 07/12] add log warning for ict and avoid circular import error --- validphys2/src/validphys/config.py | 1 + validphys2/src/validphys/filters.py | 7 +++- .../validphys/tests/test_inconsistent_ct.py | 42 +++++++------------ 3 files changed, 22 insertions(+), 28 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 45f0e3ec0f..96b015c6c0 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -1771,6 +1771,7 @@ def produce_filter_data( "covariance matrix has not been implemented yet." ) elif inconsistent_fakedata: + log.warning("Using filter for inconsistent closure data") return validphys.filters.filter_inconsistent_closure_data_by_experiment return validphys.filters.filter_closure_data_by_experiment diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index a526299dd8..cb45bfb68a 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -17,7 +17,6 @@ import validphys.cuts from validphys.process_options import PROCESSES from validphys.utils import generate_path_filtered_data -from validphys.closuretest.inconsistent_closuretest.inconsistent_ct import InconsistentCommonData log = logging.getLogger(__name__) @@ -406,7 +405,7 @@ def _filter_inconsistent_closure_data( inconsistent_data_settings, ): """ - TODO + Same as _filter_closure_data, but for inconsistent closure tests. """ total_data_points = 0 total_cut_data_points = 0 @@ -433,6 +432,10 @@ def _filter_inconsistent_closure_data( closure_data = make_level1_data(data, closure_data, filterseed, data_index, sep_mult) + from validphys.closuretest.inconsistent_closuretest.inconsistent_ct import ( + InconsistentCommonData, + ) + closure_data = [ InconsistentCommonData( setname=cd.setname, diff --git a/validphys2/src/validphys/tests/test_inconsistent_ct.py b/validphys2/src/validphys/tests/test_inconsistent_ct.py index 8d3287b075..25e2433be0 100644 --- a/validphys2/src/validphys/tests/test_inconsistent_ct.py +++ b/validphys2/src/validphys/tests/test_inconsistent_ct.py @@ -6,27 +6,20 @@ from validphys.tests.conftest import SINGLE_DATASET from validphys.closuretest.inconsistent_closuretest.inconsistent_ct import InconsistentCommonData +from validphys.api import API -def load_cd(): - """ - Load a commondata instance and an inconsistent commondata instance. - """ - # avoid circular import - from validphys.api import API - - cd = API.commondata(**{"dataset_input": {**SINGLE_DATASET}}).load() - - inconsys_cd = InconsistentCommonData( - setname=cd.setname, - ndata=cd.ndata, - commondataproc=cd.commondataproc, - nkin=cd.nkin, - nsys=cd.nsys, - commondata_table=cd.commondata_table, - systype_table=cd.systype_table, - ) - return cd, inconsys_cd +cd = API.commondata(**{"dataset_input": {**SINGLE_DATASET}}).load() + +inconsys_cd = InconsistentCommonData( + setname=cd.setname, + ndata=cd.ndata, + commondataproc=cd.commondataproc, + nkin=cd.nkin, + nsys=cd.nsys, + commondata_table=cd.commondata_table, + systype_table=cd.systype_table, +) def test_with_MULT_sys(): @@ -35,7 +28,6 @@ def test_with_MULT_sys(): replaced correctly by dataclasses.replace(self, commondata_table = new_table) """ - cd, inconsys_cd = load_cd() mult_sys_tab = 3 * cd.commondata_table["MULT"].to_numpy() @@ -50,7 +42,7 @@ def test_with_ADD_sys(): replaced correctly by dataclasses.replace(self, commondata_table = new_table) """ - cd, inconsys_cd = load_cd() + mult_sys_tab = 3 * cd.commondata_table["ADD"].to_numpy() inc_mult_sys_tab = inconsys_cd.with_ADD_sys(mult_sys_tab).commondata_table["ADD"].to_numpy() @@ -64,7 +56,6 @@ def test_rescale_sys_CORR_MULT(): CORR MULT uncertainties works as expected """ - cd, inconsys_cd = load_cd() rescaling_factor = 2.0 treatment_err = "MULT" @@ -97,7 +88,6 @@ def test_rescale_sys_CORR_ADD(): CORR ADD uncertainties works as expected """ - cd, inconsys_cd = load_cd() rescaling_factor = 2.0 treatment_err = "ADD" @@ -130,7 +120,7 @@ def test_process_commondata(): leaves the commondata instance unchanged when told to do so. """ - cd, inconsys_cd = load_cd() + new_icd = inconsys_cd.process_commondata( ADD=False, MULT=False, @@ -153,7 +143,7 @@ def test_process_commondata_CORR_MULT(): as expected with process_commondata method """ - cd, inconsys_cd = load_cd() + treatment_err = "MULT" rescaling_factor = 2.0 new_icd = inconsys_cd.process_commondata( @@ -186,7 +176,7 @@ def test_process_commondata_CORR_ADD(): as expected with process_commondata method """ - cd, inconsys_cd = load_cd() + treatment_err = "ADD" rescaling_factor = 2.0 new_icd = inconsys_cd.process_commondata( From ca164796260607fc959f1ae8ff99cb044f7bfe17 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Fri, 18 Oct 2024 16:55:30 +0100 Subject: [PATCH 08/12] changed names to treatmnet and names)uncertainties --- validphys2/src/validphys/config.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 96b015c6c0..a183546a70 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -482,11 +482,8 @@ def parse_inconsistent_data_settings(self, settings): Parse the inconsistent data settings from the yaml file. """ known_keys = { - "ADD", - "MULT", - "CORR", - "UNCORR", - "SPECIAL", + "treatment_names", + "names_uncertainties", "inconsistent_datasets", "sys_rescaling_factor", } @@ -499,11 +496,8 @@ def parse_inconsistent_data_settings(self, settings): ict_data_settings = {} - ict_data_settings["ADD"] = settings.get("ADD", False) - ict_data_settings["MULT"] = settings.get("MULT", False) - ict_data_settings["CORR"] = settings.get("CORR", False) - ict_data_settings["UNCORR"] = settings.get("UNCORR", False) - ict_data_settings["SPECIAL"] = settings.get("SPECIAL", False) + ict_data_settings["treatment_names"] = settings.get("treatment_names", []) + ict_data_settings["names_uncertainties"] = settings.get("names_uncertainties", []) ict_data_settings["inconsistent_datasets"] = settings.get("inconsistent_datasets", []) ict_data_settings["sys_rescaling_factor"] = settings.get("sys_rescaling_factor", 1) From 29ef8585731bb5304ca0a3ca4ab8cc1b1515941a Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Fri, 18 Oct 2024 16:56:37 +0100 Subject: [PATCH 09/12] adapted inconsistent cd class to write on systematics table --- .../inconsistent_ct.py | 183 +++++++++--------- 1 file changed, 92 insertions(+), 91 deletions(-) diff --git a/validphys2/src/validphys/closuretest/inconsistent_closuretest/inconsistent_ct.py b/validphys2/src/validphys/closuretest/inconsistent_closuretest/inconsistent_ct.py index feed3e0d2d..6a1801b3b4 100644 --- a/validphys2/src/validphys/closuretest/inconsistent_closuretest/inconsistent_ct.py +++ b/validphys2/src/validphys/closuretest/inconsistent_closuretest/inconsistent_ct.py @@ -26,98 +26,108 @@ class InconsistentCommonData(CommonData): nsys: int commondata_table: pd.DataFrame = dataclasses.field(repr=False) systype_table: pd.DataFrame = dataclasses.field(repr=False) - systematics_table: pd.DataFrame = dataclasses.field(init=None, repr=False) + systematics_table: pd.DataFrame = dataclasses.field(default=None, repr=False) - def with_MULT_sys(self, mult_sys): + # def systematic_errors(self, central_values=None): + # """ + # Overrides the systematic_errors method of the CommonData class + # in order to return the systematics_table attribute. + # """ + # return self.systematics_table + + def select_systype_table_indices(self, treatment_names, names_uncertainties): """ - returns an InconsistentCommonData instance - with MULT systematics replaced by mult_sys + Returns the indices of the systype_table that correspond to the + names_uncertainties list. Parameters ---------- - mult_sys : pd.DataFrame() - all MULT columns of - InconsistentCommonData.commondata_table - """ - table = self.commondata_table.copy() - table["MULT"] = mult_sys - return dataclasses.replace(self, commondata_table=table) + treatment_names : list + list of the names of the treatments that should be selected + possible values are: MULT, ADD - def with_ADD_sys(self, add_sys): - """ - returns an InconsistentCommonData instance - with ADD systematics replaced by add_sys + names_uncertainties : list + list of the names of the uncertainties that should be selected + possible values are: CORR, UNCORR, THEORYCORR, THEORYUNCORR, SPECIAL + SPECIAL is used for intra-dataset systematics - Parameters - ---------- - add_sys : pd.DataFrame() - all ADD columns of - InconsistentCommonData.commondata_table + Returns + ------- + systype_tab.index : pd.Index """ - table = self.commondata_table.copy() - table["ADD"] = add_sys - return dataclasses.replace(self, commondata_table=table) + # check that names_uncertainties only contains either CORR, UNCORR, THEORYCORR, THEORYUNCORR or SPECIAL + # if not raise an error + if not all( + name in ["CORR", "UNCORR", "THEORYCORR", "THEORYUNCORR", "SPECIAL"] + for name in names_uncertainties + ): + raise ValueError( + "names_uncertainties should only contain either CORR, UNCORR, THEORYCORR, THEORYUNCORR or SPECIAL" + ) - def rescale_sys(self, treatment_err, CORR, UNCORR, SPECIAL, sys_rescaling_factor): - """ - rescale the sys (MULT or ADD) by constant factor, sys_rescaling_factor, - a distinction is done between CORR, UNCORR and SPECIAL systematics + # if "SPECIAL", then we need to select the intra-dataset systematics + if "SPECIAL" in names_uncertainties: + names_uncertainties.remove("SPECIAL") - Parameters - ---------- + # avoid circular import error + from validphys.covmats import INTRA_DATASET_SYS_NAME + + # note: | operator allows to extend the condition so as to also include the names_uncertainties + systype_tab = self.systype_table[ + (self.systype_table["treatment"].isin(treatment_names)) + & ( + ~self.systype_table["name"].isin(INTRA_DATASET_SYS_NAME) + | self.systype_table["name"].isin(names_uncertainties) + ) + ] - treatment_err : str - e.g. 'MULT' or 'ADD' + else: + systype_tab = self.systype_table[ + (self.systype_table["treatment"].isin(treatment_names)) + & (self.systype_table["name"].isin(names_uncertainties)) + ] - CORR : bool + return systype_tab.index - UNCORR : bool + def rescale_systematics(self, treatment_names, names_uncertainties, sys_rescaling_factor): + """ + Rescale the columns of the systematics_table that are included in the + the names_uncertainties list. And return the rescaled systematics_table - SPECIAL : bool + Parameters + ---------- + treatment_names : list + list of the names of the treatments that should be rescaled + possible values are: MULT, ADD - sys_rescaling_factor : float, int + names_uncertainties : list + list of the names of the uncertainties that should be rescaled + possible values are: CORR, UNCORR, THEORYCORR, THEORYUNCORR, SPECIAL + SPECIAL is used for intra-dataset systematics + + sys_rescaling_factor : float + factor by which the systematics should be rescaled Returns ------- - pd.DataFrame corresponding to the rescaled MULT systematics + self.systematics_table : pd.DataFrame """ - # avoid circular import error - from validphys.covmats import INTRA_DATASET_SYS_NAME - - # err_table = self.systematics_table.loc[:, [treatment_err]].copy() - # get indices of CORR / UNCORR sys - systype_corr = self.systype_table[ - (self.systype_table["treatment"] == treatment_err) - & (self.systype_table["name"].isin(["CORR", "THEORYCORR"])) - ] - - systype_uncorr = self.systype_table[ - (self.systype_table["treatment"] == treatment_err) - & (self.systype_table["name"].isin(["UNCORR", "THEORYUNCORR"])) - ] - - # get indices of special (intra datasets) correlations - systype_special = self.systype_table[ - (self.systype_table["treatment"] == treatment_err) - & (~self.systype_table["name"].isin(INTRA_DATASET_SYS_NAME)) - ] - - # rescale systematics - - if CORR: - err_table = self.systematics_table.iloc[:, systype_corr.index - 1] - err_table *= sys_rescaling_factor - if UNCORR: - err_table = self.systematics_table.iloc[:, systype_uncorr.index - 1] - err_table *= sys_rescaling_factor - if SPECIAL: - err_table = self.systematics_table.iloc[:, systype_special.index - 1] - err_table *= sys_rescaling_factor - - return err_table + + sys_table = self.systematics_table.copy() + + # select the columns of the systematics_table that should be rescaled + systype_idx = self.select_systype_table_indices( + treatment_names=treatment_names, names_uncertainties=names_uncertainties + ) + + # rescale columns of the systematics_table that are included in the index systype_idx + + sys_table.iloc[:, systype_idx - 1] *= sys_rescaling_factor + + return sys_table def process_commondata( - self, ADD, MULT, CORR, UNCORR, SPECIAL, inconsistent_datasets, sys_rescaling_factor + self, treatment_names, names_uncertainties, sys_rescaling_factor, inconsistent_datasets ): """ returns a commondata instance @@ -129,22 +139,20 @@ def process_commondata( Parameters ---------- + treatment_names : list + list of the names of the treatments that should be rescaled + possible values are: MULT, ADD - ADD : bool - - MULT : bool - - CORR : bool + names_uncertainties : list + list of the names of the uncertainties that should be rescaled + possible values are: CORR, UNCORR, THEORYCORR, THEORYUNCORR, SPECIAL + SPECIAL is used for intra-dataset systematics - UNCORR : bool - - SPECIAL : bool + sys_rescaling_factor : float, int inconsistent_datasets : list list of the datasets for which an inconsistency should be introduced - sys_rescaling_factor : float, int - Returns ------- validphys.inconsistent_ct.InconsistentCommonData @@ -153,15 +161,8 @@ def process_commondata( if not self.setname in inconsistent_datasets: return self - - if MULT: - new_commondata = new_commondata.with_MULT_sys( - self.rescale_sys("MULT", CORR, UNCORR, SPECIAL, sys_rescaling_factor) - ) - - if ADD: - new_commondata = new_commondata.with_ADD_sys( - self.rescale_sys("ADD", CORR, UNCORR, SPECIAL, sys_rescaling_factor) - ) + new_commondata.systematics_table = self.rescale_systematics( + treatment_names, names_uncertainties, sys_rescaling_factor + ) return new_commondata From 58b4e75032d5bdedfba3d040f1784eb143c98960 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Fri, 18 Oct 2024 22:16:10 +0100 Subject: [PATCH 10/12] override systematic_errors method with a property and added setter to property --- .../inconsistent_ct.py | 79 +++++++++++++++---- 1 file changed, 63 insertions(+), 16 deletions(-) diff --git a/validphys2/src/validphys/closuretest/inconsistent_closuretest/inconsistent_ct.py b/validphys2/src/validphys/closuretest/inconsistent_closuretest/inconsistent_ct.py index 6a1801b3b4..93f047aae4 100644 --- a/validphys2/src/validphys/closuretest/inconsistent_closuretest/inconsistent_ct.py +++ b/validphys2/src/validphys/closuretest/inconsistent_closuretest/inconsistent_ct.py @@ -3,6 +3,7 @@ methods needed in order to introduce an inconsistency within a Closure Test. """ +import yaml import dataclasses from validphys.coredata import CommonData import pandas as pd @@ -27,18 +28,29 @@ class InconsistentCommonData(CommonData): commondata_table: pd.DataFrame = dataclasses.field(repr=False) systype_table: pd.DataFrame = dataclasses.field(repr=False) systematics_table: pd.DataFrame = dataclasses.field(default=None, repr=False) + _systematic_errors: any = dataclasses.field(default=None, init=False) - # def systematic_errors(self, central_values=None): - # """ - # Overrides the systematic_errors method of the CommonData class - # in order to return the systematics_table attribute. - # """ - # return self.systematics_table + @property + def systematic_errors(self): + """ + Overrides the systematic_errors method of the CommonData class. + + This is done in order to allow the systematic_errors to be a property + and hence to be able to assign values to it (setter). + """ + if self._systematic_errors is None: + return super().systematic_errors() + return self._systematic_errors + + @systematic_errors.setter + def systematic_errors(self, value): + # Define the setter to allow assignment to systematic_errors + self._systematic_errors = value def select_systype_table_indices(self, treatment_names, names_uncertainties): """ - Returns the indices of the systype_table that correspond to the - names_uncertainties list. + Is used to get the indices of the systype_table that correspond to the + intersection of the treatment_names and names_uncertainties lists. Parameters ---------- @@ -67,8 +79,6 @@ def select_systype_table_indices(self, treatment_names, names_uncertainties): # if "SPECIAL", then we need to select the intra-dataset systematics if "SPECIAL" in names_uncertainties: - names_uncertainties.remove("SPECIAL") - # avoid circular import error from validphys.covmats import INTRA_DATASET_SYS_NAME @@ -77,7 +87,9 @@ def select_systype_table_indices(self, treatment_names, names_uncertainties): (self.systype_table["treatment"].isin(treatment_names)) & ( ~self.systype_table["name"].isin(INTRA_DATASET_SYS_NAME) - | self.systype_table["name"].isin(names_uncertainties) + | self.systype_table["name"].isin( + [name for name in names_uncertainties if name != "SPECIAL"] + ) ) ] @@ -91,8 +103,8 @@ def select_systype_table_indices(self, treatment_names, names_uncertainties): def rescale_systematics(self, treatment_names, names_uncertainties, sys_rescaling_factor): """ - Rescale the columns of the systematics_table that are included in the - the names_uncertainties list. And return the rescaled systematics_table + Rescale the columns of the systematic_errors() that are included in the + the names_uncertainties list. And return the rescaled table. Parameters ---------- @@ -113,7 +125,7 @@ def rescale_systematics(self, treatment_names, names_uncertainties, sys_rescalin self.systematics_table : pd.DataFrame """ - sys_table = self.systematics_table.copy() + sys_table = self.systematic_errors.copy() # select the columns of the systematics_table that should be rescaled systype_idx = self.select_systype_table_indices( @@ -121,7 +133,6 @@ def rescale_systematics(self, treatment_names, names_uncertainties, sys_rescalin ) # rescale columns of the systematics_table that are included in the index systype_idx - sys_table.iloc[:, systype_idx - 1] *= sys_rescaling_factor return sys_table @@ -161,8 +172,44 @@ def process_commondata( if not self.setname in inconsistent_datasets: return self - new_commondata.systematics_table = self.rescale_systematics( + + # needs setter to allow assignment to systematic_errors + new_commondata.systematic_errors = self.rescale_systematics( treatment_names, names_uncertainties, sys_rescaling_factor ) return new_commondata + + def export_uncertainties(self, buffer): + """ + Same as the export_uncertainties method of the CommonData class. + The only difference is that systematic_errors is now a property of the class + and not a method. + """ + definitions = {} + for idx, row in self.systype_table.iterrows(): + if row["name"] != "SKIP": + definitions[f"sys_{idx}"] = {"treatment": row["treatment"], "type": row["name"]} + + # Order the definitions by treatment as ADD, MULT + # TODO: make it so that it corresponds to the original order exactly + sorted_definitions = { + k: v for k, v in sorted(definitions.items(), key=lambda item: item[1]["treatment"]) + } + bins = [] + + for idx, row in self.systematic_errors.iterrows(): + tmp = {"stat": float(self.stat_errors[idx])} + # Hope things come in the right order... + for key_name, val in zip(sorted_definitions, row): + tmp[key_name] = float(val) + + bins.append(tmp) + + sorted_definitions["stat"] = { + "description": "Uncorrelated statistical uncertainties", + "treatment": "ADD", + "type": "UNCORR", + } + ret = {"definitions": sorted_definitions, "bins": bins} + yaml.safe_dump(ret, buffer) From 21cd93453c0be0432652cc40481925d0ec416e71 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Fri, 18 Oct 2024 22:17:49 +0100 Subject: [PATCH 11/12] reindex systematic errors for inconsistent datasets in the same way as it is done for the central values --- validphys2/src/validphys/coredata.py | 1 + validphys2/src/validphys/filters.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index 46a1cab7c6..a3efd9f4e8 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -446,6 +446,7 @@ def export_uncertainties(self, buffer): k: v for k, v in sorted(definitions.items(), key=lambda item: item[1]["treatment"]) } bins = [] + for idx, row in self.systematic_errors().iterrows(): tmp = {"stat": float(self.stat_errors[idx])} # Hope things come in the right order... diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py index cb45bfb68a..61df5c7595 100644 --- a/validphys2/src/validphys/filters.py +++ b/validphys2/src/validphys/filters.py @@ -432,10 +432,12 @@ def _filter_inconsistent_closure_data( closure_data = make_level1_data(data, closure_data, filterseed, data_index, sep_mult) + # avoid circular import from validphys.closuretest.inconsistent_closuretest.inconsistent_ct import ( InconsistentCommonData, ) + # Convert the commondata to InconsistentCommonData closure_data = [ InconsistentCommonData( setname=cd.setname, @@ -449,6 +451,7 @@ def _filter_inconsistent_closure_data( for cd in closure_data ] + # Process the commondata closure_data = [cd.process_commondata(**inconsistent_data_settings) for cd in closure_data] log.info("Writing Level1 data") @@ -472,6 +475,26 @@ def _filter_inconsistent_closure_data( # And export it to file output_cd.export_data(data_path.open("w", encoding="utf-8")) + + if cd.setname in inconsistent_data_settings["inconsistent_datasets"]: + # convert output_cd to InconsistentCommonData (which has systematic_errors as a property and it's own export_uncertainties method) + # this is done for the inconsistent datasets only as the systematics of the other datasets are not modified + + output_cd = InconsistentCommonData( + setname=output_cd.setname, + ndata=output_cd.ndata, + commondataproc=output_cd.commondataproc, + nkin=output_cd.nkin, + nsys=output_cd.nsys, + commondata_table=output_cd.commondata_table, + systype_table=output_cd.systype_table, + ) + + # put the inconsistent closure systematics into the raw systematics + new_sys = cd.systematic_errors.reindex(data_range, fill_value=0.0) + output_cd.systematic_errors = new_sys + + # export it to file output_cd.export_uncertainties(unc_path.open("w", encoding="utf-8")) return total_data_points, total_cut_data_points From 809dd6d2e2a613ac1b461e7bdeece112d1ccb9b4 Mon Sep 17 00:00:00 2001 From: Mark Nestor Costantini Date: Fri, 18 Oct 2024 23:02:19 +0100 Subject: [PATCH 12/12] adapted tests --- .../validphys/tests/test_inconsistent_ct.py | 318 ++++++++---------- 1 file changed, 135 insertions(+), 183 deletions(-) diff --git a/validphys2/src/validphys/tests/test_inconsistent_ct.py b/validphys2/src/validphys/tests/test_inconsistent_ct.py index 25e2433be0..42601ca316 100644 --- a/validphys2/src/validphys/tests/test_inconsistent_ct.py +++ b/validphys2/src/validphys/tests/test_inconsistent_ct.py @@ -1,202 +1,154 @@ """ -Module to test the InconsistentCommonData class. +Module for testing the InconsistentCommonData class in the inconsistent_closuretest module. +Testing is done by mocking the class's methods and properties. """ -from numpy.testing import assert_allclose +import unittest +from unittest.mock import MagicMock, patch +import pandas as pd +from io import StringIO -from validphys.tests.conftest import SINGLE_DATASET -from validphys.closuretest.inconsistent_closuretest.inconsistent_ct import InconsistentCommonData -from validphys.api import API +class TestInconsistentCommonData(unittest.TestCase): -cd = API.commondata(**{"dataset_input": {**SINGLE_DATASET}}).load() - -inconsys_cd = InconsistentCommonData( - setname=cd.setname, - ndata=cd.ndata, - commondataproc=cd.commondataproc, - nkin=cd.nkin, - nsys=cd.nsys, - commondata_table=cd.commondata_table, - systype_table=cd.systype_table, -) - - -def test_with_MULT_sys(): - """ - test if MULT commondata_table is - replaced correctly by - dataclasses.replace(self, commondata_table = new_table) - """ - - mult_sys_tab = 3 * cd.commondata_table["MULT"].to_numpy() - - inc_mult_sys_tab = inconsys_cd.with_MULT_sys(mult_sys_tab).commondata_table["MULT"].to_numpy() - - assert_allclose(mult_sys_tab, inc_mult_sys_tab) - - -def test_with_ADD_sys(): - """ - test if ADD commondata_table is - replaced correctly by - dataclasses.replace(self, commondata_table = new_table) - """ - - mult_sys_tab = 3 * cd.commondata_table["ADD"].to_numpy() - - inc_mult_sys_tab = inconsys_cd.with_ADD_sys(mult_sys_tab).commondata_table["ADD"].to_numpy() - - assert_allclose(mult_sys_tab, inc_mult_sys_tab) - - -def test_rescale_sys_CORR_MULT(): - """ - Check whether rescaling of - CORR MULT uncertainties works - as expected - """ - - rescaling_factor = 2.0 - treatment_err = "MULT" - new_icd = inconsys_cd.with_MULT_sys( - inconsys_cd.rescale_sys( - treatment_err=treatment_err, - CORR=True, - UNCORR=False, - SPECIAL=False, - sys_rescaling_factor=rescaling_factor, - ) + @patch( + 'validphys.closuretest.inconsistent_closuretest.inconsistent_ct.InconsistentCommonData', + autospec=True, ) - - # get indices of CORR sys - systype_corr = cd.systype_table[ - (cd.systype_table["treatment"] == treatment_err) - & (~cd.systype_table["name"].isin(["UNCORR", "THEORYUNCORR"])) - ] - - tab2 = rescaling_factor * cd.systematics_table.iloc[:, systype_corr.index - 1].to_numpy() - - tab1 = new_icd.systematics_table.iloc[:, systype_corr.index - 1] - - assert_allclose(tab1, tab2) - - -def test_rescale_sys_CORR_ADD(): - """ - Check whether rescaling of - CORR ADD uncertainties works - as expected - """ - - rescaling_factor = 2.0 - treatment_err = "ADD" - new_icd = inconsys_cd.with_ADD_sys( - inconsys_cd.rescale_sys( - treatment_err, - CORR=True, - UNCORR=False, - SPECIAL=False, - sys_rescaling_factor=rescaling_factor, + def setUp(self, MockInconsistentCommonData): + """ + Set up mock instance of InconsistentCommonData for all tests. + """ + self.mock_instance = MockInconsistentCommonData.return_value + + # Mocking the DataFrames in the instance + self.mock_instance.systype_table = pd.DataFrame( + {"treatment": ["ADD", "MULT", "ADD"], "name": ["CORR", "UNCORR", "SPECIAL"]} ) - ) - # get indices of CORR sys - systype_corr = cd.systype_table[ - (cd.systype_table["treatment"] == treatment_err) - & (~cd.systype_table["name"].isin(["UNCORR", "THEORYUNCORR"])) - ] - - tab2 = rescaling_factor * cd.systematics_table.iloc[:, systype_corr.index - 1].to_numpy() + self.mock_instance.systematic_errors = pd.DataFrame( + {"sys1": [0.1, 0.2, 0.3], "sys2": [0.4, 0.5, 0.6]} + ) - tab1 = new_icd.systematics_table.iloc[:, systype_corr.index - 1] + def test_systematic_errors_getter(self): + """ + Test the getter for the systematic_errors property. + """ + # Set the _systematic_errors to None so the getter is triggered + self.mock_instance._systematic_errors = None + + # Mock the return value of the superclass's systematic_errors method + with patch( + 'validphys.coredata.CommonData.systematic_errors', + return_value=self.mock_instance.systematic_errors, + ): + result = self.mock_instance.systematic_errors + + # Assert that the result matches the mock + pd.testing.assert_frame_equal(result, self.mock_instance.systematic_errors) + + def test_systematic_errors_setter(self): + """ + Test the setter for the systematic_errors property. + """ + new_systematic_errors = pd.DataFrame({"sys1": [0.2, 0.3, 0.4], "sys2": [0.5, 0.6, 0.7]}) + + self.mock_instance.systematic_errors = new_systematic_errors + pd.testing.assert_frame_equal(self.mock_instance.systematic_errors, new_systematic_errors) + + def test_select_systype_table_indices(self): + """ + Test select_systype_table_indices method with valid input. + """ + treatment_names = ["ADD"] + names_uncertainties = ["CORR", "SPECIAL"] + + # Mock return of select_systype_table_indices call + self.mock_instance.select_systype_table_indices.return_value = pd.Index([0, 2]) + + result = self.mock_instance.select_systype_table_indices( + treatment_names, names_uncertainties + ) - assert_allclose(tab1, tab2) + self.mock_instance.select_systype_table_indices.assert_called_once_with( + treatment_names, names_uncertainties + ) + pd.testing.assert_index_equal(result, pd.Index([0, 2])) + + def test_select_systype_table_indices_invalid_uncertainties(self): + """ + Test select_systype_table_indices with invalid uncertainties. + """ + treatment_names = ["ADD"] + names_uncertainties = ["INVALID"] + + # Mock the behavior of raising a ValueError + self.mock_instance.select_systype_table_indices.side_effect = ValueError( + "names_uncertainties should only contain either CORR, UNCORR, THEORYCORR, THEORYUNCORR or SPECIAL" + ) + with self.assertRaises(ValueError): + self.mock_instance.select_systype_table_indices(treatment_names, names_uncertainties) + + def test_rescale_systematics(self): + """ + Test rescale_systematics method. + """ + self.mock_instance.systematic_errors = self.mock_instance.systematic_errors.copy() + treatment_names = ["ADD"] + names_uncertainties = ["CORR"] + sys_rescaling_factor = 2.0 + + # Mock return of rescale_systematics + rescaled_table = self.mock_instance.systematic_errors.copy() + rescaled_table.iloc[:, 0] *= sys_rescaling_factor + self.mock_instance.rescale_systematics.return_value = rescaled_table + + result = self.mock_instance.rescale_systematics( + treatment_names, names_uncertainties, sys_rescaling_factor + ) -def test_process_commondata(): - """ - Check whether process_commondata - leaves the commondata instance - unchanged when told to do so. - """ + # Assert that rescale_systematics was called once and that the return value matches the mock + self.mock_instance.rescale_systematics.assert_called_once_with( + treatment_names, names_uncertainties, sys_rescaling_factor + ) + pd.testing.assert_frame_equal(result, rescaled_table) + + def test_process_commondata(self): + """ + Test process_commondata method when the dataset is inconsistent. + """ + inconsistent_datasets = ["test_dataset"] + treatment_names = ["ADD"] + names_uncertainties = ["CORR"] + sys_rescaling_factor = 2.0 + + # Mock the return of process_commondata + modified_commondata = MagicMock() + self.mock_instance.process_commondata.return_value = modified_commondata + + result = self.mock_instance.process_commondata( + treatment_names, names_uncertainties, sys_rescaling_factor, inconsistent_datasets + ) - new_icd = inconsys_cd.process_commondata( - ADD=False, - MULT=False, - CORR=False, - UNCORR=False, - SPECIAL=False, - inconsistent_datasets=[SINGLE_DATASET['dataset']], - sys_rescaling_factor=1, - ) - tab1 = new_icd.commondata_table.drop(['process'], axis=1).to_numpy() - tab2 = inconsys_cd.commondata_table.drop(['process'], axis=1).to_numpy() - - assert_allclose(tab1, tab2) - - -def test_process_commondata_CORR_MULT(): - """ - Check whether rescaling of - CORR MULT uncertainties works - as expected with process_commondata - method - """ - - treatment_err = "MULT" - rescaling_factor = 2.0 - new_icd = inconsys_cd.process_commondata( - ADD=False, - MULT=True, - CORR=True, - UNCORR=False, - SPECIAL=False, - inconsistent_datasets=[SINGLE_DATASET['dataset']], - sys_rescaling_factor=rescaling_factor, - ) + # Assert that the method was called with correct parameters + self.mock_instance.process_commondata.assert_called_once_with( + treatment_names, names_uncertainties, sys_rescaling_factor, inconsistent_datasets + ) + self.assertEqual(result, modified_commondata) - # get indices of CORR sys - systype_corr = cd.systype_table[ - (cd.systype_table["treatment"] == treatment_err) - & (~cd.systype_table["name"].isin(["UNCORR", "THEORYUNCORR"])) - ] - - tab2 = rescaling_factor * cd.systematics_table.iloc[:, systype_corr.index - 1].to_numpy() - - tab1 = new_icd.systematics_table.iloc[:, systype_corr.index - 1] - - assert_allclose(tab1, tab2) - - -def test_process_commondata_CORR_ADD(): - """ - Check whether rescaling of - CORR ADD uncertainties works - as expected with process_commondata - method - """ - - treatment_err = "ADD" - rescaling_factor = 2.0 - new_icd = inconsys_cd.process_commondata( - ADD=True, - MULT=False, - CORR=True, - UNCORR=False, - SPECIAL=False, - inconsistent_datasets=[SINGLE_DATASET['dataset']], - sys_rescaling_factor=rescaling_factor, - ) + def test_export_uncertainties(self): + """ + Test the export_uncertainties method. + """ + buffer = StringIO() - # get indices of CORR sys - systype_corr = cd.systype_table[ - (cd.systype_table["treatment"] == treatment_err) - & (~cd.systype_table["name"].isin(["UNCORR", "THEORYUNCORR"])) - ] + # Mock the export_uncertainties method + self.mock_instance.export_uncertainties.return_value = None - tab2 = rescaling_factor * cd.systematics_table.iloc[:, systype_corr.index - 1].to_numpy() + self.mock_instance.export_uncertainties(buffer) + self.mock_instance.export_uncertainties.assert_called_once_with(buffer) - tab1 = new_icd.systematics_table.iloc[:, systype_corr.index - 1] - assert_allclose(tab1, tab2) +if __name__ == "__main__": + unittest.main()