updated filters and filters tests

NNPDF · Feb 29, 2024 · 3aefd4a · 3aefd4a
1 parent 3eccc0b
commit 3aefd4a
Show file tree

Hide file tree

Showing 5 changed files with 49 additions and 40 deletions.
diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py
@@ -433,11 +433,19 @@ def __hash__(self):
     def check(self):
         """Various checks to apply manually to the observable before it is used anywhere
         These are not part of the __post_init__ call since they can only happen after the metadata
-        has been read and the observable selected.
+        has been read, the observable selected and (likely) variants applied.
         """
-        # Check that the data_central is empty if and only if the dataset is a positivity/integrability set
-        if self.data_central is None and not self.is_lagrange_multiplier:
-            raise ValidationError(f"Missing `data_central` field for {self.name}")
+        # Check whether the data central or the uncertainties are empty for a non-positivity/integrability set
+        if not self.is_lagrange_multiplier:
+            if self.data_central is None:
+                raise ValidationError(f"Missing `data_central` field for {self.name}")
+
+            if not self.data_uncertainties:
+                ermsg = f"Missing `data_uncertainties` for {self.name}."
+                # be polite
+                if "legacy" in self.variants:
+                    ermsg += " Maybe you intended to use `variant: legacy`?"
+                raise ValidationError(ermsg)
 
         # Check that plotting.plot_x is being filled
         if self.plotting.plot_x is None:
@@ -466,7 +474,7 @@ def apply_variant(self, variant_name):
         try:
             variant = self.variants[variant_name]
         except KeyError as e:
-            raise ValueError(f"The requested variant does not exist {self.variant_name}") from e
+            raise ValueError(f"The requested variant does not exist {variant_name}") from e
 
         variant_replacement = {}
         if variant.data_uncertainties is not None:
@@ -797,7 +805,6 @@ def select_observable(self, obs_name_raw):
 
         # Now burn the _parent key into the observable and apply checks
         object.__setattr__(observable, "_parent", self)
-        observable.check()
         return observable
 
 
@@ -827,10 +834,10 @@ def parse_new_metadata(metadata_file, observable_name, variant=None):
     return metadata
 
 
-def parse_commondata_new(metadata):
+def load_commondata_new(metadata):
     """
 
-    TODO: update this docstring since now the parse_commondata_new takes the information from
+    TODO: update this docstring since now the load_commondata_new takes the information from
     the metadata, and the name -> split is done outside
 
     In the current iteration of the commondata, each of the commondata
@@ -855,6 +862,9 @@ def parse_commondata_new(metadata):
     Note that this function reproduces `parse_commondata` below, which parses the
     _old_ file format
     """
+    # Before loading, apply the checks
+    metadata.check()
+
     # Now parse the data
     data_df = metadata.load_data_central()
     # the uncertainties
@@ -942,15 +952,13 @@ def load_commondata(spec):
         setname = spec.name
         systypefile = spec.sysfile
 
-        commondata = parse_commondata_old(commondatafile, systypefile, setname)
-    else:
-        commondata = parse_commondata_new(spec.metadata)
+        return load_commondata_old(commondatafile, systypefile, setname)
 
-    return commondata
+    return load_commondata_new(spec.metadata)
 
 
 ### Old commondata:
-def parse_commondata_old(commondatafile, systypefile, setname):
+def load_commondata_old(commondatafile, systypefile, setname):
     """Parse a commondata file  and a systype file into a CommonData.
 
     Parameters

diff --git a/validphys2/src/validphys/filters.py b/validphys2/src/validphys/filters.py
@@ -13,6 +13,7 @@
 from reportengine.checks import check, make_check
 from reportengine.compat import yaml
 import validphys.cuts
+from validphys.process_options import PROCESSES
 from validphys.utils import freeze_args, generate_path_filtered_data
 
 log = logging.getLogger(__name__)
@@ -50,6 +51,8 @@ def _get_kinlabel_process_type(process_type):
     to the process type
     This requires some extra digestion for DIS
     """
+    if isinstance(process_type, str):
+        process_type = PROCESSES.get(process_type.upper(), process_type.upper())
     if hasattr(process_type, "accepted_variables"):
         return process_type.accepted_variables
     process_type = str(process_type)
@@ -465,15 +468,12 @@ def __init__(self, initial_data: dict, *, defaults: dict, theory_parameters: dic
         if self.dataset is None and self.process_type is None:
             raise MissingRuleAttribute("Please define either a process type or dataset.")
 
-        # TODO:
-        # For the cuts to work in a generic way, it is important that the same kind of process share the same
-        # syntax for the variables (ie, all of them should use pt2 or pt_square)
-
         if self.process_type is None:
             from validphys.loader import Loader, LoaderError
 
             if loader is None:
                 loader = Loader()
+
             try:
                 cd = loader.check_commondata(self.dataset)
             except LoaderError as e:

diff --git a/validphys2/src/validphys/loader.py b/validphys2/src/validphys/loader.py
@@ -23,7 +23,7 @@
 from reportengine import filefinder
 from reportengine.compat import yaml
 from validphys import lhaindex
-from validphys.commondataparser import parse_new_metadata, parse_commondata_old
+from validphys.commondataparser import parse_new_metadata, load_commondata_old
 from validphys.core import (
     PDF,
     CommonDataSpec,
@@ -205,7 +205,7 @@ def _use_fit_commondata_old_format_to_new_format(setname, file_path):
     # Try loading the data from file_path, using the systypes from there
     # although they are not used
     systypes = next(file_path.parent.glob("systypes/*.dat"))
-    commondata = parse_commondata_old(file_path, systypes, setname)
+    commondata = load_commondata_old(file_path, systypes, setname)
 
     # Export the data central
     new_data_stream = tempfile.NamedTemporaryFile(

diff --git a/validphys2/src/validphys/process_options.py b/validphys2/src/validphys/process_options.py
@@ -254,5 +254,5 @@ def _displusjet_xq2map(kin_dict):
 
 
 @Parser
-def ValidProcess(process_name) -> _Process:
+def ValidProcess(process_name) -> _Process | str:
     return PROCESSES.get(process_name.upper(), process_name.upper())
diff --git a/validphys2/src/validphys/tests/test_filter_rules.py b/validphys2/src/validphys/tests/test_filter_rules.py
@@ -10,31 +10,31 @@
     PerturbativeOrder,
     BadPerturbativeOrder,
 )
-from validphys.tests.conftest import THEORYID, PDF
+from validphys.tests.conftest import THEORYID_NEW as THEORYID, PDF
 
 bad_rules = [
-    {'dataset': 'NMC'},
+    {'dataset': "NMC_NC_NOTFIXED_DW_EM-F2"},
     {'rule': 'x < 0.1'},
     {'dataset': 'NOT_EXISTING', 'rule': 'x < 0.1'},
-    {'dataset': 'NMC', 'rule': 'x < 0.1', 'PTO': 'bogus'},
-    {'dataset': 'NMC', 'rule': 'x < 0.1', 'PTO': {'bog': 'us'}},
-    {'dataset': 'NMC', 'rule': 'x < 0.1', 'local_variables': 'bogus'},
-    {'dataset': 'NMC', 'rule': 'bogus syntax'},
-    {'dataset': 'NMC', 'rule': 'unknown_variable > 10'},
-    {'dataset': 'NMC', 'local_variables': {'z': 'bogus syntax'}, 'rule': 'z > 10'},
+    {'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'x < 0.1', 'PTO': 'bogus'},
+    {'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'x < 0.1', 'PTO': {'bog': 'us'}},
+    {'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'x < 0.1', 'local_variables': 'bogus'},
+    {'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'bogus syntax'},
+    {'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'unknown_variable > 10'},
+    {'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'local_variables': {'z': 'bogus syntax'}, 'rule': 'z > 10'},
     {
-        'dataset': 'NMC',
+        'dataset': "NMC_NC_NOTFIXED_DW_EM-F2",
         'local_variables': {'z': 'unknown_variable + 1'},
         'rule': 'z > 10',
     },
-    {'dataset': 'NMC', 'local_variables': {'z': 'v+1', 'v': '10'}, 'rule': 'z > 10'},
+    {'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'local_variables': {'z': 'v+1', 'v': '10'}, 'rule': 'z > 10'},
 ]
 
 # Note: Don't change the order here. In this way it tests all cases.
 good_rules = [
     {'process_type': 'DIS_ALL', 'PTO': 'N3LO', 'rule': 'x < 1e-2'},
     {'process_type': 'DIS_ALL', 'IC': 'False', 'rule': 'x < 1e-2'},
-    {'process_type': 'JET', 'rule': 'p_T2 < 10'},
+    {'process_type': 'JET', 'rule': 'pT < 3.16'},
 ]
 
 
@@ -54,7 +54,7 @@ def test_rule_caching():
     for rule_list in (rule_list_1, rule_list_2):
         cut_list.append(
             API.cuts(
-                dataset_input={"dataset": "NMC"},
+                dataset_input={"dataset": "NMC_NC_NOTFIXED_DW_EM-F2", "variant": "legacy"},
                 use_cuts="internal",
                 theoryid=THEORYID,
                 filter_rules=rule_list,
@@ -81,18 +81,19 @@ def test_bad_rules():
 
 def test_default_rules():
     l = Loader()
-    dsnames = ['NMC', 'LHCBWZMU8TEV']
-    for dsname in dsnames:
-        ds = l.check_dataset(dsname, cuts='internal', theoryid=THEORYID)
+    dsnames = ['NMC_NC_NOTFIXED_DW_EM-F2', 'LHCB_Z0_8TEV_MUON_Y']
+    variants = ["legacy", None]
+    for dsname, v in zip(dsnames, variants):
+        ds = l.check_dataset(dsname, cuts='internal', theoryid=THEORYID, variant=v)
         assert ds.cuts.load() is not None
 
 
 def test_good_rules():
     l = Loader()
     rules = [mkrule(inp) for inp in good_rules]
-    dsnames = ['ATLAS1JET11', 'NMC']
+    dsnames = ['ATLAS_1JET_8TEV_R06_PTY', 'NMC_NC_NOTFIXED_DW_EM-F2']
     for dsname in dsnames:
-        ds = l.check_dataset(dsname, cuts='internal', rules=rules, theoryid=THEORYID)
+        ds = l.check_dataset(dsname, cuts='internal', rules=rules, theoryid=THEORYID, variant="legacy")
         assert ds.cuts.load() is not None
 
 
@@ -101,7 +102,7 @@ def test_added_rules():
         "theoryid": THEORYID,
         "pdf": PDF,
         "use_cuts": "internal",
-        "dataset_inputs": [{"dataset": "ATLAS1JET11"}],
+        "dataset_inputs": [{"dataset": "ATLAS_1JET_8TEV_R06_PTY", "variant": "legacy"}],
         "filter_rules": [],
         "dataspecs": [
             {
@@ -111,13 +112,13 @@ def test_added_rules():
             {
                 "speclabel": "fewer data",
                 "added_filter_rules": [
-                    {"dataset": "ATLAS1JET11", "rule": "p_T2 < 1000**2", "reson": "pt cut"}
+                    {"dataset": "ATLAS_1JET_8TEV_R06_PTY", "rule": "pT < 1000", "reson": "pt cut"}
                 ],
             },
             {
                 "speclabel": "empty data",
                 "added_filter_rules": [
-                    {"dataset": "ATLAS1JET11", "rule": "eta < 0", "reason": "empty data"}
+                    {"dataset": "ATLAS_1JET_8TEV_R06_PTY", "rule": "y < 0", "reason": "empty data"}
                 ],
             },
         ],