Skip to content

Commit

Permalink
updated filters and filters tests
Browse files Browse the repository at this point in the history
  • Loading branch information
scarlehoff committed Feb 29, 2024
1 parent 3eccc0b commit 3aefd4a
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 40 deletions.
34 changes: 21 additions & 13 deletions validphys2/src/validphys/commondataparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,11 +433,19 @@ def __hash__(self):
def check(self):
"""Various checks to apply manually to the observable before it is used anywhere
These are not part of the __post_init__ call since they can only happen after the metadata
has been read and the observable selected.
has been read, the observable selected and (likely) variants applied.
"""
# Check that the data_central is empty if and only if the dataset is a positivity/integrability set
if self.data_central is None and not self.is_lagrange_multiplier:
raise ValidationError(f"Missing `data_central` field for {self.name}")
# Check whether the data central or the uncertainties are empty for a non-positivity/integrability set
if not self.is_lagrange_multiplier:
if self.data_central is None:
raise ValidationError(f"Missing `data_central` field for {self.name}")

if not self.data_uncertainties:
ermsg = f"Missing `data_uncertainties` for {self.name}."
# be polite
if "legacy" in self.variants:
ermsg += " Maybe you intended to use `variant: legacy`?"
raise ValidationError(ermsg)

# Check that plotting.plot_x is being filled
if self.plotting.plot_x is None:
Expand Down Expand Up @@ -466,7 +474,7 @@ def apply_variant(self, variant_name):
try:
variant = self.variants[variant_name]
except KeyError as e:
raise ValueError(f"The requested variant does not exist {self.variant_name}") from e
raise ValueError(f"The requested variant does not exist {variant_name}") from e

variant_replacement = {}
if variant.data_uncertainties is not None:
Expand Down Expand Up @@ -797,7 +805,6 @@ def select_observable(self, obs_name_raw):

# Now burn the _parent key into the observable and apply checks
object.__setattr__(observable, "_parent", self)
observable.check()
return observable


Expand Down Expand Up @@ -827,10 +834,10 @@ def parse_new_metadata(metadata_file, observable_name, variant=None):
return metadata


def parse_commondata_new(metadata):
def load_commondata_new(metadata):
"""
TODO: update this docstring since now the parse_commondata_new takes the information from
TODO: update this docstring since now the load_commondata_new takes the information from
the metadata, and the name -> split is done outside
In the current iteration of the commondata, each of the commondata
Expand All @@ -855,6 +862,9 @@ def parse_commondata_new(metadata):
Note that this function reproduces `parse_commondata` below, which parses the
_old_ file format
"""
# Before loading, apply the checks
metadata.check()

# Now parse the data
data_df = metadata.load_data_central()
# the uncertainties
Expand Down Expand Up @@ -942,15 +952,13 @@ def load_commondata(spec):
setname = spec.name
systypefile = spec.sysfile

commondata = parse_commondata_old(commondatafile, systypefile, setname)
else:
commondata = parse_commondata_new(spec.metadata)
return load_commondata_old(commondatafile, systypefile, setname)

return commondata
return load_commondata_new(spec.metadata)


### Old commondata:
def parse_commondata_old(commondatafile, systypefile, setname):
def load_commondata_old(commondatafile, systypefile, setname):
"""Parse a commondata file and a systype file into a CommonData.
Parameters
Expand Down
8 changes: 4 additions & 4 deletions validphys2/src/validphys/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from reportengine.checks import check, make_check
from reportengine.compat import yaml
import validphys.cuts
from validphys.process_options import PROCESSES
from validphys.utils import freeze_args, generate_path_filtered_data

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -50,6 +51,8 @@ def _get_kinlabel_process_type(process_type):
to the process type
This requires some extra digestion for DIS
"""
if isinstance(process_type, str):
process_type = PROCESSES.get(process_type.upper(), process_type.upper())
if hasattr(process_type, "accepted_variables"):
return process_type.accepted_variables
process_type = str(process_type)
Expand Down Expand Up @@ -465,15 +468,12 @@ def __init__(self, initial_data: dict, *, defaults: dict, theory_parameters: dic
if self.dataset is None and self.process_type is None:
raise MissingRuleAttribute("Please define either a process type or dataset.")

# TODO:
# For the cuts to work in a generic way, it is important that the same kind of process share the same
# syntax for the variables (ie, all of them should use pt2 or pt_square)

if self.process_type is None:
from validphys.loader import Loader, LoaderError

if loader is None:
loader = Loader()

try:
cd = loader.check_commondata(self.dataset)
except LoaderError as e:
Expand Down
4 changes: 2 additions & 2 deletions validphys2/src/validphys/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from reportengine import filefinder
from reportengine.compat import yaml
from validphys import lhaindex
from validphys.commondataparser import parse_new_metadata, parse_commondata_old
from validphys.commondataparser import parse_new_metadata, load_commondata_old
from validphys.core import (
PDF,
CommonDataSpec,
Expand Down Expand Up @@ -205,7 +205,7 @@ def _use_fit_commondata_old_format_to_new_format(setname, file_path):
# Try loading the data from file_path, using the systypes from there
# although they are not used
systypes = next(file_path.parent.glob("systypes/*.dat"))
commondata = parse_commondata_old(file_path, systypes, setname)
commondata = load_commondata_old(file_path, systypes, setname)

# Export the data central
new_data_stream = tempfile.NamedTemporaryFile(
Expand Down
2 changes: 1 addition & 1 deletion validphys2/src/validphys/process_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,5 +254,5 @@ def _displusjet_xq2map(kin_dict):


@Parser
def ValidProcess(process_name) -> _Process:
def ValidProcess(process_name) -> _Process | str:
return PROCESSES.get(process_name.upper(), process_name.upper())
41 changes: 21 additions & 20 deletions validphys2/src/validphys/tests/test_filter_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,31 +10,31 @@
PerturbativeOrder,
BadPerturbativeOrder,
)
from validphys.tests.conftest import THEORYID, PDF
from validphys.tests.conftest import THEORYID_NEW as THEORYID, PDF

bad_rules = [
{'dataset': 'NMC'},
{'dataset': "NMC_NC_NOTFIXED_DW_EM-F2"},
{'rule': 'x < 0.1'},
{'dataset': 'NOT_EXISTING', 'rule': 'x < 0.1'},
{'dataset': 'NMC', 'rule': 'x < 0.1', 'PTO': 'bogus'},
{'dataset': 'NMC', 'rule': 'x < 0.1', 'PTO': {'bog': 'us'}},
{'dataset': 'NMC', 'rule': 'x < 0.1', 'local_variables': 'bogus'},
{'dataset': 'NMC', 'rule': 'bogus syntax'},
{'dataset': 'NMC', 'rule': 'unknown_variable > 10'},
{'dataset': 'NMC', 'local_variables': {'z': 'bogus syntax'}, 'rule': 'z > 10'},
{'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'x < 0.1', 'PTO': 'bogus'},
{'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'x < 0.1', 'PTO': {'bog': 'us'}},
{'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'x < 0.1', 'local_variables': 'bogus'},
{'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'bogus syntax'},
{'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'rule': 'unknown_variable > 10'},
{'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'local_variables': {'z': 'bogus syntax'}, 'rule': 'z > 10'},
{
'dataset': 'NMC',
'dataset': "NMC_NC_NOTFIXED_DW_EM-F2",
'local_variables': {'z': 'unknown_variable + 1'},
'rule': 'z > 10',
},
{'dataset': 'NMC', 'local_variables': {'z': 'v+1', 'v': '10'}, 'rule': 'z > 10'},
{'dataset': "NMC_NC_NOTFIXED_DW_EM-F2", 'local_variables': {'z': 'v+1', 'v': '10'}, 'rule': 'z > 10'},
]

# Note: Don't change the order here. In this way it tests all cases.
good_rules = [
{'process_type': 'DIS_ALL', 'PTO': 'N3LO', 'rule': 'x < 1e-2'},
{'process_type': 'DIS_ALL', 'IC': 'False', 'rule': 'x < 1e-2'},
{'process_type': 'JET', 'rule': 'p_T2 < 10'},
{'process_type': 'JET', 'rule': 'pT < 3.16'},
]


Expand All @@ -54,7 +54,7 @@ def test_rule_caching():
for rule_list in (rule_list_1, rule_list_2):
cut_list.append(
API.cuts(
dataset_input={"dataset": "NMC"},
dataset_input={"dataset": "NMC_NC_NOTFIXED_DW_EM-F2", "variant": "legacy"},
use_cuts="internal",
theoryid=THEORYID,
filter_rules=rule_list,
Expand All @@ -81,18 +81,19 @@ def test_bad_rules():

def test_default_rules():
l = Loader()
dsnames = ['NMC', 'LHCBWZMU8TEV']
for dsname in dsnames:
ds = l.check_dataset(dsname, cuts='internal', theoryid=THEORYID)
dsnames = ['NMC_NC_NOTFIXED_DW_EM-F2', 'LHCB_Z0_8TEV_MUON_Y']
variants = ["legacy", None]
for dsname, v in zip(dsnames, variants):
ds = l.check_dataset(dsname, cuts='internal', theoryid=THEORYID, variant=v)
assert ds.cuts.load() is not None


def test_good_rules():
l = Loader()
rules = [mkrule(inp) for inp in good_rules]
dsnames = ['ATLAS1JET11', 'NMC']
dsnames = ['ATLAS_1JET_8TEV_R06_PTY', 'NMC_NC_NOTFIXED_DW_EM-F2']
for dsname in dsnames:
ds = l.check_dataset(dsname, cuts='internal', rules=rules, theoryid=THEORYID)
ds = l.check_dataset(dsname, cuts='internal', rules=rules, theoryid=THEORYID, variant="legacy")
assert ds.cuts.load() is not None


Expand All @@ -101,7 +102,7 @@ def test_added_rules():
"theoryid": THEORYID,
"pdf": PDF,
"use_cuts": "internal",
"dataset_inputs": [{"dataset": "ATLAS1JET11"}],
"dataset_inputs": [{"dataset": "ATLAS_1JET_8TEV_R06_PTY", "variant": "legacy"}],
"filter_rules": [],
"dataspecs": [
{
Expand All @@ -111,13 +112,13 @@ def test_added_rules():
{
"speclabel": "fewer data",
"added_filter_rules": [
{"dataset": "ATLAS1JET11", "rule": "p_T2 < 1000**2", "reson": "pt cut"}
{"dataset": "ATLAS_1JET_8TEV_R06_PTY", "rule": "pT < 1000", "reson": "pt cut"}
],
},
{
"speclabel": "empty data",
"added_filter_rules": [
{"dataset": "ATLAS1JET11", "rule": "eta < 0", "reason": "empty data"}
{"dataset": "ATLAS_1JET_8TEV_R06_PTY", "rule": "y < 0", "reason": "empty data"}
],
},
],
Expand Down

0 comments on commit 3aefd4a

Please sign in to comment.