-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into tevatron_inc_DY
- Loading branch information
Showing
46 changed files
with
4,071 additions
and
258 deletions.
There are no files selected for viewing
File renamed without changes.
269 changes: 269 additions & 0 deletions
269
nnpdf_data/nnpdf_data/commondata/ATLAS_SINGLETOP_13TEV/filter.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,269 @@ | ||
import pathlib | ||
|
||
import numpy as np | ||
import pandas as pd | ||
import yaml | ||
|
||
from nnpdf_data.filter_utils.utils import prettify_float | ||
|
||
yaml.add_representer(float, prettify_float) | ||
|
||
NB_POINTS = 4 | ||
MT_VALUE = 172.5 | ||
SQRT_S = 13_000.0 | ||
|
||
from nnpdf_data.filter_utils.utils import cormat_to_covmat, covmat_to_artunc | ||
from nnpdf_data.filter_utils.utils import symmetrize_errors as se | ||
|
||
|
||
def load_yaml(table_id: int, version: int = 1) -> dict: | ||
"""Load the HEP data table in yaml format. | ||
Parameters | ||
---------- | ||
table_id: int | ||
table ID number | ||
Returns | ||
------- | ||
dict: | ||
ditionary containing the table contents | ||
""" | ||
filename = f"HEPData-ins1486394-v{version}-Table_{table_id}" | ||
table = pathlib.Path(f"./rawdata/{filename}.yaml") | ||
|
||
return yaml.safe_load(table.read_text()) | ||
|
||
|
||
def get_kinematics(hepdata: dict, bin_index: list = [], indx: int = 0) -> list: | ||
"""Read the version and list of tables from metadata. | ||
Parameters | ||
---------- | ||
hepdata: dict | ||
dictionary containing all data info | ||
bin_index: list | ||
list of Non-empty bin index | ||
indx: int | ||
Column index from which to read, default=0 | ||
Returns | ||
------- | ||
kinematics: list | ||
kinematic info | ||
""" | ||
if len(hepdata["independent_variables"]) > 0: | ||
bins = hepdata["independent_variables"][indx]["values"] | ||
if len(bin_index) > 0: | ||
bins = [bins[i] for i in bin_index] | ||
else: | ||
bins = [] | ||
|
||
kinematics = [] | ||
if len(bins) > 1: # differential case | ||
for i in bin_index: | ||
ymin, ymax = [float(value) for value in bins[i]["value"].split('-')] | ||
kin_value = { | ||
"y_t": {"min": ymin, "mid": (ymin + ymax) / 2, "max": ymax}, | ||
"m_t2": {"min": None, "mid": MT_VALUE**2, "max": None}, | ||
"sqrts": {"min": None, "mid": SQRT_S, "max": None}, | ||
} | ||
kinematics.append(kin_value) | ||
else: # inclusive case | ||
kin_value = {"m_t2": {"min": None, "mid": MT_VALUE**2, "max": None}} | ||
kinematics.append(kin_value) | ||
|
||
return kinematics | ||
|
||
|
||
def get_data_values(hepdata: dict, bin_index: list, indx: int = 0) -> list: | ||
"""Extract the central values from the HepData yaml file. | ||
Parameters | ||
---------- | ||
hepdata: dict | ||
dictionary containing all data info | ||
bin_index: list | ||
Bin indices that must be parsed | ||
indx: int | ||
Column index from which to read the central value, default=0 | ||
Returns | ||
------- | ||
list: | ||
list of dictionaries whose contents are the central values | ||
""" | ||
central = hepdata["dependent_variables"][indx]["values"] | ||
return np.array([central[i]["value"] for i in bin_index]) | ||
|
||
|
||
def get_errors(hepdata: dict, bin_index: list) -> dict: | ||
""" | ||
Extract the uncertainties from hepdata and computes the shift of the central value in case of | ||
asymmetric uncertainties | ||
Parameters | ||
---------- | ||
hepdata: dict | ||
Hepdata yaml file loaded as dictionary | ||
bin_index: list | ||
Bin indices that must be parsed | ||
Returns | ||
------- | ||
dict: | ||
Dictionary containing the errors (as pandas DataFrame) and shifts of central values | ||
""" | ||
# parse the systematics | ||
errors = [] | ||
shifts_cv = [] # relevant for asymmetric uncertainties | ||
for i in bin_index: | ||
bin = hepdata["dependent_variables"][i] | ||
|
||
# loop over sources of uncertainty | ||
error_sources = [] | ||
shift_cv = 0 | ||
for source in bin["values"]: | ||
if "errors" not in source: | ||
error_sources.append(None) | ||
elif "symerror" in source["errors"][0]: | ||
error_sources.append(source["errors"][0]["symerror"]) | ||
elif "asymerror" in source["errors"][0]: | ||
|
||
delta_min = source["errors"][0]["asymerror"]["minus"] | ||
delta_plus = source["errors"][0]["asymerror"]["plus"] | ||
se_delta, se_sigma = se(delta_plus, delta_min) | ||
|
||
# shift central value | ||
shift_cv += se_delta | ||
|
||
error_sources.append(se_sigma) | ||
shifts_cv.append(shift_cv) | ||
errors.append(error_sources) | ||
errors = np.array(errors) | ||
|
||
# get the description of the uncertainty from hepdata | ||
error_names = [ | ||
error_name["value"] for error_name in hepdata["independent_variables"][0]["values"] | ||
] | ||
errors = pd.DataFrame( | ||
errors, columns=error_names, index=[f"bin {i}" for i in range(errors.shape[0])] | ||
) | ||
|
||
return {"errors": errors, "shifts_cv": np.array(shifts_cv)} | ||
|
||
|
||
def format_uncertainties(uncs: dict) -> list: | ||
"""Format the uncertainties to be dumped into the yaml file. | ||
Parameters | ||
---------- | ||
uncs: dict | ||
Dictionary containing the various source of uncertainties | ||
Returns | ||
------- | ||
list: | ||
list of dictionaries whose elements are the various errors | ||
""" | ||
|
||
combined_errors = [] | ||
n_bins = uncs["systematics"].index.str.startswith("bin").sum() | ||
for i in range(n_bins): | ||
errors = {} | ||
if "statistics" in uncs: | ||
errors["stat"] = float(uncs["statistics"].iloc[i, 0]) | ||
for j, unc in enumerate(uncs["systematics"].loc[f"bin {i}"].values): | ||
errors[f"sys_corr_{j + 1}"] = float(unc) | ||
|
||
combined_errors.append(errors) | ||
|
||
return combined_errors | ||
|
||
|
||
def dump_commondata(kinematics: list, data: list, errors: dict, obs: str) -> None: | ||
"""Function that generates and writes the commondata files. | ||
Parameters | ||
---------- | ||
kinematics: list | ||
list containing the kinematic values | ||
data: list | ||
list containing the central values | ||
errors: dict | ||
Dictionary containing the different errors | ||
obs: str | ||
Name to append to the file names | ||
""" | ||
|
||
if "statistics" in errors: | ||
error_definition = { | ||
"stat": { | ||
"description": "Uncorrelated statistical uncertainties", | ||
"treatment": "ADD", | ||
"type": "UNCORR", | ||
} | ||
} | ||
else: | ||
error_definition = {} | ||
|
||
n_sys = errors["systematics"].shape[1] | ||
for i in range(n_sys): | ||
|
||
error_definition[f"sys_corr_{i + 1}"] = { | ||
"description": errors["systematics"].columns[i], | ||
"treatment": errors["systematics"].loc["treatment"].iloc[i], | ||
"type": errors["systematics"].loc["type"].iloc[i], | ||
} | ||
|
||
errors_formatted = format_uncertainties(errors) | ||
with open(f"data_{obs}.yaml", "w") as file: | ||
yaml.dump({"data_central": data.tolist()}, file, sort_keys=False) | ||
|
||
with open(f"kinematics_{obs}.yaml", "w") as file: | ||
yaml.dump({"bins": kinematics}, file, sort_keys=False) | ||
|
||
with open(f"uncertainties_{obs}.yaml", "w") as file: | ||
yaml.dump( | ||
{"definitions": error_definition, "bins": errors_formatted}, file, sort_keys=False | ||
) | ||
|
||
|
||
def main_filter() -> None: | ||
""" | ||
This filter produces the commondata for the following three observables: | ||
1) T-Y-NORM | ||
2) TBAR-Y-NORM | ||
3) TCHANNEL-XSEC (ratio tq/tqbar) | ||
1) and 2) follow the same procedure. | ||
""" | ||
|
||
# TCHANNEL-XSEC RATIO | ||
yaml_content_data = load_yaml(table_id=3, version=1) | ||
yaml_uncertainties = load_yaml(table_id=4, version=1) | ||
|
||
data_central = get_data_values(yaml_content_data, bin_index=[0], indx=2) | ||
kinematics = get_kinematics(yaml_content_data) | ||
uncertainties = get_errors(yaml_uncertainties, bin_index=[2]) | ||
|
||
uncertainties_absolute = uncertainties["errors"] * data_central.reshape(1, -1) * 1e-2 | ||
stat_abs = uncertainties_absolute.iloc[[0], [0]] | ||
sys_abs = uncertainties_absolute.iloc[[0], 1:-2] | ||
errors = {"statistics": stat_abs, "systematics": sys_abs} | ||
|
||
n_sys = sys_abs.shape[1] | ||
sys_types = {"treatment": ["MULT"] * n_sys, "type": ["UNCORR"] * (n_sys - 1) + ["ATLASLUMI15"]} | ||
sys_types_df = pd.DataFrame(sys_types, index=sys_abs.columns).T | ||
errors["systematics"] = pd.concat([sys_types_df, sys_abs]) | ||
|
||
dump_commondata(kinematics, data_central, errors, obs="TCHANNEL-XSEC") | ||
|
||
return | ||
|
||
|
||
if __name__ == "__main__": | ||
main_filter() |
12 changes: 2 additions & 10 deletions
12
nnpdf_data/nnpdf_data/commondata/ATLAS_SINGLETOP_13TEV/kinematics_TCHANNEL-XSEC.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,5 @@ | ||
bins: | ||
- k1: | ||
- m_t2: | ||
min: null | ||
mid: 0.0 | ||
max: null | ||
k2: | ||
min: null | ||
mid: 30032.89 | ||
max: null | ||
k3: | ||
min: null | ||
mid: 13000.0 | ||
mid: 29756.25 | ||
max: null |
38 changes: 14 additions & 24 deletions
38
nnpdf_data/nnpdf_data/commondata/ATLAS_SINGLETOP_13TEV/metadata.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,60 +1,50 @@ | ||
setname: ATLAS_SINGLETOP_13TEV | ||
version: 1 | ||
version_comment: Port of old commondata | ||
version: 2 | ||
version_comment: Implementation of new data | ||
nnpdf_metadata: | ||
nnpdf31_process: SINGLETOP | ||
experiment: ATLAS | ||
arXiv: | ||
url: https://arxiv.org/abs/1609.03920 | ||
journal: JHEP 04 (2017) 086 | ||
iNSPIRE: | ||
url: '' | ||
url: https://inspirehep.net/literature/1486394 | ||
hepdata: | ||
url: '' | ||
version: -1 | ||
url: https://www.hepdata.net/record/ins1486394 | ||
version: 1 | ||
implemented_observables: | ||
- observable_name: TCHANNEL-XSEC | ||
observable: | ||
description: Heavy Quarks Total Cross Section | ||
label: ATLAS single top $R_{t}$ 13 TeV | ||
units: '' | ||
process_type: INC | ||
tables: [] | ||
tables: [3, 4] | ||
npoints: [] | ||
ndata: 1 | ||
plotting: | ||
kinematics_override: inc_sqrt_scale | ||
kinematics_override: identity | ||
dataset_label: ATLAS single top $R_{t}$ 13 TeV | ||
plot_x: idat | ||
kinematic_coverage: | ||
- k1 | ||
- k2 | ||
- k3 | ||
- m_t2 | ||
kinematics: | ||
variables: | ||
k1: | ||
description: Variable k1 | ||
label: k1 | ||
units: '' | ||
k2: | ||
description: Variable k2 | ||
label: k2 | ||
units: '' | ||
k3: | ||
description: Variable k3 | ||
label: k3 | ||
units: '' | ||
m_t2: | ||
description: "top mass squared" | ||
label: $M^2$ | ||
units: $GeV^2$ | ||
file: kinematics_TCHANNEL-XSEC.yaml | ||
theory: | ||
conversion_factor: 1.0 | ||
operation: RATIO | ||
FK_tables: | ||
- - ATLAS_SINGLETOP_TCH_R_13TEV-ATLAS_SINGLETOP_TCH_R_13TEV_T | ||
- - ATLAS_SINGLETOP_TCH_R_13TEV-ATLAS_SINGLETOP_TCH_R_13TEV_TB | ||
data_uncertainties: [] | ||
data_uncertainties: [uncertainties_TCHANNEL-XSEC.yaml] | ||
variants: | ||
legacy: | ||
data_uncertainties: | ||
- uncertainties_legacy_TCHANNEL-XSEC.yaml | ||
data_central: data_legacy_TCHANNEL-XSEC.yaml | ||
data_central: data_TCHANNEL-XSEC.yaml | ||
ported_from: ATLAS_SINGLETOP_TCH_R_13TEV |
Oops, something went wrong.