Skip to content

Commit

Permalink
Merge branch 'master' into tevatron_inc_DY
Browse files Browse the repository at this point in the history
  • Loading branch information
scarlehoff authored Nov 25, 2024
2 parents c9808b9 + be0ed18 commit 05b77d3
Show file tree
Hide file tree
Showing 46 changed files with 4,071 additions and 258 deletions.
269 changes: 269 additions & 0 deletions nnpdf_data/nnpdf_data/commondata/ATLAS_SINGLETOP_13TEV/filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,269 @@
import pathlib

import numpy as np
import pandas as pd
import yaml

from nnpdf_data.filter_utils.utils import prettify_float

yaml.add_representer(float, prettify_float)

NB_POINTS = 4
MT_VALUE = 172.5
SQRT_S = 13_000.0

from nnpdf_data.filter_utils.utils import cormat_to_covmat, covmat_to_artunc
from nnpdf_data.filter_utils.utils import symmetrize_errors as se


def load_yaml(table_id: int, version: int = 1) -> dict:
"""Load the HEP data table in yaml format.
Parameters
----------
table_id: int
table ID number
Returns
-------
dict:
ditionary containing the table contents
"""
filename = f"HEPData-ins1486394-v{version}-Table_{table_id}"
table = pathlib.Path(f"./rawdata/{filename}.yaml")

return yaml.safe_load(table.read_text())


def get_kinematics(hepdata: dict, bin_index: list = [], indx: int = 0) -> list:
"""Read the version and list of tables from metadata.
Parameters
----------
hepdata: dict
dictionary containing all data info
bin_index: list
list of Non-empty bin index
indx: int
Column index from which to read, default=0
Returns
-------
kinematics: list
kinematic info
"""
if len(hepdata["independent_variables"]) > 0:
bins = hepdata["independent_variables"][indx]["values"]
if len(bin_index) > 0:
bins = [bins[i] for i in bin_index]
else:
bins = []

kinematics = []
if len(bins) > 1: # differential case
for i in bin_index:
ymin, ymax = [float(value) for value in bins[i]["value"].split('-')]
kin_value = {
"y_t": {"min": ymin, "mid": (ymin + ymax) / 2, "max": ymax},
"m_t2": {"min": None, "mid": MT_VALUE**2, "max": None},
"sqrts": {"min": None, "mid": SQRT_S, "max": None},
}
kinematics.append(kin_value)
else: # inclusive case
kin_value = {"m_t2": {"min": None, "mid": MT_VALUE**2, "max": None}}
kinematics.append(kin_value)

return kinematics


def get_data_values(hepdata: dict, bin_index: list, indx: int = 0) -> list:
"""Extract the central values from the HepData yaml file.
Parameters
----------
hepdata: dict
dictionary containing all data info
bin_index: list
Bin indices that must be parsed
indx: int
Column index from which to read the central value, default=0
Returns
-------
list:
list of dictionaries whose contents are the central values
"""
central = hepdata["dependent_variables"][indx]["values"]
return np.array([central[i]["value"] for i in bin_index])


def get_errors(hepdata: dict, bin_index: list) -> dict:
"""
Extract the uncertainties from hepdata and computes the shift of the central value in case of
asymmetric uncertainties
Parameters
----------
hepdata: dict
Hepdata yaml file loaded as dictionary
bin_index: list
Bin indices that must be parsed
Returns
-------
dict:
Dictionary containing the errors (as pandas DataFrame) and shifts of central values
"""
# parse the systematics
errors = []
shifts_cv = [] # relevant for asymmetric uncertainties
for i in bin_index:
bin = hepdata["dependent_variables"][i]

# loop over sources of uncertainty
error_sources = []
shift_cv = 0
for source in bin["values"]:
if "errors" not in source:
error_sources.append(None)
elif "symerror" in source["errors"][0]:
error_sources.append(source["errors"][0]["symerror"])
elif "asymerror" in source["errors"][0]:

delta_min = source["errors"][0]["asymerror"]["minus"]
delta_plus = source["errors"][0]["asymerror"]["plus"]
se_delta, se_sigma = se(delta_plus, delta_min)

# shift central value
shift_cv += se_delta

error_sources.append(se_sigma)
shifts_cv.append(shift_cv)
errors.append(error_sources)
errors = np.array(errors)

# get the description of the uncertainty from hepdata
error_names = [
error_name["value"] for error_name in hepdata["independent_variables"][0]["values"]
]
errors = pd.DataFrame(
errors, columns=error_names, index=[f"bin {i}" for i in range(errors.shape[0])]
)

return {"errors": errors, "shifts_cv": np.array(shifts_cv)}


def format_uncertainties(uncs: dict) -> list:
"""Format the uncertainties to be dumped into the yaml file.
Parameters
----------
uncs: dict
Dictionary containing the various source of uncertainties
Returns
-------
list:
list of dictionaries whose elements are the various errors
"""

combined_errors = []
n_bins = uncs["systematics"].index.str.startswith("bin").sum()
for i in range(n_bins):
errors = {}
if "statistics" in uncs:
errors["stat"] = float(uncs["statistics"].iloc[i, 0])
for j, unc in enumerate(uncs["systematics"].loc[f"bin {i}"].values):
errors[f"sys_corr_{j + 1}"] = float(unc)

combined_errors.append(errors)

return combined_errors


def dump_commondata(kinematics: list, data: list, errors: dict, obs: str) -> None:
"""Function that generates and writes the commondata files.
Parameters
----------
kinematics: list
list containing the kinematic values
data: list
list containing the central values
errors: dict
Dictionary containing the different errors
obs: str
Name to append to the file names
"""

if "statistics" in errors:
error_definition = {
"stat": {
"description": "Uncorrelated statistical uncertainties",
"treatment": "ADD",
"type": "UNCORR",
}
}
else:
error_definition = {}

n_sys = errors["systematics"].shape[1]
for i in range(n_sys):

error_definition[f"sys_corr_{i + 1}"] = {
"description": errors["systematics"].columns[i],
"treatment": errors["systematics"].loc["treatment"].iloc[i],
"type": errors["systematics"].loc["type"].iloc[i],
}

errors_formatted = format_uncertainties(errors)
with open(f"data_{obs}.yaml", "w") as file:
yaml.dump({"data_central": data.tolist()}, file, sort_keys=False)

with open(f"kinematics_{obs}.yaml", "w") as file:
yaml.dump({"bins": kinematics}, file, sort_keys=False)

with open(f"uncertainties_{obs}.yaml", "w") as file:
yaml.dump(
{"definitions": error_definition, "bins": errors_formatted}, file, sort_keys=False
)


def main_filter() -> None:
"""
This filter produces the commondata for the following three observables:
1) T-Y-NORM
2) TBAR-Y-NORM
3) TCHANNEL-XSEC (ratio tq/tqbar)
1) and 2) follow the same procedure.
"""

# TCHANNEL-XSEC RATIO
yaml_content_data = load_yaml(table_id=3, version=1)
yaml_uncertainties = load_yaml(table_id=4, version=1)

data_central = get_data_values(yaml_content_data, bin_index=[0], indx=2)
kinematics = get_kinematics(yaml_content_data)
uncertainties = get_errors(yaml_uncertainties, bin_index=[2])

uncertainties_absolute = uncertainties["errors"] * data_central.reshape(1, -1) * 1e-2
stat_abs = uncertainties_absolute.iloc[[0], [0]]
sys_abs = uncertainties_absolute.iloc[[0], 1:-2]
errors = {"statistics": stat_abs, "systematics": sys_abs}

n_sys = sys_abs.shape[1]
sys_types = {"treatment": ["MULT"] * n_sys, "type": ["UNCORR"] * (n_sys - 1) + ["ATLASLUMI15"]}
sys_types_df = pd.DataFrame(sys_types, index=sys_abs.columns).T
errors["systematics"] = pd.concat([sys_types_df, sys_abs])

dump_commondata(kinematics, data_central, errors, obs="TCHANNEL-XSEC")

return


if __name__ == "__main__":
main_filter()
Original file line number Diff line number Diff line change
@@ -1,13 +1,5 @@
bins:
- k1:
- m_t2:
min: null
mid: 0.0
max: null
k2:
min: null
mid: 30032.89
max: null
k3:
min: null
mid: 13000.0
mid: 29756.25
max: null
Original file line number Diff line number Diff line change
@@ -1,60 +1,50 @@
setname: ATLAS_SINGLETOP_13TEV
version: 1
version_comment: Port of old commondata
version: 2
version_comment: Implementation of new data
nnpdf_metadata:
nnpdf31_process: SINGLETOP
experiment: ATLAS
arXiv:
url: https://arxiv.org/abs/1609.03920
journal: JHEP 04 (2017) 086
iNSPIRE:
url: ''
url: https://inspirehep.net/literature/1486394
hepdata:
url: ''
version: -1
url: https://www.hepdata.net/record/ins1486394
version: 1
implemented_observables:
- observable_name: TCHANNEL-XSEC
observable:
description: Heavy Quarks Total Cross Section
label: ATLAS single top $R_{t}$ 13 TeV
units: ''
process_type: INC
tables: []
tables: [3, 4]
npoints: []
ndata: 1
plotting:
kinematics_override: inc_sqrt_scale
kinematics_override: identity
dataset_label: ATLAS single top $R_{t}$ 13 TeV
plot_x: idat
kinematic_coverage:
- k1
- k2
- k3
- m_t2
kinematics:
variables:
k1:
description: Variable k1
label: k1
units: ''
k2:
description: Variable k2
label: k2
units: ''
k3:
description: Variable k3
label: k3
units: ''
m_t2:
description: "top mass squared"
label: $M^2$
units: $GeV^2$
file: kinematics_TCHANNEL-XSEC.yaml
theory:
conversion_factor: 1.0
operation: RATIO
FK_tables:
- - ATLAS_SINGLETOP_TCH_R_13TEV-ATLAS_SINGLETOP_TCH_R_13TEV_T
- - ATLAS_SINGLETOP_TCH_R_13TEV-ATLAS_SINGLETOP_TCH_R_13TEV_TB
data_uncertainties: []
data_uncertainties: [uncertainties_TCHANNEL-XSEC.yaml]
variants:
legacy:
data_uncertainties:
- uncertainties_legacy_TCHANNEL-XSEC.yaml
data_central: data_legacy_TCHANNEL-XSEC.yaml
data_central: data_TCHANNEL-XSEC.yaml
ported_from: ATLAS_SINGLETOP_TCH_R_13TEV
Loading

0 comments on commit 05b77d3

Please sign in to comment.