Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support rebinning in histogram postprocessing #418

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions config_example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ Regions:
Variable: "jet_pt"
Filter: "lep_charge > 0"
Binning: [200, 300, 400, 500, 600]
Rebin:
LowerIndex: -1
UpperIndex: 8
Steps: -5

Samples:
- Name: "Data"
Expand Down
4 changes: 3 additions & 1 deletion example.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
cabinetry.configuration.print_overview(config)

# create template histograms
cabinetry.templates.build(config, method="uproot")
# cabinetry.templates.build(config, method="uproot")

# perform histogram post-processing
cabinetry.templates.postprocess(config)

raise SystemExit

# visualize systematic templates
cabinetry.visualize.templates(config)

Expand Down
25 changes: 25 additions & 0 deletions src/cabinetry/schemas/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@
},
"uniqueItems": true
},
"Rebin": {
"description": "rebinning to apply",
"$ref": "#/definitions/rebin_setting"
},
"Filter": {
"description": "selection criteria to apply",
"type": "string"
Expand Down Expand Up @@ -398,6 +402,27 @@
}
]
},
"rebin_setting": {
"title": "Rebin setting",
"$$target": "#/definitions/rebin_setting",
"description": "rebin settings for template histograms",
"type": "object",
"properties": {
"LowerIndex": {
"description": "zero-based lower index for histogram",
"type": "integer"
},
"UpperIndex": {
"description": "zero-based upper index for histogram",
"type": "integer"
},
"Steps": {
"description": "number of bins to merge",
"type": "integer"
}
},
"additionalProperties": false
},
"smoothing_setting": {
"title": "Smoothing setting",
"$$target": "#/definitions/smoothing_setting",
Expand Down
46 changes: 46 additions & 0 deletions src/cabinetry/templates/postprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pathlib
from typing import Any, Dict, Literal, Optional

import boost_histogram as bh
import numpy as np

from cabinetry import configuration
Expand All @@ -31,6 +32,30 @@ def _fix_stat_unc(histogram: histo.Histogram, name: str) -> None:
histogram.stdev = np.nan_to_num(histogram.stdev, nan=0.0)


def _rebinning_slice(region: Dict[str, Any]) -> Optional[slice]:
"""Returns the slice for rebinning a histogram or None otherwise.

Args:
region (Dict[str, Any]): containing all region information

Returns:
Optional[slice]: slice for rebinning or None
"""
rebinning_info = region.get("Rebin", {})
if all(
[r not in ["LowerIndex", "UpperIndex", "Steps"] for r in rebinning_info.keys()]
):
# no rebinning needed
return None

lower_idx = rebinning_info.get("LowerIndex", None)
upper_idx = rebinning_info.get("UpperIndex", None)
steps = rebinning_info.get("Steps", slice(None))
if steps < 1:
raise ValueError("steps for merging must be at least 1")
return slice(lower_idx, upper_idx, bh.rebin(steps))


def _apply_353qh_twice(
variation: histo.Histogram, nominal: histo.Histogram, name: str
) -> None:
Expand Down Expand Up @@ -91,6 +116,7 @@ def apply_postprocessing(
histogram: histo.Histogram,
name: str,
*,
rebinning_slice: Optional[slice] = None,
smoothing_algorithm: Optional[str] = None,
nominal_histogram: Optional[histo.Histogram] = None,
) -> histo.Histogram:
Expand All @@ -103,6 +129,8 @@ def apply_postprocessing(
Args:
histogram (cabinetry.histo.Histogram): the histogram to postprocess
name (str): histogram name for logging
rebinning_slice (Optional[slice]): rebinning to apply, defaults to None (no re-
binning applied)
smoothing_algorithm (Optional[str]): name of smoothing algorithm to apply,
defaults to None (no smoothing done)
nominal_histogram (Optional[cabinetry.histo.Histogram]): nominal histogram
Expand All @@ -113,14 +141,24 @@ def apply_postprocessing(
"""
# copy histogram to new object to leave it unchanged
modified_histogram = copy.deepcopy(histogram)

# apply rebinning
if rebinning_slice is not None:
modified_histogram = modified_histogram[
rebinning_slice
] # type: ignore[assignment]

_fix_stat_unc(modified_histogram, name)

# smoothing
if smoothing_algorithm is not None:
if smoothing_algorithm == "353QH, twice":
if nominal_histogram is None:
raise ValueError("cannot apply smoothing, nominal histogram missing")
_apply_353qh_twice(modified_histogram, nominal_histogram, name)
else:
log.warning(f"unknown smoothing algorithm {smoothing_algorithm}")

return modified_histogram


Expand Down Expand Up @@ -162,6 +200,10 @@ def process_template(
)
histogram_name = histo.name(region, sample, systematic, template=template)

# rebinning information from config
rebinning_slice = _rebinning_slice(region)

# smoothing algorithm from config
smoothing_algorithm = _smoothing_algorithm(region, sample, systematic)
if smoothing_algorithm is None:
nominal_histogram = None
Expand All @@ -173,14 +215,18 @@ def process_template(
histogram_folder, region, sample, {}, modified=False
)

log.info("edges before rebinning", histogram.bins) # to be removed
new_histogram = apply_postprocessing(
histogram,
histogram_name,
rebinning_slice=rebinning_slice,
smoothing_algorithm=smoothing_algorithm,
nominal_histogram=nominal_histogram,
)
histogram.validate(histogram_name)
new_histo_path = histogram_folder / (histogram_name + "_modified")
new_histogram.save(new_histo_path)

log.info("edges after rebinning", new_histogram.bins) # to be removed

return process_template