-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathcommon.py
314 lines (249 loc) · 11 KB
/
common.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
# coding: utf-8
"""
Selection modules for HH(bbWW) that are used for both SL and DL.
"""
from __future__ import annotations
from collections import defaultdict
from columnflow.types import Callable
import law
from columnflow.util import maybe_import, DotDict
from columnflow.columnar_util import EMPTY_FLOAT, get_ak_routes
from columnflow.production.util import attach_coffea_behavior
from columnflow.selection import Selector, SelectionResult, selector
from columnflow.selection.cms.met_filters import met_filters
from columnflow.selection.cms.json_filter import json_filter
from columnflow.selection.cms.jets import jet_veto_map
from columnflow.production.cms.mc_weight import mc_weight
from columnflow.production.categories import category_ids
from hbw.production.process_ids import hbw_process_ids
from columnflow.production.cms.seeds import deterministic_seeds
from hbw.selection.gen import hard_gen_particles
from hbw.production.weights import event_weights_to_normalize, large_weights_killer
from hbw.selection.stats import hbw_selection_step_stats, hbw_increment_stats
from hbw.selection.hists import hbw_selection_hists
np = maybe_import("numpy")
ak = maybe_import("awkward")
logger = law.logger.get_logger(__name__)
def masked_sorted_indices(mask: ak.Array, sort_var: ak.Array, ascending: bool = False) -> ak.Array:
"""
Helper function to obtain the correct indices of an object mask
"""
indices = ak.argsort(sort_var, axis=-1, ascending=ascending)
return indices[mask[indices]]
@selector(
uses={"*"},
exposed=True,
)
def check_columns(
self: Selector,
events: ak.Array,
stats: defaultdict,
# hists: dict,
**kwargs,
) -> tuple[ak.Array, SelectionResult]:
routes = get_ak_routes(events) # noqa
from hbw.util import debugger
debugger()
raise Exception("This Selector is only for checking nano content")
def get_met_filters(self: Selector):
""" custom function to skip met filter for our Run2 EOY signal samples """
met_filters = self.config_inst.x.met_filters
if getattr(self, "dataset_inst", None) and self.dataset_inst.has_tag("is_eoy"):
# remove filter for EOY sample
try:
met_filters.remove("Flag.BadPFMuonDzFilter")
except (KeyError, AttributeError):
pass
return met_filters
hbw_met_filters = met_filters.derive("hbw_met_filters", cls_dict=dict(get_met_filters=get_met_filters))
@selector(
uses={
jet_veto_map,
hbw_met_filters, json_filter, "PV.npvsGood",
hbw_process_ids, attach_coffea_behavior,
mc_weight, large_weights_killer,
},
produces={
hbw_met_filters, json_filter,
hbw_process_ids, attach_coffea_behavior,
mc_weight, large_weights_killer,
},
exposed=False,
)
def pre_selection(
self: Selector,
events: ak.Array,
stats: defaultdict,
**kwargs,
) -> tuple[ak.Array, SelectionResult]:
""" Methods that are called for both SL and DL before calling the selection modules """
# temporary fix for optional types from Calibration (e.g. events.Jet.pt --> ?float32)
# TODO: remove as soon as possible as it might lead to weird bugs when there are none entries in inputs
events = ak.fill_none(events, EMPTY_FLOAT)
# prepare the selection results that are updated at every step
results = SelectionResult()
# run deterministic seeds when no Calibrator has been requested
if not self.task.calibrators:
events = self[deterministic_seeds](events, **kwargs)
# mc weight
if self.dataset_inst.is_mc:
events = self[mc_weight](events, **kwargs)
events = self[large_weights_killer](events, **kwargs)
if self.dataset_inst.is_mc:
# get hard gen particles
events, results = self[hard_gen_particles](events, results, **kwargs)
# create process ids
events = self[hbw_process_ids](events, **kwargs)
# ensure coffea behavior
events = self[attach_coffea_behavior](events, **kwargs)
# apply some general quality criteria on events
results.steps["good_vertex"] = events.PV.npvsGood >= 1
events, met_results = self[hbw_met_filters](events, **kwargs) # produces "met_filter" step
results += met_results
if self.dataset_inst.is_data:
events, json_results = self[json_filter](events, **kwargs) # produces "json" step
results += json_results
else:
results.steps["json"] = ak.Array(np.ones(len(events), dtype=bool))
# apply jet veto map
events, jet_veto_results = self[jet_veto_map](events, **kwargs)
results += jet_veto_results
# combine quality criteria into a single step
results.steps["cleanup"] = (
# results.steps.jet_veto_map &
results.steps.good_vertex &
results.steps.met_filter &
results.steps.json
)
return events, results
@pre_selection.init
def pre_selection_init(self: Selector) -> None:
if self.task and not self.task.calibrators:
self.uses.add(deterministic_seeds)
self.produces.add(deterministic_seeds)
if not getattr(self, "dataset_inst", None) or self.dataset_inst.is_data:
return
self.uses.update({hard_gen_particles})
self.produces.update({hard_gen_particles})
@selector(
uses={
category_ids, hbw_increment_stats, hbw_selection_step_stats,
hbw_selection_hists,
},
produces={
category_ids, hbw_increment_stats, hbw_selection_step_stats,
hbw_selection_hists,
},
exposed=False,
)
def post_selection(
self: Selector,
events: ak.Array,
results: SelectionResult,
stats: defaultdict,
hists: dict,
**kwargs,
) -> tuple[ak.Array, SelectionResult]:
""" Methods that are called for both SL and DL after calling the selection modules """
# build categories
events = self[category_ids](events, results=results, **kwargs)
# produce event weights
if self.dataset_inst.is_mc:
events = self[event_weights_to_normalize](events, results=results, **kwargs)
# increment stats
events = self[hbw_selection_step_stats](events, results, stats, **kwargs)
events = self[hbw_increment_stats](events, results, stats, **kwargs)
events = self[hbw_selection_hists](events, results, hists, **kwargs)
def log_fraction(stats_key: str, msg: str | None = None):
if not stats.get(stats_key):
return
if not msg:
msg = "Fraction of {stats_key}"
logger.info(f"{msg}: {(100 * stats[stats_key] / stats['num_events']):.2f}%")
log_fraction("num_negative_weights", "Fraction of negative weights")
log_fraction("num_pu_0", "Fraction of events with pu_weight == 0")
log_fraction("num_pu_100", "Fraction of events with pu_weight >= 100")
# temporary fix for optional types from Calibration (e.g. events.Jet.pt --> ?float32)
# TODO: remove as soon as possible as it might lead to weird bugs when there are none entries in inputs
events = ak.fill_none(events, EMPTY_FLOAT)
logger.info(f"Selected {ak.sum(results.event)} from {len(events)} events")
return events, results
@post_selection.init
def post_selection_init(self: Selector) -> None:
if not getattr(self, "dataset_inst", None) or self.dataset_inst.is_data:
return
self.uses.update({event_weights_to_normalize})
self.produces.update({event_weights_to_normalize})
configurable_attributes = {
# lepton selection
"mu_pt": float,
"ele_pt": float,
"mu2_pt": float,
"ele2_pt": float,
# trigger selection
"trigger": dict, # dict[str, list[str]]
"trigger_config_func": Callable,
# jet selection
"jet_pt": float,
"n_jet": int,
# bjet selection
"b_tagger": str,
"btag_wp": str,
"n_btag": int,
}
def configure_selector(self: Selector):
"""
Helper to configure the selector with the configurable attributes.
This function should be called in the main Selector's __init__ method.
The configuration is stored in the main config_inst under the key "selector_config".
The configurable attributes and their expected types are defined in the `configurable_attributes` dict.
The configuration is performed as follows:
- The confugation is performed once per config_inst as soon as a config_inst is available. This is ensured by
checking if the config_inst has the aux key "selector_config".
- If the attribute is a Callable, it is called. This function can directly set config attributes or
return the value to be set. When None is returned, the attribute is skipped.
- If the attribute is None, it is skipped.
- If the attribute is not of the expected type, a TypeError is raised.
- If the attribute is already set in the config_inst, a warning is issued.
- The configuration is stored in the config_inst under the key "selector_config".
- The configuration is also stored in the config_inst under the key of the attribute name (to be removed).
- The btag_sf, btag_column, and btag_wp are set based on the b_tagger attribute.
"""
# ensure calling just once
if not hasattr(self, "config_inst") or self.config_inst.has_aux("selector_config"):
return
self.config_inst.set_aux("selector_config", DotDict())
for attr_name, attr_type in configurable_attributes.items():
if not hasattr(self, attr_name):
continue
attr = getattr(self, attr_name)
if isinstance(attr, Callable):
logger.info(f"Calling config function '{attr_name}'")
attr = attr()
if attr is None:
continue
if not isinstance(attr, attr_type):
raise TypeError(f"Attribute '{attr_name}' must be of type '{attr_type}' or None")
if attr_name in self.config_inst.aux and attr != self.config_inst.get_aux(attr_name):
logger.info(
f"Selector {self.cls_name} is overwriting config attribute '{attr_name}' "
f"(replaces '{self.config_inst.get_aux(attr_name)}' with '{attr}')",
)
# TODO: only use the "selector_config" and remove the direct config aux
self.config_inst.set_aux(attr_name, attr)
self.config_inst.x.selector_config[attr_name] = attr
# define config for b-tagging SFs (this needs to be done by the main Selector, because this
# needs to be setup before running the init of the btag Producer)
# NOTE we could try using the BTagSFConfig instead of this overly complicated setup:
if self.config_inst.x.b_tagger == "deepjet":
self.config_inst.x.btag_sf = ("deepJet_shape", self.config_inst.x.btag_sf_jec_sources, "btagDeepFlavB")
self.config_inst.x.btag_column = "btagDeepFlavB"
elif self.config_inst.x.b_tagger == "particlenet":
self.config_inst.x.btag_sf = ("particleNet_shape", self.config_inst.x.btag_sf_jec_sources, "btagPNetB")
self.config_inst.x.btag_column = "btagPNetB"
else:
raise NotImplementedError(f"Cannot resolve btag sf config for b_tagger {self.config_inst.x.b_tagger}")
# write used btag wp score in the config
self.config_inst.x.btag_wp_score = (
self.config_inst.x.btag_working_points[self.config_inst.x.b_tagger][self.config_inst.x.btag_wp]
)