-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathgen_v.py
241 lines (198 loc) · 7.7 KB
/
gen_v.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
# coding: utf-8
"""
Producers for L1 prefiring weights.
"""
from __future__ import annotations
from columnflow.production import Producer, producer
from columnflow.util import maybe_import, InsertableDict, DotDict
from columnflow.columnar_util import set_ak_column
np = maybe_import("numpy")
ak = maybe_import("awkward")
@producer(
uses={
"GenPart.genPartIdxMother", "GenPart.pdgId", "GenPart.statusFlags",
"GenPart.pt", "GenPart.eta", "GenPart.phi", "GenPart.mass",
},
# requested GenVBoson columns, passed to the *uses* and *produces*
produced_v_columns={"pt"},
mc_only=True,
)
def gen_v_boson(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
"""
Produce gen-level Z or W bosons from `GenParticle` collection.
"""
# try to find parton-level electroweak boson
abs_id = abs(events.GenPart.pdgId)
v_boson = events.GenPart[((abs_id == 23) | (abs_id == 24))]
v_boson = v_boson[v_boson.hasFlags("isLastCopy")]
v_boson = v_boson[~ak.is_none(v_boson, axis=1)]
if ak.any(ak.num(v_boson) > 1):
raise Exception("found more than one parton-level Z/W boson")
v_boson = ak.firsts(v_boson)
# if not found, reconstruct pair of leptons from hard process
leptons = events.GenPart[((abs_id >= 11) & (abs_id <= 16))]
leptons = leptons[leptons.hasFlags("isHardProcess")]
if ak.any(ak.num(leptons) != 2):
raise Exception("number of hard process leptons != 2")
dilepton_pair = (leptons[:, 0] + leptons[:, 1])
# assign PDG ID based on lepton flavors: 23 (Z), +/- 24 (W+/W-)
dilepton_pdg_id = ak.zeros_like(dilepton_pair.pt)
dilepton_pdg_id = ak.where(
abs(leptons.pdgId[:, 0]) == abs(leptons.pdgId[:, 1]),
23,
dilepton_pdg_id,
)
dilepton_pdg_id = ak.where(
abs(leptons.pdgId[:, 0]) != abs(leptons.pdgId[:, 1]),
24 * ak.where(
leptons.pdgId[:, 0] % 2,
np.sign(leptons.pdgId[:, 1]),
np.sign(leptons.pdgId[:, 0]),
),
dilepton_pdg_id,
)
dilepton_pair["pdgId"] = dilepton_pdg_id
# check if the V boson type is consistent
unique_pdg_id = (set(abs(v_boson.pdgId)) | set(dilepton_pair.pdgId)) - {None}
if len(unique_pdg_id) != 1:
raise Exception(f"found multiple boson types: {unique_pdg_id}")
# save the column
for field in self.produced_v_columns:
value = ak.where(
ak.is_none(v_boson),
getattr(dilepton_pair, field),
getattr(v_boson, field),
)
events = set_ak_column(
events,
f"GenVBoson.{field}",
ak.fill_none(value, 0.0),
)
return events
@gen_v_boson.init
def gen_v_boson_init(self: Producer) -> bool:
for col in self.produced_v_columns:
self.uses.add(f"GenPart.{col}")
self.produces.add(f"GenVBoson.{col}")
@gen_v_boson.skip
def gen_v_boson_skip(self: Producer) -> bool:
"""
Custom skip function that checks whether the dataset is a MC simulation containing
V+jets events.
"""
# never skip when there is not dataset
if not getattr(self, "dataset_inst", None):
return False
return self.dataset_inst.is_data or not (
self.dataset_inst.has_tag("is_v_jets")
)
@producer(
uses={
"GenVBoson.pt",
},
produces={
"vjets_weight", "vjets_weight_up", "vjets_weight_down",
},
# only run on mc
mc_only=True,
# function to determine the correction file
get_vjets_reweighting_file=(lambda self, external_files: external_files.vjets_reweighting),
# function to determine the correction file
get_vjets_reweighting_config=(lambda self: self.config_inst.x.vjets_reweighting),
)
def vjets_weight(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
"""
Producer for V+jets K factor weights. Requires an external file in the config as under ``vjets_reweighting``:
.. code-block:: python
cfg.x.external_files = DotDict.wrap({
"vjets_reweighting": "data/json/vjets_reweighting.json.gz",
})
*get_vjets_reweighting_file* can be adapted in a subclass in case it is stored differently in the external
files.
The name of the corrections as a function of the generator-level *W* and *Z* transverse momentum, as well as the
associated uncertainty should be given as an auxiliary entry in the config:
.. code-block:: python
cfg.x.vjets_reweighting = DotDict.wrap({
"w": {
"value": "wjets_kfactor_value",
"error": "wjets_kfactor_error",
},
"z": {
"value": "zjets_kfactor_value",
"error": "zjets_kfactor_error",
},
})
"""
# fail if not run in ttbar simulation
if not self.dataset_inst.has_tag("is_v_jets"):
raise Exception(f"vjets_weight should only run for W+Jets and Z+Jets datasets, got {self.dataset_inst}")
def get_kfactor(obj_name, key, obj):
"""Obtain K factor."""
# keep track of missing inputs
input_is_none = ak.is_none(obj.pt)
pt = ak.fill_none(obj.pt, 0.0)
# get values from correctionlib evaluator
val = self.vjets_reweighting_evaluators[obj_name][key](pt)
# mask values where inputs were missing
val = ak.where(~input_is_none, val, 1.0)
return val
# get config key by boson type
if self.dataset_inst.has_tag("is_z_jets"):
boson = "z"
elif self.dataset_inst.has_tag("is_w_jets"):
boson = "w"
else:
assert False
# compute the prefiring probablities and related statistical uncertainties
kfactor = DotDict.wrap({})
for key in ("value", "error"):
kfactor[key] = get_kfactor(boson, key, events.GenVBoson)
weights = {
"nominal": kfactor.value,
"up": kfactor.value + kfactor.error,
"down": kfactor.value - kfactor.error,
}
# save the weights
events = set_ak_column(events, "vjets_weight", weights["nominal"], value_type=np.float32)
events = set_ak_column(events, "vjets_weight_up", weights["up"], value_type=np.float32)
events = set_ak_column(events, "vjets_weight_down", weights["down"], value_type=np.float32)
return events
@vjets_weight.skip
def vjets_weight_skip(self: Producer) -> bool:
"""
Skip if running on anything except ttbar MC simulation.
"""
# never skip when there is no dataset
if not getattr(self, "dataset_inst", None):
return False
return self.dataset_inst.is_data or not (
self.dataset_inst.has_tag("is_v_jets")
)
@vjets_weight.init
def vjets_weight_init(self: Producer) -> None:
shift_inst = getattr(self, "local_shift_inst", None)
if not shift_inst:
return
@vjets_weight.requires
def vjets_weight_requires(self: Producer, reqs: dict) -> None:
if "external_files" in reqs:
return
from columnflow.tasks.external import BundleExternalFiles
reqs["external_files"] = BundleExternalFiles.req(self.task)
@vjets_weight.setup
def vjets_weight_setup(self: Producer, reqs: dict, inputs: dict, reader_targets: InsertableDict) -> None:
bundle = reqs["external_files"]
# create the L1 prefiring weight evaluator
import correctionlib
correctionlib.highlevel.Correction.__call__ = correctionlib.highlevel.Correction.evaluate
correction_set = correctionlib.CorrectionSet.from_string(
self.get_vjets_reweighting_file(bundle.files).load(formatter="gzip").decode("utf-8"),
)
corrections = self.get_vjets_reweighting_config()
self.vjets_reweighting_evaluators = {
obj_name: {
key: correction_set[correction_name]
for key, correction_name in corrections_obj.items()
}
for obj_name, corrections_obj in corrections.items()
}