Skip to content

Commit

Permalink
Weights is daskified; various rebase cruft for jetmet tools
Browse files Browse the repository at this point in the history
  • Loading branch information
lgray committed Mar 15, 2023
1 parent 5e3ce4d commit ff251c7
Show file tree
Hide file tree
Showing 4 changed files with 276 additions and 44 deletions.
146 changes: 104 additions & 42 deletions coffea/analysis_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,16 @@ class Weights:
Parameters
----------
size : int
size : int | None
size of the weight arrays to be handled (i.e. the number of events / instances).
If None then we expect to operate in delayed mode.
storeIndividual : bool, optional
store not only the total weight + variations, but also each individual weight.
Default is false.
"""

def __init__(self, size, storeIndividual=False):
self._weight = numpy.ones(size)
self._weight = None if size is None else numpy.ones(size)
self._weights = {}
self._modifiers = {}
self._weightStats = coffea.processor.dict_accumulator()
Expand Down Expand Up @@ -86,17 +87,21 @@ def __add_delayed(self, name, weight, weightUp, weightDown, shift):
# and we default to one or is it an invalid weight and we should never use this
# event in the first place (0) ?
weight = dask_awkward.fill_none(weight, 1.0)
self._weight = self._weight * weight
if self._weight is None:
self._weight = weight
else:
self._weight = self._weight * weight
if self._storeIndividual:
self._weights[name] = weight
self.__add_variation(name, weight, weightUp, weightDown, shift)
self._weightStats[name] = WeightStatistics(
weight.sum(),
(weight**2).sum(),
weight.min(),
weight.max(),
weight.size,
)
if isinstance(self._weightStats, coffea.processor.dict_accumulator):
self._weightStats = {}
self._weightStats[name] = {
"sumw": dask_awkward.to_dask_array(weight).sum(),
"sumw2": dask_awkward.to_dask_array(weight**2).sum(),
"minw": dask_awkward.to_dask_array(weight).min(),
"maxw": dask_awkward.to_dask_array(weight).max(),
}

def add(self, name, weight, weightUp=None, weightDown=None, shift=False):
"""Add a new weight
Expand Down Expand Up @@ -127,15 +132,44 @@ def add(self, name, weight, weightUp=None, weightDown=None, shift=False):
"Avoid using 'Up' and 'Down' in weight names, instead pass appropriate shifts to add() call"
)
weight = coffea.util._ensure_flat(weight, allow_missing=True)
if isinstance(weight, numpy.ndarray):
if isinstance(weight, numpy.ndarray) and isinstance(
self._weight, numpy.ndarray
):
self.__add_eager(name, weight, weightUp, weightDown, shift)
elif isinstance(weight, dask_awkward.Array):
elif isinstance(weight, dask_awkward.Array) and isinstance(
self._weight, (dask_awkward.Array, type(None))
):
self.__add_delayed(name, weight, weightUp, weightDown, shift)
else:
raise ValueError(
f"Incompatible weights: self._weight={type(self.weight)}, weight={type(weight)}"
)

def __add_multivariation_eager(
self, name, weight, modifierNames, weightsUp, weightsDown, shift=False
):
"""Add a new weight with multiple variations in eager mode"""
if isinstance(weight, numpy.ma.MaskedArray):
# TODO what to do with option-type? is it representative of unknown weight
# and we default to one or is it an invalid weight and we should never use this
# event in the first place (0) ?
weight = weight.filled(1.0)
self._weight = self._weight * weight
if self._storeIndividual:
self._weights[name] = weight
self.__add_variation(name, weight, weightUp, weightDown, shift)
# Now loop on the variations
if len(modifierNames) > 0:
if len(modifierNames) != len(weightsUp) or len(modifierNames) != len(
weightsDown
):
raise ValueError(
"Provide the same number of modifier names related to the list of modified weights"
)
for modifier, weightUp, weightDown in zip(
modifierNames, weightsUp, weightsDown
):
systName = f"{name}_{modifier}"
self.__add_variation(systName, weight, weightUp, weightDown, shift)
self._weightStats[name] = WeightStatistics(
weight.sum(),
(weight**2).sum(),
Expand All @@ -144,6 +178,41 @@ def add(self, name, weight, weightUp=None, weightDown=None, shift=False):
weight.size,
)

def __add_multivariation_delayed(
self, name, weight, modifierNames, weightsUp, weightsDown, shift=False
):
"""Add a new weight with multiple variations in delayed mode"""
if isinstance(weight, awkward.types.OptionType):
# TODO what to do with option-type? is it representative of unknown weight
# and we default to one or is it an invalid weight and we should never use this
# event in the first place (0) ?
weight = dask_awkward.fill_none(weight, 1.0)
if self._weight is None:
self._weight = weight
else:
self._weight = self._weight * weight
if self._storeIndividual:
self._weights[name] = weight
# Now loop on the variations
if len(modifierNames) > 0:
if len(modifierNames) != len(weightsUp) or len(modifierNames) != len(
weightsDown
):
raise ValueError(
"Provide the same number of modifier names related to the list of modified weights"
)
for modifier, weightUp, weightDown in zip(
modifierNames, weightsUp, weightsDown
):
systName = f"{name}_{modifier}"
self.__add_variation(systName, weight, weightUp, weightDown, shift)
self._weightStats[name] = {
"sumw": dask_awkward.to_dask_array(weight).sum(),
"sumw2": dask_awkward.to_dask_array(weight**2).sum(),
"minw": dask_awkward.to_dask_array(weight).min(),
"maxw": dask_awkward.to_dask_array(weight).max(),
}

def add_multivariation(
self, name, weight, modifierNames, weightsUp, weightsDown, shift=False
):
Expand Down Expand Up @@ -177,34 +246,22 @@ def add_multivariation(
"Avoid using 'Up' and 'Down' in weight names, instead pass appropriate shifts to add() call"
)
weight = coffea.util._ensure_flat(weight, allow_missing=True)
if isinstance(weight, numpy.ma.MaskedArray):
# TODO what to do with option-type? is it representative of unknown weight
# and we default to one or is it an invalid weight and we should never use this
# event in the first place (0) ?
weight = weight.filled(1.0)
self._weight = self._weight * weight
if self._storeIndividual:
self._weights[name] = weight
# Now loop on the variations
if len(modifierNames) > 0:
if len(modifierNames) != len(weightsUp) or len(modifierNames) != len(
weightsDown
):
raise ValueError(
"Provide the same number of modifier names related to the list of modified weights"
)
for modifier, weightUp, weightDown in zip(
modifierNames, weightsUp, weightsDown
if isinstance(weight, numpy.ndarray) and isinstance(
self._weight, numpy.ndarray
):
systName = f"{name}_{modifier}"
self.__add_variation(systName, weight, weightUp, weightDown, shift)
self._weightStats[name] = WeightStatistics(
weight.sum(),
(weight**2).sum(),
weight.min(),
weight.max(),
weight.size,
)
self.__add_multivariation_eager(
name, weight, modifierNames, weightsUp, weightsDown, shift
)
elif isinstance(weight, dask_awkward.Array) and isinstance(
self._weight, (dask_awkward.Array, type(None))
):
self.__add_multivariation_delayed(
name, weight, modifierNames, weightsUp, weightsDown, shift
)
else:
raise ValueError(
f"Incompatible weights: self._weight={type(self.weight)}, weight={type(weight)}"
)

def __add_variation_eager(self, name, weight, weightUp, weightDown, shift):
"""Helper function to add an eagerly calculated weight variation."""
Expand Down Expand Up @@ -331,9 +388,14 @@ def partial_weight(self, include=[], exclude=[]):
if exclude:
names = names - set(exclude)

w = numpy.ones(self._weight.size)
w = None
if isinstance(self._weight, numpy.ndarray):
w = numpy.ones(self._weight.size)
elif isinstance(self._weight, dask_awkward.Array):
w = dask_awkward.ones_like(self._weight)

for name in names:
w *= self._weights[name]
w = w * self._weights[name]

return w

Expand Down
2 changes: 1 addition & 1 deletion coffea/lookup_tools/txt_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ def convert_l5flavor_jes_txt_file(juncFilePath):
for name, ifile in components:
wrapped_up = _build_standard_jme_lookup(
*_parse_jme_formatted_file(
name, interpolatedFunc=False, parmsFromColumns=False, jme_f=ifile
name, interpolatedFunc=False, paramsFromColumns=False, jme_f=ifile
)
)
retval.update(wrapped_up)
Expand Down
Loading

0 comments on commit ff251c7

Please sign in to comment.