Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: custom modifiers #1188

Closed
wants to merge 13 commits into from
241 changes: 241 additions & 0 deletions docs/examples/notebooks/custom_modifiers.ipynb

Large diffs are not rendered by default.

21 changes: 12 additions & 9 deletions src/pyhf/cli/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pyhf.workspace import Workspace
from pyhf import modifiers
from pyhf import utils
from pyhf import parameters

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -72,14 +73,16 @@ def inspect(workspace, output_file, measurement):
]
result['modifiers'] = dict(ws.modifiers)

parset_descr = {
parameters.paramsets.unconstrained: 'unconstrained',
parameters.paramsets.constrained_by_normal: 'constrained_by_normal',
parameters.paramsets.constrained_by_poisson: 'constrained_by_poisson',
}

model = ws.model()

result['parameters'] = sorted(
(
parname,
modifiers.registry[result['modifiers'][parname]]
.required_parset([], [])['paramset_type']
.__name__,
)
for parname in ws.parameters
(k, parset_descr[type(v['paramset'])]) for k, v in model.config.par_map.items()
)
result['systematics'] = [
(
Expand All @@ -97,7 +100,7 @@ def inspect(workspace, output_file, measurement):

maxlen_channels = max(map(len, ws.channels))
maxlen_samples = max(map(len, ws.samples))
maxlen_parameters = max(map(len, ws.parameters))
maxlen_parameters = max(map(len, [p for p, _ in result['parameters']]))
maxlen_measurements = max(map(lambda x: len(x[0]), result['measurements']))
maxlen = max(
[maxlen_channels, maxlen_samples, maxlen_parameters, maxlen_measurements]
Expand Down Expand Up @@ -174,7 +177,7 @@ def inspect(workspace, output_file, measurement):
'--modifier-type',
default=[],
multiple=True,
type=click.Choice(modifiers.uncombined.keys()),
type=click.Choice(modifiers.pyhfset.keys()),
)
@click.option('--measurement', default=[], multiple=True, metavar='<MEASUREMENT>...')
def prune(
Expand Down
6 changes: 4 additions & 2 deletions src/pyhf/infer/calculators.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,9 @@ def pvalue(self, value):
return tensorlib.astensor(
tensorlib.sum(
tensorlib.where(
self.samples >= value, tensorlib.astensor(1), tensorlib.astensor(0)
self.samples >= value,
tensorlib.astensor(1.0),
tensorlib.astensor(0.0),
)
)
/ tensorlib.shape(self.samples)[0]
Expand All @@ -569,7 +571,7 @@ def expected_value(self, nsigma):
>>> samples = normal.sample((100,))
>>> dist = pyhf.infer.calculators.EmpiricalDistribution(samples)
>>> dist.expected_value(nsigma=1)
6.15094381209505
6.150943812095049

>>> import pyhf
>>> import numpy.random as random
Expand Down
3 changes: 0 additions & 3 deletions src/pyhf/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.channels = []
self.samples = []
self.parameters = []
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unsure here, but why did this get moved up to _ModelConfig instead? Seems like the information is still the same.

self.modifiers = []
# keep track of the width of each channel (how many bins)
self.channel_nbins = {}
Expand All @@ -30,7 +29,6 @@ def __init__(self, *args, **kwargs):
for sample in channel['samples']:
self.samples.append(sample['name'])
for modifier_def in sample['modifiers']:
self.parameters.append(modifier_def['name'])
self.modifiers.append(
(
modifier_def['name'], # mod name
Expand All @@ -40,7 +38,6 @@ def __init__(self, *args, **kwargs):

self.channels = sorted(list(set(self.channels)))
self.samples = sorted(list(set(self.samples)))
self.parameters = sorted(list(set(self.parameters)))
self.modifiers = sorted(list(set(self.modifiers)))
self.channel_nbins = {
channel: self.channel_nbins[channel] for channel in self.channels
Expand Down
247 changes: 40 additions & 207 deletions src/pyhf/modifiers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,214 +1,47 @@
import logging

from pyhf import exceptions
from pyhf import get_backend

log = logging.getLogger(__name__)

registry = {}


def validate_modifier_structure(modifier):
"""
Check if given object contains the right structure for modifiers
"""
required_methods = ['required_parset']

for method in required_methods:
if not hasattr(modifier, method):
raise exceptions.InvalidModifier(
f'Expected {method:s} method on modifier {modifier.__name__:s}'
)
return True


def add_to_registry(
cls, cls_name=None, constrained=False, pdf_type='normal', op_code='addition'
):
"""
Consistent add_to_registry() function that handles actually adding thing to the registry.

Raises an error if the name to register for the modifier already exists in the registry,
or if the modifier does not have the right structure.
"""
global registry
cls_name = cls_name or cls.__name__
if cls_name in registry:
raise KeyError(f'The modifier name "{cls_name:s}" is already taken.')
# validate the structure
validate_modifier_structure(cls)
# set is_constrained
cls.is_constrained = constrained
if constrained:
tensorlib, _ = get_backend()
if not hasattr(tensorlib, pdf_type):
raise exceptions.InvalidModifier(
f'The specified pdf_type "{pdf_type:s}" is not valid for {cls_name:s}({cls.__name__:s}). See pyhf.tensor documentation for available pdfs.'
)
cls.pdf_type = pdf_type
else:
cls.pdf_type = None

if op_code not in ['addition', 'multiplication']:
raise exceptions.InvalidModifier(
f'The specified op_code "{op_code:s}" is not valid for {cls_name:s}({cls.__name__:s}). See pyhf.modifier documentation for available operation codes.'
)
cls.op_code = op_code

registry[cls_name] = cls


def modifier(*args, **kwargs):
"""
Decorator for registering modifiers. To flag the modifier as a constrained modifier, add `constrained=True`.


Args:
name (:obj:`str`): the name of the modifier to use. Use the class name by default. (default: None)
constrained (:obj:`bool`): whether the modifier is constrained or not. (default: False)
pdf_type (:obj:`str): the name of the pdf to use from tensorlib if constrained. (default: normal)
op_code (:obj:`str`): the name of the operation the modifier performs on the data (e.g. addition, multiplication)

Returns:
modifier

Raises:
ValueError: too many keyword arguments, or too many arguments, or wrong arguments
TypeError: provided name is not a string
pyhf.exceptions.InvalidModifier: object does not have necessary modifier structure
"""
#
# Examples:
#
# >>> @modifiers.modifier
# >>> ... class myCustomModifier(object):
# >>> ... @classmethod
# >>> ... def required_parset(cls, sample_data, modifier_data): pass
#
# >>> @modifiers.modifier(name='myCustomNamer')
# >>> ... class myCustomModifier(object):
# >>> ... @classmethod
# >>> ... def required_parset(cls, sample_data, modifier_data): pass
#
# >>> @modifiers.modifier(constrained=False)
# >>> ... class myUnconstrainedModifier(object):
# >>> ... @classmethod
# >>> ... def required_parset(cls, sample_data, modifier_data): pass
# >>> ...
# >>> myUnconstrainedModifier.pdf_type
# None
#
# >>> @modifiers.modifier(constrained=True, pdf_type='poisson')
# >>> ... class myConstrainedCustomPoissonModifier(object):
# >>> ... @classmethod
# >>> ... def required_parset(cls, sample_data, modifier_data): pass
# >>> ...
# >>> myConstrainedCustomGaussianModifier.pdf_type
# 'poisson'
#
# >>> @modifiers.modifier(constrained=True)
# >>> ... class myCustomModifier(object):
# >>> ... @classmethod
# >>> ... def required_parset(cls, sample_data, modifier_data): pass
#
# >>> @modifiers.modifier(op_code='multiplication')
# >>> ... class myMultiplierModifier(object):
# >>> ... @classmethod
# >>> ... def required_parset(cls, sample_data, modifier_data): pass
# >>> ...
# >>> myMultiplierModifier.op_code
# 'multiplication'

def _modifier(name, constrained, pdf_type, op_code):
def wrapper(cls):
add_to_registry(
cls,
cls_name=name,
constrained=constrained,
pdf_type=pdf_type,
op_code=op_code,
)
return cls

return wrapper

name = kwargs.pop('name', None)
constrained = bool(kwargs.pop('constrained', False))
pdf_type = str(kwargs.pop('pdf_type', 'normal'))
op_code = str(kwargs.pop('op_code', 'addition'))
# check for unparsed keyword arguments
if kwargs:
raise ValueError(f'Unparsed keyword arguments {kwargs.keys()}')
# check to make sure the given name is a string, if passed in one
if not isinstance(name, str) and name is not None:
raise TypeError(f'@modifier must be given a string. You gave it {type(name)}')

if not args:
# called like @modifier(name='foo', constrained=False, pdf_type='normal', op_code='addition')
return _modifier(name, constrained, pdf_type, op_code)
if len(args) == 1:
# called like @modifier
if not callable(args[0]):
raise ValueError('You must decorate a callable python object')
add_to_registry(
args[0],
cls_name=name,
constrained=constrained,
pdf_type=pdf_type,
op_code=op_code,
)
return args[0]
raise ValueError(
f'@modifier must be called with only keyword arguments, @modifier(name=\'foo\'), or no arguments, @modifier; ({len(args):d} given)'
)


from pyhf.modifiers.histosys import histosys, histosys_combined
from pyhf.modifiers.lumi import lumi, lumi_combined
from pyhf.modifiers.normfactor import normfactor, normfactor_combined
from pyhf.modifiers.normsys import normsys, normsys_combined
from pyhf.modifiers.shapefactor import shapefactor, shapefactor_combined
from pyhf.modifiers.shapesys import shapesys, shapesys_combined
from pyhf.modifiers.staterror import staterror, staterror_combined

uncombined = {
'histosys': histosys,
'lumi': lumi,
'normfactor': normfactor,
'normsys': normsys,
'shapefactor': shapefactor,
'shapesys': shapesys,
'staterror': staterror,
}

combined = {
'histosys': histosys_combined,
'lumi': lumi_combined,
'normfactor': normfactor_combined,
'normsys': normsys_combined,
'shapefactor': shapefactor_combined,
'shapesys': shapesys_combined,
'staterror': staterror_combined,
}
from .histosys import histosys_builder, histosys_combined
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are we dropping __all__ from this? Presumably, someone would want to build their own custom modifier based off an existing one and we're not quite allowing it to be imported so easily. But maybe that's the point?

from .lumi import lumi_builder, lumi_combined
from .normfactor import normfactor_builder, normfactor_combined
from .shapefactor import shapefactor_builder, shapefactor_combined
from .normsys import normsys_builder, normsys_combined
from .shapesys import shapesys_builder, shapesys_combined
from .staterror import staterror_builder, staterror_combined

__all__ = [
"combined",
"histosys",
"histosys_combined",
"lumi",
"lumi_combined",
"normfactor",
"normfactor_combined",
"normsys",
"normsys_combined",
"shapefactor",
"shapefactor_combined",
"shapesys",
"shapesys_combined",
"staterror",
"staterror_combined",
'histosys',
'histosys_builder',
'histosys_combined',
'lumi',
'lumi_builder',
'lumi_combined',
'normfactor',
'normfactor_builder',
'normfactor_combined',
'normsys',
'normsys_builder',
'normsys_combined',
'pyhfset',
'shapefactor',
'shapefactor_builder',
'shapefactor_combined',
'shapesys',
'shapesys_builder',
'shapesys_combined',
'staterror',
'staterror_builder',
'staterror_combined',
]


def __dir__():
return __all__


pyhfset = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not a huge fan here. we're relying on basically bookkeeping through the indices (which is scattered throughout the code) and knowing that builder is [0] and combined is [1]. We could either do a namedtuple here for each, so that modifiers['histosys'].builder and modifiers['histosys'].combined work instead, or we do pyhf.modifiers.builders['histosys'] and pyhf.modifiers.combined['histosys']. I think this was a better design.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The other thing is this is exactly the registry model (without the decoration functionality) which is fine. I guess the question is whether you want the registry to be defined per-model or per-pyhf instantiation. This change allows a different modifier_set per-model which is probably a good thing (less "global" state, and more state-less).

'histosys': (histosys_builder, histosys_combined),
'lumi': (lumi_builder, lumi_combined),
'normfactor': (normfactor_builder, normfactor_combined),
'normsys': (normsys_builder, normsys_combined),
'shapefactor': (shapefactor_builder, shapefactor_combined),
'shapesys': (shapesys_builder, shapesys_combined),
'staterror': (staterror_builder, staterror_combined),
}
Loading