Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: respect fixed parameters in n_dof calculation #479

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions src/cabinetry/fit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,11 @@ def _run_minos(


def _goodness_of_fit(
model: pyhf.pdf.Model, data: List[float], best_twice_nll: float
model: pyhf.pdf.Model,
data: List[float],
best_twice_nll: float,
*,
fix_pars: Optional[List[bool]] = None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a , * between the mandatory and optional arguments to force the optional arguments to be keyword-only (see also other functions in this file for examples). This is done to allow re-ordering keyword arguments in the future without fear of breaking the setup for anyone, as otherwise people could be using fix_pars as a positional argument still and relying on the order.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

) -> float:
"""Calculates goodness-of-fit p-value with a saturated model.

Expand All @@ -381,6 +385,8 @@ def _goodness_of_fit(
data (List[float]): the observed data
best_twice_nll (float): best-fit -2 log(likelihood) of fit for which goodness-
of-fit should be calculated
fix_pars (Optional[List[bool]], optional): list of booleans specifying which
parameters are held constant, defaults to None (use ``pyhf`` suggestion)

Returns:
float: goodness-of-fit p-value
Expand Down Expand Up @@ -416,7 +422,7 @@ def _goodness_of_fit(
# of bins minus the number of unconstrained parameters
n_dof = sum(
model.config.channel_nbins.values()
) - model_utils.unconstrained_parameter_count(model)
) - model_utils.unconstrained_parameter_count(model, fix_pars=fix_pars)
log.debug(f"number of degrees of freedom: {n_dof}")

if n_dof <= 0:
Expand Down Expand Up @@ -502,7 +508,9 @@ def fit(

if goodness_of_fit:
# calculate goodness-of-fit with saturated model
p_val = _goodness_of_fit(model, data, fit_results.best_twice_nll)
p_val = _goodness_of_fit(
model, data, fit_results.best_twice_nll, fix_pars=fix_pars
)
fit_results = fit_results._replace(goodness_of_fit=p_val)

return fit_results
Expand Down
18 changes: 15 additions & 3 deletions src/cabinetry/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,24 +481,36 @@ def prediction(
)


def unconstrained_parameter_count(model: pyhf.pdf.Model) -> int:
"""Returns the number of unconstrained parameters in a model.
def unconstrained_parameter_count(
model: pyhf.pdf.Model,
*,
fix_pars: Optional[List[bool]] = None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment here about , *

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

) -> int:
"""Returns the number of unconstrained, non-constant parameters in a model.

The number is the sum of all independent parameters in a fit. A shapefactor that
affects multiple bins enters the count once for each independent bin. Parameters
that are set to constant are not included in the count.

Args:
model (pyhf.pdf.Model): model to count parameters for
fix_pars (Optional[List[bool]], optional): list of booleans specifying which
parameters are held constant, defaults to None (use ``pyhf`` suggestion)

Returns:
int: number of unconstrained parameters
"""
n_pars = 0
# custom fixed parameters overrule suggested fixed parameters
if fix_pars is None:
fix_pars = model.config.suggested_fixed()

for parname in model.config.par_order:
if not model.config.param_set(parname).constrained:
# only consider non-constant parameters
n_pars += model.config.param_set(parname).suggested_fixed.count(False)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am trying to remind myself if there was a particular reason I wrote this in this fashion because it felt slightly unusual to me upon revisiting. Looking at the implementation for model.config.suggested_fixed() in https://github.com/scikit-hep/pyhf/blob/1af5ed423cfbdda40dedabf93fed772c729adee3/src/pyhf/pdf.py#L437-L441, that seems to be using the exact same interface though so I think for the fix_pars=None case, the new approach is fully consistent with the old one. I will run with some example workspaces to sanity check this but tests look good too so I think this solution is exactly what we need!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good!

par_slice = model.config.par_slice(parname)
n_pars += fix_pars[par_slice].count(False)

return n_pars


Expand Down
7 changes: 4 additions & 3 deletions tests/fit/test_fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ def test__goodness_of_fit(
assert mock_pars.call_args[1] == {}
assert mock_count.call_count == 1
assert mock_count.call_args[0][0].spec == model.spec
assert mock_count.call_args[1] == {}
assert mock_count.call_args[1] == {"fix_pars": None}
assert "Delta NLL = 0.084185" in [rec.message for rec in caplog.records]
assert np.allclose(p_val, 0.91926079)
caplog.clear()
Expand All @@ -372,7 +372,7 @@ def test__goodness_of_fit(
assert mock_pars.call_count == 2 # no new call (no auxdata)
assert mock_count.call_count == 3
assert mock_count.call_args[0][0].spec == model.spec
assert mock_count.call_args[1] == {}
assert mock_count.call_args[1] == {"fix_pars": None}
assert (
"cannot calculate p-value: 0 degrees of freedom and Delta NLL = 0.000000"
in [rec.message for rec in caplog.records]
Expand Down Expand Up @@ -480,7 +480,8 @@ def test_fit(mock_fit, mock_print, mock_gof):

# goodness-of-fit test
fit_results_gof = fit.fit(model, data, goodness_of_fit=True)
assert mock_gof.call_args_list == [((model, data, 2.0), {})]
assert mock_gof.call_args[0] == (model, data, 2.0)
assert mock_gof.call_args[1] == {"fix_pars": None}
assert fit_results_gof.goodness_of_fit == 0.1


Expand Down
6 changes: 6 additions & 0 deletions tests/test_model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,12 @@ def test_unconstrained_parameter_count(example_spec, example_spec_shapefactor):
model = pyhf.Workspace(example_spec_shapefactor).model()
assert model_utils.unconstrained_parameter_count(model) == 3

# fixed parameters can be provided to function
model = pyhf.Workspace(example_spec_shapefactor).model()
fix_pars = model.config.suggested_fixed()
fix_pars[0] = True
assert model_utils.unconstrained_parameter_count(model, fix_pars=fix_pars) == 2

# fixed parameters are skipped in counting
example_spec_shapefactor["measurements"][0]["config"]["parameters"].append(
{"name": "Signal strength", "fixed": True}
Expand Down
Loading