diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 268656f68b..11efbbbe2c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -65,7 +65,6 @@ jobs: tests/backends/test_base.py tests/backends/test_ndarray.py tests/step_methods/hmc/test_hmc.py - tests/test_func_utils.py tests/distributions/test_shape_utils.py tests/distributions/test_mixture.py tests/test_testing.py diff --git a/docs/source/api/misc.rst b/docs/source/api/misc.rst index 095d262179..659ce84d3c 100644 --- a/docs/source/api/misc.rst +++ b/docs/source/api/misc.rst @@ -8,5 +8,4 @@ Other utils compute_log_likelihood compute_log_prior - find_constrained_prior DictToArrayBijection diff --git a/pymc/__init__.py b/pymc/__init__.py index 684feac11f..85c7ace2db 100644 --- a/pymc/__init__.py +++ b/pymc/__init__.py @@ -53,7 +53,6 @@ def __set_compiler_flags(): from pymc.data import * from pymc.distributions import * from pymc.exceptions import * -from pymc.func_utils import find_constrained_prior from pymc.logprob import * from pymc.math import ( expand_packed_triangular, diff --git a/pymc/func_utils.py b/pymc/func_utils.py deleted file mode 100644 index e7acb3dff0..0000000000 --- a/pymc/func_utils.py +++ /dev/null @@ -1,204 +0,0 @@ -# Copyright 2024 - present The PyMC Developers -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import warnings - -from collections.abc import Callable - -import numpy as np -import pytensor.tensor as pt - -from pytensor.gradient import NullTypeGradError -from scipy import optimize - -import pymc as pm - -__all__ = ["find_constrained_prior"] - - -def find_constrained_prior( - distribution: pm.Distribution, - lower: float, - upper: float, - init_guess: dict[str, float], - mass: float = 0.95, - fixed_params: dict[str, float] | None = None, - mass_below_lower: float | None = None, - **kwargs, -) -> dict[str, float]: - """ - Find optimal parameters to get `mass` % of probability of a distribution between `lower` and `upper`. - - Note: only works for one- and two-parameter distributions, as there - are exactly two constraints. Fix some combination of parameters - if you want to use it on >=3-parameter distributions. - - Parameters - ---------- - distribution : Distribution - PyMC distribution you want to set a prior on. - Needs to have a ``logcdf`` method implemented in PyMC. - lower : float - Lower bound to get `mass` % of probability of `pm_dist`. - upper : float - Upper bound to get `mass` % of probability of `pm_dist`. - init_guess : dict of {str : float} - Initial guess for ``scipy.optimize.least_squares`` to find the - optimal parameters of `pm_dist` fitting the interval constraint. - Must be a dictionary with the name of the PyMC distribution's - parameter as keys and the initial guess as values. - mass : float, default 0.95 - Share of the probability mass we want between ``lower`` and ``upper``. - Defaults to 95%. - fixed_params : str or float, optional, default None - Only used when `pm_dist` has at least three parameters. - Dictionary of fixed parameters, so that there are only 2 to optimize. - For instance, for a StudentT, you fix nu to a constant and get the optimized - mu and sigma. - mass_below_lower : float, optional, default None - The probability mass below the ``lower`` bound. If ``None``, - defaults to ``(1 - mass) / 2``, which implies that the probability - mass below the ``lower`` value will be equal to the probability - mass above the ``upper`` value. - - Returns - ------- - opt_params : dict - The optimized distribution parameters as a dictionary. - Dictionary keys are the parameter names and - dictionary values are the optimized parameter values. - - Notes - ----- - Optional keyword arguments can be passed to ``find_constrained_prior``. These will be - delivered to the underlying call to :external:py:func:`scipy.optimize.minimize`. - - Examples - -------- - .. code-block:: python - - # get parameters obeying constraints - opt_params = pm.find_constrained_prior( - pm.Gamma, lower=0.1, upper=0.4, mass=0.75, init_guess={"alpha": 1, "beta": 10} - ) - - # use these parameters to draw random samples - samples = pm.Gamma.dist(**opt_params, size=100).eval() - - # use these parameters in a model - with pm.Model(): - x = pm.Gamma("x", **opt_params) - - # specify fixed values before optimization - opt_params = pm.find_constrained_prior( - pm.StudentT, - lower=0, - upper=1, - init_guess={"mu": 5, "sigma": 2}, - fixed_params={"nu": 7}, - ) - - Under some circumstances, you might not want to have the same cumulative - probability below the ``lower`` threshold and above the ``upper`` threshold. - For example, you might want to constrain an Exponential distribution to - find the parameter that yields 90% of the mass below the ``upper`` bound, - and have zero mass below ``lower``. You can do that with the following call - to ``find_constrained_prior`` - - .. code-block:: python - - opt_params = pm.find_constrained_prior( - pm.Exponential, - lower=0, - upper=3.0, - mass=0.9, - init_guess={"lam": 1}, - mass_below_lower=0, - ) - """ - warnings.warn( - "find_constrained_prior is deprecated and will be removed in a future version. " - "Please use maxent function from PreliZ. " - "https://preliz.readthedocs.io/en/latest/api_reference.html#preliz.unidimensional.maxent", - FutureWarning, - stacklevel=2, - ) - - assert 0.01 <= mass <= 0.99, ( - "This function optimizes the mass of the given distribution +/- " - f"1%, so `mass` has to be between 0.01 and 0.99. You provided {mass}." - ) - if mass_below_lower is None: - mass_below_lower = (1 - mass) / 2 - - # exit when any parameter is not scalar: - if np.any(np.asarray(distribution.rv_op.ndims_params) != 0): - raise NotImplementedError( - "`pm.find_constrained_prior` does not work with non-scalar parameters yet.\n" - "Feel free to open a pull request on PyMC repo if you really need this feature." - ) - - dist_params = pt.vector("dist_params") - params_to_optim = { - arg_name: dist_params[i] for arg_name, i in zip(init_guess.keys(), range(len(init_guess))) - } - - if fixed_params is not None: - params_to_optim.update(fixed_params) - - dist = distribution.dist(**params_to_optim) - - try: - logcdf_lower = pm.logcdf(dist, pm.floatX(lower)) - logcdf_upper = pm.logcdf(dist, pm.floatX(upper)) - except AttributeError: - raise AttributeError( - f"You cannot use `find_constrained_prior` with {distribution} -- it doesn't have a logcdf " - "method yet.\nOpen an issue or, even better, a pull request on PyMC repo if you really " - "need it." - ) - - target = (pt.exp(logcdf_lower) - mass_below_lower) ** 2 - target_fn = pm.pytensorf.compile([dist_params], target, allow_input_downcast=True) - - constraint = pt.exp(logcdf_upper) - pt.exp(logcdf_lower) - constraint_fn = pm.pytensorf.compile([dist_params], constraint, allow_input_downcast=True) - - jac: str | Callable - constraint_jac: str | Callable - try: - pytensor_jac = pm.gradient(target, [dist_params]) - jac = pm.pytensorf.compile([dist_params], pytensor_jac, allow_input_downcast=True) - pytensor_constraint_jac = pm.gradient(constraint, [dist_params]) - constraint_jac = pm.pytensorf.compile( - [dist_params], pytensor_constraint_jac, allow_input_downcast=True - ) - # when PyMC cannot compute the gradient - except (NotImplementedError, NullTypeGradError): - jac = "2-point" - constraint_jac = "2-point" - cons = optimize.NonlinearConstraint(constraint_fn, lb=mass, ub=mass, jac=constraint_jac) - - opt = optimize.minimize( - target_fn, x0=list(init_guess.values()), jac=jac, constraints=cons, **kwargs - ) - if not opt.success: - raise ValueError( - f"Optimization of parameters failed.\nOptimization termination details:\n{opt}" - ) - - # save optimal parameters - opt_params = dict(zip(init_guess.keys(), opt.x)) - if fixed_params is not None: - opt_params.update(fixed_params) - return opt_params diff --git a/tests/test_func_utils.py b/tests/test_func_utils.py deleted file mode 100644 index e0815c86c7..0000000000 --- a/tests/test_func_utils.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright 2024 - present The PyMC Developers -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import pytest - -import pymc as pm - - -@pytest.mark.parametrize( - "distribution, lower, upper, init_guess, fixed_params, mass_below_lower", - [ - (pm.Gamma, 0.1, 0.4, {"alpha": 1, "beta": 10}, {}, None), - (pm.Normal, 155, 180, {"mu": 170, "sigma": 3}, {}, None), - (pm.StudentT, 0.1, 0.4, {"mu": 10, "sigma": 3}, {"nu": 7}, None), - (pm.StudentT, 0, 1, {"mu": 5, "sigma": 2, "nu": 7}, {}, None), - (pm.Exponential, 0, 1, {"lam": 1}, {}, 0), - (pm.HalfNormal, 0, 1, {"sigma": 1}, {}, 0), - (pm.Binomial, 0, 8, {"p": 0.5}, {"n": 10}, None), - (pm.Poisson, 1, 15, {"mu": 10}, {}, None), - (pm.Poisson, 19, 41, {"mu": 30}, {}, None), - ], -) -@pytest.mark.parametrize("mass", [0.5, 0.75, 0.95]) -def test_find_constrained_prior( - distribution, lower, upper, init_guess, fixed_params, mass, mass_below_lower -): - opt_params = pm.find_constrained_prior( - distribution, - lower=lower, - upper=upper, - mass=mass, - init_guess=init_guess, - fixed_params=fixed_params, - mass_below_lower=mass_below_lower, - ) - - opt_distribution = distribution.dist(**opt_params) - mass_in_interval = ( - pm.math.exp(pm.logcdf(opt_distribution, upper)) - - pm.math.exp(pm.logcdf(opt_distribution, lower)) - ).eval() - assert np.abs(mass_in_interval - mass) <= 1e-5 - - -@pytest.mark.parametrize( - "distribution, lower, upper, init_guess, fixed_params", - [ - (pm.Gamma, 0.1, 0.4, {"alpha": 1}, {"beta": 10}), - (pm.Exponential, 0.1, 1, {"lam": 1}, {}), - (pm.Binomial, 0, 2, {"p": 0.8}, {"n": 10}), - ], -) -def test_find_constrained_prior_error_too_large( - distribution, lower, upper, init_guess, fixed_params -): - with pytest.raises( - ValueError, match="Optimization of parameters failed.\nOptimization termination details:\n" - ): - pm.find_constrained_prior( - distribution, - lower=lower, - upper=upper, - mass=0.95, - init_guess=init_guess, - fixed_params=fixed_params, - ) - - -def test_find_constrained_prior_input_errors(): - # missing param - with pytest.raises(TypeError, match="required positional argument"): - pm.find_constrained_prior( - pm.StudentT, - lower=0.1, - upper=0.4, - mass=0.95, - init_guess={"mu": 170, "sigma": 3}, - ) - - # mass too high - with pytest.raises(AssertionError, match="has to be between 0.01 and 0.99"): - pm.find_constrained_prior( - pm.StudentT, - lower=0.1, - upper=0.4, - mass=0.995, - init_guess={"mu": 170, "sigma": 3}, - fixed_params={"nu": 7}, - ) - - # mass too low - with pytest.raises(AssertionError, match="has to be between 0.01 and 0.99"): - pm.find_constrained_prior( - pm.StudentT, - lower=0.1, - upper=0.4, - mass=0.005, - init_guess={"mu": 170, "sigma": 3}, - fixed_params={"nu": 7}, - ) - - # non-scalar params - with pytest.raises(NotImplementedError, match="does not work with non-scalar parameters yet"): - pm.find_constrained_prior( - pm.MvNormal, - lower=0, - upper=1, - mass=0.95, - init_guess={"mu": 5, "cov": np.asarray([[1, 0.2], [0.2, 1]])}, - )