Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove sd optional kwarg from distributions #5583

Merged
merged 23 commits into from
Mar 18, 2022
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
0a65c1f
Removed sd optional kwarg from continuous.py
purna135 Mar 11, 2022
29c0016
Removed sd optional kwarg from timeseries.py
purna135 Mar 11, 2022
8acda64
Removed sd optional kwarg from mixture.py
purna135 Mar 11, 2022
6b34983
removed pymc/sampling_jax.py and pymc/step_methods/sgmcmc.py from scr…
purna135 Mar 11, 2022
40c939c
Merge remote-tracking branch 'upstream/main' into remove_sd
purna135 Mar 13, 2022
33e5463
sd renamed to sigma
purna135 Mar 14, 2022
01b0ce0
renamed sd to sigma in benchmarks.py
purna135 Mar 15, 2022
72e4915
though rename of sd to sigma is not required in test_mixture.py, just…
purna135 Mar 15, 2022
c9a71dc
renamed sd to sigma in test_shape_handling.py::TestShapeDimsSize::tes…
purna135 Mar 15, 2022
7bef2d6
renamed sd to sigma in dimensionality.ipynb
purna135 Mar 15, 2022
23a0f0a
renamed sd to sigma in test_util.py
purna135 Mar 15, 2022
1e723bd
renamed sd to sigma in tests/models.py
purna135 Mar 15, 2022
72186c4
renamed sd to sigma in distributions.py
purna135 Mar 15, 2022
d827e70
renamed sd to sigma in data.py
purna135 Mar 15, 2022
e14e799
renamed sd to sigma in examples/posterior_predictive.ipynb
purna135 Mar 15, 2022
e761f50
renamed sd to sigma in developer_guide.rst
purna135 Mar 15, 2022
4c1a001
renamed sd to sigma in source/PyMC_and_Aesara.rst
purna135 Mar 15, 2022
853b0ff
renamed sd to sigma in examples/GLM_linear.ipynb
purna135 Mar 15, 2022
8e2be8d
renamed sd to sigma in pymc/model.py
purna135 Mar 15, 2022
8ba28b5
fixed examples/GLM_linear.ipynb
purna135 Mar 15, 2022
e45e65b
added a note "sd has been removed" in Release note
purna135 Mar 17, 2022
7d01461
✔ Updated deprecation note and added link to PR.
purna135 Mar 18, 2022
c5fcc70
resolved conflict in test_sampling.py
purna135 Mar 18, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ All of the above apply to:
This includes API changes we did not warn about since at least `3.11.0` (2021-01).

- Setting initial values through `pm.Distribution(testval=...)` is now `pm.Distribution(initval=...)`.
- Alternative `sd` keyword argument has been removed from all distributions. `sigma` should be used instead (see [#5583](https://github.com/pymc-devs/pymc/pull/5583)).


### New features
Expand Down
20 changes: 10 additions & 10 deletions benchmarks/benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,17 @@ def glm_hierarchical_model(random_seed=123):

n_counties = len(data.county.unique())
with pm.Model() as model:
mu_a = pm.Normal("mu_a", mu=0.0, sd=100**2)
mu_a = pm.Normal("mu_a", mu=0.0, sigma=100**2)
sigma_a = pm.HalfCauchy("sigma_a", 5)
mu_b = pm.Normal("mu_b", mu=0.0, sd=100**2)
mu_b = pm.Normal("mu_b", mu=0.0, sigma=100**2)
sigma_b = pm.HalfCauchy("sigma_b", 5)
a = pm.Normal("a", mu=0, sd=1, shape=n_counties)
b = pm.Normal("b", mu=0, sd=1, shape=n_counties)
a = pm.Normal("a", mu=0, sigma=1, shape=n_counties)
b = pm.Normal("b", mu=0, sigma=1, shape=n_counties)
a = mu_a + sigma_a * a
b = mu_b + sigma_b * b
eps = pm.HalfCauchy("eps", 5)
radon_est = a[county_idx] + b[county_idx] * data.floor.values
pm.Normal("radon_like", mu=radon_est, sd=eps, observed=data.log_radon)
pm.Normal("radon_like", mu=radon_est, sigma=eps, observed=data.log_radon)
return model


Expand All @@ -58,7 +58,7 @@ def mixture_model(random_seed=1234):

with pm.Model() as model:
w = pm.Dirichlet("w", a=np.ones_like(w_true))
mu = pm.Normal("mu", mu=0.0, sd=10.0, shape=w_true.shape)
mu = pm.Normal("mu", mu=0.0, sigma=10.0, shape=w_true.shape)
enforce_order = pm.Potential(
"enforce_order",
at.switch(mu[0] - mu[1] <= 0, 0.0, -np.inf)
Expand Down Expand Up @@ -88,7 +88,7 @@ class OverheadSuite:
def setup(self, step):
self.n_steps = 10000
with pm.Model() as self.model:
pm.Normal("x", mu=0, sd=1)
pm.Normal("x", mu=0, sigma=1)

def time_overhead_sample(self, step):
with self.model:
Expand Down Expand Up @@ -133,8 +133,8 @@ def time_drug_evaluation(self):
sigma_low = 1
sigma_high = 10
with pm.Model():
group1_mean = pm.Normal("group1_mean", y_mean, sd=y_std)
group2_mean = pm.Normal("group2_mean", y_mean, sd=y_std)
group1_mean = pm.Normal("group1_mean", y_mean, sigma=y_std)
group2_mean = pm.Normal("group2_mean", y_mean, sigma=y_std)
group1_std = pm.Uniform("group1_std", lower=sigma_low, upper=sigma_high)
group2_std = pm.Uniform("group2_std", lower=sigma_low, upper=sigma_high)
lambda_1 = group1_std**-2
Expand Down Expand Up @@ -301,7 +301,7 @@ def freefall(y, t, p):
# If we know one of the parameter values, we can simply pass the value.
ode_solution = ode_model(y0=[0], theta=[gamma, 9.8])
# The ode_solution has a shape of (n_times, n_states)
Y = pm.Normal("Y", mu=ode_solution, sd=sigma, observed=y)
Y = pm.Normal("Y", mu=ode_solution, sigma=sigma, observed=y)

t0 = time.time()
idata = pm.sample(500, tune=1000, chains=2, cores=2, random_seed=0)
Expand Down
14 changes: 7 additions & 7 deletions docs/source/PyMC_and_Aesara.rst
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,8 @@ example::

with pm.Model() as model:
mu = pm.Normal('mu', 0, 1)
sd = pm.HalfNormal('sd', 1)
y = pm.Normal('y', mu=mu, sigma=sd, observed=data)
sigma = pm.HalfNormal('sigma', 1)
y = pm.Normal('y', mu=mu, sigma=sigma, observed=data)

is roughly equivalent to this::

Expand All @@ -203,10 +203,10 @@ is roughly equivalent to this::
model.add_free_variable(sd_log__)
model.add_logp_term(corrected_logp_half_normal(sd_log__))

sd = at.exp(sd_log__)
model.add_deterministic_variable(sd)
sigma = at.exp(sd_log__)
model.add_deterministic_variable(sigma)

model.add_logp_term(pm.Normal.dist(mu, sd).logp(data))
model.add_logp_term(pm.Normal.dist(mu, sigma).logp(data))

The return values of the variable constructors are subclasses
of Aesara variables, so when we define a variable we can use any
Expand All @@ -217,5 +217,5 @@ Aesara operation on them::
# beta is a at.dvector
beta = pm.Normal('beta', 0, 1, shape=len(design_matrix))
predict = at.dot(design_matrix, beta)
sd = pm.HalfCauchy('sd', beta=2.5)
pm.Normal('y', mu=predict, sigma=sd, observed=data)
sigma = pm.HalfCauchy('sigma', beta=2.5)
pm.Normal('y', mu=predict, sigma=sigma, observed=data)
4 changes: 2 additions & 2 deletions docs/source/contributing/developer_guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -888,8 +888,8 @@ others. The challenge and some summary of the solution could be found in Luciano

with pm.Model() as m:
mu = pm.Normal('mu', 0., 1., shape=(5, 1))
sd = pm.HalfNormal('sd', 5., shape=(1, 10))
pm.Normal('x', mu=mu, sigma=sd, observed=np.random.randn(2, 5, 10))
sigma = pm.HalfNormal('sigma', 5., shape=(1, 10))
pm.Normal('x', mu=mu, sigma=sigma, observed=np.random.randn(2, 5, 10))
trace = pm.sample_prior_predictive(100)

trace['x'].shape # ==> should be (100, 2, 5, 10)
Expand Down
189 changes: 4 additions & 185 deletions docs/source/learn/examples/dimensionality.ipynb

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions docs/source/learn/examples/posterior_predictive.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,9 @@
" b = pm.Normal(\"b\", 0.0, 10.0)\n",
"\n",
" mu = a + b * predictor_scaled\n",
" sd = pm.Exponential(\"sd\", 1.0)\n",
" sigma = pm.Exponential(\"sigma\", 1.0)\n",
"\n",
" pm.Normal(\"obs\", mu=mu, sigma=sd, observed=outcome_scaled)\n",
" pm.Normal(\"obs\", mu=mu, sigma=sigma, observed=outcome_scaled)\n",
" idata = pm.sample_prior_predictive(samples=50)"
]
},
Expand Down Expand Up @@ -212,9 +212,9 @@
" b = pm.Normal(\"b\", 0.0, 1.0)\n",
"\n",
" mu = a + b * predictor_scaled\n",
" sd = pm.Exponential(\"sd\", 1.0)\n",
" sigma = pm.Exponential(\"sigma\", 1.0)\n",
"\n",
" pm.Normal(\"obs\", mu=mu, sigma=sd, observed=outcome_scaled)\n",
" pm.Normal(\"obs\", mu=mu, sigma=sigma, observed=outcome_scaled)\n",
" idata = pm.sample_prior_predictive(samples=50)"
]
},
Expand Down Expand Up @@ -328,7 +328,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Everything ran smoothly, but it's often difficult to understand what the parameters' values mean when analyzing a trace plot or table summary -- even more so here, as the parameters live in the standardized space. A useful thing to understand your models is... you guessed it: posterior predictive checks! We'll use PyMC's dedicated function to sample data from the posterior. This function will randomly draw 4000 samples of parameters from the trace. Then, for each sample, it will draw 100 random numbers from a normal distribution specified by the values of `mu` and `sd` in that sample:"
"Everything ran smoothly, but it's often difficult to understand what the parameters' values mean when analyzing a trace plot or table summary -- even more so here, as the parameters live in the standardized space. A useful thing to understand your models is... you guessed it: posterior predictive checks! We'll use PyMC's dedicated function to sample data from the posterior. This function will randomly draw 4000 samples of parameters from the trace. Then, for each sample, it will draw 100 random numbers from a normal distribution specified by the values of `mu` and `sigma` in that sample:"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions pymc/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,8 @@ class Minibatch(TensorVariable):

>>> with pm.Model() as model:
... mu = pm.Flat('mu')
... sd = pm.HalfNormal('sd')
... lik = pm.Normal('lik', mu, sd, observed=x, total_size=(100, 100))
... sigma = pm.HalfNormal('sigma')
... lik = pm.Normal('lik', mu, sigma, observed=x, total_size=(100, 100))


Then you can perform regular Variational Inference out of the box
Expand Down
64 changes: 13 additions & 51 deletions pymc/distributions/continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,13 +546,10 @@ class Normal(Continuous):
rv_op = normal

@classmethod
def dist(cls, mu=0, sigma=None, tau=None, sd=None, no_assert=False, **kwargs):
if sd is not None:
sigma = sd
def dist(cls, mu=0, sigma=None, tau=None, no_assert=False, **kwargs):
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
sigma = at.as_tensor_variable(sigma)

# sd = sigma
# tau = at.as_tensor_variable(tau)
# mean = median = mode = mu = at.as_tensor_variable(floatX(mu))
# variance = 1.0 / self.tau
Expand Down Expand Up @@ -710,13 +707,11 @@ def dist(
mu: Optional[DIST_PARAMETER_TYPES] = None,
sigma: Optional[DIST_PARAMETER_TYPES] = None,
tau: Optional[DIST_PARAMETER_TYPES] = None,
sd: Optional[DIST_PARAMETER_TYPES] = None,
lower: Optional[DIST_PARAMETER_TYPES] = None,
upper: Optional[DIST_PARAMETER_TYPES] = None,
*args,
**kwargs,
) -> RandomVariable:
sigma = sd if sd is not None else sigma
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
sigma = at.as_tensor_variable(sigma)
tau = at.as_tensor_variable(tau)
Expand Down Expand Up @@ -866,10 +861,7 @@ class HalfNormal(PositiveContinuous):
rv_op = halfnormal

@classmethod
def dist(cls, sigma=None, tau=None, sd=None, *args, **kwargs):
if sd is not None:
sigma = sd

def dist(cls, sigma=None, tau=None, *args, **kwargs):
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)

assert_negative_support(tau, "tau", "HalfNormal")
Expand Down Expand Up @@ -1226,10 +1218,7 @@ class Beta(UnitContinuous):
rv_op = aesara.tensor.random.beta

@classmethod
def dist(cls, alpha=None, beta=None, mu=None, sigma=None, sd=None, *args, **kwargs):
if sd is not None:
sigma = sd

def dist(cls, alpha=None, beta=None, mu=None, sigma=None, *args, **kwargs):
alpha, beta = cls.get_alpha_beta(alpha, beta, mu, sigma)
alpha = at.as_tensor_variable(floatX(alpha))
beta = at.as_tensor_variable(floatX(beta))
Expand Down Expand Up @@ -1785,10 +1774,7 @@ class LogNormal(PositiveContinuous):
rv_op = lognormal

@classmethod
def dist(cls, mu=0, sigma=None, tau=None, sd=None, *args, **kwargs):
if sd is not None:
sigma = sd

def dist(cls, mu=0, sigma=None, tau=None, *args, **kwargs):
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)

mu = at.as_tensor_variable(floatX(mu))
Expand Down Expand Up @@ -1914,9 +1900,7 @@ class StudentT(Continuous):
rv_op = studentt

@classmethod
def dist(cls, nu, mu=0, lam=None, sigma=None, sd=None, *args, **kwargs):
if sd is not None:
sigma = sd
def dist(cls, nu, mu=0, lam=None, sigma=None, *args, **kwargs):
nu = at.as_tensor_variable(floatX(nu))
lam, sigma = get_tau_sigma(tau=lam, sigma=sigma)
sigma = at.as_tensor_variable(sigma)
Expand Down Expand Up @@ -2306,10 +2290,7 @@ class Gamma(PositiveContinuous):
rv_op = gamma

@classmethod
def dist(cls, alpha=None, beta=None, mu=None, sigma=None, sd=None, no_assert=False, **kwargs):
if sd is not None:
sigma = sd

def dist(cls, alpha=None, beta=None, mu=None, sigma=None, no_assert=False, **kwargs):
alpha, beta = cls.get_alpha_beta(alpha, beta, mu, sigma)
alpha = at.as_tensor_variable(floatX(alpha))
beta = at.as_tensor_variable(floatX(beta))
Expand Down Expand Up @@ -2426,10 +2407,7 @@ class InverseGamma(PositiveContinuous):
rv_op = invgamma

@classmethod
def dist(cls, alpha=None, beta=None, mu=None, sigma=None, sd=None, *args, **kwargs):
if sd is not None:
sigma = sd

def dist(cls, alpha=None, beta=None, mu=None, sigma=None, *args, **kwargs):
alpha, beta = cls._get_alpha_beta(alpha, beta, mu, sigma)
alpha = at.as_tensor_variable(floatX(alpha))
beta = at.as_tensor_variable(floatX(beta))
Expand Down Expand Up @@ -2750,11 +2728,7 @@ class HalfStudentT(PositiveContinuous):
rv_op = halfstudentt

@classmethod
def dist(cls, nu=1, sigma=None, lam=None, sd=None, *args, **kwargs):

if sd is not None:
sigma = sd

def dist(cls, nu=1, sigma=None, lam=None, *args, **kwargs):
nu = at.as_tensor_variable(floatX(nu))
lam, sigma = get_tau_sigma(lam, sigma)
sigma = at.as_tensor_variable(sigma)
Expand Down Expand Up @@ -2886,11 +2860,7 @@ class ExGaussian(Continuous):
rv_op = exgaussian

@classmethod
def dist(cls, mu=0.0, sigma=None, nu=None, sd=None, *args, **kwargs):

if sd is not None:
sigma = sd

def dist(cls, mu=0.0, sigma=None, nu=None, *args, **kwargs):
mu = at.as_tensor_variable(floatX(mu))
sigma = at.as_tensor_variable(floatX(sigma))
nu = at.as_tensor_variable(floatX(nu))
Expand Down Expand Up @@ -3118,10 +3088,7 @@ class SkewNormal(Continuous):
rv_op = skewnormal

@classmethod
def dist(cls, alpha=1, mu=0.0, sigma=None, tau=None, sd=None, *args, **kwargs):
if sd is not None:
sigma = sd

def dist(cls, alpha=1, mu=0.0, sigma=None, tau=None, *args, **kwargs):
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
alpha = at.as_tensor_variable(floatX(alpha))
mu = at.as_tensor_variable(floatX(mu))
Expand Down Expand Up @@ -3445,10 +3412,7 @@ class Rice(PositiveContinuous):
rv_op = rice

@classmethod
def dist(cls, nu=None, sigma=None, b=None, sd=None, *args, **kwargs):
if sd is not None:
sigma = sd

def dist(cls, nu=None, sigma=None, b=None, *args, **kwargs):
nu, b, sigma = cls.get_nu_b(nu, b, sigma)
b = at.as_tensor_variable(floatX(b))
sigma = at.as_tensor_variable(floatX(sigma))
Expand Down Expand Up @@ -3657,12 +3621,10 @@ class LogitNormal(UnitContinuous):
rv_op = logit_normal

@classmethod
def dist(cls, mu=0, sigma=None, tau=None, sd=None, **kwargs):
if sd is not None:
sigma = sd
def dist(cls, mu=0, sigma=None, tau=None, **kwargs):
mu = at.as_tensor_variable(floatX(mu))
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
sigma = sd = at.as_tensor_variable(sigma)
sigma = at.as_tensor_variable(sigma)
tau = at.as_tensor_variable(tau)
assert_negative_support(sigma, "sigma", "LogitNormal")
assert_negative_support(tau, "tau", "LogitNormal")
Expand Down
8 changes: 2 additions & 6 deletions pymc/distributions/mixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,17 +477,13 @@ class NormalMixture:
y = pm.NormalMixture("y", w=weights, mu=μ, sigma=σ, observed=data)
"""

def __new__(cls, name, w, mu, sigma=None, tau=None, sd=None, comp_shape=(), **kwargs):
if sd is not None:
sigma = sd
def __new__(cls, name, w, mu, sigma=None, tau=None, comp_shape=(), **kwargs):
_, sigma = get_tau_sigma(tau=tau, sigma=sigma)

return Mixture(name, w, Normal.dist(mu, sigma=sigma, size=comp_shape), **kwargs)

@classmethod
def dist(cls, w, mu, sigma=None, tau=None, sd=None, comp_shape=(), **kwargs):
if sd is not None:
sigma = sd
def dist(cls, w, mu, sigma=None, tau=None, comp_shape=(), **kwargs):
_, sigma = get_tau_sigma(tau=tau, sigma=sigma)

return Mixture.dist(w, Normal.dist(mu, sigma=sigma, size=comp_shape), **kwargs)
19 changes: 6 additions & 13 deletions pymc/distributions/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,15 +108,10 @@ class AR(distribution.Continuous):
distribution for initial values (Defaults to Flat())
"""

def __init__(
self, rho, sigma=None, tau=None, constant=False, init=None, sd=None, *args, **kwargs
):
def __init__(self, rho, sigma=None, tau=None, constant=False, init=None, *args, **kwargs):
super().__init__(*args, **kwargs)
if sd is not None:
sigma = sd

tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
self.sigma = self.sd = at.as_tensor_variable(sigma)
self.sigma = at.as_tensor_variable(sigma)
self.tau = at.as_tensor_variable(tau)

self.mean = at.as_tensor_variable(0.0)
Expand Down Expand Up @@ -201,17 +196,15 @@ class GaussianRandomWalk(distribution.Continuous):
distribution for initial value (Defaults to Flat())
"""

def __init__(self, tau=None, init=None, sigma=None, mu=0.0, sd=None, *args, **kwargs):
def __init__(self, tau=None, init=None, sigma=None, mu=0.0, *args, **kwargs):
kwargs.setdefault("shape", 1)
super().__init__(*args, **kwargs)
if sum(self.shape) == 0:
raise TypeError("GaussianRandomWalk must be supplied a non-zero shape argument!")
if sd is not None:
sigma = sd
tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
self.tau = at.as_tensor_variable(tau)
sigma = at.as_tensor_variable(sigma)
self.sigma = self.sd = sigma
self.sigma = sigma
self.mu = at.as_tensor_variable(mu)
self.init = init or Flat.dist()
self.mean = at.as_tensor_variable(0.0)
Expand Down Expand Up @@ -400,8 +393,8 @@ def logp(self, x):
xt = x[:-1]
f, g = self.sde_fn(x[:-1], *self.sde_pars)
mu = xt + self.dt * f
sd = at.sqrt(self.dt) * g
return at.sum(Normal.dist(mu=mu, sigma=sd).logp(x[1:]))
sigma = at.sqrt(self.dt) * g
return at.sum(Normal.dist(mu=mu, sigma=sigma).logp(x[1:]))

def _distr_parameters_for_repr(self):
return ["dt"]
Expand Down
2 changes: 1 addition & 1 deletion pymc/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,7 @@ def __init__(self, mean=0, sigma=1, name=''):
Normal('v2', mu=mean, sigma=sd)

# something more complex is allowed, too
half_cauchy = HalfCauchy('sd', beta=10, initval=1.)
half_cauchy = HalfCauchy('sigma', beta=10, initval=1.)
Normal('v3', mu=mean, sigma=half_cauchy)

# Deterministic variables can be used in usual way
Expand Down
Loading