diff --git a/pymc3/__init__.py b/pymc3/__init__.py index 358527e4c17..8eacf2704f0 100644 --- a/pymc3/__init__.py +++ b/pymc3/__init__.py @@ -46,7 +46,8 @@ def __set_compiler_flags(): from pymc3.distributions import * from pymc3.distributions import transforms from pymc3.exceptions import * -from pymc3.glm import * + +# from pymc3.glm import * from pymc3.math import ( expand_packed_triangular, invlogit, diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py index 51958f541b2..b30721f4ff2 100644 --- a/pymc3/distributions/__init__.py +++ b/pymc3/distributions/__init__.py @@ -11,8 +11,125 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from functools import singledispatch +from typing import Optional -from pymc3.distributions import shape_utils, timeseries, transforms +import theano.tensor as tt + +from theano.tensor.random.op import Observed +from theano.tensor.var import TensorVariable + + +def get_rv_var_and_value( + rv_var: TensorVariable, + rv_value: Optional[TensorVariable] = None, +) -> TensorVariable: + + if rv_value is None: + if rv_var.owner and isinstance(rv_var.owner.op, Observed): + rv_var, rv_value = rv_var.owner.inputs + elif hasattr(rv_var.tag, "value_var"): + rv_value = rv_var.tag.value_var + else: + raise ValueError("value is unspecified") + + return rv_var, rv_value + + +def logpt( + rv_var: TensorVariable, + rv_value: Optional[TensorVariable] = None, + jacobian: bool = True, + scaling: Optional[TensorVariable] = None, + **kwargs +) -> TensorVariable: + """Get a graph of the log-likelihood for a random variable at a point.""" + + rv_var, rv_value = get_rv_var_and_value(rv_var, rv_value) + rv_node = rv_var.owner + + if not rv_node: + raise TypeError("rv_var must be the output of a RandomVariable Op") + + rng, size, dtype, *dist_params = rv_node.inputs + + if jacobian: + logp_var = _logp(rv_node.op, rv_value, *dist_params, **kwargs) + else: + logp_var = _logp_nojac(rv_node.op, rv_value, *dist_params, **kwargs) + + if scaling: + logp_var *= scaling + + if rv_var.name is not None: + logp_var.name = "__logp_%s" % rv_var.name + + return logp_var + + +@singledispatch +def _logp(op, value, *dist_params, **kwargs): + return tt.zeros_like(value) + + +def logcdf(rv_var, rv_value, **kwargs): + + rv_var, rv_value = get_rv_var_and_value(rv_var, rv_value) + rv_node = rv_var.owner + + if not rv_node: + raise TypeError() + + rng, size, dtype, *dist_params = rv_node.inputs + + return _logcdf(rv_node.op, rv_value, *dist_params, **kwargs) + + +@singledispatch +def _logcdf(op, value, *args, **kwargs): + raise NotImplementedError() + + +def logp_nojac(rv_var, rv_value=None, **kwargs): + + rv_var, rv_value = get_rv_var_and_value(rv_var, rv_value) + rv_node = rv_var.owner + + if not rv_node: + raise TypeError() + + rng, size, dtype, *dist_params = rv_node.inputs + + return _logp_nojac(rv_node.op, rv_value, **kwargs) + + +@singledispatch +def _logp_nojac(op, value, *args, **kwargs): + """Return the logp, but do not include a jacobian term for transforms. + + If we use different parametrizations for the same distribution, we + need to add the determinant of the jacobian of the transformation + to make sure the densities still describe the same distribution. + However, MAP estimates are not invariant with respect to the + parameterization, we need to exclude the jacobian terms in this case. + + This function should be overwritten in base classes for transformed + distributions. + """ + return logpt(op, value, *args, **kwargs) + + +def logp_sum(var, value, *args, **kwargs): + """Return the sum of the logp values for the given observations. + + Subclasses can use this to improve the speed of logp evaluations + if only the sum of the logp values is needed. + """ + return tt.sum(logpt(var, value, *args, **kwargs)) + + +# from pymc3.distributions import timeseries +from pymc3.distributions import shape_utils, transforms from pymc3.distributions.bart import BART from pymc3.distributions.bound import Bound from pymc3.distributions.continuous import ( @@ -74,7 +191,6 @@ Discrete, Distribution, NoDistribution, - TensorType, draw_values, generate_samples, ) @@ -94,15 +210,15 @@ ) from pymc3.distributions.posterior_predictive import fast_sample_posterior_predictive from pymc3.distributions.simulator import Simulator -from pymc3.distributions.timeseries import ( - AR, - AR1, - GARCH11, - GaussianRandomWalk, - MvGaussianRandomWalk, - MvStudentTRandomWalk, -) +# from pymc3.distributions.timeseries import ( +# AR, +# AR1, +# GARCH11, +# GaussianRandomWalk, +# MvGaussianRandomWalk, +# MvStudentTRandomWalk, +# ) __all__ = [ "Uniform", "Flat", @@ -149,7 +265,6 @@ "Continuous", "Discrete", "NoDistribution", - "TensorType", "MvNormal", "MatrixNormal", "KroneckerNormal", @@ -161,13 +276,13 @@ "WishartBartlett", "LKJCholeskyCov", "LKJCorr", - "AR1", - "AR", + # "AR1", + # "AR", "AsymmetricLaplace", - "GaussianRandomWalk", - "MvGaussianRandomWalk", - "MvStudentTRandomWalk", - "GARCH11", + # "GaussianRandomWalk", + # "MvGaussianRandomWalk", + # "MvStudentTRandomWalk", + # "GARCH11", "SkewNormal", "Mixture", "NormalMixture", diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 5c990bde3b5..0d34bb599f0 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -25,8 +25,9 @@ from scipy import stats from scipy.interpolate import InterpolatedUnivariateSpline from scipy.special import expit +from theano.tensor.random.basic import NormalRV, UniformRV, normal, uniform -from pymc3.distributions import transforms +from pymc3.distributions import _logcdf, _logp, transforms from pymc3.distributions.dist_math import ( SplineWrapper, alltrue_elemwise, @@ -86,21 +87,21 @@ class PositiveContinuous(Continuous): """Base class for positive continuous distributions""" - def __init__(self, transform=transforms.log, *args, **kwargs): - super().__init__(transform=transform, *args, **kwargs) + default_transform = transforms.log class UnitContinuous(Continuous): """Base class for continuous distributions on [0,1]""" - def __init__(self, transform=transforms.logodds, *args, **kwargs): - super().__init__(transform=transform, *args, **kwargs) + default_transform = transforms.logodds class BoundedContinuous(Continuous): """Base class for bounded continuous distributions""" - def __init__(self, transform="auto", lower=None, upper=None, *args, **kwargs): + default_transform = "auto" + + def create_transform(transform="auto", lower=None, upper=None): lower = tt.as_tensor_variable(lower) if lower is not None else None upper = tt.as_tensor_variable(upper) if upper is not None else None @@ -115,7 +116,7 @@ def __init__(self, transform="auto", lower=None, upper=None, *args, **kwargs): else: transform = transforms.interval(lower, upper) - super().__init__(transform=transform, *args, **kwargs) + return transform def assert_negative_support(var, label, distname, value=-1e-6): @@ -223,81 +224,64 @@ class Uniform(BoundedContinuous): Upper limit. """ - def __init__(self, lower=0, upper=1, *args, **kwargs): - self.lower = lower = tt.as_tensor_variable(floatX(lower)) - self.upper = upper = tt.as_tensor_variable(floatX(upper)) - self.mean = (upper + lower) / 2.0 - self.median = self.mean + @classmethod + def dist(cls, lower=0, upper=1, *args, **kwargs): + lower = lower = tt.as_tensor_variable(floatX(lower)) + upper = upper = tt.as_tensor_variable(floatX(upper)) + # mean = (upper + lower) / 2.0 + # median = self.mean - super().__init__(lower=lower, upper=upper, *args, **kwargs) + transform = kwargs.get("transform", cls.default_transform) + transform = cls.create_transform(transform, lower, upper) - def random(self, point=None, size=None): - """ - Draw random values from Uniform distribution. + rv_var = uniform(lower, upper, **kwargs) + rv_var.tag.transform = transform - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). + return rv_var - Returns - ------- - array - """ - lower, upper = draw_values([self.lower, self.upper], point=point, size=size) - return generate_samples( - stats.uniform.rvs, loc=lower, scale=upper - lower, dist_shape=self.shape, size=size - ) - - def logp(self, value): - """ - Calculate log-probability of Uniform distribution at specified value. +@_logp.register(UniformRV) +def uniform_logp(op, value, lower, upper): + """ + Calculate log-probability of Uniform distribution at specified value. - Parameters - ---------- - value: numeric - Value for which log-probability is calculated. + Parameters + ---------- + value: numeric + Value for which log-probability is calculated. - Returns - ------- - TensorVariable - """ - lower = self.lower - upper = self.upper - return bound(-tt.log(upper - lower), value >= lower, value <= upper) + Returns + ------- + TensorVariable + """ + return bound(-tt.log(upper - lower), value >= lower, value <= upper) - def logcdf(self, value): - """ - Compute the log of the cumulative distribution function for Uniform distribution - at the specified value. - Parameters - ---------- - value: numeric or np.ndarray or theano.tensor - Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. +@_logcdf.register(UniformRV) +def uniform_logcdf(op, value, lower, upper): + """ + Compute the log of the cumulative distribution function for Uniform distribution + at the specified value. - Returns - ------- - TensorVariable - """ - lower = self.lower - upper = self.upper + Parameters + ---------- + value: numeric or np.ndarray or theano.tensor + Value(s) for which log CDF is calculated. If the log CDF for multiple + values are desired the values must be provided in a numpy array or theano tensor. - return tt.switch( - tt.lt(value, lower) | tt.lt(upper, lower), - -np.inf, - tt.switch( - tt.lt(value, upper), - tt.log(value - lower) - tt.log(upper - lower), - 0, - ), - ) + Returns + ------- + TensorVariable + """ + return tt.switch( + tt.lt(value, lower) | tt.lt(upper, lower), + -np.inf, + tt.switch( + tt.lt(value, upper), + tt.log(value - lower) - tt.log(upper - lower), + 0, + ), + ) class Flat(Continuous): @@ -478,87 +462,67 @@ class Normal(Continuous): x = pm.Normal('x', mu=0, tau=1/23) """ - def __init__(self, mu=0, sigma=None, tau=None, sd=None, **kwargs): + @classmethod + def dist(cls, mu=0, sigma=None, tau=None, sd=None, **kwargs): if sd is not None: sigma = sd tau, sigma = get_tau_sigma(tau=tau, sigma=sigma) - self.sigma = self.sd = tt.as_tensor_variable(sigma) - self.tau = tt.as_tensor_variable(tau) + sigma = tt.as_tensor_variable(sigma) - self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(floatX(mu)) - self.variance = 1.0 / self.tau + # sd = sigma + # tau = tt.as_tensor_variable(tau) + # mean = median = mode = mu = tt.as_tensor_variable(floatX(mu)) + # variance = 1.0 / self.tau assert_negative_support(sigma, "sigma", "Normal") - assert_negative_support(tau, "tau", "Normal") + # assert_negative_support(tau, "tau", "Normal") - super().__init__(**kwargs) + rv_var = normal(mu, sigma, **kwargs) + rv_var.tag.transform = kwargs.get("transform", cls.default_transform) - def random(self, point=None, size=None): - """ - Draw random values from Normal distribution. + return rv_var - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). - Returns - ------- - array - """ - mu, tau, _ = draw_values([self.mu, self.tau, self.sigma], point=point, size=size) - return generate_samples( - stats.norm.rvs, loc=mu, scale=tau ** -0.5, dist_shape=self.shape, size=size - ) - - def logp(self, value): - """ - Calculate log-probability of Normal distribution at specified value. +@_logp.register(NormalRV) +def normal_logp(op, value, mu, sigma): + """ + Calculate log-probability of Normal distribution at specified value. - Parameters - ---------- - value: numeric - Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + Parameters + ---------- + value: numeric + Value(s) for which log-probability is calculated. If the log probabilities for multiple + values are desired the values must be provided in a numpy array or theano tensor - Returns - ------- - TensorVariable - """ - sigma = self.sigma - tau = self.tau - mu = self.mu + Returns + ------- + TensorVariable + """ + tau, sigma = get_tau_sigma(tau=None, sigma=sigma) - return bound((-tau * (value - mu) ** 2 + tt.log(tau / np.pi / 2.0)) / 2.0, sigma > 0) + return bound((-tau * (value - mu) ** 2 + tt.log(tau / np.pi / 2.0)) / 2.0, sigma > 0) - def _distr_parameters_for_repr(self): - return ["mu", "sigma"] - def logcdf(self, value): - """ - Compute the log of the cumulative distribution function for Normal distribution - at the specified value. +@_logcdf.register(NormalRV) +def normal_logcdf(op, value, mu, sigma): + """ + Compute the log of the cumulative distribution function for Normal distribution + at the specified value. - Parameters - ---------- - value: numeric or np.ndarray or theano.tensor - Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + Parameters + ---------- + value: numeric or np.ndarray or theano.tensor + Value(s) for which log CDF is calculated. If the log CDF for multiple + values are desired the values must be provided in a numpy array or theano tensor. - Returns - ------- - TensorVariable - """ - mu = self.mu - sigma = self.sigma - return bound( - normal_lcdf(mu, sigma, value), - 0 < sigma, - ) + Returns + ------- + TensorVariable + """ + return bound( + normal_lcdf(mu, sigma, value), + 0 < sigma, + ) class TruncatedNormal(BoundedContinuous): diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py index 8178ae0d228..c72efc255da 100644 --- a/pymc3/distributions/distribution.py +++ b/pymc3/distributions/distribution.py @@ -57,7 +57,6 @@ "Continuous", "Discrete", "NoDistribution", - "TensorType", "draw_values", "generate_samples", ] @@ -76,9 +75,9 @@ class _Unpickling: class Distribution: """Statistical distribution""" + default_transform = None + def __new__(cls, name, *args, **kwargs): - if name is _Unpickling: - return object.__new__(cls) # for pickle try: model = Model.get_context() except TypeError: @@ -93,13 +92,13 @@ def __new__(cls, name, *args, **kwargs): raise TypeError(f"Name needs to be a string but got: {name}") data = kwargs.pop("observed", None) - cls.data = data + if isinstance(data, ObservedRV) or isinstance(data, FreeRV): raise TypeError("observed needs to be data but got: {}".format(type(data))) + total_size = kwargs.pop("total_size", None) dims = kwargs.pop("dims", None) - has_shape = "shape" in kwargs shape = kwargs.pop("shape", None) if dims is not None: if shape is not None: @@ -114,33 +113,14 @@ def __new__(cls, name, *args, **kwargs): f"Distribution initialized with invalid shape {shape}. This is not allowed." ) - # Some distributions do not accept shape=None - if has_shape or shape is not None: - dist = cls.dist(*args, **kwargs, shape=shape) - else: - dist = cls.dist(*args, **kwargs) - return model.Var(name, dist, data, total_size, dims=dims) + rv_out = cls.dist(*args, **kwargs) + rv_out.name = name - def __getnewargs__(self): - return (_Unpickling,) + return model.register_rv_var(rv_out, name, cls, data, total_size, dims=dims) @classmethod def dist(cls, *args, **kwargs): - dist = object.__new__(cls) - dist.__init__(*args, **kwargs) - return dist - - def __init__( - self, shape, dtype, testval=None, defaults=(), transform=None, broadcastable=None, dims=None - ): - self.shape = np.atleast_1d(shape) - if False in (np.floor(self.shape) == self.shape): - raise TypeError("Expected int elements in shape") - self.dtype = dtype - self.type = TensorType(self.dtype, self.shape, broadcastable) - self.testval = testval - self.defaults = defaults - self.transform = transform + raise NotImplementedError() def default(self): return np.asarray(self.get_test_val(self.testval, self.defaults), self.dtype) @@ -244,37 +224,9 @@ def _repr_latex_(self, *, formatting="latex_with_params", **kwargs): """Magic method name for IPython to use for LaTeX formatting.""" return self._str_repr(formatting=formatting, **kwargs) - def logp_nojac(self, *args, **kwargs): - """Return the logp, but do not include a jacobian term for transforms. - - If we use different parametrizations for the same distribution, we - need to add the determinant of the jacobian of the transformation - to make sure the densities still describe the same distribution. - However, MAP estimates are not invariant with respect to the - parametrization, we need to exclude the jacobian terms in this case. - - This function should be overwritten in base classes for transformed - distributions. - """ - return self.logp(*args, **kwargs) - - def logp_sum(self, *args, **kwargs): - """Return the sum of the logp values for the given observations. - - Subclasses can use this to improve the speed of logp evaluations - if only the sum of the logp values is needed. - """ - return tt.sum(self.logp(*args, **kwargs)) - __latex__ = _repr_latex_ -def TensorType(dtype, shape, broadcastable=None): - if broadcastable is None: - broadcastable = np.atleast_1d(shape) == 1 - return tt.TensorType(str(dtype), broadcastable) - - class NoDistribution(Distribution): def __init__( self, diff --git a/pymc3/glm/families.py b/pymc3/glm/families.py index 23ca136cf85..31be47909e7 100644 --- a/pymc3/glm/families.py +++ b/pymc3/glm/families.py @@ -71,7 +71,7 @@ def _get_priors(self, model=None, name=""): if isinstance(val, (numbers.Number, np.ndarray, np.generic)): priors[key] = val else: - priors[key] = model.Var(f"{name}{key}", val) + priors[key] = model.register_rv_var(f"{name}{key}", val) return priors diff --git a/pymc3/glm/linear.py b/pymc3/glm/linear.py index 81c916c1185..5e5358f6f49 100644 --- a/pymc3/glm/linear.py +++ b/pymc3/glm/linear.py @@ -81,13 +81,15 @@ def __init__( if name in vars: v = Deterministic(name, vars[name]) else: - v = self.Var(name=name, dist=priors.get(name, self.default_intercept_prior)) + v = self.register_rv_var( + name=name, dist=priors.get(name, self.default_intercept_prior) + ) coeffs.append(v) else: if name in vars: v = Deterministic(name, vars[name]) else: - v = self.Var( + v = self.register_rv_var( name=name, dist=priors.get( name, priors.get("Regressor", self.default_regressor_prior) diff --git a/pymc3/model.py b/pymc3/model.py index 393c4d2f6a2..ed03fa3ccb7 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -30,11 +30,13 @@ from pandas import Series from theano.compile import SharedVariable from theano.graph.basic import Apply +from theano.tensor.random.op import observed from theano.tensor.var import TensorVariable import pymc3 as pm from pymc3.blocking import ArrayOrdering, DictToArrayBijection +from pymc3.distributions import logpt from pymc3.exceptions import ImputationWarning from pymc3.math import flatten_list from pymc3.memoize import WithMemoization, memoize @@ -810,7 +812,7 @@ class Model(Factor, WithMemoization, metaclass=ContextMeta): A dictionary of theano config values that should be set temporarily in the model context. See the documentation of theano for a complete list. Set config key - ``compute_test_value`` to `raise` if it is None. + ``compute_test_value`` to `ignore` if it is None. check_bounds: bool Ensure that input parameters to distributions are in a valid range. If your model is built in a way where you know your @@ -898,7 +900,7 @@ def __new__(cls, *args, **kwargs): instance._parent = cls.get_context(error_if_none=False) theano_config = kwargs.get("theano_config", None) if theano_config is None or "compute_test_value" not in theano_config: - theano_config = {"compute_test_value": "raise"} + theano_config = {"compute_test_value": "ignore"} instance._theano_config = theano_config return instance @@ -958,7 +960,7 @@ def dict_to_array(self): @property def ndim(self): - return sum(var.dsize for var in self.free_RVs) + return sum(var.ndim for var in self.free_RVs) @property def logp_array(self): @@ -966,8 +968,10 @@ def logp_array(self): @property def dlogp_array(self): - vars = inputvars(self.cont_vars) - return self.bijection.mapf(self.fastdlogp(vars)) + logpt = self.logpt + vars = inputvars(logpt) + dlogp = self.fastfn(gradient(self.logpt, vars)) + return self.bijection.mapf(dlogp) def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs): """Compile a theano function that computes logp and gradient. @@ -984,16 +988,24 @@ def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs): if grad_vars is None: grad_vars = list(typefilter(self.free_RVs, continuous_types)) else: - for var in grad_vars: + for i, var in enumerate(grad_vars): if var.dtype not in continuous_types: raise ValueError("Can only compute the gradient of continuous types: %s" % var) + # We allow one to pass the random variable terms as arguments + if hasattr(var.tag, "value_var"): + grad_vars[i] = var.tag.value_var if tempered: with self: free_RVs_logp = tt.sum( - [tt.sum(var.logpt) for var in self.free_RVs + self.potentials] + [ + tt.sum(logpt(var, var.tag.value_var, scaling=var.tag.scaling)) + for var in self.free_RVs + self.potentials + ] + ) + observed_RVs_logp = tt.sum( + [tt.sum(logpt(obs, scaling=obs.tag.scaling)) for obs in self.observed_RVs] ) - observed_RVs_logp = tt.sum([tt.sum(var.logpt) for var in self.observed_RVs]) costs = [free_RVs_logp, observed_RVs_logp] else: @@ -1006,13 +1018,17 @@ def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs): def logpt(self): """Theano scalar of log-probability of the model""" with self: - factors = [var.logpt for var in self.basic_RVs] + self.potentials - logp = tt.sum([tt.sum(factor) for factor in factors]) + factors = [ + logpt(var, var.tag.value_var, scaling=var.tag.scaling) for var in self.free_RVs + ] + factors += [logpt(obs, scaling=obs.tag.scaling) for obs in self.observed_RVs] + factors += self.potentials + logp_var = tt.sum([tt.sum(factor) for factor in factors]) if self.name: - logp.name = "__logp_%s" % self.name + logp_var.name = "__logp_%s" % self.name else: - logp.name = "__logp" - return logp + logp_var.name = "__logp" + return logp_var @property def logp_nojact(self): @@ -1022,35 +1038,42 @@ def logp_nojact(self): will be the same as logpt as there is no need for Jacobian correction. """ with self: - factors = [var.logp_nojact for var in self.basic_RVs] + self.potentials - logp = tt.sum([tt.sum(factor) for factor in factors]) + factors = [ + logpt(var, var.tag.value_var, jacobian=False, scaling=var.tag.scaling) + for var in self.free_RVs + ] + factors += [ + logpt(obs, jacobian=False, scaling=obs.tag.scaling) for obs in self.observed_RVs + ] + factors += self.potentials + logp_var = tt.sum([tt.sum(factor) for factor in factors]) if self.name: - logp.name = "__logp_nojac_%s" % self.name + logp_var.name = "__logp_nojac_%s" % self.name else: - logp.name = "__logp_nojac" - return logp + logp_var.name = "__logp_nojac" + return logp_var @property def varlogpt(self): """Theano scalar of log-probability of the unobserved random variables (excluding deterministic).""" with self: - factors = [var.logpt for var in self.free_RVs] + factors = [logpt(var, var.tag.value_var) for var in self.free_RVs] return tt.sum(factors) @property def datalogpt(self): with self: - factors = [var.logpt for var in self.observed_RVs] + factors = [logpt(obs) for obs in self.observed_RVs] factors += [tt.sum(factor) for factor in self.potentials] return tt.sum(factors) @property def vars(self): - """List of unobserved random variables used as inputs to the model - (which excludes deterministics). + """List of unobserved random variables used as inputs to the model's + log-likelihood (which excludes deterministics). """ - return self.free_RVs + return [v.tag.value_var for v in self.free_RVs] @property def basic_RVs(self): @@ -1062,12 +1085,17 @@ def basic_RVs(self): @property def unobserved_RVs(self): """List of all random variable, including deterministic ones.""" - return self.vars + self.deterministics + return self.free_RVs + self.deterministics + + @property + def independent_vars(self): + """List of all variables that are non-stochastic inputs to the model.""" + return inputvars(self.unobserved_RVs) @property def test_point(self): """Test point used to check that the model doesn't generate errors""" - return Point(((var, var.tag.test_value) for var in self.vars), model=self) + return Point(((var.tag.value_var, var.tag.test_value) for var in self.free_RVs), model=self) @property def disc_vars(self): @@ -1109,9 +1137,8 @@ def add_coords(self, coords): else: self.coords[name] = coords[name] - def Var(self, name, dist, data=None, total_size=None, dims=None): - """Create and add (un)observed random variable to the model with an - appropriate prior distribution. + def register_rv_var(self, rv_var, name, dist, data=None, total_size=None, dims=None): + """Register an (un)observed random variable with the model. Parameters ---------- @@ -1131,63 +1158,95 @@ def Var(self, name, dist, data=None, total_size=None, dims=None): """ name = self.name_for(name) + self.add_random_variable(rv_var, dims) + if data is None: - if getattr(dist, "transform", None) is None: - with self: - var = FreeRV(name=name, distribution=dist, total_size=total_size, model=self) - self.free_RVs.append(var) + # Create a `TensorVariable` that will be used as the random + # variable's "value" in log-likelihood graphs. + # + # In general, we'll call this type of variable the "value" variable. + # + # In all other cases, the role of the value variable is taken by + # observed data. That's why value variables are only referenced in + # this branch of the conditional. + value_var = rv_var.type() + value_var.name = rv_var.name + rv_var.tag.value_var = value_var + + self.free_RVs.append(rv_var) + + transform = getattr(dist, "transform", None) + + if transform is None: + value_var.tag.transform = None + rv_var.tag.scaling = _get_scaling(total_size, rv_var.shape, rv_var.ndim) else: - with self: - var = TransformedRV( - name=name, - distribution=dist, - transform=dist.transform, - total_size=total_size, - model=self, - ) + value_var.tag.transform = transform + + # These transformations are only relevant to log-likelihood + # calculations, so everything is being done on the "value" + # variable. + value_var_trans = transform.apply(value_var) + value_var_trans.tag.transformed = value_var_trans + + transformed_name = get_transformed_name(name, transform) + + rv_var.tag.scaling = _get_scaling( + total_size, value_var_trans.shape, value_var_trans.ndim + ) + pm._log.debug( "Applied {transform}-transform to {name}" " and added transformed {orig_name} to model.".format( - transform=dist.transform.name, + transform=transform.name, name=name, - orig_name=get_transformed_name(name, dist.transform), + orig_name=transformed_name, ) ) - self.deterministics.append(var) - self.add_random_variable(var, dims) - return var + self.deterministics.append(rv_var) + + return rv_var elif isinstance(data, dict): - with self: - var = MultiObservedRV( - name=name, - data=data, - distribution=dist, - total_size=total_size, - model=self, - ) - self.observed_RVs.append(var) - if var.missing_values: - self.free_RVs += var.missing_values - self.missing_values += var.missing_values - for v in var.missing_values: - self.named_vars[v.name] = v + + # TODO: How exactly does this dictionary map to `rv_var`? + + # obs_rvs = {name: make_obs_var(rv_var, d, name, self) for name, d in data.items()} + # rv_var.tag.data = obs_rvs + # + # missing_values = [ + # datum.missing_values for datum in data.values() if datum.missing_values is not None + # ] + # rv_var.tag.missing_values = missing_values + # + # scaling = _get_scaling(total_size, rv_var.shape, rv_var.ndim) + # rv_var.tag.scaling = scaling + # + # self.observed_RVs.append(rv_var) + # + # if missing_values: + # self.free_RVs += rv_var.tag.missing_values + # self.missing_values += rv_var.tag.missing_values + # for v in rv_var.tag.missing_values: + # self.named_vars[v.name] = v + # return obs_rvs + + raise NotImplementedError() else: - with self: - var = ObservedRV( - name=name, - data=data, - distribution=dist, - total_size=total_size, - model=self, - ) - self.observed_RVs.append(var) - if var.missing_values: - self.free_RVs.append(var.missing_values) - self.missing_values.append(var.missing_values) - self.named_vars[var.missing_values.name] = var.missing_values + data = pandas_to_array(data) + rv_var.tag.data = data + + rv_obs = make_obs_var(rv_var, data, name, self) + + self.observed_RVs.append(rv_obs) + + if rv_obs.tag.missing_values: + self.free_RVs.append(rv_obs.tag.missing_values) + self.missing_values.append(rv_obs.tag.missing_values) + self.named_vars[rv_obs.tag.missing_values.name] = rv_obs.tag.missing_values + + rv_obs.tag.scaling = _get_scaling(total_size, rv_obs.shape, rv_obs.ndim) - self.add_random_variable(var, dims) - return var + return rv_obs def add_random_variable(self, var, dims=None): """Add a random variable to the named variables of the model.""" @@ -1343,7 +1402,7 @@ def flatten(self, vars=None, order=None, inputvar=None): flat_view """ if vars is None: - vars = self.free_RVs + vars = self.vars if order is None: order = ArrayOrdering(vars) if inputvar is None: @@ -1380,7 +1439,10 @@ def check_test_point(self, test_point=None, round_vals=2): test_point = self.test_point return Series( - {RV.name: np.round(RV.logp(test_point), round_vals) for RV in self.basic_RVs}, + { + rv.name: np.round(self.fn(logpt(rv))(test_point), round_vals) + for rv in self.basic_RVs + }, name="Log-probability of test_point", ) @@ -1726,8 +1788,8 @@ def pandas_to_array(data): return pm.floatX(ret) -def as_tensor(data, name, model, distribution): - dtype = distribution.dtype +def make_obs_var(rv_var, data, name, model): + dtype = rv_var.dtype data = pandas_to_array(data).astype(dtype) if hasattr(data, "mask"): @@ -1737,29 +1799,25 @@ def as_tensor(data, name, model, distribution): " sampling distribution.".format(name=name) ) warnings.warn(impute_message, ImputationWarning) - from pymc3.distributions import NoDistribution - - testval = np.broadcast_to(distribution.default(), data.shape)[data.mask] - fakedist = NoDistribution.dist( - shape=data.mask.sum(), - dtype=dtype, - testval=testval, - parent_dist=distribution, - ) - missing_values = FreeRV(name=name + "_missing", distribution=fakedist, model=model) + + missing_values = rv_var[data.mask] constant = tt.as_tensor_variable(data.filled()) - dataTensor = tt.set_subtensor(constant[data.mask.nonzero()], missing_values) - dataTensor.missing_values = missing_values - return dataTensor + data = tt.set_subtensor(constant[data.mask.nonzero()], missing_values) + rv_obs = observed(rv_var, data) + rv_obs.tag.missing_values = missing_values elif sps.issparse(data): data = sparse.basic.as_sparse(data, name=name) - data.missing_values = None - return data + rv_obs = observed(rv_var, data) + rv_obs.tag.missing_values = None else: data = tt.as_tensor_variable(data, name=name) - data.missing_values = None - return data + rv_obs = observed(rv_var, data) + rv_obs.tag.missing_values = None + + rv_obs.name = name + + return rv_obs class ObservedRV(Factor, PyMC3Variable): @@ -1806,7 +1864,7 @@ def __init__( super().__init__(type, owner, index, name) if distribution is not None: - data = as_tensor(data, name, model, distribution) + data = make_obs_var(data, name, model, distribution) self.missing_values = data.missing_values self.logp_elemwiset = distribution.logp(data) @@ -1847,13 +1905,6 @@ def __init__(self, name, data, distribution, total_size=None, model=None): needed for upscaling logp """ self.name = name - self.data = { - name: as_tensor(data, name, model, distribution) for name, data in data.items() - } - - self.missing_values = [ - datum.missing_values for datum in self.data.values() if datum.missing_values is not None - ] self.logp_elemwiset = distribution.logp(**self.data) # The logp might need scaling in minibatches. # This is done in `Factor`. @@ -1862,7 +1913,6 @@ def __init__(self, name, data, distribution, total_size=None, model=None): self.total_size = total_size self.model = model self.distribution = distribution - self.scaling = _get_scaling(total_size, self.logp_elemwiset.shape, self.logp_elemwiset.ndim) # Make hashable by id for draw_values def __hash__(self): @@ -1993,7 +2043,7 @@ def __init__( transformed_name = get_transformed_name(name, transform) - self.transformed = model.Var( + self.transformed = model.register_rv_var( transformed_name, transform.apply(distribution), total_size=total_size ) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index bc77113772f..78a33ebabc1 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -28,6 +28,7 @@ import arviz import numpy as np import packaging +import theano import theano.gradient as tg import xarray @@ -57,6 +58,7 @@ ) from pymc3.step_methods.arraystep import BlockedStep, PopulationArrayStepShared from pymc3.step_methods.hmc import quadpotential +from pymc3.theanof import inputvars from pymc3.util import ( chains_and_samples, check_start_vals, @@ -1896,6 +1898,7 @@ def sample_prior_predictive( samples=500, model: Optional[Model] = None, var_names: Optional[Iterable[str]] = None, + indep_var_values: Optional[Dict[str, np.ndarray]] = None, random_seed=None, ) -> Dict[str, np.ndarray]: """Generate samples from the prior predictive distribution. @@ -1908,6 +1911,8 @@ def sample_prior_predictive( var_names : Iterable[str] A list of names of variables for which to compute the posterior predictive samples. Defaults to both observed and unobserved RVs. + indep_var_values: dict + A dictionary specifying values for all the independent variables. random_seed : int Seed for the random number generator. @@ -1937,11 +1942,21 @@ def sample_prior_predictive( if random_seed is not None: np.random.seed(random_seed) + names = get_default_varnames(vars_, include_transformed=False) - # draw_values fails with auto-transformed variables. transform them later! - values = draw_values([model[name] for name in names], size=samples) - data = {k: v for k, v in zip(names, values)} + vars_to_sample = [model[name] for name in names] + inputs = [i for i in inputvars(vars_to_sample) if i not in indep_var_values] + sampler_fn = theano.function( + inputs, + vars_to_sample, + givens=indep_var_values, + allow_input_downcast=True, + accept_inplace=True, + ) + values = zip(*[sampler_fn() for i in range(samples)]) + + data = {k: np.stack(v) for k, v in zip(names, values)} if data is None: raise AssertionError("No variables sampled: attempting to sample %s" % names) diff --git a/pymc3/tests/test_model.py b/pymc3/tests/test_model.py index c8be76c2d72..01e75c59d21 100644 --- a/pymc3/tests/test_model.py +++ b/pymc3/tests/test_model.py @@ -35,7 +35,7 @@ def __init__(self, name="", model=None): super().__init__(name, model) assert pm.modelcontext(None) is self # 1) init variables with Var method - self.Var("v1", pm.Normal.dist()) + self.register_rv_var("v1", pm.Normal.dist()) self.v2 = pm.Normal("v2", mu=0, sigma=1) # 2) Potentials and Deterministic variables with method too # be sure that names will not overlap with other same models @@ -46,7 +46,7 @@ def __init__(self, name="", model=None): class DocstringModel(pm.Model): def __init__(self, mean=0, sigma=1, name="", model=None): super().__init__(name, model) - self.Var("v1", Normal.dist(mu=mean, sigma=sigma)) + self.register_rv_var("v1", Normal.dist(mu=mean, sigma=sigma)) Normal("v2", mu=mean, sigma=sigma) Normal("v3", mu=mean, sigma=HalfCauchy("sd", beta=10, testval=1.0)) Deterministic("v3_sq", self.v3 ** 2) @@ -59,12 +59,12 @@ def test_setattr_properly_works(self): pm.Normal("v1") assert len(model.vars) == 1 with pm.Model("sub") as submodel: - submodel.Var("v1", pm.Normal.dist()) + submodel.register_rv_var("v1", pm.Normal.dist()) assert hasattr(submodel, "v1") assert len(submodel.vars) == 1 assert len(model.vars) == 2 with submodel: - submodel.Var("v2", pm.Normal.dist()) + submodel.register_rv_var("v2", pm.Normal.dist()) assert hasattr(submodel, "v2") assert len(submodel.vars) == 2 assert len(model.vars) == 3 @@ -82,7 +82,7 @@ def test_context_passes_vars_to_parent_model(self): assert usermodel2._parent == model # you can enter in a context with submodel with usermodel2: - usermodel2.Var("v3", pm.Normal.dist()) + usermodel2.register_rv_var("v3", pm.Normal.dist()) pm.Normal("v4") # this variable is created in parent model too assert "another_v2" in model.named_vars diff --git a/pymc3/tests/test_model_helpers.py b/pymc3/tests/test_model_helpers.py index 7b049bac707..ca507f2a238 100644 --- a/pymc3/tests/test_model_helpers.py +++ b/pymc3/tests/test_model_helpers.py @@ -127,7 +127,7 @@ def test_as_tensor(self): fake_distribution.testval = None # Alias the function to be tested - func = pm.model.as_tensor + func = pm.model.make_obs_var # Check function behavior using the various inputs dense_output = func(dense_input, input_name, fake_model, fake_distribution)