From e1a86fe7e5e19be32e60b4d69f7605b4e3a6f0a5 Mon Sep 17 00:00:00 2001 From: Eelke Spaak Date: Tue, 11 Aug 2020 14:06:19 +0200 Subject: [PATCH 1/7] adding unit tests for new __str__ functionality --- pymc3/tests/test_distributions.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index fbfb74ab195..212fa1d6062 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -1771,7 +1771,7 @@ def test_bound(): BoundPoissonPositionalArgs = Bound(Poisson, upper=6)("x", 2.0) -class TestLatex: +class TestStrAndLatexRepr: def setup_class(self): # True parameter values alpha, sigma = 1, 1 @@ -1800,7 +1800,7 @@ def setup_class(self): # Likelihood (sampling distribution) of observations Y_obs = Normal("Y_obs", mu=mu, sigma=sigma, observed=Y) self.distributions = [alpha, sigma, mu, b, Z, Y_obs] - self.expected = ( + self.expected_latex = ( r"$\text{alpha} \sim \text{Normal}(\mathit{mu}=0.0,~\mathit{sigma}=10.0)$", r"$\text{sigma} \sim \text{HalfNormal}(\mathit{sigma}=1.0)$", r"$\text{mu} \sim \text{Deterministic}(\text{alpha},~\text{Constant},~\text{beta})$", @@ -1808,22 +1808,38 @@ def setup_class(self): r"$\text{Z} \sim \text{MvNormal}(\mathit{mu}=array,~\mathit{chol_cov}=array)$", r"$\text{Y_obs} \sim \text{Normal}(\mathit{mu}=\text{mu},~\mathit{sigma}=f(\text{sigma}))$", ) + self.expected_str = ( + r"alpha ~ Normal(mu=0.0, sigma=10.0)", + r"sigma ~ HalfNormal(sigma=1.0)", + r"mu ~ Deterministic(alpha, Constant, beta)", + r"beta ~ Normal(mu=0.0, sigma=10.0)", + r"Z ~ MvNormal(mu=array, chol_cov=array)", + r"Y_obs ~ Normal(mu=mu, sigma=f(sigma))", + ) def test__repr_latex_(self): - for distribution, tex in zip(self.distributions, self.expected): + for distribution, tex in zip(self.distributions, self.expected_latex): assert distribution._repr_latex_() == tex model_tex = self.model._repr_latex_() - for tex in self.expected: # make sure each variable is in the model + for tex in self.expected_latex: # make sure each variable is in the model for segment in tex.strip("$").split(r"\sim"): assert segment in model_tex def test___latex__(self): - for distribution, tex in zip(self.distributions, self.expected): + for distribution, tex in zip(self.distributions, self.expected_latex): assert distribution._repr_latex_() == distribution.__latex__() assert self.model._repr_latex_() == self.model.__latex__() + def test___str__(self): + for distribution, str_repr in zip(self.distributions, self.expected_str): + assert distribution.__str__() == str_repr + + model_str = self.model.__str__() + for str_repr in self.expected_str: + assert str_repr in model_str + def test_discrete_trafo(): with pytest.raises(ValueError) as err: From d73c27e016a5189978e6e57b30e4b70583d4e090 Mon Sep 17 00:00:00 2001 From: Eelke Spaak Date: Fri, 4 Sep 2020 16:12:08 +0200 Subject: [PATCH 2/7] use get_var_name instead of str to get variable name --- pymc3/backends/base.py | 9 +++++---- pymc3/backends/sqlite.py | 3 ++- pymc3/blocking.py | 4 +++- pymc3/distributions/distribution.py | 4 ++-- pymc3/distributions/posterior_predictive.py | 8 ++++---- pymc3/model.py | 5 +++-- pymc3/model_graph.py | 4 ++-- pymc3/step_methods/arraystep.py | 3 ++- pymc3/tests/sampler_fixtures.py | 3 ++- pymc3/tuning/scaling.py | 3 ++- pymc3/tuning/starting.py | 5 +++-- pymc3/util.py | 16 +++++++++++++++- 12 files changed, 45 insertions(+), 22 deletions(-) diff --git a/pymc3/backends/base.py b/pymc3/backends/base.py index 57b0984b2a6..a08abacb83d 100644 --- a/pymc3/backends/base.py +++ b/pymc3/backends/base.py @@ -28,6 +28,7 @@ from ..model import modelcontext, Model from .report import SamplerReport, merge_reports +from ..util import get_var_name logger = logging.getLogger('pymc3') @@ -109,7 +110,7 @@ def _set_sampler_vars(self, sampler_vars): self.sampler_vars = sampler_vars # pylint: disable=unused-argument - def setup(self, draws, chain, sampler_vars=None) -> None: + def setup(self, draws, chain, sampler_vars=None) -> None: """Perform chain-specific setup. Parameters @@ -335,7 +336,7 @@ def __getitem__(self, idx): var = idx burn, thin = 0, 1 - var = str(var) + var = get_var_name(var) if var in self.varnames: if var in self.stat_names: warnings.warn("Attribute access on a trace object is ambigous. " @@ -355,7 +356,7 @@ def __getattr__(self, name): if name in self._attrs: raise AttributeError - name = str(name) + name = get_var_name(name) if name in self.varnames: if name in self.stat_names: warnings.warn("Attribute access on a trace object is ambigous. " @@ -482,7 +483,7 @@ def get_values(self, varname, burn=0, thin=1, combine=True, chains=None, """ if chains is None: chains = self.chains - varname = str(varname) + varname = get_var_name(varname) try: results = [self._straces[chain].get_values(varname, burn, thin) for chain in chains] diff --git a/pymc3/backends/sqlite.py b/pymc3/backends/sqlite.py index 3dc93b3e34d..ad9bd3d0a13 100644 --- a/pymc3/backends/sqlite.py +++ b/pymc3/backends/sqlite.py @@ -36,6 +36,7 @@ from ..backends import base, ndarray from . import tracetab as ttab +from ..util import get_var_name TEMPLATES = { 'table': ('CREATE TABLE IF NOT EXISTS [{table}] ' @@ -244,7 +245,7 @@ def get_values(self, varname, burn=0, thin=1): if thin < 1: raise ValueError('Only positive thin values are supported ' 'in SQLite backend.') - varname = str(varname) + varname = get_var_name(varname) statement_args = {'chain': self.chain} if burn == 0 and thin == 1: diff --git a/pymc3/blocking.py b/pymc3/blocking.py index 67dc658e2cc..c915e42fd2b 100644 --- a/pymc3/blocking.py +++ b/pymc3/blocking.py @@ -21,6 +21,8 @@ import numpy as np import collections +from ..util import get_var_name + __all__ = ['ArrayOrdering', 'DictToArrayBijection', 'DictToVarBijection'] VarMap = collections.namedtuple('VarMap', 'var, slc, shp, dtyp') @@ -237,7 +239,7 @@ class DictToVarBijection: """ def __init__(self, var, idx, dpoint): - self.var = str(var) + self.var = get_var_name(var) self.idx = idx self.dpt = dpoint diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py index 5d58c4ccf04..02507f988e4 100644 --- a/pymc3/distributions/distribution.py +++ b/pymc3/distributions/distribution.py @@ -23,7 +23,7 @@ import numpy as np import theano.tensor as tt from theano import function -from ..util import get_repr_for_variable +from ..util import get_repr_for_variable, get_var_name import theano from ..memoize import memoize from ..model import ( @@ -728,7 +728,7 @@ def draw_values(params, point=None, size=None): missing_inputs = set([j for j, p in symbolic_params]) while to_eval or missing_inputs: if to_eval == missing_inputs: - raise ValueError('Cannot resolve inputs for {}'.format([str(params[j]) for j in to_eval])) + raise ValueError('Cannot resolve inputs for {}'.format([get_var_name(params[j]) for j in to_eval])) to_eval = set(missing_inputs) missing_inputs = set() for param_idx in to_eval: diff --git a/pymc3/distributions/posterior_predictive.py b/pymc3/distributions/posterior_predictive.py index 5c7bab07ef9..ff54f94da2d 100644 --- a/pymc3/distributions/posterior_predictive.py +++ b/pymc3/distributions/posterior_predictive.py @@ -42,7 +42,7 @@ ) from ..exceptions import IncorrectArgumentsError from ..vartypes import theano_constant -from ..util import dataset_to_point_dict, chains_and_samples +from ..util import dataset_to_point_dict, chains_and_samples, get_var_name # Failing tests: # test_mixture_random_shape::test_mixture_random_shape @@ -460,7 +460,7 @@ def draw_values(self) -> List[np.ndarray]: if to_eval == missing_inputs: raise ValueError( "Cannot resolve inputs for {}".format( - [str(trace.varnames[j]) for j in to_eval] + [get_var_name(trace.varnames[j]) for j in to_eval] ) ) to_eval = set(missing_inputs) @@ -493,7 +493,7 @@ def draw_values(self) -> List[np.ndarray]: return [self.evaluated[j] for j in params] def init(self) -> None: - """This method carries out the initialization phase of sampling + """This method carries out the initialization phase of sampling from the posterior predictive distribution. Notably it initializes the ``_DrawValuesContext`` bookkeeping object and evaluates the "fast drawable" parts of the model.""" @@ -567,7 +567,7 @@ def draw_value(self, param, trace: Optional[_TraceDict] = None, givens=None): The value or distribution. Constants or shared variables will be converted to an array and returned. Theano variables are evaluated. If `param` is a pymc3 random variable, draw - values from it and return that (as ``np.ndarray``), unless a + values from it and return that (as ``np.ndarray``), unless a value is specified in the ``trace``. trace: pm.MultiTrace, optional A dictionary from pymc3 variable names to samples of their values diff --git a/pymc3/model.py b/pymc3/model.py index cae3e12ca53..f619b4d3083 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -36,7 +36,7 @@ from .theanof import gradient, hessian, inputvars, generator from .vartypes import typefilter, discrete_types, continuous_types, isgenerator from .blocking import DictToArrayBijection, ArrayOrdering -from .util import get_transformed_name +from .util import get_transformed_name, get_var_name from .exceptions import ImputationWarning __all__ = [ @@ -1480,7 +1480,8 @@ def Point(*args, **kwargs): except Exception as e: raise TypeError("can't turn {} and {} into a dict. {}".format(args, kwargs, e)) return dict( - (str(k), np.array(v)) for k, v in d.items() if str(k) in map(str, model.vars) + (get_var_name(k), np.array(v)) for k, v in d.items() + if get_var_name(k) in map(get_var_name, model.vars) ) diff --git a/pymc3/model_graph.py b/pymc3/model_graph.py index dd9f6caaab4..4f67b624dad 100644 --- a/pymc3/model_graph.py +++ b/pymc3/model_graph.py @@ -21,7 +21,7 @@ from theano.compile import SharedVariable from theano.tensor import Tensor -from .util import get_default_varnames +from .util import get_default_varnames, get_var_name from .model import ObservedRV import pymc3 as pm @@ -83,7 +83,7 @@ def _filter_parents(self, var, parents) -> Set[VarName]: if self.transform_map[p] != var.name: keep.add(self.transform_map[p]) else: - raise AssertionError('Do not know what to do with {}'.format(str(p))) + raise AssertionError('Do not know what to do with {}'.format(get_var_name(p))) return keep def get_parents(self, var: Tensor) -> Set[VarName]: diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py index fde0a3f6707..eae1ce703c9 100644 --- a/pymc3/step_methods/arraystep.py +++ b/pymc3/step_methods/arraystep.py @@ -16,6 +16,7 @@ from ..model import modelcontext from ..theanof import inputvars from ..blocking import ArrayOrdering, DictToArrayBijection +from ..util import get_var_name import numpy as np from numpy.random import uniform from enum import IntEnum, unique @@ -175,7 +176,7 @@ def __init__(self, vars, shared, blocked=True): """ self.vars = vars self.ordering = ArrayOrdering(vars) - self.shared = {str(var): shared for var, shared in shared.items()} + self.shared = {get_var_name(var): shared for var, shared in shared.items()} self.blocked = blocked self.bij = None diff --git a/pymc3/tests/sampler_fixtures.py b/pymc3/tests/sampler_fixtures.py index 94b8741fa8a..967a3bd014b 100644 --- a/pymc3/tests/sampler_fixtures.py +++ b/pymc3/tests/sampler_fixtures.py @@ -13,6 +13,7 @@ # limitations under the License. import pymc3 as pm +from pymc3.util import get_var_name import numpy as np import numpy.testing as npt from scipy import stats @@ -145,7 +146,7 @@ def setup_class(cls): cls.trace = pm.sample(cls.n_samples, tune=cls.tune, step=cls.step, cores=cls.chains) cls.samples = {} for var in cls.model.unobserved_RVs: - cls.samples[str(var)] = cls.trace.get_values(var, burn=cls.burn) + cls.samples[get_var_name(var)] = cls.trace.get_values(var, burn=cls.burn) def test_neff(self): if hasattr(self, 'min_n_eff'): diff --git a/pymc3/tuning/scaling.py b/pymc3/tuning/scaling.py index 37f7079199a..cce7a09d2d5 100644 --- a/pymc3/tuning/scaling.py +++ b/pymc3/tuning/scaling.py @@ -17,6 +17,7 @@ from ..model import modelcontext, Point from ..theanof import hessian_diag, inputvars from ..blocking import DictToArrayBijection, ArrayOrdering +from ..util import get_var_name __all__ = ['find_hessian', 'trace_cov', 'guess_scaling'] @@ -135,7 +136,7 @@ def trace_cov(trace, vars=None, model=None): vars = trace.varnames def flat_t(var): - x = trace[str(var)] + x = trace[get_var_name(var)] return x.reshape((x.shape[0], np.prod(x.shape[1:], dtype=int))) return np.cov(np.concatenate(list(map(flat_t, vars)), 1).T) diff --git a/pymc3/tuning/starting.py b/pymc3/tuning/starting.py index ae81a80c2a9..f9078cdc163 100644 --- a/pymc3/tuning/starting.py +++ b/pymc3/tuning/starting.py @@ -28,7 +28,7 @@ from ..theanof import inputvars import theano.gradient as tg from ..blocking import DictToArrayBijection, ArrayOrdering -from ..util import update_start_vals, get_default_varnames +from ..util import update_start_vals, get_default_varnames, get_var_name import warnings from inspect import getargspec @@ -196,7 +196,8 @@ def nan_to_high(x): def allinmodel(vars, model): notin = [v for v in vars if v not in model.vars] if notin: - raise ValueError("Some variables not in the model: " + str(notin)) + notin = list(map(get_var_name, notin)) + raise ValueError("Some variables not in the model: " + notin) class CostFuncWrapper: diff --git a/pymc3/util.py b/pymc3/util.py index 46e35d595cf..2b9bfa440be 100644 --- a/pymc3/util.py +++ b/pymc3/util.py @@ -20,6 +20,8 @@ import arviz from numpy import asscalar, ndarray +from theano.tensor import TensorVariable + LATEX_ESCAPE_RE = re.compile(r"(%|_|\$|#|&)", re.MULTILINE) @@ -121,7 +123,7 @@ def get_default_varnames(var_iterator, include_transformed): if include_transformed: return list(var_iterator) else: - return [var for var in var_iterator if not is_transformed_name(str(var))] + return [var for var in var_iterator if not is_transformed_name(get_var_name(var))] def get_repr_for_variable(variable, formatting="plain"): @@ -153,6 +155,18 @@ def get_repr_for_variable(variable, formatting="plain"): return name +def get_var_name(var): + """ Get an appropriate, plain variable name for a variable. Necessary + because we override theano.tensor.TensorVariable.__str__ to give informative + string representations to our pymc3.PyMC3Variables, yet we want to use the + plain name as e.g. keys in dicts. + """ + if isinstance(var, TensorVariable): + return super(TensorVariable, var).__str__() + else: + return str(var) + + def update_start_vals(a, b, model): r"""Update a with b, without overwriting existing keys. Values specified for transformed variables on the original scale are also transformed and inserted. From 254b9fb39ae6e7c1f5743edecd1ebbfb511afa2e Mon Sep 17 00:00:00 2001 From: Eelke Spaak Date: Fri, 4 Sep 2020 16:21:04 +0200 Subject: [PATCH 3/7] adding semantically meaningful __str__ --- pymc3/distributions/distribution.py | 3 +++ pymc3/model.py | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py index 02507f988e4..e39c109b6bb 100644 --- a/pymc3/distributions/distribution.py +++ b/pymc3/distributions/distribution.py @@ -174,6 +174,9 @@ def _str_repr(self, name=None, dist=None, formatting='plain'): return "{var_name} ~ {distr_name}({params})".format(var_name=name, distr_name=dist._distr_name_for_repr(), params=param_string) + def __str__(self, **kwargs): + return self._str_repr(formatting="plain", **kwargs) + def _repr_latex_(self, **kwargs): """Magic method name for IPython to use for LaTeX formatting.""" return self._str_repr(formatting="latex", **kwargs) diff --git a/pymc3/model.py b/pymc3/model.py index f619b4d3083..bfab8a38f4c 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -80,6 +80,9 @@ def _str_repr(self, name=None, dist=None, formatting="plain"): def _repr_latex_(self, **kwargs): return self._str_repr(formatting="latex", **kwargs) + def __str__(self, **kwargs): + return self._str_repr(formatting="plain", **kwargs) + __latex__ = _repr_latex_ @@ -1368,6 +1371,9 @@ def _str_repr(self, formatting="plain", **kwargs): for n, d in zip(names, distrs)] return "\n".join(rv_reprs) + def __str__(self, **kwargs): + return self._str_repr(formatting="plain", **kwargs) + def _repr_latex_(self, **kwargs): return self._str_repr(formatting="latex", **kwargs) @@ -1873,6 +1879,7 @@ def Deterministic(name, var, model=None, dims=None): model.add_random_variable(var, dims) var._repr_latex_ = functools.partial(_repr_deterministic_rv, var, formatting='latex') var.__latex__ = var._repr_latex_ + var.__str__ = functools.partial(_repr_deterministic_rv, var, formatting='plain') return var From 0f34003d746bbf151aef2453e68f2883e83f2ffd Mon Sep 17 00:00:00 2001 From: Eelke Spaak Date: Fri, 4 Sep 2020 16:22:12 +0200 Subject: [PATCH 4/7] correcting import syntax error --- pymc3/blocking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/blocking.py b/pymc3/blocking.py index c915e42fd2b..03bb23bce17 100644 --- a/pymc3/blocking.py +++ b/pymc3/blocking.py @@ -21,7 +21,7 @@ import numpy as np import collections -from ..util import get_var_name +from .util import get_var_name __all__ = ['ArrayOrdering', 'DictToArrayBijection', 'DictToVarBijection'] From 66faf244b90a62f601e88c11c2eea22a7dbff593 Mon Sep 17 00:00:00 2001 From: Eelke Spaak Date: Fri, 4 Sep 2020 16:40:25 +0200 Subject: [PATCH 5/7] patch type of Deterministic to ensure proper handling of str() --- pymc3/model.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pymc3/model.py b/pymc3/model.py index bfab8a38f4c..68f6b7efd13 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -1879,7 +1879,14 @@ def Deterministic(name, var, model=None, dims=None): model.add_random_variable(var, dims) var._repr_latex_ = functools.partial(_repr_deterministic_rv, var, formatting='latex') var.__latex__ = var._repr_latex_ - var.__str__ = functools.partial(_repr_deterministic_rv, var, formatting='plain') + + # simply assigning var.__str__ is not enough, since str() will default to the class- + # defined __str__ anyway; see https://stackoverflow.com/a/5918210/1692028 + old_type = type(var) + new_type = type(old_type.__name__ + '_pymc3_Deterministic', (old_type,), + {'__str__': functools.partial(_repr_deterministic_rv, var, formatting='plain')}) + var.__class__ = new_type + return var From 99bfbf7fbd1cd08f946bc56d0d432767af35e8c5 Mon Sep 17 00:00:00 2001 From: Eelke Spaak Date: Sat, 5 Sep 2020 10:49:33 +0200 Subject: [PATCH 6/7] adding test for tuning.starting.allinmodel --- pymc3/tests/test_starting.py | 22 ++++++++++++++++++++++ pymc3/tuning/starting.py | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/pymc3/tests/test_starting.py b/pymc3/tests/test_starting.py index bbe47c6c55f..741f4ed73ce 100644 --- a/pymc3/tests/test_starting.py +++ b/pymc3/tests/test_starting.py @@ -19,6 +19,8 @@ from .models import simple_model, non_normal, simple_arbitrary_det from .helpers import select_by_precision +from pytest import raises + def test_accuracy_normal(): _, model, (mu, _) = simple_model() @@ -83,3 +85,23 @@ def test_find_MAP(): close_to(map_est2['mu'], 0, tol) close_to(map_est2['sigma'], 1, tol) + + +def test_allinmodel(): + model1 = Model() + model2 = Model() + with model1: + x1 = Normal('x1', mu=0, sigma=1) + y1 = Normal('y1', mu=0, sigma=1) + with model2: + x2 = Normal('x2', mu=0, sigma=1) + y2 = Normal('y2', mu=0, sigma=1) + + starting.allinmodel([x1, y1], model1) + starting.allinmodel([x1], model1) + with raises(ValueError): + starting.allinmodel([x2, y2], model1) + with raises(ValueError): + starting.allinmodel([x2, y1], model1) + with raises(ValueError): + starting.allinmodel([x2], model1) \ No newline at end of file diff --git a/pymc3/tuning/starting.py b/pymc3/tuning/starting.py index f9078cdc163..db49ef52aef 100644 --- a/pymc3/tuning/starting.py +++ b/pymc3/tuning/starting.py @@ -197,7 +197,7 @@ def allinmodel(vars, model): notin = [v for v in vars if v not in model.vars] if notin: notin = list(map(get_var_name, notin)) - raise ValueError("Some variables not in the model: " + notin) + raise ValueError("Some variables not in the model: " + str(notin)) class CostFuncWrapper: From 238b278fd855d3b8e92b2e465ed563ae3e521b70 Mon Sep 17 00:00:00 2001 From: Eelke Spaak Date: Mon, 7 Sep 2020 09:49:32 +0200 Subject: [PATCH 7/7] more precise exception checks in unit test Co-authored-by: Marco Gorelli --- pymc3/tests/test_starting.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pymc3/tests/test_starting.py b/pymc3/tests/test_starting.py index 741f4ed73ce..6f3e7d61c2a 100644 --- a/pymc3/tests/test_starting.py +++ b/pymc3/tests/test_starting.py @@ -99,9 +99,9 @@ def test_allinmodel(): starting.allinmodel([x1, y1], model1) starting.allinmodel([x1], model1) - with raises(ValueError): + with raises(ValueError, match=r"Some variables not in the model: \['x2', 'y2'\]"): starting.allinmodel([x2, y2], model1) - with raises(ValueError): + with raises(ValueError, match=r"Some variables not in the model: \['x2'\]"): starting.allinmodel([x2, y1], model1) - with raises(ValueError): - starting.allinmodel([x2], model1) \ No newline at end of file + with raises(ValueError, match=r"Some variables not in the model: \['x2'\]"): + starting.allinmodel([x2], model1)