From 46294ff9346e74a6c110cf682aad94e8a2d84a3a Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Thu, 11 Feb 2021 21:25:43 -0600 Subject: [PATCH] Rename Theano to Aesara --- .github/ISSUE_TEMPLATE.md | 2 +- .github/workflows/arviz_compat.yml | 2 +- .github/workflows/jaxtests.yml | 2 +- .github/workflows/pytest.yml | 2 +- .github/workflows/windows.yml | 2 +- README.rst | 8 +- benchmarks/benchmarks/benchmarks.py | 10 +- .../Advanced_usage_of_Theano_in_PyMC3.rst | 54 +- docs/source/Gaussian_Processes.rst | 4 +- docs/source/Probability_Distributions.rst | 2 +- docs/source/PyMC3_and_Theano.rst | 106 ++-- docs/source/about.rst | 8 +- docs/source/api/math.rst | 4 +- docs/source/conf.py | 2 +- docs/source/developer_guide.rst | 130 ++-- docs/source/index.rst | 6 +- pymc3/__init__.py | 10 +- pymc3/{theanof.py => aesaraf.py} | 125 ++-- pymc3/backends/base.py | 4 +- pymc3/blocking.py | 4 +- pymc3/data.py | 46 +- pymc3/distributions/bound.py | 8 +- pymc3/distributions/continuous.py | 596 +++++++++--------- pymc3/distributions/discrete.py | 293 ++++----- pymc3/distributions/dist_math.py | 220 +++---- pymc3/distributions/distribution.py | 123 ++-- pymc3/distributions/mixture.py | 66 +- pymc3/distributions/multivariate.py | 288 ++++----- pymc3/distributions/posterior_predictive.py | 49 +- pymc3/distributions/special.py | 25 +- pymc3/distributions/timeseries.py | 64 +- pymc3/distributions/transforms.py | 102 +-- pymc3/glm/families.py | 10 +- pymc3/glm/linear.py | 10 +- pymc3/glm/utils.py | 22 +- pymc3/gp/cov.py | 167 ++--- pymc3/gp/gp.py | 88 +-- pymc3/gp/mean.py | 12 +- pymc3/gp/util.py | 16 +- pymc3/math.py | 96 +-- pymc3/model.py | 252 ++++---- pymc3/model_graph.py | 24 +- pymc3/ode/ode.py | 29 +- pymc3/ode/utils.py | 22 +- pymc3/parallel_sampling.py | 6 +- pymc3/sampling.py | 2 +- pymc3/sampling_jax.py | 14 +- pymc3/smc/smc.py | 22 +- pymc3/step_methods/arraystep.py | 16 +- pymc3/step_methods/elliptical_slice.py | 8 +- pymc3/step_methods/gibbs.py | 4 +- pymc3/step_methods/hmc/base_hmc.py | 10 +- pymc3/step_methods/hmc/hmc.py | 2 +- pymc3/step_methods/hmc/nuts.py | 4 +- pymc3/step_methods/hmc/quadpotential.py | 26 +- pymc3/step_methods/metropolis.py | 12 +- pymc3/step_methods/mlda.py | 23 +- pymc3/step_methods/pgbart.py | 10 +- pymc3/step_methods/sgmcmc.py | 28 +- pymc3/step_methods/slicer.py | 2 +- pymc3/tests/backend_fixtures.py | 12 +- pymc3/tests/conftest.py | 14 +- pymc3/tests/helpers.py | 18 +- pymc3/tests/models.py | 32 +- pymc3/tests/sampler_fixtures.py | 8 +- .../{test_theanof.py => test_aesaraf.py} | 32 +- pymc3/tests/test_data_container.py | 4 +- pymc3/tests/test_dist_math.py | 102 +-- pymc3/tests/test_distributions.py | 83 +-- pymc3/tests/test_distributions_random.py | 4 +- pymc3/tests/test_distributions_timeseries.py | 2 +- pymc3/tests/test_examples.py | 14 +- pymc3/tests/test_gp.py | 240 +++---- pymc3/tests/test_hmc.py | 2 +- pymc3/tests/test_math.py | 30 +- pymc3/tests/test_minibatches.py | 70 +- pymc3/tests/test_mixture.py | 22 +- pymc3/tests/test_model.py | 48 +- pymc3/tests/test_model_graph.py | 2 +- pymc3/tests/test_model_helpers.py | 47 +- pymc3/tests/test_models_utils.py | 10 +- pymc3/tests/test_ode.py | 14 +- pymc3/tests/test_parallel_sampling.py | 15 +- pymc3/tests/test_posdef_sym.py | 10 +- pymc3/tests/test_posteriors.py | 4 +- pymc3/tests/test_quadpotential.py | 2 +- pymc3/tests/test_random.py | 30 +- pymc3/tests/test_sampling.py | 40 +- pymc3/tests/test_shape_handling.py | 4 +- pymc3/tests/test_shared.py | 6 +- pymc3/tests/test_smc.py | 12 +- pymc3/tests/test_special_functions.py | 12 +- pymc3/tests/test_step.py | 86 +-- pymc3/tests/test_transforms.py | 78 +-- pymc3/tests/test_types.py | 18 +- pymc3/tests/test_updates.py | 10 +- pymc3/tests/test_variational_inference.py | 96 +-- pymc3/tuning/scaling.py | 2 +- pymc3/tuning/starting.py | 4 +- pymc3/util.py | 4 +- pymc3/variational/approximations.py | 68 +- pymc3/variational/flows.py | 74 +-- pymc3/variational/inference.py | 6 +- pymc3/variational/operators.py | 8 +- pymc3/variational/opvi.py | 166 ++--- pymc3/variational/stein.py | 16 +- pymc3/variational/test_functions.py | 26 +- pymc3/variational/updates.py | 135 ++-- pymc3/vartypes.py | 7 - requirements.txt | 4 +- scripts/test.sh | 2 +- setup.py | 2 +- 112 files changed, 2562 insertions(+), 2499 deletions(-) rename pymc3/{theanof.py => aesaraf.py} (79%) rename pymc3/tests/{test_theanof.py => test_aesaraf.py} (90%) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index c9dfdbc6bf6..0988bfa4e95 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -18,7 +18,7 @@ If you have questions about a specific use case, or you are not sure whether thi ## Versions and main components * PyMC3 Version: -* Theano Version: +* Aesara Version: * Python Version: * Operating system: * How did you install PyMC3: (conda/pip) diff --git a/.github/workflows/arviz_compat.yml b/.github/workflows/arviz_compat.yml index 2bbf0762054..55405d0624e 100644 --- a/.github/workflows/arviz_compat.yml +++ b/.github/workflows/arviz_compat.yml @@ -19,7 +19,7 @@ jobs: runs-on: ${{ matrix.os }} env: TEST_SUBSET: ${{ matrix.test-subset }} - THEANO_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=native' + AESARA_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=native' defaults: run: shell: bash -l {0} diff --git a/.github/workflows/jaxtests.yml b/.github/workflows/jaxtests.yml index c5b3f23963d..2e2f16b33ad 100644 --- a/.github/workflows/jaxtests.yml +++ b/.github/workflows/jaxtests.yml @@ -17,7 +17,7 @@ jobs: runs-on: ${{ matrix.os }} env: TEST_SUBSET: ${{ matrix.test-subset }} - THEANO_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=native' + AESARA_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=native' defaults: run: shell: bash -l {0} diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index e492c7e705c..7c36909811e 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -62,7 +62,7 @@ jobs: runs-on: ${{ matrix.os }} env: TEST_SUBSET: ${{ matrix.test-subset }} - THEANO_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=native' + AESARA_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=native' defaults: run: shell: bash -l {0} diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 8a81e97b217..b5f34623a32 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -17,7 +17,7 @@ jobs: runs-on: ${{ matrix.os }} env: TEST_SUBSET: ${{ matrix.test-subset }} - THEANO_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=core2' + AESARA_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=core2' defaults: run: shell: bash -l {0} diff --git a/README.rst b/README.rst index cc2c5fba08c..9f4e3b36138 100644 --- a/README.rst +++ b/README.rst @@ -15,13 +15,13 @@ Check out the `getting started guide `__ forum. -The future of PyMC3 & Theano +The future of PyMC3 & Aesara ============================ -There have been many questions and uncertainty around the future of PyMC3 since Theano +There have been many questions and uncertainty around the future of PyMC3 since Aesara stopped getting developed by the original authors, and we started experiments with PyMC4. -We are happy to announce that PyMC3 on Theano (which we are `developing further `__) +We are happy to announce that PyMC3 on Aesara (which we are `developing further `__) with a new JAX backend is the future. PyMC4 will not be developed further. See the `full announcement `__ @@ -39,7 +39,7 @@ Features - **Variational inference**: `ADVI `__ for fast approximate posterior estimation as well as mini-batch ADVI for large data sets. -- Relies on `Theano-PyMC `__ which provides: +- Relies on `Aesara `__ which provides: * Computation optimization and dynamic C or JAX compilation * Numpy broadcasting and advanced indexing * Linear algebra operators diff --git a/benchmarks/benchmarks/benchmarks.py b/benchmarks/benchmarks/benchmarks.py index 489befbefc0..eb0e3b008d2 100644 --- a/benchmarks/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks/benchmarks.py @@ -14,11 +14,11 @@ import time import timeit +import aesara +import aesara.tensor as aet import arviz as az import numpy as np import pandas as pd -import theano -import theano.tensor as tt import pymc3 as pm @@ -27,7 +27,7 @@ def glm_hierarchical_model(random_seed=123): """Sample glm hierarchical model to use in benchmarks""" np.random.seed(random_seed) data = pd.read_csv(pm.get_data("radon.csv")) - data["log_radon"] = data["log_radon"].astype(theano.config.floatX) + data["log_radon"] = data["log_radon"].astype(aesara.config.floatX) county_idx = data.county_code.values n_counties = len(data.county.unique()) @@ -61,8 +61,8 @@ def mixture_model(random_seed=1234): mu = pm.Normal("mu", mu=0.0, sd=10.0, shape=w_true.shape) enforce_order = pm.Potential( "enforce_order", - tt.switch(mu[0] - mu[1] <= 0, 0.0, -np.inf) - + tt.switch(mu[1] - mu[2] <= 0, 0.0, -np.inf), + aet.switch(mu[0] - mu[1] <= 0, 0.0, -np.inf) + + aet.switch(mu[1] - mu[2] <= 0, 0.0, -np.inf), ) tau = pm.Gamma("tau", alpha=1.0, beta=1.0, shape=w_true.shape) pm.NormalMixture("x_obs", w=w, mu=mu, tau=tau, observed=x) diff --git a/docs/source/Advanced_usage_of_Theano_in_PyMC3.rst b/docs/source/Advanced_usage_of_Theano_in_PyMC3.rst index ba0df7cad46..2815c99bf40 100644 --- a/docs/source/Advanced_usage_of_Theano_in_PyMC3.rst +++ b/docs/source/Advanced_usage_of_Theano_in_PyMC3.rst @@ -4,20 +4,20 @@ _referenced in docs/source/notebooks/table_of_contents_tutorials.js ================================= -Advanced usage of Theano in PyMC3 +Advanced usage of Aesara in PyMC3 ================================= Using shared variables ====================== -Shared variables allow us to use values in theano functions that are +Shared variables allow us to use values in aesara functions that are not considered an input to the function, but can still be changed later. They are very similar to global variables in may ways:: - a = tt.scalar('a') + a = aet.scalar('a') # Create a new shared variable with initial value of 0.1 - b = theano.shared(0.1) - func = theano.function([a], a * b) + b = aesara.shared(0.1) + func = aesara.function([a], a * b) assert func(2.) == 0.2 b.set_value(10.) @@ -34,7 +34,7 @@ be time consuming if the number of datasets is large):: true_mu = [np.random.randn() for _ in range(10)] observed_data = [mu + np.random.randn(20) for mu in true_mu] - data = theano.shared(observed_data[0]) + data = aesara.shared(observed_data[0]) with pm.Model() as model: mu = pm.Normal('mu', 0, 10) pm.Normal('y', mu=mu, sigma=1, observed=data) @@ -55,7 +55,7 @@ variable for our observations:: x = np.random.randn(100) y = x > 0 - x_shared = theano.shared(x) + x_shared = aesara.shared(x) with pm.Model() as model: coeff = pm.Normal('x', mu=0, sigma=1) @@ -74,10 +74,10 @@ not possible to change the shape of a shared variable if that would also change the shape of one of the variables. -Writing custom Theano Ops +Writing custom Aesara Ops ========================= -While Theano includes a wide range of operations, there are cases where +While Aesara includes a wide range of operations, there are cases where it makes sense to write your own. But before doing this it is a good idea to think hard if it is actually necessary. Especially if you want to use algorithms that need gradient information — this includes NUTS and @@ -87,22 +87,22 @@ debugging skills for the gradients. Good reasons for defining a custom Op might be the following: -- You require an operation that is not available in Theano and can't - be build up out of existing Theano operations. This could for example +- You require an operation that is not available in Aesara and can't + be build up out of existing Aesara operations. This could for example include models where you need to solve differential equations or integrals, or find a root or minimum of a function that depends on your parameters. - You want to connect your PyMC3 model to some existing external code. - After carefully considering different parametrizations and a lot of profiling your model is still too slow, but you know of a faster - way to compute the gradient than what theano is doing. This faster + way to compute the gradient than what aesara is doing. This faster way might be anything from clever maths to using more hardware. There is nothing stopping anyone from using a cluster via MPI in a custom node, if a part of the gradient computation is slow enough and sufficiently parallelizable to make the cost worth it. We would definitely like to hear about any such examples. -Theano has extensive `documentation, `_ +Aesara has extensive `documentation, `_ about how to write new Ops. @@ -158,7 +158,7 @@ We can now use `scipy.optimize.newton` to find the root:: def mu_from_theta(theta): return optimize.newton(func, 1, fprime=jac, args=(theta,)) -We could wrap `mu_from_theta` with `theano.compile.ops.as_op` and use gradient-free +We could wrap `mu_from_theta` with `aesara.compile.ops.as_op` and use gradient-free methods like Metropolis, but to get NUTS and ADVI working, we also need to define the derivative of `mu_from_theta`. We can find this derivative using the implicit function theorem, or equivalently we @@ -181,16 +181,16 @@ We get \frac{d}{d\theta}\mu(\theta) = - \frac{\mu(\theta)^2}{1 + \theta\mu(\theta) + e^{-\theta\mu(\theta)}} -Now, we use this to define a theano op, that also computes the gradient:: +Now, we use this to define a aesara op, that also computes the gradient:: - import theano - import theano.tensor as tt - import theano.tests.unittest_tools - from theano.graph.op import Op + import aesara + import aesara.tensor as aet + import aesara.tests.unittest_tools + from aesara.graph.op import Op class MuFromTheta(Op): - itypes = [tt.dscalar] - otypes = [tt.dscalar] + itypes = [aet.dscalar] + otypes = [aet.dscalar] def perform(self, node, inputs, outputs): theta, = inputs @@ -201,23 +201,23 @@ Now, we use this to define a theano op, that also computes the gradient:: theta, = inputs mu = self(theta) thetamu = theta * mu - return [- g[0] * mu ** 2 / (1 + thetamu + tt.exp(-thetamu))] + return [- g[0] * mu ** 2 / (1 + thetamu + aet.exp(-thetamu))] If you value your sanity, always check that the gradient is ok:: - theano.tests.unittest_tools.verify_grad(MuFromTheta(), [np.array(0.2)]) - theano.tests.unittest_tools.verify_grad(MuFromTheta(), [np.array(1e-5)]) - theano.tests.unittest_tools.verify_grad(MuFromTheta(), [np.array(1e5)]) + aesara.gradient.verify_grad(MuFromTheta(), [np.array(0.2)]) + aesara.gradient.verify_grad(MuFromTheta(), [np.array(1e-5)]) + aesara.gradient.verify_grad(MuFromTheta(), [np.array(1e5)]) We can now define our model using this new op:: import pymc3 as pm - tt_mu_from_theta = MuFromTheta() + aet_mu_from_theta = MuFromTheta() with pm.Model() as model: theta = pm.HalfNormal('theta', sigma=1) - mu = pm.Deterministic('mu', tt_mu_from_theta(theta)) + mu = pm.Deterministic('mu', aet_mu_from_theta(theta)) pm.Normal('y', mu=mu, sigma=0.1, observed=[0.2, 0.21, 0.3]) trace = pm.sample() diff --git a/docs/source/Gaussian_Processes.rst b/docs/source/Gaussian_Processes.rst index 3f4583a80c0..40c987acd7f 100644 --- a/docs/source/Gaussian_Processes.rst +++ b/docs/source/Gaussian_Processes.rst @@ -113,7 +113,7 @@ which allows users to combine covariance functions into new ones, for example: After the covariance function is defined, it is now a function that is evaluated by calling :code:`cov_func(x, x)` (or :code:`mean_func(x)`). Since -PyMC3 is built on top of Theano, it is relatively easy to define and experiment +PyMC3 is built on top of Aesara, it is relatively easy to define and experiment with non-standard covariance and mean functons. For more information check out the tutorial on covariance functions. @@ -158,7 +158,7 @@ other type of random variable. The first argument is the name of the random variable representing the function we are placing the prior over. The second argument is the inputs to the function that the prior is over, :code:`X`. The inputs are usually known and present in the data, but they can -also be PyMC3 random variables. If the inputs are a Theano tensor or a +also be PyMC3 random variables. If the inputs are a Aesara tensor or a PyMC3 random variable, the :code:`shape` needs to be given. Usually at this point, inference is performed on the model. The diff --git a/docs/source/Probability_Distributions.rst b/docs/source/Probability_Distributions.rst index 8c49af6eaa5..f15c43ecb91 100644 --- a/docs/source/Probability_Distributions.rst +++ b/docs/source/Probability_Distributions.rst @@ -27,7 +27,7 @@ A variable requires at least a ``name`` argument, and zero or more model paramet p = pm.Beta('p', 1, 1, shape=(3, 3)) -Probability distributions are all subclasses of ``Distribution``, which in turn has two major subclasses: ``Discrete`` and ``Continuous``. In terms of data types, a ``Continuous`` random variable is given whichever floating point type is defined by ``theano.config.floatX``, while ``Discrete`` variables are given ``int16`` types when ``theano.config.floatX`` is ``float32``, and ``int64`` otherwise. +Probability distributions are all subclasses of ``Distribution``, which in turn has two major subclasses: ``Discrete`` and ``Continuous``. In terms of data types, a ``Continuous`` random variable is given whichever floating point type is defined by ``aesara.config.floatX``, while ``Discrete`` variables are given ``int16`` types when ``aesara.config.floatX`` is ``float32``, and ``int64`` otherwise. All distributions in ``pm.distributions`` will have two important methods: ``random()`` and ``logp()`` with the following signatures: diff --git a/docs/source/PyMC3_and_Theano.rst b/docs/source/PyMC3_and_Theano.rst index d2c521ad3f5..c3f0794ee5a 100644 --- a/docs/source/PyMC3_and_Theano.rst +++ b/docs/source/PyMC3_and_Theano.rst @@ -4,24 +4,24 @@ _href from docs/source/index.rst ================ -PyMC3 and Theano +PyMC3 and Aesara ================ -What is Theano +What is Aesara ============== -Theano is a package that allows us to define functions involving array +Aesara is a package that allows us to define functions involving array operations and linear algebra. When we define a PyMC3 model, we implicitly -build up a Theano function from the space of our parameters to +build up a Aesara function from the space of our parameters to their posterior probability density up to a constant factor. We then use symbolic manipulations of this function to also get access to its gradient. -Note that the original developers have stopped maintaining Theano, so -PyMC3 uses `Theano-PyMC `_, -a fork of Theano maintained by the PyMC3 developers. +Note that the original developers have stopped maintaining Aesara, so +PyMC3 uses `Aesara `_, +a fork of Aesara maintained by the PyMC3 developers. -For a thorough introduction to Theano see the -`theano docs `_, +For a thorough introduction to Aesara see the +`aesara docs `_, but for the most part you don't need detailed knowledge about it as long as you are not trying to define new distributions or other extensions of PyMC3. But let's look at a simple example to get a rough @@ -37,14 +37,14 @@ arbitrarily chosen) function First, we need to define symbolic variables for our inputs (this is similar to eg SymPy's `Symbol`):: - import theano - import theano.tensor as tt + import aesara + import aesara.tensor as aet # We don't specify the dtype of our input variables, so it # defaults to using float64 without any special config. - a = tt.scalar('a') - x = tt.vector('x') - # `tt.ivector` creates a symbolic vector of integers. - y = tt.ivector('y') + a = aet.scalar('a') + x = aet.vector('x') + # `aet.ivector` creates a symbolic vector of integers. + y = aet.ivector('y') Next, we use those variables to build up a symbolic representation of the output of our function. Note that no computation is actually @@ -52,24 +52,24 @@ being done at this point. We only record what operations we need to do to compute the output:: inner = a * x**3 + y**2 - out = tt.exp(inner).sum() + out = aet.exp(inner).sum() .. note:: - In this example we use `tt.exp` to create a symbolic representation + In this example we use `aet.exp` to create a symbolic representation of the exponential of `inner`. Somewhat surprisingly, it would also have worked if we used `np.exp`. This is because numpy gives objects it operates on a chance to define the results of - operations themselves. Theano variables do this for a large number - of operations. We usually still prefer the theano + operations themselves. Aesara variables do this for a large number + of operations. We usually still prefer the aesara functions instead of the numpy versions, as that makes it clear that we are working with symbolic input instead of plain arrays. -Now we can tell Theano to build a function that does this computation. -With a typical configuration, Theano generates C code, compiles it, +Now we can tell Aesara to build a function that does this computation. +With a typical configuration, Aesara generates C code, compiles it, and creates a python function which wraps the C function:: - func = theano.function([a, x, y], [out]) + func = aesara.function([a, x, y], [out]) We can call this function with actual arrays as many times as we want:: @@ -79,22 +79,22 @@ We can call this function with actual arrays as many times as we want:: out = func(a_val, x_vals, y_vals) -For the most part the symbolic Theano variables can be operated on -like NumPy arrays. Most NumPy functions are available in `theano.tensor` -(which is typically imported as `tt`). A lot of linear algebra operations -can be found in `tt.nlinalg` and `tt.slinalg` (the NumPy and SciPy +For the most part the symbolic Aesara variables can be operated on +like NumPy arrays. Most NumPy functions are available in `aesara.tensor` +(which is typically imported as `aet`). A lot of linear algebra operations +can be found in `aet.nlinalg` and `aet.slinalg` (the NumPy and SciPy operations respectively). Some support for sparse matrices is available -in `theano.sparse`. For a detailed overview of available operations, -see `the theano api docs `_. +in `aesara.sparse`. For a detailed overview of available operations, +see `the aesara api docs `_. -A notable exception where theano variables do *not* behave like +A notable exception where aesara variables do *not* behave like NumPy arrays are operations involving conditional execution. Code like this won't work as expected:: - a = tt.vector('a') + a = aet.vector('a') if (a > 0).all(): - b = tt.sqrt(a) + b = aet.sqrt(a) else: b = -a @@ -104,17 +104,17 @@ and according to the rules for this conversion, things that aren't empty containers or zero are converted to `True`. So the code is equivalent to this:: - a = tt.vector('a') - b = tt.sqrt(a) + a = aet.vector('a') + b = aet.sqrt(a) -To get the desired behaviour, we can use `tt.switch`:: +To get the desired behaviour, we can use `aet.switch`:: - a = tt.vector('a') - b = tt.switch((a > 0).all(), tt.sqrt(a), -a) + a = aet.vector('a') + b = aet.switch((a > 0).all(), aet.sqrt(a), -a) Indexing also works similarly to NumPy:: - a = tt.vector('a') + a = aet.vector('a') # Access the 10th element. This will fail when a function build # from this expression is executed with an array that is too short. b = a[10] @@ -122,21 +122,21 @@ Indexing also works similarly to NumPy:: # Extract a subvector b = a[[1, 2, 10]] -Changing elements of an array is possible using `tt.set_subtensor`:: +Changing elements of an array is possible using `aet.set_subtensor`:: - a = tt.vector('a') - b = tt.set_subtensor(a[:10], 1) + a = aet.vector('a') + b = aet.set_subtensor(a[:10], 1) - # is roughly equivalent to this (although theano avoids + # is roughly equivalent to this (although aesara avoids # the copy if `a` isn't used anymore) a = np.random.randn(10) b = a.copy() b[:10] = 1 -How PyMC3 uses Theano +How PyMC3 uses Aesara ===================== -Now that we have a basic understanding of Theano we can look at what +Now that we have a basic understanding of Aesara we can look at what happens if we define a PyMC3 model. Let's look at a simple example:: true_mu = 0.1 @@ -163,7 +163,7 @@ where with the normal likelihood :math:`N(x|μ,σ^2)` To build that function we need to keep track of two things: The parameter space (the *free variables*) and the logp function. For each free variable -we generate a Theano variable. And for each variable (observed or otherwise) +we generate a Aesara variable. And for each variable (observed or otherwise) we add a term to the global logp. In the background something similar to this is happening:: @@ -171,7 +171,7 @@ this is happening:: # in exactly this way! model = pm.Model() - mu = tt.scalar('mu') + mu = aet.scalar('mu') model.add_free_variable(mu) model.add_logp_term(pm.Normal.dist(0, 1).logp(mu)) @@ -181,7 +181,7 @@ So calling `pm.Normal()` modifies the model: It changes the logp function of the model. If the `observed` keyword isn't set it also creates a new free variable. In contrast, `pm.Normal.dist()` doesn't care about the model, it just creates an object that represents the normal distribution. Calling -`logp` on this object creates a theano variable for the logp probability +`logp` on this object creates a aesara variable for the logp probability or log probability density of the distribution, but again without changing the model in any way. @@ -199,27 +199,27 @@ is roughly equivalent to this:: # For illustration only, not real code! model = pm.Model() - mu = tt.scalar('mu') + mu = aet.scalar('mu') model.add_free_variable(mu) model.add_logp_term(pm.Normal.dist(0, 1).logp(mu)) - sd_log__ = tt.scalar('sd_log__') + sd_log__ = aet.scalar('sd_log__') model.add_free_variable(sd_log__) model.add_logp_term(corrected_logp_half_normal(sd_log__)) - sd = tt.exp(sd_log__) + sd = aet.exp(sd_log__) model.add_deterministic_variable(sd) model.add_logp_term(pm.Normal.dist(mu, sd).logp(data)) The return values of the variable constructors are subclasses -of theano variables, so when we define a variable we can use any -theano operation on them:: +of aesara variables, so when we define a variable we can use any +aesara operation on them:: design_matrix = np.array([[...]]) with pm.Model() as model: - # beta is a tt.dvector + # beta is a aet.dvector beta = pm.Normal('beta', 0, 1, shape=len(design_matrix)) - predict = tt.dot(design_matrix, beta) + predict = aet.dot(design_matrix, beta) sd = pm.HalfCauchy('sd', beta=2.5) pm.Normal('y', mu=predict, sigma=sd, observed=data) diff --git a/docs/source/about.rst b/docs/source/about.rst index 20f111caa5a..56cf3355555 100644 --- a/docs/source/about.rst +++ b/docs/source/about.rst @@ -27,7 +27,7 @@ PyMC3 strives to make Bayesian modeling as simple and painless as possible, all * Includes a large suite of well-documented statistical distributions. -* Uses Theano as the computational backend, allowing for fast expression evaluation, automatic gradient calculation, and GPU computing. +* Uses Aesara as the computational backend, allowing for fast expression evaluation, automatic gradient calculation, and GPU computing. * Built-in support for Gaussian process modeling. @@ -45,7 +45,7 @@ PyMC3 strives to make Bayesian modeling as simple and painless as possible, all What's new in version 3 ======================= -The third major version of PyMC has benefitted from being re-written from scratch. Substantial improvements in the user interface and performance have resulted from this. While PyMC2 relied on Fortran extensions (via f2py) for most of the computational heavy-lifting, PyMC3 leverages Theano, a library from the Montréal Institute for Learning Algorithms (MILA), for array-based expression evaluation, to perform its computation. What this provides, above all else, is fast automatic differentiation, which is at the heart of the gradient-based sampling and optimization methods currently providing inference for probabilistic programming. +The third major version of PyMC has benefitted from being re-written from scratch. Substantial improvements in the user interface and performance have resulted from this. While PyMC2 relied on Fortran extensions (via f2py) for most of the computational heavy-lifting, PyMC3 leverages Aesara, a library from the Montréal Institute for Learning Algorithms (MILA), for array-based expression evaluation, to perform its computation. What this provides, above all else, is fast automatic differentiation, which is at the heart of the gradient-based sampling and optimization methods currently providing inference for probabilistic programming. Major changes from previous versions: @@ -65,7 +65,7 @@ Major changes from previous versions: * Much more! -While the addition of Theano adds a level of complexity to the development of PyMC, fundamentally altering how the underlying computation is performed, we have worked hard to maintain the elegant simplicity of the original PyMC model specification syntax. +While the addition of Aesara adds a level of complexity to the development of PyMC, fundamentally altering how the underlying computation is performed, we have worked hard to maintain the elegant simplicity of the original PyMC model specification syntax. History @@ -90,7 +90,7 @@ plotting, csv table output, improved imputation syntax, and posterior predictive check plots. PyMC 2.3 was released on October 31, 2013. It included Python 3 compatibility, improved summary plots, and some important bug fixes. -In 2011, John Salvatier began thinking about implementing gradient-based MCMC samplers, and developed the ``mcex`` package to experiment with his ideas. The following year, John was invited by the team to re-engineer PyMC to accomodate Hamiltonian Monte Carlo sampling. This led to the adoption of Theano as the computational back end, and marked the beginning of PyMC3's development. The first alpha version of PyMC3 was released in June 2015. Over the following 2 years, the core development team grew to 12 members, and the first release, PyMC3 3.0, was launched in January 2017. +In 2011, John Salvatier began thinking about implementing gradient-based MCMC samplers, and developed the ``mcex`` package to experiment with his ideas. The following year, John was invited by the team to re-engineer PyMC to accomodate Hamiltonian Monte Carlo sampling. This led to the adoption of Aesara as the computational back end, and marked the beginning of PyMC3's development. The first alpha version of PyMC3 was released in June 2015. Over the following 2 years, the core development team grew to 12 members, and the first release, PyMC3 3.0, was launched in January 2017. .. _support: diff --git a/docs/source/api/math.rst b/docs/source/api/math.rst index c548d132163..8842a77c334 100644 --- a/docs/source/api/math.rst +++ b/docs/source/api/math.rst @@ -3,8 +3,8 @@ Math ==== This submodule contains various mathematical functions. Most of them are imported directly -from theano.tensor (see there for more details). Doing any kind of math with PyMC3 random -variables, or defining custom likelihoods or priors requires you to use these theano +from aesara.tensor (see there for more details). Doing any kind of math with PyMC3 random +variables, or defining custom likelihoods or priors requires you to use these aesara expressions rather than NumPy or Python code. .. currentmodule:: pymc3.math diff --git a/docs/source/conf.py b/docs/source/conf.py index 9b23a323af7..0b7d3a1cc2c 100755 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -158,7 +158,7 @@ ("About PyMC3", "about"), ], # "fixed_sidebar": "false", - # "description": "Probabilistic Programming in Python: Bayesian Modeling and Probabilistic Machine Learning with Theano" + # "description": "Probabilistic Programming in Python: Bayesian Modeling and Probabilistic Machine Learning with Aesara" } # Add any paths that contain custom themes here, relative to this directory. diff --git a/docs/source/developer_guide.rst b/docs/source/developer_guide.rst index 64463cd5b41..4f7b6e45248 100644 --- a/docs/source/developer_guide.rst +++ b/docs/source/developer_guide.rst @@ -9,7 +9,7 @@ PyMC3 Developer Guide `PyMC3 `__ is a Python package for Bayesian statistical modeling built on top of -`Theano `__. This +`Aesara `__. This document aims to explain the design and implementation of probabilistic programming in PyMC3, with comparisons to other PPL like TensorFlow Probability (TFP) and Pyro in mind. A user-facing API @@ -110,7 +110,7 @@ elementary. As long as you have a well-behaved density function, we can use it in the model to build the model log-likelihood function. Random number generation is great to have, but sometimes there might not be efficient random number generator for some densities. Since a function -is all you need, you can wrap almost any Theano function into a +is all you need, you can wrap almost any Aesara function into a distribution using ``pm.DensityDist`` https://docs.pymc.io/Probability\_Distributions.html#custom-distributions @@ -147,7 +147,7 @@ density function `__ .. math:: X:=f(x) = \frac{1}{\sigma \sqrt{2 \pi}} \exp^{- 0.5 (\frac{x - \mu}{\sigma})^2}\vert_{\mu = 0, \sigma=1} = \frac{1}{\sqrt{2 \pi}} \exp^{- 0.5 x^2} -Within a model context, RVs are essentially Theano tensors (more on that +Within a model context, RVs are essentially Aesara tensors (more on that below). This is different than TFP and pyro, where you need to be more explicit about the conversion. For example: @@ -156,7 +156,7 @@ explicit about the conversion. For example: .. code:: python with pm.Model() as model: - z = pm.Normal('z', mu=0., sigma=5.) # ==> pymc3.model.FreeRV, or theano.tensor with logp + z = pm.Normal('z', mu=0., sigma=5.) # ==> pymc3.model.FreeRV, or aesara.tensor with logp x = pm.Normal('x', mu=z, sigma=1., observed=5.) # ==> pymc3.model.ObservedRV, also has logp properties x.logp({'z': 2.5}) # ==> -4.0439386 model.logp({'z': 2.5}) # ==> -6.6973152 @@ -194,7 +194,7 @@ Random method and logp method, very different behind the curtain ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In short, the random method is scipy/numpy-based, and the logp method is -Theano-based. The ``logp`` method is straightforward - it is a Theano +Aesara-based. The ``logp`` method is straightforward - it is a Aesara function within each distribution. It has the following signature: .. code:: python @@ -202,20 +202,20 @@ function within each distribution. It has the following signature: def logp(self, value): # GET PARAMETERS param1, param2, ... = self.params1, self.params2, ... - # EVALUATE LOG-LIKELIHOOD FUNCTION, all inputs are (or array that could be convert to) theano tensor + # EVALUATE LOG-LIKELIHOOD FUNCTION, all inputs are (or array that could be convert to) aesara tensor total_log_prob = f(param1, param2, ..., value) return total_log_prob -In the ``logp`` method, parameters and values are either Theano tensors, +In the ``logp`` method, parameters and values are either Aesara tensors, or could be converted to tensors. It is rather convenient as the evaluation of logp is represented as a tensor (``RV.logpt``), and when we linked different ``logp`` together (e.g., summing all ``RVs.logpt`` -to get the model totall logp) the dependence is taken care of by Theano +to get the model totall logp) the dependence is taken care of by Aesara when the graph is built and compiled. Again, since the compiled function depends on the nodes that already in the graph, whenever you want to generate a new function that takes new input tensors you either need to regenerate the graph with the appropriate dependencies, or replace the node by editing the existing graph. -In PyMC3 we use the second approach by using ``theano.clone()`` when it is needed. +In PyMC3 we use the second approach by using ``aesara.clone_replace()`` when it is needed. As explained above, distribution in a ``pm.Model()`` context automatically turn into a tensor with distribution property (pymc3 @@ -225,7 +225,7 @@ itself `__ -(representated as a tensor also) property to a Theano tensor (thus +(representated as a tensor also) property to a Aesara tensor (thus making it a random variable). For a ``TransformedRV``, it transforms the distribution into a ``TransformedDistribution``, and then ``model.Var`` is called again to added the RV associated with the @@ -494,10 +494,10 @@ the model logp), and also deterministic transformation (as bookkeeping): named\_vars, free\_RVs, observed\_RVs, deterministics, potentials, missing\_values. The model context then computes some simple model properties, builds a bijection mapping that transforms between -dictionary and numpy/Theano ndarray, thus allowing the ``logp``/``dlogp`` functions +dictionary and numpy/Aesara ndarray, thus allowing the ``logp``/``dlogp`` functions to have two equivalent versions: one takes a ``dict`` as input and the other takes an ``ndarray`` as input. More importantly, a ``pm.Model()`` contains methods -to compile Theano functions that take Random Variables (that are also +to compile Aesara functions that take Random Variables (that are also initialised within the same model) as input, for example: .. code:: python @@ -559,20 +559,20 @@ sum them together to get the model logp: @property def logpt(self): - """Theano scalar of log-probability of the model""" + """Aesara scalar of log-probability of the model""" with self: factors = [var.logpt for var in self.basic_RVs] + self.potentials - logp = tt.sum([tt.sum(factor) for factor in factors]) + logp = aet.sum([aet.sum(factor) for factor in factors]) ... return logp -which returns a Theano tensor that its value depends on the free -parameters in the model (i.e., its parent nodes from the Theano +which returns a Aesara tensor that its value depends on the free +parameters in the model (i.e., its parent nodes from the Aesara graph).You can evaluate or compile into a python callable (that you can pass numpy as input args). Note that the logp tensor depends on its -input in the Theano graph, thus you cannot pass new tensor to generate a +input in the Aesara graph, thus you cannot pass new tensor to generate a logp function. For similar reason, in PyMC3 we do graph copying a lot -using theano.clone to replace the inputs to a tensor. +using aesara.clone_replace to replace the inputs to a tensor. .. code:: python @@ -587,7 +587,7 @@ using theano.clone to replace the inputs to a tensor. .. code:: python - type(m.logpt) # ==> theano.tensor.var.TensorVariable + type(m.logpt) # ==> aesara.tensor.var.TensorVariable .. code:: python @@ -620,14 +620,14 @@ logp/dlogp function: return ValueGradFunction(self.logpt, grad_vars, extra_vars, **kwargs) ``ValueGradFunction`` is a callable class which isolates part of the -Theano graph to compile additional Theano functions. PyMC3 relies on -``theano.clone`` to copy the ``model.logpt`` and replace its input. It +Aesara graph to compile additional Aesara functions. PyMC3 relies on +``aesara.clone_replace`` to copy the ``model.logpt`` and replace its input. It does not edit or rewrite the graph directly. .. code:: python class ValueGradFunction: - """Create a theano function that computes a value and its gradient. + """Create a aesara function that computes a value and its gradient. ... """ def __init__(self, logpt, grad_vars, extra_vars=[], dtype=None, @@ -646,31 +646,31 @@ does not edit or rewrite the graph directly. # Extra vars are a subset of free_RVs that are not input to the compiled function. # But nonetheless logpt depends on these RVs. - # This is set up as a dict of theano.shared tensors, but givens (a list of - # tuple(free_RVs, theano.shared)) is the actual list that goes into the theano function + # This is set up as a dict of aesara.shared tensors, but givens (a list of + # tuple(free_RVs, aesara.shared)) is the actual list that goes into the aesara function givens = [] self._extra_vars_shared = {} for var in extra_vars: - shared = theano.shared(var.tag.test_value, var.name + '_shared__') + shared = aesara.shared(var.tag.test_value, var.name + '_shared__') self._extra_vars_shared[var.name] = shared givens.append((var, shared)) # See the implementation below. Basically, it clones the logpt and replaces its - # input with a *single* 1d theano tensor + # input with a *single* 1d aesara tensor self._vars_joined, self._logpt_joined = self._build_joined( self._logpt, grad_vars, self._ordering.vmap) - grad = tt.grad(self._logpt_joined, self._vars_joined) + grad = aet.grad(self._logpt_joined, self._vars_joined) grad.name = '__grad' inputs = [self._vars_joined] - self._theano_function = theano.function( + self._aesara_function = aesara.function( inputs, [self._logpt_joined, grad], givens=givens, **kwargs) def _build_joined(self, logpt, args, vmap): - args_joined = tt.vector('__args_joined') + args_joined = aet.vector('__args_joined') args_joined.tag.test_value = np.zeros(self.size, dtype=self.dtype) joined_slices = {} @@ -680,12 +680,12 @@ does not edit or rewrite the graph directly. joined_slices[vmap.var] = sliced replace = {var: joined_slices[var.name] for var in args} - return args_joined, theano.clone(logpt, replace=replace) + return args_joined, aesara.clone_replace(logpt, replace=replace) def __call__(self, array, grad_out=None, extra_vars=None): ... - logp, dlogp = self._theano_function(array) + logp, dlogp = self._aesara_function(array) return logp, dlogp @@ -773,12 +773,12 @@ gradient easily. Here is a taste of how it works in action: So why is this necessary? One can imagine that we just compile one logp function, and do bookkeeping ourselves. For example, we can build the -logp function in Theano directly: +logp function in Aesara directly: .. code:: python - import theano - func = theano.function(m.free_RVs, m.logpt) + import aesara + func = aesara.function(m.free_RVs, m.logpt) func(*inputlist) @@ -790,8 +790,8 @@ logp function in Theano directly: .. code:: python - logpt_grad = theano.grad(m.logpt, m.free_RVs) - func_d = theano.function(m.free_RVs, logpt_grad) + logpt_grad = aesara.grad(m.logpt, m.free_RVs) + func_d = aesara.function(m.free_RVs, logpt_grad) func_d(*inputlist) @@ -808,12 +808,12 @@ Similarly, build a conditional logp: .. code:: python - shared = theano.shared(inputlist[1]) - func2 = theano.function([m.free_RVs[0]], m.logpt, givens=[(m.free_RVs[1], shared)]) + shared = aesara.shared(inputlist[1]) + func2 = aesara.function([m.free_RVs[0]], m.logpt, givens=[(m.free_RVs[1], shared)]) print(func2(inputlist[0])) - logpt_grad2 = theano.grad(m.logpt, m.free_RVs[0]) - func_d2 = theano.function([m.free_RVs[0]], logpt_grad2, givens=[(m.free_RVs[1], shared)]) + logpt_grad2 = aesara.grad(m.logpt, m.free_RVs[0]) + func_d2 = aesara.function([m.free_RVs[0]], logpt_grad2, givens=[(m.free_RVs[1], shared)]) print(func_d2(inputlist[0])) @@ -830,7 +830,7 @@ everything into a single function: .. code:: python - func_logp_and_grad = theano.function(m.free_RVs, [m.logpt, logpt_grad]) # ==> ERROR + func_logp_and_grad = aesara.function(m.free_RVs, [m.logpt, logpt_grad]) # ==> ERROR We want to have a function that return the evaluation and its gradient @@ -838,23 +838,23 @@ re each input: ``value, grad = f(x)``, but the naive implementation does not work. We can of course wrap 2 functions - one for logp one for dlogp - and output a list. But that would mean we need to call 2 functions. In addition, when we write code using python logic to do bookkeeping when -we build our conditional logp. Using ``theano.clone``, we always have -the input to the Theano function being a 1d vector (instead of a list of +we build our conditional logp. Using ``aesara.clone_replace``, we always have +the input to the Aesara function being a 1d vector (instead of a list of RV that each can have very different shape), thus it is very easy to do matrix operation like rotation etc. Notes ~~~~~ -| The current setup is quite powerful, as the Theano compiled function +| The current setup is quite powerful, as the Aesara compiled function is fairly fast to compile and to call. Also, when we are repeatedly calling a conditional logp function, external RV only need to reset once. However, there are still significant overheads when we are - passing values between Theano graph and numpy. That is the reason we + passing values between Aesara graph and numpy. That is the reason we often see no advantage in using GPU, because the data is copying between GPU and CPU at each function call - and for a small model, the result is a slower inference under GPU than CPU. -| Also, ``theano.clone`` is too convenient (pymc internal joke is that +| Also, ``aesara.clone_replace`` is too convenient (pymc internal joke is that it is like a drug - very addictive). If all the operation happens in the graph (including the conditioning and setting value), I see no need to isolate part of the graph (via graph copying or graph @@ -927,10 +927,10 @@ Dynamic HMC ^^^^^^^^^^^ We love NUTS, or to be more precise Dynamic HMC with complex stopping -rules. This part is actually all done outside of Theano, for NUTS, it +rules. This part is actually all done outside of Aesara, for NUTS, it includes: the leapfrog, dual averaging, tunning of mass matrix and step size, the tree building, sampler related statistics like divergence and -energy checking. We actually have a Theano version of HMC, but it has never +energy checking. We actually have a Aesara version of HMC, but it has never been used, and has been removed from the main repository. It can still be found in the `git history `__, @@ -940,7 +940,7 @@ Variational Inference (VI) ~~~~~~~~~~~~~~~~~~~~~~~~~~ The design of the VI module takes a different approach than -MCMC - it has a functional design, and everything is done within Theano +MCMC - it has a functional design, and everything is done within Aesara (i.e., Optimization and building the variational objective). The base class of variational inference is `pymc3.variational.Inference `__, @@ -1006,7 +1006,7 @@ skip this for now and only consider ``SingleGroupApproximation`` like `variational/opvi `__, strip away the normalizing term, ``datalogp`` and ``varlogp`` are expectation of the variational free\_RVs and data logp - we clone the -datalogp and varlogp from the model, replace its input with Theano +datalogp and varlogp from the model, replace its input with Aesara tensor that `samples from the variational posterior `__. For ADVI, these samples are from `a @@ -1021,7 +1021,7 @@ straightforward to evaluate `__ - `Laplace approximation in pymc3.ipynb `__ - Connecting it to other library within a model - - `Using “black box” likelihood function by creating a custom Theano Op `__ + - `Using “black box” likelihood function by creating a custom Aesara Op `__ - Using emcee - Using other library for inference - Connecting to Julia for solving ODE (with gradient for solution that can be used in NUTS) @@ -1115,14 +1115,14 @@ Random methods in numpy There is a lot of complex logic for sampling from random variables, and because it is all in Python, we can't transform a sampling graph -further. Unfortunately, Theano does not have code to sample from various +further. Unfortunately, Aesara does not have code to sample from various distributions and we didn't want to write that our own. Samplers are in Python ~~~~~~~~~~~~~~~~~~~~~~ While having the samplers be written in Python allows for a lot of -flexibility and intuitive for experiment (writing e.g. NUTS in Theano is +flexibility and intuitive for experiment (writing e.g. NUTS in Aesara is also very difficult), it comes at a performance penalty and makes sampling on the GPU very inefficient because memory needs to be copied for every logp evaluation. diff --git a/docs/source/index.rst b/docs/source/index.rst index 053e1962f14..dd4effa4a47 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -97,10 +97,10 @@ - +
-
PyMC3 and Theano
-
Theano is the deep-learning library PyMC3 uses to construct probability distributions and then access the gradient in order to implement cutting edge inference algorithms. More advanced models may be built by understanding this layer. +
PyMC3 and Aesara
+
Aesara is the library PyMC3 uses to construct probability distributions and then access the gradient in order to implement cutting edge inference algorithms. More advanced models may be built by understanding this layer.
diff --git a/pymc3/__init__.py b/pymc3/__init__.py index 1e51deeb647..8f33feef09d 100644 --- a/pymc3/__init__.py +++ b/pymc3/__init__.py @@ -29,16 +29,17 @@ def __set_compiler_flags(): - # Workarounds for Theano compiler problems on various platforms - import theano + # Workarounds for Aesara compiler problems on various platforms + import aesara - current = theano.config.gcc__cxxflags - theano.config.gcc__cxxflags = f"{current} -Wno-c++11-narrowing" + current = aesara.config.gcc__cxxflags + aesara.config.gcc__cxxflags = f"{current} -Wno-c++11-narrowing" __set_compiler_flags() from pymc3 import gp, ode, sampling +from pymc3.aesaraf import * from pymc3.backends import load_trace, save_trace from pymc3.backends.tracetab import * from pymc3.blocking import * @@ -63,7 +64,6 @@ def __set_compiler_flags(): from pymc3.smc import * from pymc3.step_methods import * from pymc3.tests import test -from pymc3.theanof import * from pymc3.tuning import * from pymc3.variational import * from pymc3.vartypes import * diff --git a/pymc3/theanof.py b/pymc3/aesaraf.py similarity index 79% rename from pymc3/theanof.py rename to pymc3/aesaraf.py index c40311da6e8..87b370e55fd 100644 --- a/pymc3/theanof.py +++ b/pymc3/aesaraf.py @@ -12,14 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara import numpy as np -import theano -from theano import scalar -from theano import tensor as tt -from theano.graph.basic import Apply, graph_inputs -from theano.graph.op import Op -from theano.sandbox.rng_mrg import MRG_RandomStream as RandomStream +from aesara import scalar +from aesara import tensor as aet +from aesara.gradient import grad +from aesara.graph.basic import Apply, graph_inputs +from aesara.graph.op import Op +from aesara.sandbox.rng_mrg import MRG_RandomStream as RandomStream +from aesara.tensor.elemwise import Elemwise +from aesara.tensor.var import TensorVariable from pymc3.blocking import ArrayOrdering from pymc3.data import GeneratorAdapter @@ -39,34 +42,34 @@ "join_nonshared_inputs", "make_shared_replacements", "generator", - "set_tt_rng", - "tt_rng", + "set_aet_rng", + "aet_rng", "take_along_axis", ] def inputvars(a): """ - Get the inputs into a theano variables + Get the inputs into a aesara variables Parameters ---------- - a: theano variable + a: aesara variable Returns ------- r: list of tensor variables that are inputs """ - return [v for v in graph_inputs(makeiter(a)) if isinstance(v, tt.TensorVariable)] + return [v for v in graph_inputs(makeiter(a)) if isinstance(v, TensorVariable)] def cont_inputs(f): """ - Get the continuous inputs into a theano variables + Get the continuous inputs into a aesara variables Parameters ---------- - a: theano variable + a: aesara variable Returns ------- @@ -77,13 +80,13 @@ def cont_inputs(f): def floatX(X): """ - Convert a theano tensor or numpy array to theano.config.floatX type. + Convert a aesara tensor or numpy array to aesara.config.floatX type. """ try: - return X.astype(theano.config.floatX) + return X.astype(aesara.config.floatX) except AttributeError: # Scalar passed - return np.asarray(X, dtype=theano.config.floatX) + return np.asarray(X, dtype=aesara.config.floatX) _conversion_map = {"float64": "int32", "float32": "int16", "float16": "int8", "float8": "int8"} @@ -91,9 +94,9 @@ def floatX(X): def intX(X): """ - Convert a theano tensor or numpy array to theano.tensor.int32 type. + Convert a aesara tensor or numpy array to aesara.tensor.int32 type. """ - intX = _conversion_map[theano.config.floatX] + intX = _conversion_map[aesara.config.floatX] try: return X.astype(intX) except AttributeError: @@ -111,16 +114,16 @@ def smartfloatX(x): """ -Theano derivative functions +Aesara derivative functions """ def gradient1(f, v): """flat gradient of f wrt v""" - return tt.flatten(tt.grad(f, v, disconnected_inputs="warn")) + return aet.flatten(grad(f, v, disconnected_inputs="warn")) -empty_gradient = tt.zeros(0, dtype="float32") +empty_gradient = aet.zeros(0, dtype="float32") def gradient(f, vars=None): @@ -128,20 +131,20 @@ def gradient(f, vars=None): vars = cont_inputs(f) if vars: - return tt.concatenate([gradient1(f, v) for v in vars], axis=0) + return aet.concatenate([gradient1(f, v) for v in vars], axis=0) else: return empty_gradient def jacobian1(f, v): """jacobian of f wrt v""" - f = tt.flatten(f) - idx = tt.arange(f.shape[0], dtype="int32") + f = aet.flatten(f) + idx = aet.arange(f.shape[0], dtype="int32") def grad_i(i): return gradient1(f[i], v) - return theano.map(grad_i, idx)[0] + return aesara.map(grad_i, idx)[0] def jacobian(f, vars=None): @@ -149,43 +152,43 @@ def jacobian(f, vars=None): vars = cont_inputs(f) if vars: - return tt.concatenate([jacobian1(f, v) for v in vars], axis=1) + return aet.concatenate([jacobian1(f, v) for v in vars], axis=1) else: return empty_gradient def jacobian_diag(f, x): - idx = tt.arange(f.shape[0], dtype="int32") + idx = aet.arange(f.shape[0], dtype="int32") def grad_ii(i): - return theano.grad(f[i], x)[i] + return grad(f[i], x)[i] - return theano.scan(grad_ii, sequences=[idx], n_steps=f.shape[0], name="jacobian_diag")[0] + return aesara.scan(grad_ii, sequences=[idx], n_steps=f.shape[0], name="jacobian_diag")[0] -@theano.config.change_flags(compute_test_value="ignore") +@aesara.config.change_flags(compute_test_value="ignore") def hessian(f, vars=None): return -jacobian(gradient(f, vars), vars) -@theano.config.change_flags(compute_test_value="ignore") +@aesara.config.change_flags(compute_test_value="ignore") def hessian_diag1(f, v): g = gradient1(f, v) - idx = tt.arange(g.shape[0], dtype="int32") + idx = aet.arange(g.shape[0], dtype="int32") def hess_ii(i): return gradient1(g[i], v)[i] - return theano.map(hess_ii, idx)[0] + return aesara.map(hess_ii, idx)[0] -@theano.config.change_flags(compute_test_value="ignore") +@aesara.config.change_flags(compute_test_value="ignore") def hessian_diag(f, vars=None): if vars is None: vars = cont_inputs(f) if vars: - return -tt.concatenate([hessian_diag1(f, v) for v in vars], axis=0) + return -aet.concatenate([hessian_diag1(f, v) for v in vars], axis=0) else: return empty_gradient @@ -235,16 +238,16 @@ def make_shared_replacements(vars, model): Dict of variable -> new shared variable """ othervars = set(model.vars) - set(vars) - return {var: theano.shared(var.tag.test_value, var.name + "_shared") for var in othervars} + return {var: aesara.shared(var.tag.test_value, var.name + "_shared") for var in othervars} def join_nonshared_inputs(xs, vars, shared, make_shared=False): """ - Takes a list of theano Variables and joins their non shared inputs into a single input. + Takes a list of aesara Variables and joins their non shared inputs into a single input. Parameters ---------- - xs: list of theano tensors + xs: list of aesara tensors vars: list of variables to join Returns @@ -256,13 +259,13 @@ def join_nonshared_inputs(xs, vars, shared, make_shared=False): if not vars: raise ValueError("Empty list of variables.") - joined = tt.concatenate([var.ravel() for var in vars]) + joined = aet.concatenate([var.ravel() for var in vars]) if not make_shared: tensor_type = joined.type inarray = tensor_type("inarray") else: - inarray = theano.shared(joined.tag.test_value, "inarray") + inarray = aesara.shared(joined.tag.test_value, "inarray") ordering = ArrayOrdering(vars) inarray.tag.test_value = joined.tag.test_value @@ -275,7 +278,7 @@ def join_nonshared_inputs(xs, vars, shared, make_shared=False): replace.update(shared) - xs_special = [theano.clone(x, replace, strict=False) for x in xs] + xs_special = [aesara.clone_replace(x, replace, strict=False) for x in xs] return xs_special, inarray @@ -303,16 +306,16 @@ def __call__(self, input): input: TensorVariable """ (oldinput,) = inputvars(self.tensor) - return theano.clone(self.tensor, {oldinput: input}, strict=False) + return aesara.clone_replace(self.tensor, {oldinput: input}, strict=False) scalar_identity = IdentityOp(scalar.upgrade_to_float, name="scalar_identity") -identity = tt.Elemwise(scalar_identity, name="identity") +identity = Elemwise(scalar_identity, name="identity") class GeneratorOp(Op): """ - Generator Op is designed for storing python generators inside theano graph. + Generator Op is designed for storing python generators inside aesara graph. __call__ creates TensorVariable It has 2 new methods @@ -351,7 +354,7 @@ def perform(self, node, inputs, output_storage, params=None): def do_constant_folding(self, fgraph, node): return False - __call__ = theano.config.change_flags(compute_test_value="off")(Op.__call__) + __call__ = aesara.config.change_flags(compute_test_value="off")(Op.__call__) def set_gen(self, gen): if not isinstance(gen, GeneratorAdapter): @@ -394,10 +397,10 @@ def generator(gen, default=None): return GeneratorOp(gen, default)() -_tt_rng = RandomStream() +_aet_rng = RandomStream() -def tt_rng(random_seed=None): +def aet_rng(random_seed=None): """ Get the package-level random number generator or new with specified seed. @@ -405,36 +408,36 @@ def tt_rng(random_seed=None): ---------- random_seed: int If not None - returns *new* theano random generator without replacing package global one + returns *new* aesara random generator without replacing package global one Returns ------- - `theano.tensor.random.utils.RandomStream` instance - `theano.tensor.random.utils.RandomStream` - instance passed to the most recent call of `set_tt_rng` + `aesara.tensor.random.utils.RandomStream` instance + `aesara.tensor.random.utils.RandomStream` + instance passed to the most recent call of `set_aet_rng` """ if random_seed is None: - return _tt_rng + return _aet_rng else: ret = RandomStream(random_seed) return ret -def set_tt_rng(new_rng): +def set_aet_rng(new_rng): """ Set the package-level random number generator. Parameters ---------- - new_rng: `theano.tensor.random.utils.RandomStream` instance + new_rng: `aesara.tensor.random.utils.RandomStream` instance The random number generator to use. """ # pylint: disable=global-statement - global _tt_rng + global _aet_rng # pylint: enable=global-statement if isinstance(new_rng, int): new_rng = RandomStream(new_rng) - _tt_rng = new_rng + _aet_rng = new_rng def floatX_array(x): @@ -443,7 +446,7 @@ def floatX_array(x): def ix_(*args): """ - Theano np.ix_ analog + Aesara np.ix_ analog See numpy.lib.index_tricks.ix_ for reference """ @@ -452,7 +455,7 @@ def ix_(*args): for k, new in enumerate(args): if new is None: out.append(slice(None)) - new = tt.as_tensor(new) + new = aet.as_tensor(new) if new.ndim != 1: raise ValueError("Cross index must be 1 dimensional") new = new.reshape((1,) * k + (new.size,) + (1,) * (nd - k - 1)) @@ -482,7 +485,7 @@ def _make_along_axis_idx(arr_shape, indices, axis): fancy_index.append(indices) else: ind_shape = shape_ones[:dim] + (-1,) + shape_ones[dim + 1 :] - fancy_index.append(tt.arange(n).reshape(ind_shape)) + fancy_index.append(aet.arange(n).reshape(ind_shape)) return tuple(fancy_index) @@ -497,8 +500,8 @@ def take_along_axis(arr, indices, axis=0): Functions returning an index along an axis, like argsort and argpartition, produce suitable indices for this function. """ - arr = tt.as_tensor_variable(arr) - indices = tt.as_tensor_variable(indices) + arr = aet.as_tensor_variable(arr) + indices = aet.as_tensor_variable(indices) # normalize inputs if axis is None: arr = arr.flatten() diff --git a/pymc3/backends/base.py b/pymc3/backends/base.py index 8b52c3e09c1..37631b656c7 100644 --- a/pymc3/backends/base.py +++ b/pymc3/backends/base.py @@ -23,8 +23,8 @@ from abc import ABC from typing import List +import aesara.tensor as aet import numpy as np -import theano.tensor as tt from pymc3.backends.report import SamplerReport, merge_reports from pymc3.model import modelcontext @@ -434,7 +434,7 @@ def add_values(self, vals, overwrite=False) -> None: for idx, chain in enumerate(chains.values()): if new_var: - dummy = tt.as_tensor_variable([], k) + dummy = aet.as_tensor_variable([], k) chain.vars.append(dummy) chain.samples[k] = v[idx] diff --git a/pymc3/blocking.py b/pymc3/blocking.py index 36696273500..4c07b4b47c2 100644 --- a/pymc3/blocking.py +++ b/pymc3/blocking.py @@ -125,13 +125,13 @@ def mapf(self, f): class ListArrayOrdering: """ - An ordering for a list to an array space. Takes also non theano.tensors. + An ordering for a list to an array space. Takes also non aesara.tensors. Modified from pymc3 blocking. Parameters ---------- list_arrays: list - :class:`numpy.ndarray` or :class:`theano.tensor.Tensor` + :class:`numpy.ndarray` or :class:`aesara.tensor.Tensor` intype: str defining the input type 'tensor' or 'numpy' """ diff --git a/pymc3/data.py b/pymc3/data.py index 4cdb793aa33..89760c14486 100644 --- a/pymc3/data.py +++ b/pymc3/data.py @@ -21,12 +21,14 @@ from copy import copy from typing import Any, Dict, List +import aesara +import aesara.tensor as aet import numpy as np import pandas as pd -import theano -import theano.tensor as tt -from theano.graph.basic import Apply +from aesara.graph.basic import Apply +from aesara.tensor.type import TensorType +from aesara.tensor.var import TensorVariable import pymc3 as pm @@ -61,7 +63,7 @@ def get_data(filename): return io.BytesIO(content) -class GenTensorVariable(tt.TensorVariable): +class GenTensorVariable(TensorVariable): def __init__(self, op, type, name=None): super().__init__(type=type, name=name) self.op = op @@ -96,7 +98,7 @@ def __init__(self, generator): # make pickling potentially possible self._yielded_test_value = False self.gen = generator - self.tensortype = tt.TensorType(self.test_value.dtype, ((False,) * self.test_value.ndim)) + self.tensortype = TensorType(self.test_value.dtype, ((False,) * self.test_value.ndim)) # python3 generator def __next__(self): @@ -119,7 +121,7 @@ def __hash__(self): return hash(id(self)) -class Minibatch(tt.TensorVariable): +class Minibatch(TensorVariable): """Multidimensional minibatch that is pure TensorVariable Parameters @@ -143,7 +145,7 @@ class Minibatch(tt.TensorVariable): you can use it to change source of minibatches programmatically in_memory_size: ``int`` or ``List[int|slice|Ellipsis]`` - data size for storing in ``theano.shared`` + data size for storing in ``aesara.shared`` Attributes ---------- @@ -231,11 +233,11 @@ class Minibatch(tt.TensorVariable): To be more concrete about how we create a minibatch, here is a demo: 1. create a shared variable - >>> shared = theano.shared(data) + >>> shared = aesara.shared(data) 2. take a random slice of size 10: - >>> ridx = pm.tt_rng().uniform(size=(10,), low=0, high=data.shape[0]-1e-10).astype('int64') + >>> ridx = pm.aet_rng().uniform(size=(10,), low=0, high=data.shape[0]-1e-10).astype('int64') 3) take the resulting slice: @@ -255,7 +257,7 @@ class Minibatch(tt.TensorVariable): Then you should create a `dict` with replacements: >>> replacements = {x: testdata} - >>> rnode = theano.clone(node, replacements) + >>> rnode = aesara.clone_replace(node, replacements) >>> assert (testdata ** 2 == rnode.eval()).all() *FIXME: In the following, what is the **reason** to replace the Minibatch variable with @@ -266,7 +268,7 @@ class Minibatch(tt.TensorVariable): For example >>> replacements = {x.minibatch: x.shared} - >>> rnode = theano.clone(node, replacements) + >>> rnode = aesara.clone_replace(node, replacements) For more complex slices some more code is needed that can seem not so clear @@ -296,7 +298,7 @@ class Minibatch(tt.TensorVariable): RNG = collections.defaultdict(list) # type: Dict[str, List[Any]] - @theano.config.change_flags(compute_test_value="raise") + @aesara.config.change_flags(compute_test_value="raise") def __init__( self, data, @@ -313,23 +315,23 @@ def __init__( else: data = np.asarray(data, dtype) in_memory_slc = self.make_static_slices(in_memory_size) - self.shared = theano.shared(data[in_memory_slc]) + self.shared = aesara.shared(data[in_memory_slc]) self.update_shared_f = update_shared_f self.random_slc = self.make_random_slices(self.shared.shape, batch_size, random_seed) minibatch = self.shared[self.random_slc] if broadcastable is None: broadcastable = (False,) * minibatch.ndim - minibatch = tt.patternbroadcast(minibatch, broadcastable) + minibatch = aet.patternbroadcast(minibatch, broadcastable) self.minibatch = minibatch super().__init__(self.minibatch.type, None, None, name=name) - Apply(theano.compile.view_op, inputs=[self.minibatch], outputs=[self]) + Apply(aesara.compile.view_op, inputs=[self.minibatch], outputs=[self]) self.tag.test_value = copy(self.minibatch.tag.test_value) def rslice(self, total, size, seed): if size is None: return slice(None) elif isinstance(size, int): - rng = pm.tt_rng(seed) + rng = pm.aet_rng(seed) Minibatch.RNG[id(self)].append(rng) return rng.uniform(size=(size,), low=0.0, high=pm.floatX(total) - 1e-16).astype("int64") else: @@ -401,7 +403,7 @@ def check(t): ) if len(end) > 0: shp_mid = shape[sep : -len(end)] - mid = [tt.arange(s) for s in shp_mid] + mid = [aet.arange(s) for s in shp_mid] else: mid = [] else: @@ -419,17 +421,17 @@ def check(t): shp_end = np.asarray([]) shp_begin = shape[: len(begin)] slc_begin = [ - self.rslice(shp_begin[i], t[0], t[1]) if t is not None else tt.arange(shp_begin[i]) + self.rslice(shp_begin[i], t[0], t[1]) if t is not None else aet.arange(shp_begin[i]) for i, t in enumerate(begin) ] slc_end = [ - self.rslice(shp_end[i], t[0], t[1]) if t is not None else tt.arange(shp_end[i]) + self.rslice(shp_end[i], t[0], t[1]) if t is not None else aet.arange(shp_end[i]) for i, t in enumerate(end) ] slc = slc_begin + mid + slc_end else: raise TypeError("Unrecognized size type, %r" % batch_size) - return pm.theanof.ix_(*slc) + return pm.aesaraf.ix_(*slc) def update_shared(self): if self.update_shared_f is None: @@ -460,7 +462,7 @@ def align_minibatches(batches=None): class Data: - """Data container class that wraps the theano ``SharedVariable`` class + """Data container class that wraps the aesara ``SharedVariable`` class and lets the model be aware of its inputs and outputs. Parameters @@ -524,7 +526,7 @@ def __new__(self, name, value, *, dims=None, export_index_as_coords=False): # `pm.model.pandas_to_array` takes care of parameter `value` and # transforms it to something digestible for pymc3 - shared_object = theano.shared(pm.model.pandas_to_array(value), name) + shared_object = aesara.shared(pm.model.pandas_to_array(value), name) if isinstance(dims, str): dims = (dims,) diff --git a/pymc3/distributions/bound.py b/pymc3/distributions/bound.py index 074a575ebad..6443414734d 100644 --- a/pymc3/distributions/bound.py +++ b/pymc3/distributions/bound.py @@ -14,9 +14,10 @@ from numbers import Real +import aesara.tensor as aet import numpy as np -import theano.tensor as tt +from pymc3.aesaraf import floatX from pymc3.distributions import transforms from pymc3.distributions.dist_math import bound from pymc3.distributions.distribution import ( @@ -26,7 +27,6 @@ draw_values, generate_samples, ) -from pymc3.theanof import floatX __all__ = ["Bound"] @@ -207,9 +207,9 @@ class _ContinuousBounded(_Bounded, Continuous): def __init__(self, distribution, lower, upper, transform="infer", *args, **kwargs): if lower is not None: - lower = tt.as_tensor_variable(floatX(lower)) + lower = aet.as_tensor_variable(floatX(lower)) if upper is not None: - upper = tt.as_tensor_variable(floatX(upper)) + upper = aet.as_tensor_variable(floatX(upper)) if transform == "infer": if lower is None and upper is None: diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 234ed935f2b..4d5310ecfe3 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -19,13 +19,14 @@ """ import warnings +import aesara.tensor as aet import numpy as np -import theano.tensor as tt from scipy import stats from scipy.interpolate import InterpolatedUnivariateSpline from scipy.special import expit +from pymc3.aesaraf import floatX from pymc3.distributions import transforms from pymc3.distributions.dist_math import ( SplineWrapper, @@ -44,7 +45,6 @@ from pymc3.distributions.distribution import Continuous, draw_values, generate_samples from pymc3.distributions.special import log_i0 from pymc3.math import invlogit, log1mexp, log1pexp, logdiffexp, logit -from pymc3.theanof import floatX __all__ = [ "Uniform", @@ -101,8 +101,8 @@ class BoundedContinuous(Continuous): def __init__(self, transform="auto", lower=None, upper=None, *args, **kwargs): - lower = tt.as_tensor_variable(lower) if lower is not None else None - upper = tt.as_tensor_variable(upper) if upper is not None else None + lower = aet.as_tensor_variable(lower) if lower is not None else None + upper = aet.as_tensor_variable(upper) if upper is not None else None if transform == "auto": if lower is None and upper is None: @@ -223,8 +223,8 @@ class Uniform(BoundedContinuous): """ def __init__(self, lower=0, upper=1, *args, **kwargs): - self.lower = lower = tt.as_tensor_variable(floatX(lower)) - self.upper = upper = tt.as_tensor_variable(floatX(upper)) + self.lower = lower = aet.as_tensor_variable(floatX(lower)) + self.upper = upper = aet.as_tensor_variable(floatX(upper)) self.mean = (upper + lower) / 2.0 self.median = self.mean @@ -268,7 +268,7 @@ def logp(self, value): """ lower = self.lower upper = self.upper - return bound(-tt.log(upper - lower), value >= lower, value <= upper) + return bound(-aet.log(upper - lower), value >= lower, value <= upper) def logcdf(self, value): """ @@ -277,9 +277,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -288,12 +288,12 @@ def logcdf(self, value): lower = self.lower upper = self.upper - return tt.switch( - tt.lt(value, lower) | tt.lt(upper, lower), + return aet.switch( + aet.lt(value, lower) | aet.lt(upper, lower), -np.inf, - tt.switch( - tt.lt(value, upper), - tt.log(value - lower) - tt.log(upper - lower), + aet.switch( + aet.lt(value, upper), + aet.log(value - lower) - aet.log(upper - lower), 0, ), ) @@ -331,13 +331,13 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- TensorVariable """ - return tt.zeros_like(value) + return aet.zeros_like(value) def logcdf(self, value): """ @@ -346,16 +346,16 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- TensorVariable """ - return tt.switch( - tt.eq(value, -np.inf), -np.inf, tt.switch(tt.eq(value, np.inf), 0, tt.log(0.5)) + return aet.switch( + aet.eq(value, -np.inf), -np.inf, aet.switch(aet.eq(value, np.inf), 0, aet.log(0.5)) ) @@ -388,13 +388,13 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- TensorVariable """ - return bound(tt.zeros_like(value), value > 0) + return bound(aet.zeros_like(value), value > 0) def logcdf(self, value): """ @@ -403,15 +403,17 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- TensorVariable """ - return tt.switch(tt.lt(value, np.inf), -np.inf, tt.switch(tt.eq(value, np.inf), 0, -np.inf)) + return aet.switch( + aet.lt(value, np.inf), -np.inf, aet.switch(aet.eq(value, np.inf), 0, -np.inf) + ) class Normal(Continuous): @@ -481,10 +483,10 @@ def __init__(self, mu=0, sigma=None, tau=None, sd=None, **kwargs): if sd is not None: sigma = sd tau, sigma = get_tau_sigma(tau=tau, sigma=sigma) - self.sigma = self.sd = tt.as_tensor_variable(sigma) - self.tau = tt.as_tensor_variable(tau) + self.sigma = self.sd = aet.as_tensor_variable(sigma) + self.tau = aet.as_tensor_variable(tau) - self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(floatX(mu)) + self.mean = self.median = self.mode = self.mu = mu = aet.as_tensor_variable(floatX(mu)) self.variance = 1.0 / self.tau assert_negative_support(sigma, "sigma", "Normal") @@ -522,7 +524,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -532,7 +534,7 @@ def logp(self, value): tau = self.tau mu = self.mu - return bound((-tau * (value - mu) ** 2 + tt.log(tau / np.pi / 2.0)) / 2.0, sigma > 0) + return bound((-tau * (value - mu) ** 2 + aet.log(tau / np.pi / 2.0)) / 2.0, sigma > 0) def _distr_parameters_for_repr(self): return ["mu", "sigma"] @@ -544,9 +546,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -647,21 +649,21 @@ def __init__( if sd is not None: sigma = sd tau, sigma = get_tau_sigma(tau=tau, sigma=sigma) - self.sigma = self.sd = tt.as_tensor_variable(sigma) - self.tau = tt.as_tensor_variable(tau) - self.lower_check = tt.as_tensor_variable(floatX(lower)) if lower is not None else lower - self.upper_check = tt.as_tensor_variable(floatX(upper)) if upper is not None else upper + self.sigma = self.sd = aet.as_tensor_variable(sigma) + self.tau = aet.as_tensor_variable(tau) + self.lower_check = aet.as_tensor_variable(floatX(lower)) if lower is not None else lower + self.upper_check = aet.as_tensor_variable(floatX(upper)) if upper is not None else upper self.lower = ( - tt.as_tensor_variable(floatX(lower)) + aet.as_tensor_variable(floatX(lower)) if lower is not None - else tt.as_tensor_variable(-np.inf) + else aet.as_tensor_variable(-np.inf) ) self.upper = ( - tt.as_tensor_variable(floatX(upper)) + aet.as_tensor_variable(floatX(upper)) if upper is not None - else tt.as_tensor_variable(np.inf) + else aet.as_tensor_variable(np.inf) ) - self.mu = tt.as_tensor_variable(floatX(mu)) + self.mu = aet.as_tensor_variable(floatX(mu)) if self.lower_check is None and self.upper_check is None: self._defaultval = mu @@ -732,7 +734,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -763,7 +765,7 @@ def _normalization(self): lsf_a = normal_lccdf(mu, sigma, self.lower) lsf_b = normal_lccdf(mu, sigma, self.upper) - return tt.switch(self.lower > 0, logdiffexp(lsf_a, lsf_b), logdiffexp(lcdf_b, lcdf_a)) + return aet.switch(self.lower > 0, logdiffexp(lsf_a, lsf_b), logdiffexp(lcdf_b, lcdf_a)) if self.lower_check is not None: return normal_lccdf(mu, sigma, self.lower) @@ -843,10 +845,10 @@ def __init__(self, sigma=None, tau=None, sd=None, *args, **kwargs): super().__init__(*args, **kwargs) tau, sigma = get_tau_sigma(tau=tau, sigma=sigma) - self.sigma = self.sd = sigma = tt.as_tensor_variable(sigma) - self.tau = tau = tt.as_tensor_variable(tau) + self.sigma = self.sd = sigma = aet.as_tensor_variable(sigma) + self.tau = tau = aet.as_tensor_variable(tau) - self.mean = tt.sqrt(2 / (np.pi * self.tau)) + self.mean = aet.sqrt(2 / (np.pi * self.tau)) self.variance = (1.0 - 2 / np.pi) / self.tau assert_negative_support(tau, "tau", "HalfNormal") @@ -882,7 +884,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -891,7 +893,7 @@ def logp(self, value): tau = self.tau sigma = self.sigma return bound( - -0.5 * tau * value ** 2 + 0.5 * tt.log(tau * 2.0 / np.pi), + -0.5 * tau * value ** 2 + 0.5 * aet.log(tau * 2.0 / np.pi), value >= 0, tau > 0, sigma > 0, @@ -907,9 +909,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -918,7 +920,7 @@ def logcdf(self, value): sigma = self.sigma z = zvalue(value, mu=0, sigma=sigma) return bound( - tt.log1p(-tt.erfc(z / tt.sqrt(2.0))), + aet.log1p(-aet.erfc(z / aet.sqrt(2.0))), 0 <= value, 0 < sigma, ) @@ -1005,14 +1007,14 @@ class Wald(PositiveContinuous): def __init__(self, mu=None, lam=None, phi=None, alpha=0.0, *args, **kwargs): super().__init__(*args, **kwargs) mu, lam, phi = self.get_mu_lam_phi(mu, lam, phi) - self.alpha = alpha = tt.as_tensor_variable(floatX(alpha)) - self.mu = mu = tt.as_tensor_variable(floatX(mu)) - self.lam = lam = tt.as_tensor_variable(floatX(lam)) - self.phi = phi = tt.as_tensor_variable(floatX(phi)) + self.alpha = alpha = aet.as_tensor_variable(floatX(alpha)) + self.mu = mu = aet.as_tensor_variable(floatX(mu)) + self.lam = lam = aet.as_tensor_variable(floatX(lam)) + self.phi = phi = aet.as_tensor_variable(floatX(phi)) self.mean = self.mu + self.alpha self.mode = ( - self.mu * (tt.sqrt(1.0 + (1.5 * self.mu / self.lam) ** 2) - 1.5 * self.mu / self.lam) + self.mu * (aet.sqrt(1.0 + (1.5 * self.mu / self.lam) ** 2) - 1.5 * self.mu / self.lam) + self.alpha ) self.variance = (self.mu ** 3) / self.lam @@ -1080,7 +1082,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -1113,9 +1115,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -1129,29 +1131,29 @@ def logcdf(self, value): value -= alpha q = value / mu l = lam * mu - r = tt.sqrt(value * lam) + r = aet.sqrt(value * lam) a = normal_lcdf(0, 1, (q - 1.0) / r) b = 2.0 / l + normal_lcdf(0, 1, -(q + 1.0) / r) left_limit = ( - tt.lt(value, 0) - | (tt.eq(value, 0) & tt.gt(mu, 0) & tt.lt(lam, np.inf)) - | (tt.lt(value, mu) & tt.eq(lam, 0)) + aet.lt(value, 0) + | (aet.eq(value, 0) & aet.gt(mu, 0) & aet.lt(lam, np.inf)) + | (aet.lt(value, mu) & aet.eq(lam, 0)) ) right_limit = ( - tt.eq(value, np.inf) - | (tt.eq(lam, 0) & tt.gt(value, mu)) - | (tt.gt(value, 0) & tt.eq(lam, np.inf)) + aet.eq(value, np.inf) + | (aet.eq(lam, 0) & aet.gt(value, mu)) + | (aet.gt(value, 0) & aet.eq(lam, np.inf)) ) - degenerate_dist = (tt.lt(mu, np.inf) & tt.eq(mu, value) & tt.eq(lam, 0)) | ( - tt.eq(value, 0) & tt.eq(lam, np.inf) + degenerate_dist = (aet.lt(mu, np.inf) & aet.eq(mu, value) & aet.eq(lam, 0)) | ( + aet.eq(value, 0) & aet.eq(lam, np.inf) ) return bound( - tt.switch( + aet.switch( ~(right_limit | degenerate_dist), - a + tt.log1p(tt.exp(b - a)), + a + aet.log1p(aet.exp(b - a)), 0, ), ~left_limit, @@ -1229,8 +1231,8 @@ def __init__(self, alpha=None, beta=None, mu=None, sigma=None, sd=None, *args, * if sd is not None: sigma = sd alpha, beta = self.get_alpha_beta(alpha, beta, mu, sigma) - self.alpha = alpha = tt.as_tensor_variable(floatX(alpha)) - self.beta = beta = tt.as_tensor_variable(floatX(beta)) + self.alpha = alpha = aet.as_tensor_variable(floatX(alpha)) + self.beta = beta = aet.as_tensor_variable(floatX(beta)) self.mean = self.alpha / (self.alpha + self.beta) self.variance = ( @@ -1283,7 +1285,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -1292,11 +1294,11 @@ def logp(self, value): alpha = self.alpha beta = self.beta - logval = tt.log(value) - log1pval = tt.log1p(-value) + logval = aet.log(value) + log1pval = aet.log1p(-value) logp = ( - tt.switch(tt.eq(alpha, 1), 0, (alpha - 1) * logval) - + tt.switch(tt.eq(beta, 1), 0, (beta - 1) * log1pval) + aet.switch(aet.eq(alpha, 1), 0, (alpha - 1) * logval) + + aet.switch(aet.eq(beta, 1), 0, (beta - 1) * log1pval) - betaln(alpha, beta) ) @@ -1326,9 +1328,9 @@ def logcdf(self, value): b = self.beta return bound( - tt.switch( - tt.lt(value, 1), - tt.log(incomplete_beta(a, b, value)), + aet.switch( + aet.lt(value, 1), + aet.log(incomplete_beta(a, b, value)), 0, ), 0 <= value, @@ -1385,15 +1387,15 @@ class Kumaraswamy(UnitContinuous): def __init__(self, a, b, *args, **kwargs): super().__init__(*args, **kwargs) - self.a = a = tt.as_tensor_variable(floatX(a)) - self.b = b = tt.as_tensor_variable(floatX(b)) + self.a = a = aet.as_tensor_variable(floatX(a)) + self.b = b = aet.as_tensor_variable(floatX(b)) - ln_mean = tt.log(b) + tt.gammaln(1 + 1 / a) + tt.gammaln(b) - tt.gammaln(1 + 1 / a + b) - self.mean = tt.exp(ln_mean) + ln_mean = aet.log(b) + aet.gammaln(1 + 1 / a) + aet.gammaln(b) - aet.gammaln(1 + 1 / a + b) + self.mean = aet.exp(ln_mean) ln_2nd_raw_moment = ( - tt.log(b) + tt.gammaln(1 + 2 / a) + tt.gammaln(b) - tt.gammaln(1 + 2 / a + b) + aet.log(b) + aet.gammaln(1 + 2 / a) + aet.gammaln(b) - aet.gammaln(1 + 2 / a + b) ) - self.variance = tt.exp(ln_2nd_raw_moment) - self.mean ** 2 + self.variance = aet.exp(ln_2nd_raw_moment) - self.mean ** 2 assert_negative_support(a, "a", "Kumaraswamy") assert_negative_support(b, "b", "Kumaraswamy") @@ -1430,7 +1432,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -1439,7 +1441,9 @@ def logp(self, value): a = self.a b = self.b - logp = tt.log(a) + tt.log(b) + (a - 1) * tt.log(value) + (b - 1) * tt.log(1 - value ** a) + logp = ( + aet.log(a) + aet.log(b) + (a - 1) * aet.log(value) + (b - 1) * aet.log(1 - value ** a) + ) return bound(logp, value >= 0, value <= 1, a > 0, b > 0) @@ -1483,10 +1487,10 @@ class Exponential(PositiveContinuous): def __init__(self, lam, *args, **kwargs): super().__init__(*args, **kwargs) - self.lam = lam = tt.as_tensor_variable(floatX(lam)) + self.lam = lam = aet.as_tensor_variable(floatX(lam)) self.mean = 1.0 / self.lam - self.median = self.mean * tt.log(2) - self.mode = tt.zeros_like(self.lam) + self.median = self.mean * aet.log(2) + self.mode = aet.zeros_like(self.lam) self.variance = self.lam ** -2 @@ -1522,14 +1526,14 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- TensorVariable """ lam = self.lam - return bound(tt.log(lam) - lam * value, value >= 0, lam > 0) + return bound(aet.log(lam) - lam * value, value >= 0, lam > 0) def logcdf(self, value): r""" @@ -1538,15 +1542,15 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- TensorVariable """ - value = floatX(tt.as_tensor(value)) + value = floatX(aet.as_tensor(value)) lam = self.lam a = lam * value return bound( @@ -1600,8 +1604,8 @@ class Laplace(Continuous): def __init__(self, mu, b, *args, **kwargs): super().__init__(*args, **kwargs) - self.b = b = tt.as_tensor_variable(floatX(b)) - self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(floatX(mu)) + self.b = b = aet.as_tensor_variable(floatX(b)) + self.mean = self.median = self.mode = self.mu = mu = aet.as_tensor_variable(floatX(mu)) self.variance = 2 * self.b ** 2 @@ -1635,7 +1639,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -1644,7 +1648,7 @@ def logp(self, value): mu = self.mu b = self.b - return -tt.log(2 * b) - abs(value - mu) / b + return -aet.log(2 * b) - abs(value - mu) / b def logcdf(self, value): """ @@ -1653,9 +1657,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -1665,13 +1669,13 @@ def logcdf(self, value): b = self.b y = (value - a) / b return bound( - tt.switch( - tt.le(value, a), - tt.log(0.5) + y, - tt.switch( - tt.gt(y, 1), - tt.log1p(-0.5 * tt.exp(-y)), - tt.log(1 - 0.5 * tt.exp(-y)), + aet.switch( + aet.le(value, a), + aet.log(0.5) + y, + aet.switch( + aet.gt(y, 1), + aet.log1p(-0.5 * aet.exp(-y)), + aet.log(1 - 0.5 * aet.exp(-y)), ), ), 0 < b, @@ -1715,9 +1719,9 @@ class AsymmetricLaplace(Continuous): """ def __init__(self, b, kappa, mu=0, *args, **kwargs): - self.b = tt.as_tensor_variable(floatX(b)) - self.kappa = tt.as_tensor_variable(floatX(kappa)) - self.mu = mu = tt.as_tensor_variable(floatX(mu)) + self.b = aet.as_tensor_variable(floatX(b)) + self.kappa = aet.as_tensor_variable(floatX(kappa)) + self.mu = mu = aet.as_tensor_variable(floatX(mu)) self.mean = self.mu - (self.kappa - 1 / self.kappa) / b self.variance = (1 + self.kappa ** 4) / (self.kappa ** 2 * self.b ** 2) @@ -1763,7 +1767,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -1771,8 +1775,8 @@ def logp(self, value): """ value = value - self.mu return bound( - tt.log(self.b / (self.kappa + (self.kappa ** -1))) - + (-value * self.b * tt.sgn(value) * (self.kappa ** tt.sgn(value))), + aet.log(self.b / (self.kappa + (self.kappa ** -1))) + + (-value * self.b * aet.sgn(value) * (self.kappa ** aet.sgn(value))), 0 < self.b, 0 < self.kappa, ) @@ -1847,14 +1851,14 @@ def __init__(self, mu=0, sigma=None, tau=None, sd=None, *args, **kwargs): tau, sigma = get_tau_sigma(tau=tau, sigma=sigma) - self.mu = mu = tt.as_tensor_variable(floatX(mu)) - self.tau = tau = tt.as_tensor_variable(tau) - self.sigma = self.sd = sigma = tt.as_tensor_variable(sigma) + self.mu = mu = aet.as_tensor_variable(floatX(mu)) + self.tau = tau = aet.as_tensor_variable(tau) + self.sigma = self.sd = sigma = aet.as_tensor_variable(sigma) - self.mean = tt.exp(self.mu + 1.0 / (2 * self.tau)) - self.median = tt.exp(self.mu) - self.mode = tt.exp(self.mu - 1.0 / self.tau) - self.variance = (tt.exp(1.0 / self.tau) - 1) * tt.exp(2 * self.mu + 1.0 / self.tau) + self.mean = aet.exp(self.mu + 1.0 / (2 * self.tau)) + self.median = aet.exp(self.mu) + self.mode = aet.exp(self.mu - 1.0 / self.tau) + self.variance = (aet.exp(1.0 / self.tau) - 1) * aet.exp(2 * self.mu + 1.0 / self.tau) assert_negative_support(tau, "tau", "Lognormal") assert_negative_support(sigma, "sigma", "Lognormal") @@ -1891,7 +1895,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -1900,9 +1904,9 @@ def logp(self, value): mu = self.mu tau = self.tau return bound( - -0.5 * tau * (tt.log(value) - mu) ** 2 - + 0.5 * tt.log(tau / (2.0 * np.pi)) - - tt.log(value), + -0.5 * tau * (aet.log(value) - mu) ** 2 + + 0.5 * aet.log(tau / (2.0 * np.pi)) + - aet.log(value), tau > 0, ) @@ -1916,9 +1920,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -1929,7 +1933,7 @@ def logcdf(self, value): tau = self.tau return bound( - normal_lcdf(mu, sigma, tt.log(value)), + normal_lcdf(mu, sigma, aet.log(value)), 0 < value, 0 < tau, ) @@ -2002,13 +2006,13 @@ def __init__(self, nu, mu=0, lam=None, sigma=None, sd=None, *args, **kwargs): super().__init__(*args, **kwargs) if sd is not None: sigma = sd - self.nu = nu = tt.as_tensor_variable(floatX(nu)) + self.nu = nu = aet.as_tensor_variable(floatX(nu)) lam, sigma = get_tau_sigma(tau=lam, sigma=sigma) - self.lam = lam = tt.as_tensor_variable(lam) - self.sigma = self.sd = sigma = tt.as_tensor_variable(sigma) - self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(mu) + self.lam = lam = aet.as_tensor_variable(lam) + self.sigma = self.sd = sigma = aet.as_tensor_variable(sigma) + self.mean = self.median = self.mode = self.mu = mu = aet.as_tensor_variable(mu) - self.variance = tt.switch((nu > 2) * 1, (1 / self.lam) * (nu / (nu - 2)), np.inf) + self.variance = aet.switch((nu > 2) * 1, (1 / self.lam) * (nu / (nu - 2)), np.inf) assert_negative_support(lam, "lam (sigma)", "StudentT") assert_negative_support(nu, "nu", "StudentT") @@ -2043,7 +2047,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -2056,9 +2060,9 @@ def logp(self, value): return bound( gammaln((nu + 1.0) / 2.0) - + 0.5 * tt.log(lam / (nu * np.pi)) + + 0.5 * aet.log(lam / (nu * np.pi)) - gammaln(nu / 2.0) - - (nu + 1.0) / 2.0 * tt.log1p(lam * (value - mu) ** 2 / nu), + - (nu + 1.0) / 2.0 * aet.log1p(lam * (value - mu) ** 2 / nu), lam > 0, nu > 0, sigma > 0, @@ -2092,11 +2096,11 @@ def logcdf(self, value): sigma = self.sigma lam = self.lam t = (value - mu) / sigma - sqrt_t2_nu = tt.sqrt(t ** 2 + nu) + sqrt_t2_nu = aet.sqrt(t ** 2 + nu) x = (t + sqrt_t2_nu) / (2.0 * sqrt_t2_nu) return bound( - tt.log(incomplete_beta(nu / 2.0, nu / 2.0, x)), + aet.log(incomplete_beta(nu / 2.0, nu / 2.0, x)), 0 < nu, 0 < sigma, 0 < lam, @@ -2149,13 +2153,13 @@ class Pareto(Continuous): """ def __init__(self, alpha, m, transform="lowerbound", *args, **kwargs): - self.alpha = alpha = tt.as_tensor_variable(floatX(alpha)) - self.m = m = tt.as_tensor_variable(floatX(m)) + self.alpha = alpha = aet.as_tensor_variable(floatX(alpha)) + self.m = m = aet.as_tensor_variable(floatX(m)) - self.mean = tt.switch(tt.gt(alpha, 1), alpha * m / (alpha - 1.0), np.inf) + self.mean = aet.switch(aet.gt(alpha, 1), alpha * m / (alpha - 1.0), np.inf) self.median = m * 2.0 ** (1.0 / alpha) - self.variance = tt.switch( - tt.gt(alpha, 2), (alpha * m ** 2) / ((alpha - 2.0) * (alpha - 1.0) ** 2), np.inf + self.variance = aet.switch( + aet.gt(alpha, 2), (alpha * m ** 2) / ((alpha - 2.0) * (alpha - 1.0) ** 2), np.inf ) assert_negative_support(alpha, "alpha", "Pareto") @@ -2197,7 +2201,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -2206,7 +2210,7 @@ def logp(self, value): alpha = self.alpha m = self.m return bound( - tt.log(alpha) + logpow(m, alpha) - logpow(value, alpha + 1), + aet.log(alpha) + logpow(m, alpha) - logpow(value, alpha + 1), value >= m, alpha > 0, m > 0, @@ -2222,9 +2226,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -2234,10 +2238,10 @@ def logcdf(self, value): alpha = self.alpha arg = (m / value) ** alpha return bound( - tt.switch( - tt.le(arg, 1e-5), - tt.log1p(-arg), - tt.log(1 - arg), + aet.switch( + aet.le(arg, 1e-5), + aet.log1p(-arg), + aet.log(1 - arg), ), m <= value, 0 < alpha, @@ -2292,8 +2296,8 @@ class Cauchy(Continuous): def __init__(self, alpha, beta, *args, **kwargs): super().__init__(*args, **kwargs) - self.median = self.mode = self.alpha = tt.as_tensor_variable(floatX(alpha)) - self.beta = tt.as_tensor_variable(floatX(beta)) + self.median = self.mode = self.alpha = aet.as_tensor_variable(floatX(alpha)) + self.beta = aet.as_tensor_variable(floatX(beta)) assert_negative_support(beta, "beta", "Cauchy") @@ -2329,7 +2333,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -2338,7 +2342,7 @@ def logp(self, value): alpha = self.alpha beta = self.beta return bound( - -tt.log(np.pi) - tt.log(beta) - tt.log1p(((value - alpha) / beta) ** 2), beta > 0 + -aet.log(np.pi) - aet.log(beta) - aet.log1p(((value - alpha) / beta) ** 2), beta > 0 ) def logcdf(self, value): @@ -2348,9 +2352,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -2359,7 +2363,7 @@ def logcdf(self, value): alpha = self.alpha beta = self.beta return bound( - tt.log(0.5 + tt.arctan((value - alpha) / beta) / np.pi), + aet.log(0.5 + aet.arctan((value - alpha) / beta) / np.pi), 0 < beta, ) @@ -2404,8 +2408,8 @@ class HalfCauchy(PositiveContinuous): def __init__(self, beta, *args, **kwargs): super().__init__(*args, **kwargs) - self.mode = tt.as_tensor_variable(0) - self.median = self.beta = tt.as_tensor_variable(floatX(beta)) + self.mode = aet.as_tensor_variable(0) + self.median = self.beta = aet.as_tensor_variable(floatX(beta)) assert_negative_support(beta, "beta", "HalfCauchy") @@ -2441,7 +2445,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -2449,7 +2453,7 @@ def logp(self, value): """ beta = self.beta return bound( - tt.log(2) - tt.log(np.pi) - tt.log(beta) - tt.log1p((value / beta) ** 2), + aet.log(2) - aet.log(np.pi) - aet.log(beta) - aet.log1p((value / beta) ** 2), value >= 0, beta > 0, ) @@ -2461,9 +2465,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -2471,7 +2475,7 @@ def logcdf(self, value): """ beta = self.beta return bound( - tt.log(2 * tt.arctan(value / beta) / np.pi), + aet.log(2 * aet.arctan(value / beta) / np.pi), 0 <= value, 0 < beta, ) @@ -2541,10 +2545,10 @@ def __init__(self, alpha=None, beta=None, mu=None, sigma=None, sd=None, *args, * sigma = sd alpha, beta = self.get_alpha_beta(alpha, beta, mu, sigma) - self.alpha = alpha = tt.as_tensor_variable(floatX(alpha)) - self.beta = beta = tt.as_tensor_variable(floatX(beta)) + self.alpha = alpha = aet.as_tensor_variable(floatX(alpha)) + self.beta = beta = aet.as_tensor_variable(floatX(beta)) self.mean = alpha / beta - self.mode = tt.maximum((alpha - 1) / beta, 0) + self.mode = aet.maximum((alpha - 1) / beta, 0) self.variance = alpha / beta ** 2 assert_negative_support(alpha, "alpha", "Gamma") @@ -2595,7 +2599,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -2617,9 +2621,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -2628,12 +2632,12 @@ def logcdf(self, value): alpha = self.alpha beta = self.beta # Avoid C-assertion when the gammainc function is called with invalid values (#4340) - safe_alpha = tt.switch(tt.lt(alpha, 0), 0, alpha) - safe_beta = tt.switch(tt.lt(beta, 0), 0, beta) - safe_value = tt.switch(tt.lt(value, 0), 0, value) + safe_alpha = aet.switch(aet.lt(alpha, 0), 0, alpha) + safe_beta = aet.switch(aet.lt(beta, 0), 0, beta) + safe_value = aet.switch(aet.lt(value, 0), 0, value) return bound( - tt.log(tt.gammainc(safe_alpha, safe_beta * safe_value)), + aet.log(aet.gammainc(safe_alpha, safe_beta * safe_value)), 0 <= value, 0 < alpha, 0 < beta, @@ -2698,13 +2702,13 @@ def __init__(self, alpha=None, beta=None, mu=None, sigma=None, sd=None, *args, * sigma = sd alpha, beta = InverseGamma._get_alpha_beta(alpha, beta, mu, sigma) - self.alpha = alpha = tt.as_tensor_variable(floatX(alpha)) - self.beta = beta = tt.as_tensor_variable(floatX(beta)) + self.alpha = alpha = aet.as_tensor_variable(floatX(alpha)) + self.beta = beta = aet.as_tensor_variable(floatX(beta)) self.mean = self._calculate_mean() self.mode = beta / (alpha + 1.0) - self.variance = tt.switch( - tt.gt(alpha, 2), (beta ** 2) / ((alpha - 2) * (alpha - 1.0) ** 2), np.inf + self.variance = aet.switch( + aet.gt(alpha, 2), (beta ** 2) / ((alpha - 2) * (alpha - 1.0) ** 2), np.inf ) assert_negative_support(alpha, "alpha", "InverseGamma") assert_negative_support(beta, "beta", "InverseGamma") @@ -2766,7 +2770,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -2791,9 +2795,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -2802,12 +2806,12 @@ def logcdf(self, value): alpha = self.alpha beta = self.beta # Avoid C-assertion when the gammaincc function is called with invalid values (#4340) - safe_alpha = tt.switch(tt.lt(alpha, 0), 0, alpha) - safe_beta = tt.switch(tt.lt(beta, 0), 0, beta) - safe_value = tt.switch(tt.lt(value, 0), 0, value) + safe_alpha = aet.switch(aet.lt(alpha, 0), 0, alpha) + safe_beta = aet.switch(aet.lt(beta, 0), 0, beta) + safe_value = aet.switch(aet.lt(value, 0), 0, value) return bound( - tt.log(tt.gammaincc(safe_alpha, safe_beta / safe_value)), + aet.log(aet.gammaincc(safe_alpha, safe_beta / safe_value)), 0 <= value, 0 < alpha, 0 < beta, @@ -2853,7 +2857,7 @@ class ChiSquared(Gamma): """ def __init__(self, nu, *args, **kwargs): - self.nu = nu = tt.as_tensor_variable(floatX(nu)) + self.nu = nu = aet.as_tensor_variable(floatX(nu)) super().__init__(alpha=nu / 2.0, beta=0.5, *args, **kwargs) @@ -2903,12 +2907,12 @@ class Weibull(PositiveContinuous): def __init__(self, alpha, beta, *args, **kwargs): super().__init__(*args, **kwargs) - self.alpha = alpha = tt.as_tensor_variable(floatX(alpha)) - self.beta = beta = tt.as_tensor_variable(floatX(beta)) - self.mean = beta * tt.exp(gammaln(1 + 1.0 / alpha)) - self.median = beta * tt.exp(gammaln(tt.log(2))) ** (1.0 / alpha) - self.variance = beta ** 2 * tt.exp(gammaln(1 + 2.0 / alpha)) - self.mean ** 2 - self.mode = tt.switch( + self.alpha = alpha = aet.as_tensor_variable(floatX(alpha)) + self.beta = beta = aet.as_tensor_variable(floatX(beta)) + self.mean = beta * aet.exp(gammaln(1 + 1.0 / alpha)) + self.median = beta * aet.exp(gammaln(aet.log(2))) ** (1.0 / alpha) + self.variance = beta ** 2 * aet.exp(gammaln(1 + 2.0 / alpha)) - self.mean ** 2 + self.mode = aet.switch( alpha >= 1, beta * ((alpha - 1) / alpha) ** (1 / alpha), 0 ) # Reference: https://en.wikipedia.org/wiki/Weibull_distribution @@ -2947,7 +2951,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -2956,9 +2960,9 @@ def logp(self, value): alpha = self.alpha beta = self.beta return bound( - tt.log(alpha) - - tt.log(beta) - + (alpha - 1) * tt.log(value / beta) + aet.log(alpha) + - aet.log(beta) + + (alpha - 1) * aet.log(value / beta) - (value / beta) ** alpha, value >= 0, alpha > 0, @@ -2972,9 +2976,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -3053,12 +3057,12 @@ def __init__(self, nu=1, sigma=None, lam=None, sd=None, *args, **kwargs): if sd is not None: sigma = sd - self.mode = tt.as_tensor_variable(0) + self.mode = aet.as_tensor_variable(0) lam, sigma = get_tau_sigma(lam, sigma) - self.median = tt.as_tensor_variable(sigma) - self.sigma = self.sd = tt.as_tensor_variable(sigma) - self.lam = tt.as_tensor_variable(lam) - self.nu = nu = tt.as_tensor_variable(floatX(nu)) + self.median = aet.as_tensor_variable(sigma) + self.sigma = self.sd = aet.as_tensor_variable(sigma) + self.lam = aet.as_tensor_variable(lam) + self.nu = nu = aet.as_tensor_variable(floatX(nu)) assert_negative_support(sigma, "sigma", "HalfStudentT") assert_negative_support(lam, "lam", "HalfStudentT") @@ -3094,7 +3098,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -3105,11 +3109,11 @@ def logp(self, value): lam = self.lam return bound( - tt.log(2) + aet.log(2) + gammaln((nu + 1.0) / 2.0) - gammaln(nu / 2.0) - - 0.5 * tt.log(nu * np.pi * sigma ** 2) - - (nu + 1.0) / 2.0 * tt.log1p(value ** 2 / (nu * sigma ** 2)), + - 0.5 * aet.log(nu * np.pi * sigma ** 2) + - (nu + 1.0) / 2.0 * aet.log1p(value ** 2 / (nu * sigma ** 2)), sigma > 0, lam > 0, nu > 0, @@ -3191,9 +3195,9 @@ def __init__(self, mu=0.0, sigma=None, nu=None, sd=None, *args, **kwargs): if sd is not None: sigma = sd - self.mu = mu = tt.as_tensor_variable(floatX(mu)) - self.sigma = self.sd = sigma = tt.as_tensor_variable(floatX(sigma)) - self.nu = nu = tt.as_tensor_variable(floatX(nu)) + self.mu = mu = aet.as_tensor_variable(floatX(mu)) + self.sigma = self.sd = sigma = aet.as_tensor_variable(floatX(sigma)) + self.nu = nu = aet.as_tensor_variable(floatX(nu)) self.mean = mu + nu self.variance = (sigma ** 2) + (nu ** 2) @@ -3234,7 +3238,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -3246,10 +3250,10 @@ def logp(self, value): # Alogithm is adapted from dexGAUS.R from gamlss return bound( - tt.switch( - tt.gt(nu, 0.05 * sigma), + aet.switch( + aet.gt(nu, 0.05 * sigma), ( - -tt.log(nu) + -aet.log(nu) + (mu - value) / nu + 0.5 * (sigma / nu) ** 2 + normal_lcdf(mu + (sigma ** 2) / nu, sigma, value) @@ -3273,9 +3277,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -3287,8 +3291,8 @@ def logcdf(self, value): # Alogithm is adapted from pexGAUS.R from gamlss return bound( - tt.switch( - tt.gt(nu, 0.05 * sigma), + aet.switch( + aet.gt(nu, 0.05 * sigma), logdiffexp( normal_lcdf(mu, sigma, value), ( @@ -3355,8 +3359,8 @@ def __init__(self, mu=0.0, kappa=None, transform="circular", *args, **kwargs): if transform == "circular": transform = transforms.Circular() super().__init__(transform=transform, *args, **kwargs) - self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(floatX(mu)) - self.kappa = kappa = tt.as_tensor_variable(floatX(kappa)) + self.mean = self.median = self.mode = self.mu = mu = aet.as_tensor_variable(floatX(mu)) + self.kappa = kappa = aet.as_tensor_variable(floatX(kappa)) assert_negative_support(kappa, "kappa", "VonMises") @@ -3390,7 +3394,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -3399,7 +3403,7 @@ def logp(self, value): mu = self.mu kappa = self.kappa return bound( - kappa * tt.cos(mu - value) - (tt.log(2 * np.pi) + log_i0(kappa)), + kappa * aet.cos(mu - value) - (aet.log(2 * np.pi) + log_i0(kappa)), kappa > 0, value >= -np.pi, value <= np.pi, @@ -3474,11 +3478,11 @@ def __init__(self, mu=0.0, sigma=None, tau=None, alpha=1, sd=None, *args, **kwar sigma = sd tau, sigma = get_tau_sigma(tau=tau, sigma=sigma) - self.mu = mu = tt.as_tensor_variable(floatX(mu)) - self.tau = tt.as_tensor_variable(tau) - self.sigma = self.sd = tt.as_tensor_variable(sigma) + self.mu = mu = aet.as_tensor_variable(floatX(mu)) + self.tau = aet.as_tensor_variable(tau) + self.sigma = self.sd = aet.as_tensor_variable(sigma) - self.alpha = alpha = tt.as_tensor_variable(floatX(alpha)) + self.alpha = alpha = aet.as_tensor_variable(floatX(alpha)) self.mean = mu + self.sigma * (2 / np.pi) ** 0.5 * alpha / (1 + alpha ** 2) ** 0.5 self.variance = self.sigma ** 2 * (1 - (2 * alpha ** 2) / ((1 + alpha ** 2) * np.pi)) @@ -3518,7 +3522,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -3529,8 +3533,8 @@ def logp(self, value): mu = self.mu alpha = self.alpha return bound( - tt.log(1 + tt.erf(((value - mu) * tt.sqrt(tau) * alpha) / tt.sqrt(2))) - + (-tau * (value - mu) ** 2 + tt.log(tau / np.pi / 2.0)) / 2.0, + aet.log(1 + aet.erf(((value - mu) * aet.sqrt(tau) * alpha) / aet.sqrt(2))) + + (-tau * (value - mu) ** 2 + aet.log(tau / np.pi / 2.0)) / 2.0, tau > 0, sigma > 0, ) @@ -3594,9 +3598,9 @@ class Triangular(BoundedContinuous): """ def __init__(self, lower=0, upper=1, c=0.5, *args, **kwargs): - self.median = self.mean = self.c = c = tt.as_tensor_variable(floatX(c)) - self.lower = lower = tt.as_tensor_variable(floatX(lower)) - self.upper = upper = tt.as_tensor_variable(floatX(upper)) + self.median = self.mean = self.c = c = aet.as_tensor_variable(floatX(c)) + self.lower = lower = aet.as_tensor_variable(floatX(lower)) + self.upper = upper = aet.as_tensor_variable(floatX(upper)) super().__init__(lower=lower, upper=upper, *args, **kwargs) @@ -3639,7 +3643,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -3649,10 +3653,10 @@ def logp(self, value): lower = self.lower upper = self.upper return bound( - tt.switch( - tt.lt(value, c), - tt.log(2 * (value - lower) / ((upper - lower) * (c - lower))), - tt.log(2 * (upper - value) / ((upper - lower) * (upper - c))), + aet.switch( + aet.lt(value, c), + aet.log(2 * (value - lower) / ((upper - lower) * (c - lower))), + aet.log(2 * (upper - value) / ((upper - lower) * (upper - c))), ), lower <= value, value <= upper, @@ -3665,9 +3669,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -3677,15 +3681,15 @@ def logcdf(self, value): lower = self.lower upper = self.upper return bound( - tt.switch( - tt.le(value, lower), + aet.switch( + aet.le(value, lower), -np.inf, - tt.switch( - tt.le(value, c), - tt.log(((value - lower) ** 2) / ((upper - lower) * (c - lower))), - tt.switch( - tt.lt(value, upper), - tt.log1p(-((upper - value) ** 2) / ((upper - lower) * (upper - c))), + aet.switch( + aet.le(value, c), + aet.log(((value - lower) ** 2) / ((upper - lower) * (c - lower))), + aet.switch( + aet.lt(value, upper), + aet.log1p(-((upper - value) ** 2) / ((upper - lower) * (upper - c))), 0, ), ), @@ -3743,13 +3747,13 @@ class Gumbel(Continuous): """ def __init__(self, mu=0, beta=1.0, **kwargs): - self.mu = tt.as_tensor_variable(floatX(mu)) - self.beta = tt.as_tensor_variable(floatX(beta)) + self.mu = aet.as_tensor_variable(floatX(mu)) + self.beta = aet.as_tensor_variable(floatX(beta)) assert_negative_support(beta, "beta", "Gumbel") self.mean = self.mu + self.beta * np.euler_gamma - self.median = self.mu - self.beta * tt.log(tt.log(2)) + self.median = self.mu - self.beta * aet.log(aet.log(2)) self.mode = self.mu self.variance = (np.pi ** 2 / 6.0) * self.beta ** 2 @@ -3785,7 +3789,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -3795,7 +3799,7 @@ def logp(self, value): beta = self.beta scaled = (value - mu) / beta return bound( - -scaled - tt.exp(-scaled) - tt.log(self.beta), + -scaled - aet.exp(-scaled) - aet.log(self.beta), 0 < beta, ) @@ -3806,9 +3810,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -3818,7 +3822,7 @@ def logcdf(self, value): mu = self.mu return bound( - -tt.exp(-(value - mu) / beta), + -aet.exp(-(value - mu) / beta), 0 < beta, ) @@ -3888,18 +3892,18 @@ def __init__(self, nu=None, sigma=None, b=None, sd=None, *args, **kwargs): sigma = sd nu, b, sigma = self.get_nu_b(nu, b, sigma) - self.nu = nu = tt.as_tensor_variable(floatX(nu)) - self.sigma = self.sd = sigma = tt.as_tensor_variable(floatX(sigma)) - self.b = b = tt.as_tensor_variable(floatX(b)) + self.nu = nu = aet.as_tensor_variable(floatX(nu)) + self.sigma = self.sd = sigma = aet.as_tensor_variable(floatX(sigma)) + self.b = b = aet.as_tensor_variable(floatX(b)) nu_sigma_ratio = -(nu ** 2) / (2 * sigma ** 2) self.mean = ( sigma * np.sqrt(np.pi / 2) - * tt.exp(nu_sigma_ratio / 2) + * aet.exp(nu_sigma_ratio / 2) * ( - (1 - nu_sigma_ratio) * tt.i0(-nu_sigma_ratio / 2) - - nu_sigma_ratio * tt.i1(-nu_sigma_ratio / 2) + (1 - nu_sigma_ratio) * aet.i0(-nu_sigma_ratio / 2) + - nu_sigma_ratio * aet.i1(-nu_sigma_ratio / 2) ) ) self.variance = ( @@ -3907,10 +3911,10 @@ def __init__(self, nu=None, sigma=None, b=None, sd=None, *args, **kwargs): + nu ** 2 - (np.pi * sigma ** 2 / 2) * ( - tt.exp(nu_sigma_ratio / 2) + aet.exp(nu_sigma_ratio / 2) * ( - (1 - nu_sigma_ratio) * tt.i0(-nu_sigma_ratio / 2) - - nu_sigma_ratio * tt.i1(-nu_sigma_ratio / 2) + (1 - nu_sigma_ratio) * aet.i0(-nu_sigma_ratio / 2) + - nu_sigma_ratio * aet.i1(-nu_sigma_ratio / 2) ) ) ** 2 @@ -3963,7 +3967,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -3974,7 +3978,7 @@ def logp(self, value): b = self.b x = value / sigma return bound( - tt.log(x * tt.exp((-(x - b) * (x - b)) / 2) * i0e(x * b) / sigma), + aet.log(x * aet.exp((-(x - b) * (x - b)) / 2) * i0e(x * b) / sigma), sigma >= 0, nu >= 0, value > 0, @@ -4030,8 +4034,8 @@ class Logistic(Continuous): def __init__(self, mu=0.0, s=1.0, *args, **kwargs): super().__init__(*args, **kwargs) - self.mu = tt.as_tensor_variable(floatX(mu)) - self.s = tt.as_tensor_variable(floatX(s)) + self.mu = aet.as_tensor_variable(floatX(mu)) + self.s = aet.as_tensor_variable(floatX(s)) self.mean = self.mode = mu self.variance = s ** 2 * np.pi ** 2 / 3.0 @@ -4067,7 +4071,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -4077,7 +4081,7 @@ def logp(self, value): s = self.s return bound( - -(value - mu) / s - tt.log(s) - 2 * tt.log1p(tt.exp(-(value - mu) / s)), + -(value - mu) / s - aet.log(s) - 2 * aet.log1p(aet.exp(-(value - mu) / s)), s > 0, ) @@ -4088,9 +4092,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -4151,10 +4155,10 @@ class LogitNormal(UnitContinuous): def __init__(self, mu=0, sigma=None, tau=None, sd=None, **kwargs): if sd is not None: sigma = sd - self.mu = mu = tt.as_tensor_variable(floatX(mu)) + self.mu = mu = aet.as_tensor_variable(floatX(mu)) tau, sigma = get_tau_sigma(tau=tau, sigma=sigma) - self.sigma = self.sd = tt.as_tensor_variable(sigma) - self.tau = tau = tt.as_tensor_variable(tau) + self.sigma = self.sd = aet.as_tensor_variable(sigma) + self.tau = tau = aet.as_tensor_variable(tau) self.median = invlogit(mu) assert_negative_support(sigma, "sigma", "LogitNormal") @@ -4192,7 +4196,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -4202,8 +4206,8 @@ def logp(self, value): tau = self.tau return bound( -0.5 * tau * (logit(value) - mu) ** 2 - + 0.5 * tt.log(tau / (2.0 * np.pi)) - - tt.log(value * (1 - value)), + + 0.5 * aet.log(tau / (2.0 * np.pi)) + - aet.log(value * (1 - value)), value > 0, value < 1, tau > 0, @@ -4242,15 +4246,15 @@ class Interpolated(BoundedContinuous): """ def __init__(self, x_points, pdf_points, *args, **kwargs): - self.lower = lower = tt.as_tensor_variable(x_points[0]) - self.upper = upper = tt.as_tensor_variable(x_points[-1]) + self.lower = lower = aet.as_tensor_variable(x_points[0]) + self.upper = upper = aet.as_tensor_variable(x_points[-1]) super().__init__(lower=lower, upper=upper, *args, **kwargs) interp = InterpolatedUnivariateSpline(x_points, pdf_points, k=1, ext="zeros") Z = interp.integral(x_points[0], x_points[-1]) - self.Z = tt.as_tensor_variable(Z) + self.Z = aet.as_tensor_variable(Z) self.interp_op = SplineWrapper(interp) self.x_points = x_points self.pdf_points = pdf_points / Z @@ -4301,13 +4305,13 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- TensorVariable """ - return tt.log(self.interp_op(value) / self.Z) + return aet.log(self.interp_op(value) / self.Z) def _distr_parameters_for_repr(self): return [] @@ -4361,13 +4365,13 @@ class Moyal(Continuous): """ def __init__(self, mu=0, sigma=1.0, *args, **kwargs): - self.mu = tt.as_tensor_variable(floatX(mu)) - self.sigma = tt.as_tensor_variable(floatX(sigma)) + self.mu = aet.as_tensor_variable(floatX(mu)) + self.sigma = aet.as_tensor_variable(floatX(sigma)) assert_negative_support(sigma, "sigma", "Moyal") - self.mean = self.mu + self.sigma * (np.euler_gamma + tt.log(2)) - self.median = self.mu - self.sigma * tt.log(2 * tt.erfcinv(1 / 2) ** 2) + self.mean = self.mu + self.sigma * (np.euler_gamma + aet.log(2)) + self.median = self.mu - self.sigma * aet.log(2 * aet.erfcinv(1 / 2) ** 2) self.mode = self.mu self.variance = (np.pi ** 2 / 2.0) * self.sigma ** 2 @@ -4403,7 +4407,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -4413,7 +4417,11 @@ def logp(self, value): sigma = self.sigma scaled = (value - mu) / sigma return bound( - (-(1 / 2) * (scaled + tt.exp(-scaled)) - tt.log(sigma) - (1 / 2) * tt.log(2 * np.pi)), + ( + -(1 / 2) * (scaled + aet.exp(-scaled)) + - aet.log(sigma) + - (1 / 2) * aet.log(2 * np.pi) + ), 0 < sigma, ) @@ -4424,9 +4432,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -4437,6 +4445,6 @@ def logcdf(self, value): scaled = (value - mu) / sigma return bound( - tt.log(tt.erfc(tt.exp(-scaled / 2) * (2 ** -0.5))), + aet.log(aet.erfc(aet.exp(-scaled / 2) * (2 ** -0.5))), 0 < sigma, ) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 0bac6fd6b23..06cd504f403 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -14,11 +14,12 @@ import warnings +import aesara.tensor as aet import numpy as np -import theano.tensor as tt from scipy import stats +from pymc3.aesaraf import floatX, intX, take_along_axis from pymc3.distributions.dist_math import ( betaln, binomln, @@ -34,7 +35,6 @@ from pymc3.distributions.distribution import Discrete, draw_values, generate_samples from pymc3.distributions.shape_utils import broadcast_distribution_samples from pymc3.math import log1mexp, log1pexp, logaddexp, logit, logsumexp, sigmoid, tround -from pymc3.theanof import floatX, intX, take_along_axis __all__ = [ "Binomial", @@ -100,9 +100,9 @@ class Binomial(Discrete): def __init__(self, n, p, *args, **kwargs): super().__init__(*args, **kwargs) - self.n = n = tt.as_tensor_variable(intX(n)) - self.p = p = tt.as_tensor_variable(floatX(p)) - self.mode = tt.cast(tround(n * p), self.dtype) + self.n = n = aet.as_tensor_variable(intX(n)) + self.p = p = aet.as_tensor_variable(floatX(p)) + self.mode = aet.cast(tround(n * p), self.dtype) def random(self, point=None, size=None): r""" @@ -132,7 +132,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -171,12 +171,12 @@ def logcdf(self, value): n = self.n p = self.p - value = tt.floor(value) + value = aet.floor(value) return bound( - tt.switch( - tt.lt(value, n), - tt.log(incomplete_beta(n - value, value + 1, 1 - p)), + aet.switch( + aet.lt(value, n), + aet.log(incomplete_beta(n - value, value + 1, 1 - p)), 0, ), 0 <= value, @@ -243,10 +243,10 @@ def BetaBinom(a, b, n, x): def __init__(self, alpha, beta, n, *args, **kwargs): super().__init__(*args, **kwargs) - self.alpha = alpha = tt.as_tensor_variable(floatX(alpha)) - self.beta = beta = tt.as_tensor_variable(floatX(beta)) - self.n = n = tt.as_tensor_variable(intX(n)) - self.mode = tt.cast(tround(alpha / (alpha + beta)), "int8") + self.alpha = alpha = aet.as_tensor_variable(floatX(alpha)) + self.beta = beta = aet.as_tensor_variable(floatX(beta)) + self.n = n = aet.as_tensor_variable(intX(n)) + self.mode = aet.cast(tround(alpha / (alpha + beta)), "int8") def _random(self, alpha, beta, n, size=None): size = size or () @@ -300,7 +300,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -340,12 +340,12 @@ def logcdf(self, value): alpha = self.alpha beta = self.beta n = self.n - safe_lower = tt.switch(tt.lt(value, 0), value, 0) + safe_lower = aet.switch(aet.lt(value, 0), value, 0) return bound( - tt.switch( - tt.lt(value, n), - logsumexp(self.logp(tt.arange(safe_lower, value + 1)), keepdims=False), + aet.switch( + aet.lt(value, n), + logsumexp(self.logp(aet.arange(safe_lower, value + 1)), keepdims=False), 0, ), 0 <= value, @@ -401,14 +401,14 @@ def __init__(self, p=None, logit_p=None, *args, **kwargs): raise ValueError("Specify one of p and logit_p") if p is not None: self._is_logit = False - self.p = p = tt.as_tensor_variable(floatX(p)) + self.p = p = aet.as_tensor_variable(floatX(p)) self._logit_p = logit(p) else: self._is_logit = True - self.p = tt.nnet.sigmoid(floatX(logit_p)) - self._logit_p = tt.as_tensor_variable(logit_p) + self.p = aet.nnet.sigmoid(floatX(logit_p)) + self._logit_p = aet.as_tensor_variable(logit_p) - self.mode = tt.cast(tround(self.p), "int8") + self.mode = aet.cast(tround(self.p), "int8") def random(self, point=None, size=None): r""" @@ -438,19 +438,23 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- TensorVariable """ if self._is_logit: - lp = tt.switch(value, self._logit_p, -self._logit_p) + lp = aet.switch(value, self._logit_p, -self._logit_p) return -log1pexp(-lp) else: p = self.p return bound( - tt.switch(value, tt.log(p), tt.log(1 - p)), value >= 0, value <= 1, p >= 0, p <= 1 + aet.switch(value, aet.log(p), aet.log(1 - p)), + value >= 0, + value <= 1, + p >= 0, + p <= 1, ) def logcdf(self, value): @@ -460,9 +464,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -471,9 +475,9 @@ def logcdf(self, value): p = self.p return bound( - tt.switch( - tt.lt(value, 1), - tt.log1p(-p), + aet.switch( + aet.lt(value, 1), + aet.log1p(-p), 0, ), 0 <= value, @@ -527,8 +531,8 @@ def DiscreteWeibull(q, b, x): def __init__(self, q, beta, *args, **kwargs): super().__init__(*args, defaults=("median",), **kwargs) - self.q = tt.as_tensor_variable(floatX(q)) - self.beta = tt.as_tensor_variable(floatX(beta)) + self.q = aet.as_tensor_variable(floatX(q)) + self.beta = aet.as_tensor_variable(floatX(beta)) self.median = self._ppf(0.5) @@ -540,7 +544,7 @@ def _ppf(self, p): q = self.q beta = self.beta - return (tt.ceil(tt.power(tt.log(1 - p) / tt.log(q), 1.0 / beta)) - 1).astype("int64") + return (aet.ceil(aet.power(aet.log(1 - p) / aet.log(q), 1.0 / beta)) - 1).astype("int64") def _random(self, q, beta, size=None): p = np.random.uniform(size=size) @@ -576,7 +580,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -585,7 +589,9 @@ def logp(self, value): q = self.q beta = self.beta return bound( - tt.log(tt.power(q, tt.power(value, beta)) - tt.power(q, tt.power(value + 1, beta))), + aet.log( + aet.power(q, aet.power(value, beta)) - aet.power(q, aet.power(value + 1, beta)) + ), 0 <= value, 0 < q, q < 1, @@ -599,9 +605,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -611,7 +617,7 @@ def logcdf(self, value): beta = self.beta return bound( - tt.log1p(-tt.power(q, tt.power(value + 1, beta))), + aet.log1p(-aet.power(q, aet.power(value + 1, beta))), 0 <= value, 0 < q, q < 1, @@ -665,8 +671,8 @@ class Poisson(Discrete): def __init__(self, mu, *args, **kwargs): super().__init__(*args, **kwargs) - self.mu = mu = tt.as_tensor_variable(floatX(mu)) - self.mode = intX(tt.floor(mu)) + self.mu = mu = aet.as_tensor_variable(floatX(mu)) + self.mode = intX(aet.floor(mu)) def random(self, point=None, size=None): r""" @@ -696,7 +702,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -705,7 +711,7 @@ def logp(self, value): mu = self.mu log_prob = bound(logpow(mu, value) - factln(value) - mu, mu >= 0, value >= 0) # Return zero when mu and value are both zero - return tt.switch(tt.eq(mu, 0) * tt.eq(value, 0), 0, log_prob) + return aet.switch(aet.eq(mu, 0) * aet.eq(value, 0), 0, log_prob) def logcdf(self, value): """ @@ -714,22 +720,22 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- TensorVariable """ mu = self.mu - value = tt.floor(value) + value = aet.floor(value) # Avoid C-assertion when the gammaincc function is called with invalid values (#4340) - safe_mu = tt.switch(tt.lt(mu, 0), 0, mu) - safe_value = tt.switch(tt.lt(value, 0), 0, value) + safe_mu = aet.switch(aet.lt(mu, 0), 0, mu) + safe_value = aet.switch(aet.lt(value, 0), 0, value) return bound( - tt.log(tt.gammaincc(safe_value + 1, safe_mu)), + aet.log(aet.gammaincc(safe_value + 1, safe_mu)), 0 <= value, 0 <= mu, ) @@ -800,16 +806,16 @@ def NegBinom(a, m, x): def __init__(self, mu=None, alpha=None, p=None, n=None, *args, **kwargs): super().__init__(*args, **kwargs) mu, alpha = self.get_mu_alpha(mu, alpha, p, n) - self.mu = mu = tt.as_tensor_variable(floatX(mu)) - self.alpha = alpha = tt.as_tensor_variable(floatX(alpha)) - self.mode = intX(tt.floor(mu)) + self.mu = mu = aet.as_tensor_variable(floatX(mu)) + self.alpha = alpha = aet.as_tensor_variable(floatX(alpha)) + self.mode = intX(aet.floor(mu)) def get_mu_alpha(self, mu=None, alpha=None, p=None, n=None): self._param_type = ["mu", "alpha"] if alpha is None: if n is not None: self._param_type[1] = "n" - self.n = tt.as_tensor_variable(intX(n)) + self.n = aet.as_tensor_variable(intX(n)) alpha = n else: raise ValueError("Incompatible parametrization. Must specify either alpha or n.") @@ -819,7 +825,7 @@ def get_mu_alpha(self, mu=None, alpha=None, p=None, n=None): if mu is None: if p is not None: self._param_type[0] = "p" - self.p = tt.as_tensor_variable(floatX(p)) + self.p = aet.as_tensor_variable(floatX(p)) mu = alpha * (1 - p) / p else: raise ValueError("Incompatible parametrization. Must specify either mu or p.") @@ -870,7 +876,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -888,7 +894,7 @@ def logp(self, value): ) # Return Poisson when alpha gets very large. - return tt.switch(tt.gt(alpha, 1e10), Poisson.dist(self.mu).logp(value), negbinom) + return aet.switch(aet.gt(alpha, 1e10), Poisson.dist(self.mu).logp(value), negbinom) def logcdf(self, value): """ @@ -915,7 +921,7 @@ def logcdf(self, value): p = alpha / (self.mu + alpha) return bound( - tt.log(incomplete_beta(alpha, tt.floor(value) + 1, p)), + aet.log(incomplete_beta(alpha, aet.floor(value) + 1, p)), 0 <= value, 0 < alpha, 0 <= p, @@ -965,7 +971,7 @@ class Geometric(Discrete): def __init__(self, p, *args, **kwargs): super().__init__(*args, **kwargs) - self.p = p = tt.as_tensor_variable(floatX(p)) + self.p = p = aet.as_tensor_variable(floatX(p)) self.mode = 1 def random(self, point=None, size=None): @@ -996,14 +1002,14 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- TensorVariable """ p = self.p - return bound(tt.log(p) + logpow(1 - p, value - 1), 0 <= p, p <= 1, value >= 1) + return bound(aet.log(p) + logpow(1 - p, value - 1), 0 <= p, p <= 1, value >= 1) def logcdf(self, value): """ @@ -1012,9 +1018,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -1023,7 +1029,7 @@ def logcdf(self, value): p = self.p return bound( - log1mexp(-tt.log1p(-p) * value), + log1mexp(-aet.log1p(-p) * value), 0 <= value, 0 <= p, p <= 1, @@ -1081,7 +1087,7 @@ def __init__(self, N, k, n, *args, **kwargs): self.N = intX(N) self.k = intX(k) self.n = intX(n) - self.mode = intX(tt.floor((n + 1) * (k + 1) / (N + 2))) + self.mode = intX(aet.floor((n + 1) * (k + 1) / (N + 2))) def random(self, point=None, size=None): r""" @@ -1120,7 +1126,7 @@ def logp(self, value): ---------- value : numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -1140,8 +1146,8 @@ def logp(self, value): - betaln(tot + 1, 1) ) # value in [max(0, n - N + k), min(k, n)] - lower = tt.switch(tt.gt(n - N + k, 0), n - N + k, 0) - upper = tt.switch(tt.lt(k, n), k, n) + lower = aet.switch(aet.gt(n - N + k, 0), n - N + k, 0) + upper = aet.switch(aet.lt(k, n), k, n) return bound(result, lower <= value, value <= upper) def logcdf(self, value): @@ -1168,12 +1174,12 @@ def logcdf(self, value): N = self.N n = self.n k = self.k - safe_lower = tt.switch(tt.lt(value, 0), value, 0) + safe_lower = aet.switch(aet.lt(value, 0), value, 0) return bound( - tt.switch( - tt.lt(value, n), - logsumexp(self.logp(tt.arange(safe_lower, value + 1)), keepdims=False), + aet.switch( + aet.lt(value, n), + logsumexp(self.logp(aet.arange(safe_lower, value + 1)), keepdims=False), 0, ), 0 <= value, @@ -1226,9 +1232,9 @@ class DiscreteUniform(Discrete): def __init__(self, lower, upper, *args, **kwargs): super().__init__(*args, **kwargs) - self.lower = intX(tt.floor(lower)) - self.upper = intX(tt.floor(upper)) - self.mode = tt.maximum(intX(tt.floor((upper + lower) / 2.0)), self.lower) + self.lower = intX(aet.floor(lower)) + self.upper = intX(aet.floor(upper)) + self.mode = aet.maximum(intX(aet.floor((upper + lower) / 2.0)), self.lower) def _random(self, lower, upper, size=None): # This way seems to be the only to deal with lower and upper @@ -1264,7 +1270,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -1272,7 +1278,7 @@ def logp(self, value): """ upper = self.upper lower = self.lower - return bound(-tt.log(upper - lower + 1), lower <= value, value <= upper) + return bound(-aet.log(upper - lower + 1), lower <= value, value <= upper) def logcdf(self, value): """ @@ -1281,9 +1287,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -1293,9 +1299,10 @@ def logcdf(self, value): lower = self.lower return bound( - tt.switch( - tt.lt(value, upper), - tt.log(tt.minimum(tt.floor(value), upper) - lower + 1) - tt.log(upper - lower + 1), + aet.switch( + aet.lt(value, upper), + aet.log(aet.minimum(aet.floor(value), upper) - lower + 1) + - aet.log(upper - lower + 1), 0, ), lower <= value, @@ -1341,17 +1348,17 @@ class Categorical(Discrete): def __init__(self, p, *args, **kwargs): super().__init__(*args, **kwargs) try: - self.k = tt.shape(p)[-1].tag.test_value + self.k = aet.shape(p)[-1].tag.test_value except AttributeError: - self.k = tt.shape(p)[-1] - p = tt.as_tensor_variable(floatX(p)) + self.k = aet.shape(p)[-1] + p = aet.as_tensor_variable(floatX(p)) # From #2082, it may be dangerous to automatically rescale p at this # point without checking for positiveness self.p = p - self.mode = tt.argmax(p, axis=-1) + self.mode = aet.argmax(p, axis=-1) if self.mode.ndim == 1: - self.mode = tt.squeeze(self.mode) + self.mode = aet.squeeze(self.mode) def random(self, point=None, size=None): r""" @@ -1389,7 +1396,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -1399,27 +1406,27 @@ def logp(self, value): k = self.k # Clip values before using them for indexing - value_clip = tt.clip(value, 0, k - 1) + value_clip = aet.clip(value, 0, k - 1) - p = p_ / tt.sum(p_, axis=-1, keepdims=True) + p = p_ / aet.sum(p_, axis=-1, keepdims=True) if p.ndim > 1: if p.ndim > value_clip.ndim: - value_clip = tt.shape_padleft(value_clip, p_.ndim - value_clip.ndim) + value_clip = aet.shape_padleft(value_clip, p_.ndim - value_clip.ndim) elif p.ndim < value_clip.ndim: - p = tt.shape_padleft(p, value_clip.ndim - p_.ndim) + p = aet.shape_padleft(p, value_clip.ndim - p_.ndim) pattern = (p.ndim - 1,) + tuple(range(p.ndim - 1)) - a = tt.log( + a = aet.log( take_along_axis( p.dimshuffle(pattern), value_clip, ) ) else: - a = tt.log(p[value_clip]) + a = aet.log(p[value_clip]) return bound( - a, value >= 0, value <= (k - 1), tt.all(p_ >= 0, axis=-1), tt.all(p <= 1, axis=-1) + a, value >= 0, value <= (k - 1), aet.all(p_ >= 0, axis=-1), aet.all(p <= 1, axis=-1) ) @@ -1439,7 +1446,7 @@ def __init__(self, c, *args, **kwargs): DeprecationWarning, ) super().__init__(*args, **kwargs) - self.mean = self.median = self.mode = self.c = c = tt.as_tensor_variable(c) + self.mean = self.median = self.mode = self.c = c = aet.as_tensor_variable(c) def random(self, point=None, size=None): r""" @@ -1474,14 +1481,14 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- TensorVariable """ c = self.c - return bound(0, tt.eq(value, c)) + return bound(0, aet.eq(value, c)) ConstantDist = Constant @@ -1539,8 +1546,8 @@ class ZeroInflatedPoisson(Discrete): def __init__(self, psi, theta, *args, **kwargs): super().__init__(*args, **kwargs) - self.theta = theta = tt.as_tensor_variable(floatX(theta)) - self.psi = tt.as_tensor_variable(floatX(psi)) + self.theta = theta = aet.as_tensor_variable(floatX(theta)) + self.psi = aet.as_tensor_variable(floatX(psi)) self.pois = Poisson.dist(theta) self.mode = self.pois.mode @@ -1574,7 +1581,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -1583,10 +1590,10 @@ def logp(self, value): psi = self.psi theta = self.theta - logp_val = tt.switch( - tt.gt(value, 0), - tt.log(psi) + self.pois.logp(value), - logaddexp(tt.log1p(-psi), tt.log(psi) - theta), + logp_val = aet.switch( + aet.gt(value, 0), + aet.log(psi) + self.pois.logp(value), + logaddexp(aet.log1p(-psi), aet.log(psi) - theta), ) return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, 0 <= theta) @@ -1598,9 +1605,9 @@ def logcdf(self, value): Parameters ---------- - value: numeric or np.ndarray or theano.tensor + value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple - values are desired the values must be provided in a numpy array or theano tensor. + values are desired the values must be provided in a numpy array or aesara tensor. Returns ------- @@ -1609,7 +1616,7 @@ def logcdf(self, value): psi = self.psi return bound( - logaddexp(tt.log1p(-psi), tt.log(psi) + self.pois.logcdf(value)), + logaddexp(aet.log1p(-psi), aet.log(psi) + self.pois.logcdf(value)), 0 <= value, 0 <= psi, psi <= 1, @@ -1669,9 +1676,9 @@ class ZeroInflatedBinomial(Discrete): def __init__(self, psi, n, p, *args, **kwargs): super().__init__(*args, **kwargs) - self.n = n = tt.as_tensor_variable(intX(n)) - self.p = p = tt.as_tensor_variable(floatX(p)) - self.psi = psi = tt.as_tensor_variable(floatX(psi)) + self.n = n = aet.as_tensor_variable(intX(n)) + self.p = p = aet.as_tensor_variable(floatX(p)) + self.psi = psi = aet.as_tensor_variable(floatX(psi)) self.bin = Binomial.dist(n, p) self.mode = self.bin.mode @@ -1705,7 +1712,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -1715,10 +1722,10 @@ def logp(self, value): p = self.p n = self.n - logp_val = tt.switch( - tt.gt(value, 0), - tt.log(psi) + self.bin.logp(value), - logaddexp(tt.log1p(-psi), tt.log(psi) + n * tt.log1p(-p)), + logp_val = aet.switch( + aet.gt(value, 0), + aet.log(psi) + self.bin.logp(value), + logaddexp(aet.log1p(-psi), aet.log(psi) + n * aet.log1p(-p)), ) return bound(logp_val, 0 <= value, value <= n, 0 <= psi, psi <= 1, 0 <= p, p <= 1) @@ -1746,7 +1753,7 @@ def logcdf(self, value): psi = self.psi return bound( - logaddexp(tt.log1p(-psi), tt.log(psi) + self.bin.logcdf(value)), + logaddexp(aet.log1p(-psi), aet.log(psi) + self.bin.logcdf(value)), 0 <= value, 0 <= psi, psi <= 1, @@ -1823,9 +1830,9 @@ def ZeroInfNegBinom(a, m, psi, x): def __init__(self, psi, mu, alpha, *args, **kwargs): super().__init__(*args, **kwargs) - self.mu = mu = tt.as_tensor_variable(floatX(mu)) - self.alpha = alpha = tt.as_tensor_variable(floatX(alpha)) - self.psi = psi = tt.as_tensor_variable(floatX(psi)) + self.mu = mu = aet.as_tensor_variable(floatX(mu)) + self.alpha = alpha = aet.as_tensor_variable(floatX(alpha)) + self.psi = psi = aet.as_tensor_variable(floatX(psi)) self.nb = NegativeBinomial.dist(mu, alpha) self.mode = self.nb.mode @@ -1872,7 +1879,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -1882,12 +1889,12 @@ def logp(self, value): mu = self.mu psi = self.psi - logp_other = tt.log(psi) + self.nb.logp(value) + logp_other = aet.log(psi) + self.nb.logp(value) logp_0 = logaddexp( - tt.log1p(-psi), tt.log(psi) + alpha * (tt.log(alpha) - tt.log(alpha + mu)) + aet.log1p(-psi), aet.log(psi) + alpha * (aet.log(alpha) - aet.log(alpha + mu)) ) - logp_val = tt.switch(tt.gt(value, 0), logp_other, logp_0) + logp_val = aet.switch(aet.gt(value, 0), logp_other, logp_0) return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, mu > 0, alpha > 0) @@ -1913,7 +1920,7 @@ def logcdf(self, value): psi = self.psi return bound( - logaddexp(tt.log1p(-psi), tt.log(psi) + self.nb.logcdf(value)), + logaddexp(aet.log1p(-psi), aet.log(psi) + self.nb.logcdf(value)), 0 <= value, 0 <= psi, psi <= 1, @@ -1987,15 +1994,15 @@ class OrderedLogistic(Categorical): """ def __init__(self, eta, cutpoints, *args, **kwargs): - self.eta = tt.as_tensor_variable(floatX(eta)) - self.cutpoints = tt.as_tensor_variable(cutpoints) + self.eta = aet.as_tensor_variable(floatX(eta)) + self.cutpoints = aet.as_tensor_variable(cutpoints) - pa = sigmoid(self.cutpoints - tt.shape_padright(self.eta)) - p_cum = tt.concatenate( + pa = sigmoid(self.cutpoints - aet.shape_padright(self.eta)) + p_cum = aet.concatenate( [ - tt.zeros_like(tt.shape_padright(pa[..., 0])), + aet.zeros_like(aet.shape_padright(pa[..., 0])), pa, - tt.ones_like(tt.shape_padright(pa[..., 0])), + aet.ones_like(aet.shape_padright(pa[..., 0])), ], axis=-1, ) @@ -2076,23 +2083,23 @@ class OrderedProbit(Categorical): def __init__(self, eta, cutpoints, *args, **kwargs): - self.eta = tt.as_tensor_variable(floatX(eta)) - self.cutpoints = tt.as_tensor_variable(cutpoints) + self.eta = aet.as_tensor_variable(floatX(eta)) + self.cutpoints = aet.as_tensor_variable(cutpoints) - probits = tt.shape_padright(self.eta) - self.cutpoints - _log_p = tt.concatenate( + probits = aet.shape_padright(self.eta) - self.cutpoints + _log_p = aet.concatenate( [ - tt.shape_padright(normal_lccdf(0, 1, probits[..., 0])), + aet.shape_padright(normal_lccdf(0, 1, probits[..., 0])), log_diff_normal_cdf(0, 1, probits[..., :-1], probits[..., 1:]), - tt.shape_padright(normal_lcdf(0, 1, probits[..., -1])), + aet.shape_padright(normal_lcdf(0, 1, probits[..., -1])), ], axis=-1, ) - _log_p = tt.as_tensor_variable(floatX(_log_p)) + _log_p = aet.as_tensor_variable(floatX(_log_p)) self._log_p = _log_p - self.mode = tt.argmax(_log_p, axis=-1) - p = tt.exp(_log_p) + self.mode = aet.argmax(_log_p, axis=-1) + p = aet.exp(_log_p) super().__init__(p=p, *args, **kwargs) @@ -2104,7 +2111,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -2114,13 +2121,13 @@ def logp(self, value): k = self.k # Clip values before using them for indexing - value_clip = tt.clip(value, 0, k - 1) + value_clip = aet.clip(value, 0, k - 1) if logp.ndim > 1: if logp.ndim > value_clip.ndim: - value_clip = tt.shape_padleft(value_clip, logp.ndim - value_clip.ndim) + value_clip = aet.shape_padleft(value_clip, logp.ndim - value_clip.ndim) elif logp.ndim < value_clip.ndim: - logp = tt.shape_padleft(logp, value_clip.ndim - logp.ndim) + logp = aet.shape_padleft(logp, value_clip.ndim - logp.ndim) pattern = (logp.ndim - 1,) + tuple(range(logp.ndim - 1)) a = take_along_axis( logp.dimshuffle(pattern), diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py index 70877722271..e154e016f34 100644 --- a/pymc3/distributions/dist_math.py +++ b/pymc3/distributions/dist_math.py @@ -19,24 +19,25 @@ """ import platform +import aesara +import aesara.tensor as aet import numpy as np import scipy.linalg import scipy.stats -import theano -import theano.tensor as tt -from theano import scan -from theano.compile.builders import OpFromGraph -from theano.graph.basic import Apply -from theano.graph.op import Op -from theano.scalar import UnaryScalarOp, upgrade_to_float_no_complex -from theano.scan import until -from theano.tensor.slinalg import Cholesky +from aesara import scan +from aesara.compile.builders import OpFromGraph +from aesara.graph.basic import Apply +from aesara.graph.op import Op +from aesara.scalar import UnaryScalarOp, upgrade_to_float_no_complex +from aesara.scan import until +from aesara.tensor.elemwise import Elemwise +from aesara.tensor.slinalg import Cholesky, Solve +from pymc3.aesaraf import floatX from pymc3.distributions.shape_utils import to_tuple from pymc3.distributions.special import gammaln from pymc3.model import modelcontext -from pymc3.theanof import floatX f = floatX c = -0.5 * np.log(2.0 * np.pi) @@ -86,7 +87,7 @@ def bound(logp, *conditions, **kwargs): else: alltrue = alltrue_scalar - return tt.switch(alltrue(conditions), logp, -np.inf) + return aet.switch(alltrue(conditions), logp, -np.inf) def alltrue_elemwise(vals): @@ -97,7 +98,7 @@ def alltrue_elemwise(vals): def alltrue_scalar(vals): - return tt.all([tt.all(1 * val) for val in vals]) + return aet.all([aet.all(1 * val) for val in vals]) def logpow(x, m): @@ -105,7 +106,7 @@ def logpow(x, m): Calculates log(x**m) since m*log(x) will fail when m, x = 0. """ # return m * log(x) - return tt.switch(tt.eq(x, 0), tt.switch(tt.eq(m, 0), 0.0, -np.inf), m * tt.log(x)) + return aet.switch(aet.eq(x, 0), aet.switch(aet.eq(m, 0), 0.0, -np.inf), m * aet.log(x)) def factln(n): @@ -124,25 +125,25 @@ def std_cdf(x): """ Calculates the standard normal cumulative distribution function. """ - return 0.5 + 0.5 * tt.erf(x / tt.sqrt(2.0)) + return 0.5 + 0.5 * aet.erf(x / aet.sqrt(2.0)) def normal_lcdf(mu, sigma, x): """Compute the log of the cumulative density function of the normal.""" z = (x - mu) / sigma - return tt.switch( - tt.lt(z, -1.0), - tt.log(tt.erfcx(-z / tt.sqrt(2.0)) / 2.0) - tt.sqr(z) / 2.0, - tt.log1p(-tt.erfc(z / tt.sqrt(2.0)) / 2.0), + return aet.switch( + aet.lt(z, -1.0), + aet.log(aet.erfcx(-z / aet.sqrt(2.0)) / 2.0) - aet.sqr(z) / 2.0, + aet.log1p(-aet.erfc(z / aet.sqrt(2.0)) / 2.0), ) def normal_lccdf(mu, sigma, x): z = (x - mu) / sigma - return tt.switch( - tt.gt(z, 1.0), - tt.log(tt.erfcx(z / tt.sqrt(2.0)) / 2.0) - tt.sqr(z) / 2.0, - tt.log1p(-tt.erfc(-z / tt.sqrt(2.0)) / 2.0), + return aet.switch( + aet.gt(z, 1.0), + aet.log(aet.erfcx(z / aet.sqrt(2.0)) / 2.0) - aet.sqr(z) / 2.0, + aet.log1p(-aet.erfc(-z / aet.sqrt(2.0)) / 2.0), ) @@ -167,37 +168,38 @@ def log_diff_normal_cdf(mu, sigma, x, y): log (\\Phi(x) - \\Phi(y)) """ - x = (x - mu) / sigma / tt.sqrt(2.0) - y = (y - mu) / sigma / tt.sqrt(2.0) + x = (x - mu) / sigma / aet.sqrt(2.0) + y = (y - mu) / sigma / aet.sqrt(2.0) # To stabilize the computation, consider these three regions: # 1) x > y > 0 => Use erf(x) = 1 - e^{-x^2} erfcx(x) and erf(y) =1 - e^{-y^2} erfcx(y) # 2) 0 > x > y => Use erf(x) = e^{-x^2} erfcx(-x) and erf(y) = e^{-y^2} erfcx(-y) # 3) x > 0 > y => Naive formula log( (erf(x) - erf(y)) / 2 ) works fine. - return tt.log(0.5) + tt.switch( - tt.gt(y, 0), - -tt.square(y) + tt.log(tt.erfcx(y) - tt.exp(tt.square(y) - tt.square(x)) * tt.erfcx(x)), - tt.switch( - tt.lt(x, 0), # 0 > x > y - -tt.square(x) - + tt.log(tt.erfcx(-x) - tt.exp(tt.square(x) - tt.square(y)) * tt.erfcx(-y)), - tt.log(tt.erf(x) - tt.erf(y)), # x >0 > y + return aet.log(0.5) + aet.switch( + aet.gt(y, 0), + -aet.square(y) + + aet.log(aet.erfcx(y) - aet.exp(aet.square(y) - aet.square(x)) * aet.erfcx(x)), + aet.switch( + aet.lt(x, 0), # 0 > x > y + -aet.square(x) + + aet.log(aet.erfcx(-x) - aet.exp(aet.square(x) - aet.square(y)) * aet.erfcx(-y)), + aet.log(aet.erf(x) - aet.erf(y)), # x >0 > y ), ) def sigma2rho(sigma): """ - `sigma -> rho` theano converter + `sigma -> rho` aesara converter :math:`mu + sigma*e = mu + log(1+exp(rho))*e`""" - return tt.log(tt.exp(tt.abs_(sigma)) - 1.0) + return aet.log(aet.exp(aet.abs_(sigma)) - 1.0) def rho2sigma(rho): """ - `rho -> sigma` theano converter + `rho -> sigma` aesara converter :math:`mu + sigma*e = mu + log(1+exp(rho))*e`""" - return tt.nnet.softplus(rho) + return aet.nnet.softplus(rho) rho2sd = rho2sigma @@ -240,13 +242,13 @@ def log_normal(x, mean, **kwargs): if sigma is not None: std = sigma elif w is not None: - std = tt.exp(w) + std = aet.exp(w) elif rho is not None: std = rho2sigma(rho) else: std = tau ** (-1) std += f(eps) - return f(c) - tt.log(tt.abs_(std)) - (x - mean) ** 2 / (2.0 * std ** 2) + return f(c) - aet.log(aet.abs_(std)) - (x - mean) ** 2 / (2.0 * std ** 2) def MvNormalLogp(): @@ -256,34 +258,34 @@ def MvNormalLogp(): Parameters ---------- - cov: tt.matrix + cov: aet.matrix The covariance matrix. - delta: tt.matrix + delta: aet.matrix Array of deviations from the mean. """ - cov = tt.matrix("cov") + cov = aet.matrix("cov") cov.tag.test_value = floatX(np.eye(3)) - delta = tt.matrix("delta") + delta = aet.matrix("delta") delta.tag.test_value = floatX(np.zeros((2, 3))) - solve_lower = tt.slinalg.Solve(A_structure="lower_triangular") - solve_upper = tt.slinalg.Solve(A_structure="upper_triangular") + solve_lower = Solve(A_structure="lower_triangular") + solve_upper = Solve(A_structure="upper_triangular") cholesky = Cholesky(lower=True, on_error="nan") n, k = delta.shape n, k = f(n), f(k) chol_cov = cholesky(cov) - diag = tt.nlinalg.diag(chol_cov) - ok = tt.all(diag > 0) + diag = aet.nlinalg.diag(chol_cov) + ok = aet.all(diag > 0) - chol_cov = tt.switch(ok, chol_cov, tt.fill(chol_cov, 1)) + chol_cov = aet.switch(ok, chol_cov, aet.fill(chol_cov, 1)) delta_trans = solve_lower(chol_cov, delta.T).T - result = n * k * tt.log(f(2) * np.pi) - result += f(2) * n * tt.sum(tt.log(diag)) + result = n * k * aet.log(f(2) * np.pi) + result += f(2) * n * aet.sum(aet.log(diag)) result += (delta_trans ** f(2)).sum() result = f(-0.5) * result - logp = tt.switch(ok, result, -np.inf) + logp = aet.switch(ok, result, -np.inf) def dlogp(inputs, gradients): (g_logp,) = gradients @@ -293,21 +295,21 @@ def dlogp(inputs, gradients): n, k = delta.shape chol_cov = cholesky(cov) - diag = tt.nlinalg.diag(chol_cov) - ok = tt.all(diag > 0) + diag = aet.nlinalg.diag(chol_cov) + ok = aet.all(diag > 0) - chol_cov = tt.switch(ok, chol_cov, tt.fill(chol_cov, 1)) + chol_cov = aet.switch(ok, chol_cov, aet.fill(chol_cov, 1)) delta_trans = solve_lower(chol_cov, delta.T).T - inner = n * tt.eye(k) - tt.dot(delta_trans.T, delta_trans) + inner = n * aet.eye(k) - aet.dot(delta_trans.T, delta_trans) g_cov = solve_upper(chol_cov.T, inner) g_cov = solve_upper(chol_cov.T, g_cov.T) tau_delta = solve_upper(chol_cov.T, delta_trans.T) g_delta = tau_delta.T - g_cov = tt.switch(ok, g_cov, -np.nan) - g_delta = tt.switch(ok, g_delta, -np.nan) + g_cov = aet.switch(ok, g_cov, -np.nan) + g_delta = aet.switch(ok, g_delta, -np.nan) return [-0.5 * g_cov * g_logp, -g_delta * g_logp] @@ -316,7 +318,7 @@ def dlogp(inputs, gradients): class SplineWrapper(Op): """ - Creates a theano operation from scipy.interpolate.UnivariateSpline + Creates a aesara operation from scipy.interpolate.UnivariateSpline """ __props__ = ("spline",) @@ -325,7 +327,7 @@ def __init__(self, spline): self.spline = spline def make_node(self, x): - x = tt.as_tensor_variable(x) + x = aet.as_tensor_variable(x) return Apply(self, [x], [x.type()]) @property @@ -363,7 +365,7 @@ def impl(self, x): i1e_scalar = I1e(upgrade_to_float_no_complex, name="i1e") -i1e = tt.Elemwise(i1e_scalar, name="Elemwise{i1e,no_inplace}") +i1e = Elemwise(i1e_scalar, name="Elemwise{i1e,no_inplace}") class I0e(UnaryScalarOp): @@ -379,11 +381,11 @@ def impl(self, x): def grad(self, inp, grads): (x,) = inp (gz,) = grads - return (gz * (i1e_scalar(x) - theano.scalar.sgn(x) * i0e_scalar(x)),) + return (gz * (i1e_scalar(x) - aesara.scalar.sgn(x) * i0e_scalar(x)),) i0e_scalar = I0e(upgrade_to_float_no_complex, name="i0e") -i0e = tt.Elemwise(i0e_scalar, name="Elemwise{i0e,no_inplace}") +i0e = Elemwise(i0e_scalar, name="Elemwise{i0e,no_inplace}") def random_choice(*args, **kwargs): @@ -437,13 +439,13 @@ def incomplete_beta_cfe(a, b, x, small): based on Cephes library by Steve Moshier (incbet.c). small: Choose element-wise which continued fraction expansion to use. """ - BIG = tt.constant(4.503599627370496e15, dtype="float64") - BIGINV = tt.constant(2.22044604925031308085e-16, dtype="float64") - THRESH = tt.constant(3.0 * np.MachAr().eps, dtype="float64") + BIG = aet.constant(4.503599627370496e15, dtype="float64") + BIGINV = aet.constant(2.22044604925031308085e-16, dtype="float64") + THRESH = aet.constant(3.0 * np.MachAr().eps, dtype="float64") - zero = tt.constant(0.0, dtype="float64") - one = tt.constant(1.0, dtype="float64") - two = tt.constant(2.0, dtype="float64") + zero = aet.constant(0.0, dtype="float64") + one = aet.constant(1.0, dtype="float64") + two = aet.constant(2.0, dtype="float64") r = one k1 = a @@ -452,11 +454,11 @@ def incomplete_beta_cfe(a, b, x, small): k5 = one k8 = a + two - k2 = tt.switch(small, a + b, b - one) - k6 = tt.switch(small, b - one, a + b) - k7 = tt.switch(small, k4, a + one) - k26update = tt.switch(small, one, -one) - x = tt.switch(small, x, x / (one - x)) + k2 = aet.switch(small, a + b, b - one) + k6 = aet.switch(small, b - one, a + b) + k7 = aet.switch(small, k4, a + one) + k26update = aet.switch(small, one, -one) + x = aet.switch(small, x, x / (one - x)) pkm2 = zero qkm2 = one @@ -482,7 +484,7 @@ def _step(i, pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r): qkm1 = qk old_r = r - r = tt.switch(tt.eq(qk, zero), r, pk / qk) + r = aet.switch(aet.eq(qk, zero), r, pk / qk) k1 += one k2 += k26update @@ -493,30 +495,32 @@ def _step(i, pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r): k7 += two k8 += two - big_cond = tt.gt(tt.abs_(qk) + tt.abs_(pk), BIG) - biginv_cond = tt.or_(tt.lt(tt.abs_(qk), BIGINV), tt.lt(tt.abs_(pk), BIGINV)) + big_cond = aet.gt(aet.abs_(qk) + aet.abs_(pk), BIG) + biginv_cond = aet.or_(aet.lt(aet.abs_(qk), BIGINV), aet.lt(aet.abs_(pk), BIGINV)) - pkm2 = tt.switch(big_cond, pkm2 * BIGINV, pkm2) - pkm1 = tt.switch(big_cond, pkm1 * BIGINV, pkm1) - qkm2 = tt.switch(big_cond, qkm2 * BIGINV, qkm2) - qkm1 = tt.switch(big_cond, qkm1 * BIGINV, qkm1) + pkm2 = aet.switch(big_cond, pkm2 * BIGINV, pkm2) + pkm1 = aet.switch(big_cond, pkm1 * BIGINV, pkm1) + qkm2 = aet.switch(big_cond, qkm2 * BIGINV, qkm2) + qkm1 = aet.switch(big_cond, qkm1 * BIGINV, qkm1) - pkm2 = tt.switch(biginv_cond, pkm2 * BIG, pkm2) - pkm1 = tt.switch(biginv_cond, pkm1 * BIG, pkm1) - qkm2 = tt.switch(biginv_cond, qkm2 * BIG, qkm2) - qkm1 = tt.switch(biginv_cond, qkm1 * BIG, qkm1) + pkm2 = aet.switch(biginv_cond, pkm2 * BIG, pkm2) + pkm1 = aet.switch(biginv_cond, pkm1 * BIG, pkm1) + qkm2 = aet.switch(biginv_cond, qkm2 * BIG, qkm2) + qkm1 = aet.switch(biginv_cond, qkm1 * BIG, qkm1) return ( (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), - until(tt.abs_(old_r - r) < (THRESH * tt.abs_(r))), + until(aet.abs_(old_r - r) < (THRESH * aet.abs_(r))), ) (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), _ = scan( _step, - sequences=[tt.arange(0, 300)], + sequences=[aet.arange(0, 300)], outputs_info=[ e - for e in tt.cast((pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), "float64") + for e in aet.cast( + (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), "float64" + ) ], ) @@ -528,28 +532,28 @@ def incomplete_beta_ps(a, b, value): Use when b*x is small and value not too close to 1. Based on Cephes library by Steve Moshier (incbet.c) """ - one = tt.constant(1, dtype="float64") + one = aet.constant(1, dtype="float64") ai = one / a u = (one - b) * value t1 = u / (a + one) t = u threshold = np.MachAr().eps * ai - s = tt.constant(0, dtype="float64") + s = aet.constant(0, dtype="float64") def _step(i, t, s): t *= (i - b) * value / i step = t / (a + i) s += step - return ((t, s), until(tt.abs_(step) < threshold)) + return ((t, s), until(aet.abs_(step) < threshold)) (t, s), _ = scan( - _step, sequences=[tt.arange(2, 302)], outputs_info=[e for e in tt.cast((t, s), "float64")] + _step, sequences=[aet.arange(2, 302)], outputs_info=[e for e in aet.cast((t, s), "float64")] ) s = s[-1] + t1 + ai - t = gammaln(a + b) - gammaln(a) - gammaln(b) + a * tt.log(value) + tt.log(s) - return tt.exp(t) + t = gammaln(a + b) - gammaln(a) - gammaln(b) + a * aet.log(value) + aet.log(s) + return aet.exp(t) def incomplete_beta(a, b, value): @@ -557,37 +561,37 @@ def incomplete_beta(a, b, value): Power series and continued fraction expansions chosen for best numerical convergence across the board based on inputs. """ - machep = tt.constant(np.MachAr().eps, dtype="float64") - one = tt.constant(1, dtype="float64") + machep = aet.constant(np.MachAr().eps, dtype="float64") + one = aet.constant(1, dtype="float64") w = one - value ps = incomplete_beta_ps(a, b, value) - flip = tt.gt(value, (a / (a + b))) + flip = aet.gt(value, (a / (a + b))) aa, bb = a, b - a = tt.switch(flip, bb, aa) - b = tt.switch(flip, aa, bb) - xc = tt.switch(flip, value, w) - x = tt.switch(flip, w, value) + a = aet.switch(flip, bb, aa) + b = aet.switch(flip, aa, bb) + xc = aet.switch(flip, value, w) + x = aet.switch(flip, w, value) tps = incomplete_beta_ps(a, b, x) - tps = tt.switch(tt.le(tps, machep), one - machep, one - tps) + tps = aet.switch(aet.le(tps, machep), one - machep, one - tps) # Choose which continued fraction expansion for best convergence. - small = tt.lt(x * (a + b - 2.0) - (a - one), 0.0) + small = aet.lt(x * (a + b - 2.0) - (a - one), 0.0) cfe = incomplete_beta_cfe(a, b, x, small) - w = tt.switch(small, cfe, cfe / xc) + w = aet.switch(small, cfe, cfe / xc) # Direct incomplete beta accounting for flipped a, b. - t = tt.exp( - a * tt.log(x) + b * tt.log(xc) + gammaln(a + b) - gammaln(a) - gammaln(b) + tt.log(w / a) + t = aet.exp( + a * aet.log(x) + b * aet.log(xc) + gammaln(a + b) - gammaln(a) - gammaln(b) + aet.log(w / a) ) - t = tt.switch(flip, tt.switch(tt.le(t, machep), one - machep, one - t), t) - return tt.switch( - tt.and_(flip, tt.and_(tt.le((b * x), one), tt.le(x, 0.95))), + t = aet.switch(flip, aet.switch(aet.le(t, machep), one - machep, one - t), t) + return aet.switch( + aet.and_(flip, aet.and_(aet.le((b * x), one), aet.le(x, 0.95))), tps, - tt.switch(tt.and_(tt.le(b * value, one), tt.le(value, 0.95)), ps, t), + aet.switch(aet.and_(aet.le(b * value, one), aet.le(value, 0.95)), ps, t), ) diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py index c24a9d9df6e..d0ef10b236c 100644 --- a/pymc3/distributions/distribution.py +++ b/pymc3/distributions/distribution.py @@ -27,12 +27,16 @@ if TYPE_CHECKING: from typing import Optional, Callable +import aesara +import aesara.graph.basic +import aesara.tensor as aet import numpy as np -import theano -import theano.graph.basic -import theano.tensor as tt -from theano import function +from aesara import function +from aesara.compile.sharedvalue import SharedVariable +from aesara.graph.basic import Constant +from aesara.tensor.type import TensorType as AesaraTensorType +from aesara.tensor.var import TensorVariable from pymc3.distributions.shape_utils import ( broadcast_dist_samples_shape, @@ -49,7 +53,7 @@ build_named_node_tree, ) from pymc3.util import get_repr_for_variable, get_var_name -from pymc3.vartypes import string_types, theano_constant +from pymc3.vartypes import string_types __all__ = [ "DensityDist", @@ -164,13 +168,13 @@ def getattr_value(self, val): if isinstance(val, string_types): val = getattr(self, val) - if isinstance(val, tt.TensorVariable): + if isinstance(val, TensorVariable): return val.tag.test_value - if isinstance(val, tt.sharedvar.SharedVariable): + if isinstance(val, SharedVariable): return val.get_value() - if isinstance(val, theano_constant): + if isinstance(val, Constant): return val.value return val @@ -264,7 +268,7 @@ def logp_sum(self, *args, **kwargs): Subclasses can use this to improve the speed of logp evaluations if only the sum of the logp values is needed. """ - return tt.sum(self.logp(*args, **kwargs)) + return aet.sum(self.logp(*args, **kwargs)) __latex__ = _repr_latex_ @@ -272,7 +276,7 @@ def logp_sum(self, *args, **kwargs): def TensorType(dtype, shape, broadcastable=None): if broadcastable is None: broadcastable = np.atleast_1d(shape) == 1 - return tt.TensorType(str(dtype), broadcastable) + return AesaraTensorType(str(dtype), broadcastable) class NoDistribution(Distribution): @@ -311,7 +315,7 @@ def logp(self, x): ------- TensorVariable """ - return tt.zeros_like(x) + return aet.zeros_like(x) def _distr_parameters_for_repr(self): return [] @@ -322,7 +326,7 @@ class Discrete(Distribution): def __init__(self, shape=(), dtype=None, defaults=("mode",), *args, **kwargs): if dtype is None: - if theano.config.floatX == "float32": + if aesara.config.floatX == "float32": dtype = "int16" else: dtype = "int64" @@ -340,7 +344,7 @@ class Continuous(Distribution): def __init__(self, shape=(), dtype=None, defaults=("median", "mean", "mode"), *args, **kwargs): if dtype is None: - dtype = theano.config.floatX + dtype = aesara.config.floatX super().__init__(shape, dtype, defaults=defaults, *args, **kwargs) @@ -371,7 +375,7 @@ def __init__( logp: callable A callable that has the following signature ``logp(value)`` and - returns a theano tensor that represents the distribution's log + returns a aesara tensor that represents the distribution's log probability density. shape: tuple (Optional): defaults to `()` The shape of the distribution. The default value indicates a scalar. @@ -526,7 +530,7 @@ def __init__( """ if dtype is None: - dtype = theano.config.floatX + dtype = aesara.config.floatX super().__init__(shape, dtype, testval, *args, **kwargs) self.logp = logp if type(self.logp) == types.MethodType: @@ -608,7 +612,7 @@ def random(self, point=None, size=None, **kwargs): "DensityDist random method cannot " "adapt to shape changes in the distribution's " "shape, which sometimes are necessary for sampling " - "when the model uses pymc3.Data or theano shared " + "when the model uses pymc3.Data or aesara shared " "tensors, or when the DensityDist has observed " "values.\n" "This check can be disabled by passing " @@ -673,9 +677,7 @@ def __init__(self): def is_fast_drawable(var): - return isinstance( - var, (numbers.Number, np.ndarray, theano_constant, tt.sharedvar.SharedVariable) - ) + return isinstance(var, (numbers.Number, np.ndarray, Constant, SharedVariable)) def draw_values(params, point=None, size=None): @@ -690,7 +692,7 @@ def draw_values(params, point=None, size=None): c) parameter can be fixed using tag.test_value (last resort) 3) The parameter is a tensor variable/constant. Can be evaluated using - theano.function, but a variable may contain nodes which + aesara.function, but a variable may contain nodes which a) are named parameters in the point b) are RVs with a random method @@ -756,20 +758,19 @@ def draw_values(params, point=None, size=None): if (next_, size) in drawn: # If the node already has a givens value, skip it continue - elif isinstance(next_, (theano_constant, tt.sharedvar.SharedVariable)): - # If the node is a theano.tensor.TensorConstant or a - # theano.tensor.sharedvar.SharedVariable, its value will be - # available automatically in _compile_theano_function so - # we can skip it. Furthermore, if this node was treated as a - # TensorVariable that should be compiled by theano in - # _compile_theano_function, it would raise a `TypeError: - # ('Constants not allowed in param list', ...)` for - # TensorConstant, and a `TypeError: Cannot use a shared - # variable (...) as explicit input` for SharedVariable. - # ObservedRV and MultiObservedRV instances are ViewOPs - # of TensorConstants or SharedVariables, we must add them - # to the stack or risk evaluating deterministics with the - # wrong values (issue #3354) + elif isinstance(next_, (Constant, SharedVariable)): + # If the node is a aesara.tensor.TensorConstant or a + # SharedVariable, its value will be available automatically in + # _compile_aesara_function so we can skip it. Furthermore, if + # this node was treated as a TensorVariable that should be + # compiled by aesara in _compile_aesara_function, it would + # raise a `TypeError: ('Constants not allowed in param list', + # ...)` for TensorConstant, and a `TypeError: Cannot use a + # shared variable (...) as explicit input` for SharedVariable. + # ObservedRV and MultiObservedRV instances are ViewOPs of + # TensorConstants or SharedVariables, we must add them to the + # stack or risk evaluating deterministics with the wrong values + # (issue #3354) stack.extend( [ node @@ -791,7 +792,7 @@ def draw_values(params, point=None, size=None): value = _draw_value(next_, point=point, givens=temp_givens, size=size) givens[next_.name] = (next_, value) drawn[(next_, size)] = value - except theano.graph.fg.MissingInputError: + except aesara.graph.fg.MissingInputError: # The node failed, so we must add the node's parents to # the stack of nodes to try to draw from. We exclude the # nodes in the `params` list. @@ -834,17 +835,17 @@ def draw_values(params, point=None, size=None): value = _draw_value(param, point=point, givens=givens.values(), size=size) evaluated[param_idx] = drawn[(param, size)] = value givens[param.name] = (param, value) - except theano.graph.fg.MissingInputError: + except aesara.graph.fg.MissingInputError: missing_inputs.add(param_idx) return [evaluated[j] for j in params] # set the order back @memoize -def _compile_theano_function(param, vars, givens=None): - """Compile theano function for a given parameter and input variables. +def _compile_aesara_function(param, vars, givens=None): + """Compile aesara function for a given parameter and input variables. - This function is memoized to avoid repeating costly theano compilations + This function is memoized to avoid repeating costly aesara compilations when repeatedly drawing values, which is done when generating posterior predictive samples. @@ -852,11 +853,11 @@ def _compile_theano_function(param, vars, givens=None): ---------- param: Model variable from which to draw value vars: Children variables of `param` - givens: Variables to be replaced in the Theano graph + givens: Variables to be replaced in the Aesara graph Returns ------- - A compiled theano function that takes the values of `vars` as input + A compiled aesara function that takes the values of `vars` as input positional args """ f = function( @@ -867,32 +868,32 @@ def _compile_theano_function(param, vars, givens=None): on_unused_input="ignore", allow_input_downcast=True, ) - return vectorize_theano_function(f, inputs=vars, output=param) + return vectorize_aesara_function(f, inputs=vars, output=param) -def vectorize_theano_function(f, inputs, output): - """Takes a compiled theano function and wraps it with a vectorized version. - Theano compiled functions expect inputs and outputs of a fixed number of +def vectorize_aesara_function(f, inputs, output): + """Takes a compiled aesara function and wraps it with a vectorized version. + Aesara compiled functions expect inputs and outputs of a fixed number of dimensions. In our context, these usually come from deterministics which are compiled against a given RV, with its core shape. If we draw i.i.d. samples from said RV, we would not be able to compute the deterministic over the i.i.d sampled dimensions (i.e. those that are not the core - dimensions of the RV). To deal with this problem, we wrap the theano + dimensions of the RV). To deal with this problem, we wrap the aesara compiled function with numpy.vectorize, providing the correct signature for the core dimensions. The extra dimensions, will be interpreted as i.i.d. sampled axis and will be broadcast following the usual rules. Parameters ---------- - f: theano compiled function - inputs: list of theano variables used as inputs for the function - givens: theano variable which is the output of the function + f: aesara compiled function + inputs: list of aesara variables used as inputs for the function + givens: aesara variable which is the output of the function Notes ----- - If inputs is an empty list (theano function with no inputs needed), then + If inputs is an empty list (aesara function with no inputs needed), then the same `f` is returned. - Only functions that return a single theano variable's value can be + Only functions that return a single aesara variable's value can be vectorized. Returns @@ -928,27 +929,27 @@ def _draw_value(param, point=None, givens=None, size=None): Parameters ---------- - param: number, array like, theano variable or pymc3 random variable + param: number, array like, aesara variable or pymc3 random variable The value or distribution. Constants or shared variables - will be converted to an array and returned. Theano variables + will be converted to an array and returned. Aesara variables are evaluated. If `param` is a pymc3 random variables, draw a new value from it and return that, unless a value is specified in `point`. point: dict, optional A dictionary from pymc3 variable names to their values. givens: dict, optional - A dictionary from theano variables to their values. These values - are used to evaluate `param` if it is a theano variable. + A dictionary from aesara variables to their values. These values + are used to evaluate `param` if it is a aesara variable. size: int, optional Number of samples """ if isinstance(param, (numbers.Number, np.ndarray)): return param - elif isinstance(param, theano_constant): + elif isinstance(param, Constant): return param.value - elif isinstance(param, tt.sharedvar.SharedVariable): + elif isinstance(param, SharedVariable): return param.get_value() - elif isinstance(param, (tt.TensorVariable, MultiObservedRV)): + elif isinstance(param, (TensorVariable, MultiObservedRV)): if point and hasattr(param, "model") and param.name in point: return point[param.name] elif hasattr(param, "random") and param.random is not None: @@ -971,7 +972,7 @@ def _draw_value(param, point=None, givens=None, size=None): return dist_tmp.random(point=point, size=size) except (ValueError, TypeError): # reset shape to account for shape changes - # with theano.shared inputs + # with aesara.shared inputs dist_tmp.shape = np.array([]) # We want to draw values to infer the dist_shape, # we don't want to store these drawn values to the context @@ -995,14 +996,14 @@ def _draw_value(param, point=None, givens=None, size=None): variables = values = [] # We only truly care if the ancestors of param that were given # value have the matching dshape and val.shape - param_ancestors = set(theano.graph.basic.ancestors([param], blockers=list(variables))) + param_ancestors = set(aesara.graph.basic.ancestors([param], blockers=list(variables))) inputs = [(var, val) for var, val in zip(variables, values) if var in param_ancestors] if inputs: input_vars, input_vals = list(zip(*inputs)) else: input_vars = [] input_vals = [] - func = _compile_theano_function(param, input_vars) + func = _compile_aesara_function(param, input_vars) output = func(*input_vals) return output raise ValueError("Unexpected type in draw_value: %s" % type(param)) diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py index 756269d3306..f423f298de2 100644 --- a/pymc3/distributions/mixture.py +++ b/pymc3/distributions/mixture.py @@ -14,10 +14,11 @@ from collections.abc import Iterable +import aesara +import aesara.tensor as aet import numpy as np -import theano -import theano.tensor as tt +from pymc3.aesaraf import _conversion_map, take_along_axis from pymc3.distributions.continuous import Normal, get_tau_sigma from pymc3.distributions.dist_math import bound, random_choice from pymc3.distributions.distribution import ( @@ -34,7 +35,6 @@ to_tuple, ) from pymc3.math import logsumexp -from pymc3.theanof import _conversion_map, take_along_axis __all__ = ["Mixture", "NormalMixture", "MixtureSameFamily"] @@ -143,15 +143,15 @@ def __init__(self, w, comp_dists, *args, **kwargs): ) shape = kwargs.pop("shape", ()) - self.w = w = tt.as_tensor_variable(w) + self.w = w = aet.as_tensor_variable(w) self.comp_dists = comp_dists defaults = kwargs.pop("defaults", []) if all_discrete(comp_dists): - default_dtype = _conversion_map[theano.config.floatX] + default_dtype = _conversion_map[aesara.config.floatX] else: - default_dtype = theano.config.floatX + default_dtype = aesara.config.floatX try: self.mean = (w * self._comp_means()).sum(axis=-1) @@ -166,9 +166,9 @@ def __init__(self, w, comp_dists, *args, **kwargs): if isinstance(comp_dists, Distribution): comp_mode_logps = comp_dists.logp(comp_dists.mode) else: - comp_mode_logps = tt.stack([cd.logp(cd.mode) for cd in comp_dists]) + comp_mode_logps = aet.stack([cd.logp(cd.mode) for cd in comp_dists]) - mode_idx = tt.argmax(tt.log(w) + comp_mode_logps, axis=-1) + mode_idx = aet.argmax(aet.log(w) + comp_mode_logps, axis=-1) self.mode = self._comp_modes()[mode_idx] if "mode" not in defaults: @@ -253,7 +253,7 @@ def _comp_logp(self, value): val_shape = tuple(value.shape.eval()) except AttributeError: val_shape = value.shape - except theano.graph.fg.MissingInputError: + except aesara.graph.fg.MissingInputError: val_shape = None try: self_shape = tuple(self.shape) @@ -292,26 +292,30 @@ def _comp_logp(self, value): if ndim <= 1: ndim = len(comp_dists.shape) - 1 if ndim < len(comp_dists.shape): - value_ = tt.shape_padright(value, len(comp_dists.shape) - ndim) + value_ = aet.shape_padright(value, len(comp_dists.shape) - ndim) else: value_ = value return comp_dists.logp(value_) else: - return tt.squeeze( - tt.stack([comp_dist.logp(value) for comp_dist in comp_dists], axis=-1) + return aet.squeeze( + aet.stack([comp_dist.logp(value) for comp_dist in comp_dists], axis=-1) ) def _comp_means(self): try: - return tt.as_tensor_variable(self.comp_dists.mean) + return aet.as_tensor_variable(self.comp_dists.mean) except AttributeError: - return tt.squeeze(tt.stack([comp_dist.mean for comp_dist in self.comp_dists], axis=-1)) + return aet.squeeze( + aet.stack([comp_dist.mean for comp_dist in self.comp_dists], axis=-1) + ) def _comp_modes(self): try: - return tt.as_tensor_variable(self.comp_dists.mode) + return aet.as_tensor_variable(self.comp_dists.mode) except AttributeError: - return tt.squeeze(tt.stack([comp_dist.mode for comp_dist in self.comp_dists], axis=-1)) + return aet.squeeze( + aet.stack([comp_dist.mode for comp_dist in self.comp_dists], axis=-1) + ) def _comp_samples(self, point=None, size=None, comp_dist_shapes=None, broadcast_shape=None): if self.comp_is_distribution: @@ -418,7 +422,7 @@ def logp(self, value): ---------- value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -427,10 +431,10 @@ def logp(self, value): w = self.w return bound( - logsumexp(tt.log(w) + self._comp_logp(value), axis=-1, keepdims=False), + logsumexp(aet.log(w) + self._comp_logp(value), axis=-1, keepdims=False), w >= 0, w <= 1, - tt.allclose(w.sum(axis=-1), 1), + aet.allclose(w.sum(axis=-1), 1), broadcast_conditions=False, ) @@ -632,8 +636,8 @@ def __init__(self, w, mu, sigma=None, tau=None, sd=None, comp_shape=(), *args, * sigma = sd _, sigma = get_tau_sigma(tau=tau, sigma=sigma) - self.mu = mu = tt.as_tensor_variable(mu) - self.sigma = self.sd = sigma = tt.as_tensor_variable(sigma) + self.mu = mu = aet.as_tensor_variable(mu) + self.sigma = self.sd = sigma = aet.as_tensor_variable(sigma) super().__init__(w, Normal.dist(mu, sigma=sigma, shape=comp_shape), *args, **kwargs) @@ -675,7 +679,7 @@ class MixtureSameFamily(Distribution): """ def __init__(self, w, comp_dists, mixture_axis=-1, *args, **kwargs): - self.w = tt.as_tensor_variable(w) + self.w = aet.as_tensor_variable(w) if not isinstance(comp_dists, Distribution): raise TypeError( "The MixtureSameFamily distribution only accepts Distribution " @@ -697,19 +701,19 @@ def __init__(self, w, comp_dists, mixture_axis=-1, *args, **kwargs): # Compute the mode so we don't always have to pass a testval defaults = kwargs.pop("defaults", []) event_shape = self.comp_dists.shape[mixture_axis + 1 :] - _w = tt.shape_padleft( - tt.shape_padright(w, len(event_shape)), + _w = aet.shape_padleft( + aet.shape_padright(w, len(event_shape)), len(self.comp_dists.shape) - w.ndim - len(event_shape), ) mode = take_along_axis( self.comp_dists.mode, - tt.argmax(_w, keepdims=True), + aet.argmax(_w, keepdims=True), axis=mixture_axis, ) self.mode = mode[(..., 0) + (slice(None),) * len(event_shape)] if not all_discrete(comp_dists): - mean = tt.as_tensor_variable(self.comp_dists.mean) + mean = aet.as_tensor_variable(self.comp_dists.mean) self.mean = (_w * mean).sum(axis=mixture_axis) if "mean" not in defaults: defaults.append("mean") @@ -725,7 +729,7 @@ def logp(self, value): ---------- value : numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or theano tensor + values are desired the values must be provided in a numpy array or aesara tensor Returns ------- @@ -742,7 +746,7 @@ def logp(self, value): # We first have to pad the shape of w to the right with ones # so that it can broadcast with the event_shape. - w = tt.shape_padright(w, len(event_shape)) + w = aet.shape_padright(w, len(event_shape)) # Second, we have to add the mixture_axis to the value tensor # To insert the mixture axis at the correct location, we use the @@ -751,14 +755,14 @@ def logp(self, value): # than the ones present in the comp_dists. comp_dists_ndim = len(comp_dists.shape) - value = tt.shape_padaxis(value, axis=mixture_axis - comp_dists_ndim) + value = aet.shape_padaxis(value, axis=mixture_axis - comp_dists_ndim) comp_logp = comp_dists.logp(value) return bound( - logsumexp(tt.log(w) + comp_logp, axis=mixture_axis, keepdims=False), + logsumexp(aet.log(w) + comp_logp, axis=mixture_axis, keepdims=False), w >= 0, w <= 1, - tt.allclose(w.sum(axis=mixture_axis - comp_dists_ndim), 1), + aet.allclose(w.sum(axis=mixture_axis - comp_dists_ndim), 1), broadcast_conditions=False, ) diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index 3fcdb8dbdaf..c23b9f191b6 100755 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -17,20 +17,27 @@ import warnings +import aesara +import aesara.tensor as aet import numpy as np import scipy -import theano -import theano.tensor as tt +from aesara.graph.basic import Apply +from aesara.graph.op import Op, get_test_value +from aesara.graph.utils import TestValueError +from aesara.tensor.nlinalg import det, eigh, matrix_inverse, trace +from aesara.tensor.slinalg import ( + Cholesky, + Solve, + solve_lower_triangular, + solve_upper_triangular, +) +from aesara.tensor.type import TensorType from scipy import linalg, stats -from theano.graph.basic import Apply -from theano.graph.op import Op, get_test_value -from theano.graph.utils import TestValueError -from theano.tensor.nlinalg import det, eigh, matrix_inverse, trace -from theano.tensor.slinalg import Cholesky import pymc3 as pm +from pymc3.aesaraf import floatX, intX from pymc3.distributions import transforms from pymc3.distributions.continuous import ChiSquared, Normal from pymc3.distributions.dist_math import bound, factln, logpow @@ -46,7 +53,6 @@ from pymc3.exceptions import ShapeError from pymc3.math import kron_diag, kron_dot, kron_solve_lower, kronecker from pymc3.model import Deterministic -from pymc3.theanof import floatX, intX __all__ = [ "MvNormal", @@ -75,8 +81,8 @@ def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True, *args, ** raise ValueError( "Incompatible parameterization. Specify exactly one of tau, cov, or chol." ) - self.mu = mu = tt.as_tensor_variable(mu) - self.solve_lower = tt.slinalg.Solve(A_structure="lower_triangular") + self.mu = mu = aet.as_tensor_variable(mu) + self.solve_lower = Solve(A_structure="lower_triangular") # Step methods and advi do not catch LinAlgErrors at the # moment. We work around that by using a cholesky op # that returns a nan as first entry instead of raising @@ -86,7 +92,7 @@ def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True, *args, ** if cov is not None: self.k = cov.shape[0] self._cov_type = "cov" - cov = tt.as_tensor_variable(cov) + cov = aet.as_tensor_variable(cov) if cov.ndim != 2: raise ValueError("cov must be two dimensional.") self.chol_cov = cholesky(cov) @@ -95,7 +101,7 @@ def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True, *args, ** elif tau is not None: self.k = tau.shape[0] self._cov_type = "tau" - tau = tt.as_tensor_variable(tau) + tau = aet.as_tensor_variable(tau) if tau.ndim != 2: raise ValueError("tau must be two dimensional.") self.chol_tau = cholesky(tau) @@ -106,7 +112,7 @@ def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True, *args, ** self._cov_type = "chol" if chol.ndim != 2: raise ValueError("chol must be two dimensional.") - self.chol_cov = tt.as_tensor_variable(chol) + self.chol_cov = aet.as_tensor_variable(chol) self._n = self.chol_cov.shape[-1] def _quaddist(self, value): @@ -137,16 +143,16 @@ def _quaddist(self, value): def _quaddist_chol(self, delta): chol_cov = self.chol_cov - diag = tt.nlinalg.diag(chol_cov) + diag = aet.nlinalg.diag(chol_cov) # Check if the covariance matrix is positive definite. - ok = tt.all(diag > 0) + ok = aet.all(diag > 0) # If not, replace the diagonal. We return -inf later, but # need to prevent solve_lower from throwing an exception. - chol_cov = tt.switch(ok, chol_cov, 1) + chol_cov = aet.switch(ok, chol_cov, 1) delta_trans = self.solve_lower(chol_cov, delta.T).T quaddist = (delta_trans ** 2).sum(axis=-1) - logdet = tt.sum(tt.log(diag)) + logdet = aet.sum(aet.log(diag)) return quaddist, logdet, ok def _quaddist_cov(self, delta): @@ -154,16 +160,16 @@ def _quaddist_cov(self, delta): def _quaddist_tau(self, delta): chol_tau = self.chol_tau - diag = tt.nlinalg.diag(chol_tau) + diag = aet.nlinalg.diag(chol_tau) # Check if the precision matrix is positive definite. - ok = tt.all(diag > 0) + ok = aet.all(diag > 0) # If not, replace the diagonal. We return -inf later, but # need to prevent solve_lower from throwing an exception. - chol_tau = tt.switch(ok, chol_tau, 1) + chol_tau = aet.switch(ok, chol_tau, 1) - delta_trans = tt.dot(delta, chol_tau) + delta_trans = aet.dot(delta, chol_tau) quaddist = (delta_trans ** 2).sum(axis=-1) - logdet = -tt.sum(tt.log(diag)) + logdet = -aet.sum(aet.log(diag)) return quaddist, logdet, ok def _cov_param_for_repr(self): @@ -235,7 +241,7 @@ class MvNormal(_QuadFormBase): chol, _, _ = pm.LKJCholeskyCov('chol_cov', n=3, eta=2, sd_dist=sd_dist, compute_corr=True) vals_raw = pm.Normal('vals_raw', mu=0, sigma=1, shape=(5, 3)) - vals = pm.Deterministic('vals', tt.dot(chol, vals_raw.T).T) + vals = pm.Deterministic('vals', aet.dot(chol, vals_raw.T).T) """ def __init__(self, mu, cov=None, tau=None, chol=None, lower=True, *args, **kwargs): @@ -362,7 +368,7 @@ def __init__( raise ValueError("Specify only one of cov and Sigma") cov = Sigma super().__init__(mu=mu, cov=cov, tau=tau, chol=chol, lower=lower, *args, **kwargs) - self.nu = nu = tt.as_tensor_variable(nu) + self.nu = nu = aet.as_tensor_variable(nu) self.mean = self.median = self.mode = self.mu = self.mu def random(self, point=None, size=None): @@ -423,7 +429,7 @@ def logp(self, value): - gammaln(self.nu / 2.0) - 0.5 * k * floatX(np.log(self.nu * np.pi)) ) - inner = -(self.nu + k) / 2.0 * tt.log1p(quaddist / self.nu) + inner = -(self.nu + k) / 2.0 * aet.log1p(quaddist / self.nu) return bound(norm + inner - logdet, ok) def _distr_parameters_for_repr(self): @@ -472,10 +478,10 @@ def __init__(self, a, transform=transforms.stick_breaking, *args, **kwargs): super().__init__(transform=transform, *args, **kwargs) - self.a = a = tt.as_tensor_variable(a) - self.mean = a / tt.sum(a) + self.a = a = aet.as_tensor_variable(a) + self.mean = a / aet.sum(a) - self.mode = tt.switch(tt.all(a > 1), (a - 1) / tt.sum(a - 1), np.nan) + self.mode = aet.switch(aet.all(a > 1), (a - 1) / aet.sum(a - 1), np.nan) def random(self, point=None, size=None): """ @@ -519,10 +525,10 @@ def logp(self, value): # only defined for sum(value) == 1 return bound( - tt.sum(logpow(value, a - 1) - gammaln(a), axis=-1) + gammaln(tt.sum(a, axis=-1)), - tt.all(value >= 0), - tt.all(value <= 1), - tt.all(a > 0), + aet.sum(logpow(value, a - 1) - gammaln(a), axis=-1) + gammaln(aet.sum(a, axis=-1)), + aet.all(value >= 0), + aet.all(value <= 1), + aet.all(a > 0), broadcast_conditions=False, ) @@ -566,21 +572,21 @@ class Multinomial(Discrete): def __init__(self, n, p, *args, **kwargs): super().__init__(*args, **kwargs) - p = p / tt.sum(p, axis=-1, keepdims=True) + p = p / aet.sum(p, axis=-1, keepdims=True) if len(self.shape) > 1: - self.n = tt.shape_padright(n) - self.p = p if p.ndim > 1 else tt.shape_padleft(p) + self.n = aet.shape_padright(n) + self.p = p if p.ndim > 1 else aet.shape_padleft(p) else: # n is a scalar, p is a 1d array - self.n = tt.as_tensor_variable(n) - self.p = tt.as_tensor_variable(p) + self.n = aet.as_tensor_variable(n) + self.p = aet.as_tensor_variable(p) self.mean = self.n * self.p - mode = tt.cast(tt.round(self.mean), "int32") - diff = self.n - tt.sum(mode, axis=-1, keepdims=True) - inc_bool_arr = tt.abs_(diff) > 0 - mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()]) + mode = aet.cast(aet.round(self.mean), "int32") + diff = self.n - aet.sum(mode, axis=-1, keepdims=True) + inc_bool_arr = aet.abs_(diff) > 0 + mode = aet.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()]) self.mode = mode def _random(self, n, p, size=None, raw_size=None): @@ -663,12 +669,12 @@ def logp(self, x): p = self.p return bound( - factln(n) + tt.sum(-factln(x) + logpow(p, x), axis=-1, keepdims=True), - tt.all(x >= 0), - tt.all(tt.eq(tt.sum(x, axis=-1, keepdims=True), n)), - tt.all(p <= 1), - tt.all(tt.eq(tt.sum(p, axis=-1), 1)), - tt.all(tt.ge(n, 0)), + factln(n) + aet.sum(-factln(x) + logpow(p, x), axis=-1, keepdims=True), + aet.all(x >= 0), + aet.all(aet.eq(aet.sum(x, axis=-1, keepdims=True), n)), + aet.all(p <= 1), + aet.all(aet.eq(aet.sum(p, axis=-1), 1)), + aet.all(aet.ge(n, 0)), broadcast_conditions=False, ) @@ -714,22 +720,22 @@ def __init__(self, n, a, shape, *args, **kwargs): n = intX(n) a = floatX(a) if len(self.shape) > 1: - self.n = tt.shape_padright(n) - self.a = tt.as_tensor_variable(a) if a.ndim > 1 else tt.shape_padleft(a) + self.n = aet.shape_padright(n) + self.a = aet.as_tensor_variable(a) if a.ndim > 1 else aet.shape_padleft(a) else: # n is a scalar, p is a 1d array - self.n = tt.as_tensor_variable(n) - self.a = tt.as_tensor_variable(a) + self.n = aet.as_tensor_variable(n) + self.a = aet.as_tensor_variable(a) p = self.a / self.a.sum(-1, keepdims=True) self.mean = self.n * p # Mode is only an approximation. Exact computation requires a complex # iterative algorithm as described in https://doi.org/10.1016/j.spl.2009.09.013 - mode = tt.cast(tt.round(self.mean), "int32") - diff = self.n - tt.sum(mode, axis=-1, keepdims=True) - inc_bool_arr = tt.abs_(diff) > 0 - mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()]) + mode = aet.cast(aet.round(self.mean), "int32") + diff = self.n - aet.sum(mode, axis=-1, keepdims=True) + inc_bool_arr = aet.abs_(diff) > 0 + mode = aet.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()]) self._defaultval = mode def _random(self, n, a, size=None): @@ -816,10 +822,10 @@ def logp(self, value): # and that each observation value_i sums to n_i. return bound( result, - tt.all(tt.ge(value, 0)), - tt.all(tt.gt(a, 0)), - tt.all(tt.ge(n, 0)), - tt.all(tt.eq(value.sum(axis=-1, keepdims=True), n)), + aet.all(aet.ge(value, 0)), + aet.all(aet.gt(a, 0)), + aet.all(aet.ge(n, 0)), + aet.all(aet.eq(value.sum(axis=-1, keepdims=True), n)), broadcast_conditions=False, ) @@ -847,9 +853,9 @@ class PosDefMatrix(Op): # Compulsory if itypes and otypes are not defined def make_node(self, x): - x = tt.as_tensor_variable(x) + x = aet.as_tensor_variable(x) assert x.ndim == 2 - o = tt.TensorType(dtype="int8", broadcastable=[])() + o = TensorType(dtype="int8", broadcastable=[])() return Apply(self, [x], [o]) # Python implementation: @@ -868,7 +874,7 @@ def infer_shape(self, fgraph, node, shapes): def grad(self, inp, grads): (x,) = inp - return [x.zeros_like(theano.config.floatX)] + return [x.zeros_like(aesara.config.floatX)] def __str__(self): return "MatrixIsPositiveDefinite" @@ -925,11 +931,11 @@ def __init__(self, nu, V, *args, **kwargs): "https://github.com/pymc-devs/pymc3/issues/538.", UserWarning, ) - self.nu = nu = tt.as_tensor_variable(nu) - self.p = p = tt.as_tensor_variable(V.shape[0]) - self.V = V = tt.as_tensor_variable(V) + self.nu = nu = aet.as_tensor_variable(nu) + self.p = p = aet.as_tensor_variable(V.shape[0]) + self.V = V = aet.as_tensor_variable(V) self.mean = nu * V - self.mode = tt.switch(tt.ge(nu, p + 1), (nu - p - 1) * V, np.nan) + self.mode = aet.switch(aet.ge(nu, p + 1), (nu - p - 1) * V, np.nan) def random(self, point=None, size=None): """ @@ -975,15 +981,15 @@ def logp(self, X): return bound( ( - (nu - p - 1) * tt.log(IXI) + (nu - p - 1) * aet.log(IXI) - trace(matrix_inverse(V).dot(X)) - - nu * p * tt.log(2) - - nu * tt.log(IVI) + - nu * p * aet.log(2) + - nu * aet.log(IVI) - 2 * multigammaln(nu / 2.0, p) ) / 2, matrix_pos_def(X), - tt.eq(X, X.T), + aet.eq(X, X.T), nu > (p - 1), broadcast_conditions=False, ) @@ -1053,44 +1059,44 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv diag_testval = None tril_testval = None - c = tt.sqrt( + c = aet.sqrt( ChiSquared("%s_c" % name, nu - np.arange(2, 2 + n_diag), shape=n_diag, testval=diag_testval) ) pm._log.info("Added new variable %s_c to model diagonal of Wishart." % name) z = Normal("%s_z" % name, 0.0, 1.0, shape=n_tril, testval=tril_testval) pm._log.info("Added new variable %s_z to model off-diagonals of Wishart." % name) # Construct A matrix - A = tt.zeros(S.shape, dtype=np.float32) - A = tt.set_subtensor(A[diag_idx], c) - A = tt.set_subtensor(A[tril_idx], z) + A = aet.zeros(S.shape, dtype=np.float32) + A = aet.set_subtensor(A[diag_idx], c) + A = aet.set_subtensor(A[tril_idx], z) # L * A * A.T * L.T ~ Wishart(L*L.T, nu) if return_cholesky: - return Deterministic(name, tt.dot(L, A)) + return Deterministic(name, aet.dot(L, A)) else: - return Deterministic(name, tt.dot(tt.dot(tt.dot(L, A), A.T), L.T)) + return Deterministic(name, aet.dot(aet.dot(aet.dot(L, A), A.T), L.T)) def _lkj_normalizing_constant(eta, n): if eta == 1: - result = gammaln(2.0 * tt.arange(1, int((n - 1) / 2) + 1)).sum() + result = gammaln(2.0 * aet.arange(1, int((n - 1) / 2) + 1)).sum() if n % 2 == 1: result += ( - 0.25 * (n ** 2 - 1) * tt.log(np.pi) - - 0.25 * (n - 1) ** 2 * tt.log(2.0) + 0.25 * (n ** 2 - 1) * aet.log(np.pi) + - 0.25 * (n - 1) ** 2 * aet.log(2.0) - (n - 1) * gammaln(int((n + 1) / 2)) ) else: result += ( - 0.25 * n * (n - 2) * tt.log(np.pi) - + 0.25 * (3 * n ** 2 - 4 * n) * tt.log(2.0) + 0.25 * n * (n - 2) * aet.log(np.pi) + + 0.25 * (3 * n ** 2 - 4 * n) * aet.log(2.0) + n * gammaln(n / 2) - (n - 1) * gammaln(n) ) else: result = -(n - 1) * gammaln(eta + 0.5 * (n - 1)) - k = tt.arange(1, n) - result += (0.5 * k * tt.log(np.pi) + gammaln(eta + 0.5 * (n - 1 - k))).sum() + k = aet.arange(1, n) + result += (0.5 * k * aet.log(np.pi) + gammaln(eta + 0.5 * (n - 1 - k))).sum() return result @@ -1100,8 +1106,8 @@ class _LKJCholeskyCov(Continuous): """ def __init__(self, eta, n, sd_dist, *args, **kwargs): - self.n = tt.as_tensor_variable(n) - self.eta = tt.as_tensor_variable(eta) + self.n = aet.as_tensor_variable(n) + self.eta = aet.as_tensor_variable(eta) if "transform" in kwargs and kwargs["transform"] is not None: raise ValueError("Invalid parameter: transform.") @@ -1143,22 +1149,22 @@ def logp(self, x): eta = self.eta diag_idxs = self.diag_idxs - cumsum = tt.cumsum(x ** 2) - variance = tt.zeros(n) - variance = tt.inc_subtensor(variance[0], x[0] ** 2) - variance = tt.inc_subtensor(variance[1:], cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]]) - sd_vals = tt.sqrt(variance) + cumsum = aet.cumsum(x ** 2) + variance = aet.zeros(n) + variance = aet.inc_subtensor(variance[0], x[0] ** 2) + variance = aet.inc_subtensor(variance[1:], cumsum[diag_idxs[1:]] - cumsum[diag_idxs[:-1]]) + sd_vals = aet.sqrt(variance) logp_sd = self.sd_dist.logp(sd_vals).sum() corr_diag = x[diag_idxs] / sd_vals - logp_lkj = (2 * eta - 3 + n - tt.arange(n)) * tt.log(corr_diag) - logp_lkj = tt.sum(logp_lkj) + logp_lkj = (2 * eta - 3 + n - aet.arange(n)) * aet.log(corr_diag) + logp_lkj = aet.sum(logp_lkj) # Compute the log det jacobian of the second transformation # described in the docstring. - idx = tt.arange(n) - det_invjac = tt.log(corr_diag) - idx * tt.log(sd_vals) + idx = aet.arange(n) + det_invjac = aet.log(corr_diag) - idx * aet.log(sd_vals) det_invjac = det_invjac.sum() norm = _lkj_normalizing_constant(eta, n) @@ -1348,10 +1354,10 @@ def LKJCholeskyCov(name, eta, n, sd_dist, compute_corr=False, store_in_trace=Tru # Or transform an uncorrelated normal: vals_raw = pm.Normal('vals_raw', mu=0, sigma=1, shape=10) - vals = tt.dot(chol, vals_raw) + vals = aet.dot(chol, vals_raw) # Or compute the covariance matrix - cov = tt.dot(chol, chol.T) + cov = aet.dot(chol, chol.T) **Implementation** In the unconstrained space all values of the cholesky factor are stored untransformed, except for the diagonal entries, where @@ -1411,9 +1417,9 @@ def LKJCholeskyCov(name, eta, n, sd_dist, compute_corr=False, store_in_trace=Tru else: chol = pm.expand_packed_triangular(n, packed_chol, lower=True) # compute covariance matrix - cov = tt.dot(chol, chol.T) + cov = aet.dot(chol, chol.T) # extract standard deviations and rho - stds = tt.sqrt(tt.diag(cov)) + stds = aet.sqrt(aet.diag(cov)) inv_stds = 1 / stds corr = inv_stds[None, :] * cov * inv_stds[:, None] if store_in_trace: @@ -1562,14 +1568,14 @@ def logp(self, x): eta = self.eta X = x[self.tri_index] - X = tt.fill_diagonal(X, 1) + X = aet.fill_diagonal(X, 1) result = _lkj_normalizing_constant(eta, n) - result += (eta - 1.0) * tt.log(det(X)) + result += (eta - 1.0) * aet.log(det(X)) return bound( result, - tt.all(X <= 1), - tt.all(X >= -1), + aet.all(X <= 1), + aet.all(X >= -1), matrix_pos_def(X), eta > 0, broadcast_conditions=False, @@ -1662,7 +1668,7 @@ class MatrixNormal(Continuous): # Setup left covariance matrix scale = pm.Lognormal('scale', mu=np.log(true_scale), sigma=0.5) - rowcov = tt.nlinalg.diag([scale**(2*i) for i in range(m)]) + rowcov = aet.nlinalg.diag([scale**(2*i) for i in range(m)]) vals = pm.MatrixNormal('vals', mu=mu, colchol=colchol, rowcov=rowcov, observed=data, shape=(m, n)) @@ -1687,10 +1693,10 @@ def __init__( assert len(shape) == 2, "shape must have length 2: mxn" self.shape = shape super().__init__(shape=shape, *args, **kwargs) - self.mu = tt.as_tensor_variable(mu) + self.mu = aet.as_tensor_variable(mu) self.mean = self.median = self.mode = self.mu - self.solve_lower = tt.slinalg.solve_lower_triangular - self.solve_upper = tt.slinalg.solve_upper_triangular + self.solve_lower = solve_lower_triangular + self.solve_upper = solve_upper_triangular def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau): cholesky = Cholesky(lower=True, on_error="raise") @@ -1705,7 +1711,7 @@ def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau): if rowcov is not None: self.m = rowcov.shape[0] self._rowcov_type = "cov" - rowcov = tt.as_tensor_variable(rowcov) + rowcov = aet.as_tensor_variable(rowcov) if rowcov.ndim != 2: raise ValueError("rowcov must be two dimensional.") self.rowchol_cov = cholesky(rowcov) @@ -1714,7 +1720,7 @@ def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau): raise ValueError("rowtau not supported at this time") self.m = rowtau.shape[0] self._rowcov_type = "tau" - rowtau = tt.as_tensor_variable(rowtau) + rowtau = aet.as_tensor_variable(rowtau) if rowtau.ndim != 2: raise ValueError("rowtau must be two dimensional.") self.rowchol_tau = cholesky(rowtau) @@ -1724,7 +1730,7 @@ def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau): self._rowcov_type = "chol" if rowchol.ndim != 2: raise ValueError("rowchol must be two dimensional.") - self.rowchol_cov = tt.as_tensor_variable(rowchol) + self.rowchol_cov = aet.as_tensor_variable(rowchol) # Among-column matrices if len([i for i in [coltau, colcov, colchol] if i is not None]) != 1: @@ -1736,7 +1742,7 @@ def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau): if colcov is not None: self.n = colcov.shape[0] self._colcov_type = "cov" - colcov = tt.as_tensor_variable(colcov) + colcov = aet.as_tensor_variable(colcov) if colcov.ndim != 2: raise ValueError("colcov must be two dimensional.") self.colchol_cov = cholesky(colcov) @@ -1745,7 +1751,7 @@ def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau): raise ValueError("coltau not supported at this time") self.n = coltau.shape[0] self._colcov_type = "tau" - coltau = tt.as_tensor_variable(coltau) + coltau = aet.as_tensor_variable(coltau) if coltau.ndim != 2: raise ValueError("coltau must be two dimensional.") self.colchol_tau = cholesky(coltau) @@ -1755,7 +1761,7 @@ def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau): self._colcov_type = "chol" if colchol.ndim != 2: raise ValueError("colchol must be two dimensional.") - self.colchol_cov = tt.as_tensor_variable(colchol) + self.colchol_cov = aet.as_tensor_variable(colchol) def random(self, point=None, size=None): """ @@ -1802,15 +1808,15 @@ def _trquaddist(self, value): # Find exponent piece by piece right_quaddist = self.solve_lower(rowchol_cov, delta) - quaddist = tt.nlinalg.matrix_dot(right_quaddist.T, right_quaddist) + quaddist = aet.nlinalg.matrix_dot(right_quaddist.T, right_quaddist) quaddist = self.solve_lower(colchol_cov, quaddist) quaddist = self.solve_upper(colchol_cov.T, quaddist) - trquaddist = tt.nlinalg.trace(quaddist) + trquaddist = aet.nlinalg.trace(quaddist) - coldiag = tt.nlinalg.diag(colchol_cov) - rowdiag = tt.nlinalg.diag(rowchol_cov) - half_collogdet = tt.sum(tt.log(coldiag)) # logdet(M) = 2*Tr(log(L)) - half_rowlogdet = tt.sum(tt.log(rowdiag)) # Using Cholesky: M = L L^T + coldiag = aet.nlinalg.diag(colchol_cov) + rowdiag = aet.nlinalg.diag(rowchol_cov) + half_collogdet = aet.sum(aet.log(coldiag)) # logdet(M) = 2*Tr(log(L)) + half_rowlogdet = aet.sum(aet.log(rowdiag)) # Using Cholesky: M = L L^T return trquaddist, half_collogdet, half_rowlogdet def logp(self, value): @@ -1869,7 +1875,7 @@ class KroneckerNormal(Continuous): :math:`[(v_1, Q_1), (v_2, Q_2), ...]` such that :math:`K_i = Q_i \text{diag}(v_i) Q_i'`. For example:: - v_i, Q_i = tt.nlinalg.eigh(K_i) + v_i, Q_i = aet.nlinalg.eigh(K_i) sigma: scalar, variable Standard deviation of the Gaussian white noise. @@ -1930,7 +1936,7 @@ class KroneckerNormal(Continuous): def __init__(self, mu, covs=None, chols=None, evds=None, sigma=None, *args, **kwargs): self._setup(covs, chols, evds, sigma) super().__init__(*args, **kwargs) - self.mu = tt.as_tensor_variable(mu) + self.mu = aet.as_tensor_variable(mu) self.mean = self.median = self.mode = self.mu def _setup(self, covs, chols, evds, sigma): @@ -1952,21 +1958,21 @@ def _setup(self, covs, chols, evds, sigma): else: # Otherwise use cholesky as usual self.chols = list(map(self.cholesky, self.covs)) - self.chol_diags = list(map(tt.nlinalg.diag, self.chols)) - self.sizes = tt.as_tensor_variable([chol.shape[0] for chol in self.chols]) - self.N = tt.prod(self.sizes) + self.chol_diags = list(map(aet.nlinalg.diag, self.chols)) + self.sizes = aet.as_tensor_variable([chol.shape[0] for chol in self.chols]) + self.N = aet.prod(self.sizes) elif chols is not None: self._cov_type = "chol" if self.is_noisy: # A strange case... # Noise requires eigendecomposition - covs = [tt.dot(chol, chol.T) for chol in chols] + covs = [aet.dot(chol, chol.T) for chol in chols] eigh_map = map(eigh, covs) self._setup_evd(eigh_map) else: self.chols = chols - self.chol_diags = list(map(tt.nlinalg.diag, self.chols)) - self.sizes = tt.as_tensor_variable([chol.shape[0] for chol in self.chols]) - self.N = tt.prod(self.sizes) + self.chol_diags = list(map(aet.nlinalg.diag, self.chols)) + self.sizes = aet.as_tensor_variable([chol.shape[0] for chol in self.chols]) + self.N = aet.prod(self.sizes) else: self._cov_type = "evd" self._setup_evd(evds) @@ -1974,10 +1980,10 @@ def _setup(self, covs, chols, evds, sigma): def _setup_evd(self, eigh_iterable): self._isEVD = True eigs_sep, Qs = zip(*eigh_iterable) # Unzip - self.Qs = list(map(tt.as_tensor_variable, Qs)) - self.QTs = list(map(tt.transpose, self.Qs)) + self.Qs = list(map(aet.as_tensor_variable, Qs)) + self.QTs = list(map(aet.transpose, self.Qs)) - self.eigs_sep = list(map(tt.as_tensor_variable, eigs_sep)) + self.eigs_sep = list(map(aet.as_tensor_variable, eigs_sep)) self.eigs = kron_diag(*self.eigs_sep) # Combine separate eigs if self.is_noisy: self.eigs += self.sigma ** 2 @@ -1989,28 +1995,28 @@ def _setup_random(self): if self._cov_type == "cov": cov = kronecker(*self.covs) if self.is_noisy: - cov = cov + self.sigma ** 2 * tt.identity_like(cov) + cov = cov + self.sigma ** 2 * aet.identity_like(cov) self.mv_params["cov"] = cov elif self._cov_type == "chol": if self.is_noisy: covs = [] for eig, Q in zip(self.eigs_sep, self.Qs): - cov_i = tt.dot(Q, tt.dot(tt.diag(eig), Q.T)) + cov_i = aet.dot(Q, aet.dot(aet.diag(eig), Q.T)) covs.append(cov_i) cov = kronecker(*covs) if self.is_noisy: - cov = cov + self.sigma ** 2 * tt.identity_like(cov) + cov = cov + self.sigma ** 2 * aet.identity_like(cov) self.mv_params["chol"] = self.cholesky(cov) else: self.mv_params["chol"] = kronecker(*self.chols) elif self._cov_type == "evd": covs = [] for eig, Q in zip(self.eigs_sep, self.Qs): - cov_i = tt.dot(Q, tt.dot(tt.diag(eig), Q.T)) + cov_i = aet.dot(Q, aet.dot(aet.diag(eig), Q.T)) covs.append(cov_i) cov = kronecker(*covs) if self.is_noisy: - cov = cov + self.sigma ** 2 * tt.identity_like(cov) + cov = cov + self.sigma ** 2 * aet.identity_like(cov) self.mv_params["cov"] = cov def random(self, point=None, size=None): @@ -2050,16 +2056,16 @@ def _quaddist(self, value): delta = value - self.mu if self._isEVD: sqrt_quad = kron_dot(self.QTs, delta.T) - sqrt_quad = sqrt_quad / tt.sqrt(self.eigs[:, None]) - logdet = tt.sum(tt.log(self.eigs)) + sqrt_quad = sqrt_quad / aet.sqrt(self.eigs[:, None]) + logdet = aet.sum(aet.log(self.eigs)) else: sqrt_quad = kron_solve_lower(self.chols, delta.T) logdet = 0 for chol_size, chol_diag in zip(self.sizes, self.chol_diags): - logchol = tt.log(chol_diag) * self.N / chol_size - logdet += tt.sum(2 * logchol) + logchol = aet.log(chol_diag) * self.N / chol_size + logdet += aet.sum(2 * logchol) # Square each sample - quad = tt.batched_dot(sqrt_quad.T, sqrt_quad.T) + quad = aet.batched_dot(sqrt_quad.T, sqrt_quad.T) if onedim: quad = quad[0] return quad, logdet @@ -2079,7 +2085,7 @@ def logp(self, value): TensorVariable """ quad, logdet = self._quaddist(value) - return -(quad + logdet + self.N * tt.log(2 * np.pi)) / 2.0 + return -(quad + logdet + self.N * aet.log(2 * np.pi)) / 2.0 def _distr_parameters_for_repr(self): return ["mu"] diff --git a/pymc3/distributions/posterior_predictive.py b/pymc3/distributions/posterior_predictive.py index 31aa3e40f58..1125ae93577 100644 --- a/pymc3/distributions/posterior_predictive.py +++ b/pymc3/distributions/posterior_predictive.py @@ -9,18 +9,20 @@ from contextlib import AbstractContextManager from typing import TYPE_CHECKING, Any, Callable, Dict, List, cast, overload +import aesara.graph.basic +import aesara.graph.fg import numpy as np -import theano.graph.basic -import theano.graph.fg -import theano.tensor as tt +from aesara.compile.sharedvalue import SharedVariable +from aesara.graph.basic import Constant +from aesara.tensor.var import TensorVariable from arviz import InferenceData from typing_extensions import Literal, Protocol from xarray import Dataset from pymc3.backends.base import MultiTrace from pymc3.distributions.distribution import ( - _compile_theano_function, + _compile_aesara_function, _DrawValuesContext, _DrawValuesContextBlocker, is_fast_drawable, @@ -35,7 +37,6 @@ modelcontext, ) from pymc3.util import chains_and_samples, dataset_to_point_list, get_var_name -from pymc3.vartypes import theano_constant # Failing tests: # test_mixture_random_shape::test_mixture_random_shape @@ -375,13 +376,13 @@ def draw_values(self) -> list[np.ndarray]: if (next_, samples) in drawn: # If the node already has a givens value, skip it continue - elif isinstance(next_, (theano_constant, tt.sharedvar.SharedVariable)): - # If the node is a theano.tensor.TensorConstant or a - # theano.tensor.sharedvar.SharedVariable, its value will be - # available automatically in _compile_theano_function so + elif isinstance(next_, (Constant, SharedVariable)): + # If the node is a aesara.tensor.TensorConstant or a + # aesara.tensor.sharedvar.SharedVariable, its value will be + # available automatically in _compile_aesara_function so # we can skip it. Furthermore, if this node was treated as a - # TensorVariable that should be compiled by theano in - # _compile_theano_function, it would raise a `TypeError: + # TensorVariable that should be compiled by aesara in + # _compile_aesara_function, it would raise a `TypeError: # ('Constants not allowed in param list', ...)` for # TensorConstant, and a `TypeError: Cannot use a shared # variable (...) as explicit input` for SharedVariable. @@ -411,7 +412,7 @@ def draw_values(self) -> list[np.ndarray]: assert isinstance(value, np.ndarray) givens[next_.name] = (next_, value) drawn[(next_, samples)] = value - except theano.graph.fg.MissingInputError: + except aesara.graph.fg.MissingInputError: # The node failed, so we must add the node's parents to # the stack of nodes to try to draw from. We exclude the # nodes in the `params` list. @@ -456,7 +457,7 @@ def draw_values(self) -> list[np.ndarray]: assert isinstance(value, np.ndarray) self.evaluated[param_idx] = drawn[(param, samples)] = value givens[param.name] = (param, value) - except theano.graph.fg.MissingInputError: + except aesara.graph.fg.MissingInputError: missing_inputs.add(param_idx) return [self.evaluated[j] for j in params] @@ -527,9 +528,9 @@ def draw_value(self, param, trace: _TraceDict | None = None, givens=None): Parameters ---------- - param: number, array like, theano variable or pymc3 random variable + param: number, array like, aesara variable or pymc3 random variable The value or distribution. Constants or shared variables - will be converted to an array and returned. Theano variables + will be converted to an array and returned. Aesara variables are evaluated. If `param` is a pymc3 random variable, draw values from it and return that (as ``np.ndarray``), unless a value is specified in the ``trace``. @@ -537,8 +538,8 @@ def draw_value(self, param, trace: _TraceDict | None = None, givens=None): A dictionary from pymc3 variable names to samples of their values used to provide context for evaluating ``param``. givens: dict, optional - A dictionary from theano variables to their values. These values - are used to evaluate ``param`` if it is a theano variable. + A dictionary from aesara variables to their values. These values + are used to evaluate ``param`` if it is a aesara variable. """ samples = self.samples @@ -569,11 +570,11 @@ def random_sample( if isinstance(param, (numbers.Number, np.ndarray)): return param - elif isinstance(param, theano_constant): + elif isinstance(param, Constant): return param.value - elif isinstance(param, tt.sharedvar.SharedVariable): + elif isinstance(param, SharedVariable): return param.get_value() - elif isinstance(param, (tt.TensorVariable, MultiObservedRV)): + elif isinstance(param, (TensorVariable, MultiObservedRV)): if hasattr(param, "model") and trace and param.name in trace.varnames: return trace[param.name] elif hasattr(param, "random") and param.random is not None: @@ -605,7 +606,7 @@ def random_sample( ) except (ValueError, TypeError): # reset shape to account for shape changes - # with theano.shared inputs + # with aesara.shared inputs dist_tmp.shape = () # We want to draw values to infer the dist_shape, # we don't want to store these drawn values to the context @@ -651,7 +652,7 @@ def random_sample( # We only truly care if the ancestors of param that were given # value have the matching dshape and val.shape param_ancestors = set( - theano.graph.basic.ancestors([param], blockers=list(variables)) + aesara.graph.basic.ancestors([param], blockers=list(variables)) ) inputs = [ (var, val) for var, val in zip(variables, values) if var in param_ancestors @@ -661,7 +662,7 @@ def random_sample( else: input_vars = [] input_vals = [] - func = _compile_theano_function(param, input_vars) + func = _compile_aesara_function(param, input_vars) if not input_vars: assert input_vals == [] # AFAICT if there are now vars, there can't be vals output = func(*input_vals) @@ -685,7 +686,7 @@ def _param_shape(var_desig, model: Model) -> tuple[int, ...]: if hasattr(v, "observations"): try: # To get shape of _observed_ data container `pm.Data` - # (wrapper for theano.SharedVariable) we evaluate it. + # (wrapper for SharedVariable) we evaluate it. shape = tuple(v.observations.shape.eval()) except AttributeError: shape = v.observations.shape diff --git a/pymc3/distributions/special.py b/pymc3/distributions/special.py index ba4662b2df1..8b218fea784 100644 --- a/pymc3/distributions/special.py +++ b/pymc3/distributions/special.py @@ -12,16 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara.tensor as aet import numpy as np -import theano.tensor as tt -from theano import scalar -from theano.scalar.basic_scipy import GammaLn, Psi +from aesara import scalar +from aesara.scalar.basic_scipy import GammaLn, Psi +from aesara.tensor.elemwise import Elemwise __all__ = ["gammaln", "multigammaln", "psi", "log_i0"] scalar_gammaln = GammaLn(scalar.upgrade_to_float, name="scalar_gammaln") -gammaln = tt.Elemwise(scalar_gammaln, name="gammaln") +gammaln = Elemwise(scalar_gammaln, name="gammaln") def multigammaln(a, p): @@ -33,17 +34,17 @@ def multigammaln(a, p): p: int degrees of freedom. p > 0 """ - i = tt.arange(1, p + 1) - return p * (p - 1) * tt.log(np.pi) / 4.0 + tt.sum(gammaln(a + (1.0 - i) / 2.0), axis=0) + i = aet.arange(1, p + 1) + return p * (p - 1) * aet.log(np.pi) / 4.0 + aet.sum(gammaln(a + (1.0 - i) / 2.0), axis=0) def log_i0(x): """ Calculates the logarithm of the 0 order modified Bessel function of the first kind"" """ - return tt.switch( - tt.lt(x, 5), - tt.log1p( + return aet.switch( + aet.lt(x, 5), + aet.log1p( x ** 2.0 / 4.0 + x ** 4.0 / 64.0 + x ** 6.0 / 2304.0 @@ -52,8 +53,8 @@ def log_i0(x): + x ** 12.0 / 2123366400.0 ), x - - 0.5 * tt.log(2.0 * np.pi * x) - + tt.log1p( + - 0.5 * aet.log(2.0 * np.pi * x) + + aet.log1p( 1.0 / (8.0 * x) + 9.0 / (128.0 * x ** 2.0) + 225.0 / (3072.0 * x ** 3.0) @@ -63,4 +64,4 @@ def log_i0(x): scalar_psi = Psi(scalar.upgrade_to_float, name="scalar_psi") -psi = tt.Elemwise(scalar_psi, name="psi") +psi = Elemwise(scalar_psi, name="psi") diff --git a/pymc3/distributions/timeseries.py b/pymc3/distributions/timeseries.py index e3e1aa15bc4..ecd693df2ff 100644 --- a/pymc3/distributions/timeseries.py +++ b/pymc3/distributions/timeseries.py @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara.tensor as aet import numpy as np -import theano.tensor as tt +from aesara import scan from scipy import stats -from theano import scan from pymc3.distributions import distribution, multivariate from pymc3.distributions.continuous import Flat, Normal, get_tau_sigma @@ -47,10 +47,10 @@ class AR1(distribution.Continuous): def __init__(self, k, tau_e, *args, **kwargs): super().__init__(*args, **kwargs) - self.k = k = tt.as_tensor_variable(k) - self.tau_e = tau_e = tt.as_tensor_variable(tau_e) + self.k = k = aet.as_tensor_variable(k) + self.tau_e = tau_e = aet.as_tensor_variable(tau_e) self.tau = tau_e * (1 - k ** 2) - self.mode = tt.as_tensor_variable(0.0) + self.mode = aet.as_tensor_variable(0.0) def logp(self, x): """ @@ -74,7 +74,7 @@ def logp(self, x): boundary = Normal.dist(0.0, tau=tau).logp innov_like = Normal.dist(k * x_im1, tau=tau_e).logp(x_i) - return boundary(x[0]) + tt.sum(innov_like) + return boundary(x[0]) + aet.sum(innov_like) class AR(distribution.Continuous): @@ -116,10 +116,10 @@ def __init__( sigma = sd tau, sigma = get_tau_sigma(tau=tau, sigma=sigma) - self.sigma = self.sd = tt.as_tensor_variable(sigma) - self.tau = tt.as_tensor_variable(tau) + self.sigma = self.sd = aet.as_tensor_variable(sigma) + self.tau = aet.as_tensor_variable(tau) - self.mean = tt.as_tensor_variable(0.0) + self.mean = aet.as_tensor_variable(0.0) if isinstance(rho, list): p = len(rho) @@ -140,7 +140,7 @@ def __init__( self.p = p self.constant = constant - self.rho = rho = tt.as_tensor_variable(rho) + self.rho = rho = aet.as_tensor_variable(rho) self.init = init def logp(self, value): @@ -157,7 +157,7 @@ def logp(self, value): TensorVariable """ if self.constant: - x = tt.add( + x = aet.add( *[self.rho[i + 1] * value[self.p - (i + 1) : -(i + 1)] for i in range(self.p)] ) eps = value[self.p :] - self.rho[0] - x @@ -165,7 +165,7 @@ def logp(self, value): if self.p == 1: x = self.rho * value[:-1] else: - x = tt.add( + x = aet.add( *[self.rho[i] * value[self.p - (i + 1) : -(i + 1)] for i in range(self.p)] ) eps = value[self.p :] - x @@ -173,7 +173,7 @@ def logp(self, value): innov_like = Normal.dist(mu=0.0, tau=self.tau).logp(eps) init_like = self.init.logp(value[: self.p]) - return tt.sum(innov_like) + tt.sum(init_like) + return aet.sum(innov_like) + aet.sum(init_like) class GaussianRandomWalk(distribution.Continuous): @@ -181,7 +181,7 @@ class GaussianRandomWalk(distribution.Continuous): Note that this is mainly a user-friendly wrapper to enable an easier specification of GRW. You are not restricted to use only Normal innovations but can use any - distribution: just use `theano.tensor.cumsum()` to create the random walk behavior. + distribution: just use `aesara.tensor.cumsum()` to create the random walk behavior. Parameters ---------- @@ -209,12 +209,12 @@ def __init__(self, tau=None, init=Flat.dist(), sigma=None, mu=0.0, sd=None, *arg if sd is not None: sigma = sd tau, sigma = get_tau_sigma(tau=tau, sigma=sigma) - self.tau = tt.as_tensor_variable(tau) - sigma = tt.as_tensor_variable(sigma) + self.tau = aet.as_tensor_variable(tau) + sigma = aet.as_tensor_variable(sigma) self.sigma = self.sd = sigma - self.mu = tt.as_tensor_variable(mu) + self.mu = aet.as_tensor_variable(mu) self.init = init - self.mean = tt.as_tensor_variable(0.0) + self.mean = aet.as_tensor_variable(0.0) def _mu_and_sigma(self, mu, sigma): """Helper to get mu and sigma if they are high dimensional.""" @@ -242,7 +242,7 @@ def logp(self, x): x_i = x[1:] mu, sigma = self._mu_and_sigma(self.mu, self.sigma) innov_like = Normal.dist(mu=x_im1 + mu, sigma=sigma).logp(x_i) - return self.init.logp(x[0]) + tt.sum(innov_like) + return self.init.logp(x[0]) + aet.sum(innov_like) return self.init.logp(x) def random(self, point=None, size=None): @@ -323,17 +323,17 @@ class GARCH11(distribution.Continuous): def __init__(self, omega, alpha_1, beta_1, initial_vol, *args, **kwargs): super().__init__(*args, **kwargs) - self.omega = omega = tt.as_tensor_variable(omega) - self.alpha_1 = alpha_1 = tt.as_tensor_variable(alpha_1) - self.beta_1 = beta_1 = tt.as_tensor_variable(beta_1) - self.initial_vol = tt.as_tensor_variable(initial_vol) - self.mean = tt.as_tensor_variable(0.0) + self.omega = omega = aet.as_tensor_variable(omega) + self.alpha_1 = alpha_1 = aet.as_tensor_variable(alpha_1) + self.beta_1 = beta_1 = aet.as_tensor_variable(beta_1) + self.initial_vol = aet.as_tensor_variable(initial_vol) + self.mean = aet.as_tensor_variable(0.0) def get_volatility(self, x): x = x[:-1] def volatility_update(x, vol, w, a, b): - return tt.sqrt(w + a * tt.square(x) + b * tt.square(vol)) + return aet.sqrt(w + a * aet.square(x) + b * aet.square(vol)) vol, _ = scan( fn=volatility_update, @@ -341,7 +341,7 @@ def volatility_update(x, vol, w, a, b): outputs_info=[self.initial_vol], non_sequences=[self.omega, self.alpha_1, self.beta_1], ) - return tt.concatenate([[self.initial_vol], vol]) + return aet.concatenate([[self.initial_vol], vol]) def logp(self, x): """ @@ -357,7 +357,7 @@ def logp(self, x): TensorVariable """ vol = self.get_volatility(x) - return tt.sum(Normal.dist(0.0, sigma=vol).logp(x)) + return aet.sum(Normal.dist(0.0, sigma=vol).logp(x)) def _distr_parameters_for_repr(self): return ["omega", "alpha_1", "beta_1"] @@ -379,7 +379,7 @@ class EulerMaruyama(distribution.Continuous): def __init__(self, dt, sde_fn, sde_pars, *args, **kwds): super().__init__(*args, **kwds) - self.dt = dt = tt.as_tensor_variable(dt) + self.dt = dt = aet.as_tensor_variable(dt) self.sde_fn = sde_fn self.sde_pars = sde_pars @@ -399,8 +399,8 @@ def logp(self, x): xt = x[:-1] f, g = self.sde_fn(x[:-1], *self.sde_pars) mu = xt + self.dt * f - sd = tt.sqrt(self.dt) * g - return tt.sum(Normal.dist(mu=mu, sigma=sd).logp(x[1:])) + sd = aet.sqrt(self.dt) * g + return aet.sum(Normal.dist(mu=mu, sigma=sd).logp(x[1:])) def _distr_parameters_for_repr(self): return ["dt"] @@ -437,7 +437,7 @@ def __init__( self.init = init self.innovArgs = (mu, cov, tau, chol, lower) self.innov = multivariate.MvNormal.dist(*self.innovArgs, shape=self.shape) - self.mean = tt.as_tensor_variable(0.0) + self.mean = aet.as_tensor_variable(0.0) def logp(self, x): """ @@ -551,7 +551,7 @@ class MvStudentTRandomWalk(MvGaussianRandomWalk): def __init__(self, nu, *args, **kwargs): super().__init__(*args, **kwargs) - self.nu = tt.as_tensor_variable(nu) + self.nu = aet.as_tensor_variable(nu) self.innov = multivariate.MvStudentT.dist(self.nu, None, *self.innovArgs) def _distr_parameters_for_repr(self): diff --git a/pymc3/distributions/transforms.py b/pymc3/distributions/transforms.py index 880301182ce..b17e7b27f46 100644 --- a/pymc3/distributions/transforms.py +++ b/pymc3/distributions/transforms.py @@ -14,16 +14,18 @@ import warnings +import aesara.tensor as aet import numpy as np -import theano.tensor as tt +from aesara.tensor.subtensor import advanced_set_subtensor1 +from aesara.tensor.type import TensorType from scipy.special import logit as nplogit +from pymc3.aesaraf import floatX, gradient from pymc3.distributions import distribution from pymc3.distributions.distribution import draw_values from pymc3.math import invlogit, logit, logsumexp from pymc3.model import FreeRV -from pymc3.theanof import floatX, gradient __all__ = [ "Transform", @@ -131,8 +133,8 @@ def __str__(self): class ElemwiseTransform(Transform): def jacobian_det(self, x): - grad = tt.reshape(gradient(tt.sum(self.backward(x)), [x]), x.shape) - return tt.log(tt.abs_(grad)) + grad = aet.reshape(gradient(aet.sum(self.backward(x)), [x]), x.shape) + return aet.log(aet.abs_(grad)) class TransformedDistribution(distribution.Distribution): @@ -159,7 +161,7 @@ def __init__(self, dist, transform, *args, **kwargs): if transform.name == "stickbreaking": b = np.hstack(((np.atleast_1d(self.shape) == 1)[:-1], False)) # force the last dim not broadcastable - self.type = tt.TensorType(v.dtype, b) + self.type = TensorType(v.dtype, b) def logp(self, x): """ @@ -212,10 +214,10 @@ class Log(ElemwiseTransform): name = "log" def backward(self, x): - return tt.exp(x) + return aet.exp(x) def forward(self, x): - return tt.log(x) + return aet.log(x) def forward_val(self, x, point=None): return np.log(x) @@ -231,7 +233,7 @@ class LogExpM1(ElemwiseTransform): name = "log_exp_m1" def backward(self, x): - return tt.nnet.softplus(x) + return aet.nnet.softplus(x) def forward(self, x): """Inverse operation of softplus. @@ -239,13 +241,13 @@ def forward(self, x): y = Log(Exp(x) - 1) = Log(1 - Exp(-x)) + x """ - return tt.log(1.0 - tt.exp(-x)) + x + return aet.log(1.0 - aet.exp(-x)) + x def forward_val(self, x, point=None): return np.log(1.0 - np.exp(-x)) + x def jacobian_det(self, x): - return -tt.nnet.softplus(-x) + return -aet.nnet.softplus(-x) log_exp_m1 = LogExpM1() @@ -273,18 +275,18 @@ class Interval(ElemwiseTransform): name = "interval" def __init__(self, a, b): - self.a = tt.as_tensor_variable(a) - self.b = tt.as_tensor_variable(b) + self.a = aet.as_tensor_variable(a) + self.b = aet.as_tensor_variable(b) def backward(self, x): a, b = self.a, self.b - sigmoid_x = tt.nnet.sigmoid(x) + sigmoid_x = aet.nnet.sigmoid(x) r = sigmoid_x * b + (1 - sigmoid_x) * a return r def forward(self, x): a, b = self.a, self.b - return tt.log(x - a) - tt.log(b - x) + return aet.log(x - a) - aet.log(b - x) def forward_val(self, x, point=None): # 2017-06-19 @@ -294,8 +296,8 @@ def forward_val(self, x, point=None): return floatX(np.log(x - a) - np.log(b - x)) def jacobian_det(self, x): - s = tt.nnet.softplus(-x) - return tt.log(self.b - self.a) - 2 * s - x + s = aet.nnet.softplus(-x) + return aet.log(self.b - self.a) - 2 * s - x interval = Interval @@ -307,16 +309,16 @@ class LowerBound(ElemwiseTransform): name = "lowerbound" def __init__(self, a): - self.a = tt.as_tensor_variable(a) + self.a = aet.as_tensor_variable(a) def backward(self, x): a = self.a - r = tt.exp(x) + a + r = aet.exp(x) + a return r def forward(self, x): a = self.a - return tt.log(x - a) + return aet.log(x - a) def forward_val(self, x, point=None): # 2017-06-19 @@ -342,16 +344,16 @@ class UpperBound(ElemwiseTransform): name = "upperbound" def __init__(self, b): - self.b = tt.as_tensor_variable(b) + self.b = aet.as_tensor_variable(b) def backward(self, x): b = self.b - r = b - tt.exp(x) + r = b - aet.exp(x) return r def forward(self, x): b = self.b - return tt.log(b - x) + return aet.log(b - x) def forward_val(self, x, point=None): # 2017-06-19 @@ -375,15 +377,15 @@ class Ordered(Transform): name = "ordered" def backward(self, y): - x = tt.zeros(y.shape) - x = tt.inc_subtensor(x[..., 0], y[..., 0]) - x = tt.inc_subtensor(x[..., 1:], tt.exp(y[..., 1:])) - return tt.cumsum(x, axis=-1) + x = aet.zeros(y.shape) + x = aet.inc_subtensor(x[..., 0], y[..., 0]) + x = aet.inc_subtensor(x[..., 1:], aet.exp(y[..., 1:])) + return aet.cumsum(x, axis=-1) def forward(self, x): - y = tt.zeros(x.shape) - y = tt.inc_subtensor(y[..., 0], x[..., 0]) - y = tt.inc_subtensor(y[..., 1:], tt.log(x[..., 1:] - x[..., :-1])) + y = aet.zeros(x.shape) + y = aet.inc_subtensor(y[..., 0], x[..., 0]) + y = aet.inc_subtensor(y[..., 1:], aet.log(x[..., 1:] - x[..., :-1])) return y def forward_val(self, x, point=None): @@ -393,7 +395,7 @@ def forward_val(self, x, point=None): return y def jacobian_det(self, y): - return tt.sum(y[..., 1:], axis=-1) + return aet.sum(y[..., 1:], axis=-1) ordered = Ordered() @@ -412,8 +414,8 @@ class SumTo1(Transform): name = "sumto1" def backward(self, y): - remaining = 1 - tt.sum(y[..., :], axis=-1, keepdims=True) - return tt.concatenate([y[..., :], remaining], axis=-1) + remaining = 1 - aet.sum(y[..., :], axis=-1, keepdims=True) + return aet.concatenate([y[..., :], remaining], axis=-1) def forward(self, x): return x[..., :-1] @@ -422,8 +424,8 @@ def forward_val(self, x, point=None): return x[..., :-1] def jacobian_det(self, x): - y = tt.zeros(x.shape) - return tt.sum(y, axis=-1) + y = aet.zeros(x.shape) + return aet.sum(y, axis=-1) sum_to_1 = SumTo1() @@ -450,8 +452,8 @@ def __init__(self, eps=None): def forward(self, x_): x = x_.T n = x.shape[0] - lx = tt.log(x) - shift = tt.sum(lx, 0, keepdims=True) / n + lx = aet.log(x) + shift = aet.sum(lx, 0, keepdims=True) / n y = lx[:-1] - shift return floatX(y.T) @@ -465,20 +467,20 @@ def forward_val(self, x_, point=None): def backward(self, y_): y = y_.T - y = tt.concatenate([y, -tt.sum(y, 0, keepdims=True)]) + y = aet.concatenate([y, -aet.sum(y, 0, keepdims=True)]) # "softmax" with vector support and no deprication warning: - e_y = tt.exp(y - tt.max(y, 0, keepdims=True)) - x = e_y / tt.sum(e_y, 0, keepdims=True) + e_y = aet.exp(y - aet.max(y, 0, keepdims=True)) + x = e_y / aet.sum(e_y, 0, keepdims=True) return floatX(x.T) def jacobian_det(self, y_): y = y_.T Km1 = y.shape[0] + 1 - sy = tt.sum(y, 0, keepdims=True) - r = tt.concatenate([y + sy, tt.zeros(sy.shape)]) + sy = aet.sum(y, 0, keepdims=True) + r = aet.concatenate([y + sy, aet.zeros(sy.shape)]) sr = logsumexp(r, 0, keepdims=True) - d = tt.log(Km1) + (Km1 * sy) - (Km1 * sr) - return tt.sum(d, 0).T + d = aet.log(Km1) + (Km1 * sy) - (Km1 * sr) + return aet.sum(d, 0).T stick_breaking = StickBreaking() @@ -490,16 +492,16 @@ class Circular(ElemwiseTransform): name = "circular" def backward(self, y): - return tt.arctan2(tt.sin(y), tt.cos(y)) + return aet.arctan2(aet.sin(y), aet.cos(y)) def forward(self, x): - return tt.as_tensor_variable(x) + return aet.as_tensor_variable(x) def forward_val(self, x, point=None): return x def jacobian_det(self, x): - return tt.zeros(x.shape) + return aet.zeros(x.shape) circular = Circular() @@ -512,17 +514,17 @@ def __init__(self, n): self.diag_idxs = np.arange(1, n + 1).cumsum() - 1 def backward(self, x): - return tt.advanced_set_subtensor1(x, tt.exp(x[self.diag_idxs]), self.diag_idxs) + return advanced_set_subtensor1(x, aet.exp(x[self.diag_idxs]), self.diag_idxs) def forward(self, y): - return tt.advanced_set_subtensor1(y, tt.log(y[self.diag_idxs]), self.diag_idxs) + return advanced_set_subtensor1(y, aet.log(y[self.diag_idxs]), self.diag_idxs) def forward_val(self, y, point=None): y[..., self.diag_idxs] = np.log(y[..., self.diag_idxs]) return y def jacobian_det(self, y): - return tt.sum(y[self.diag_idxs]) + return aet.sum(y[self.diag_idxs]) class Chain(Transform): @@ -549,7 +551,7 @@ def backward(self, y): return x def jacobian_det(self, y): - y = tt.as_tensor_variable(y) + y = aet.as_tensor_variable(y) det_list = [] ndim0 = y.ndim for transf in reversed(self.transform_list): diff --git a/pymc3/glm/families.py b/pymc3/glm/families.py index 23ca136cf85..57232e28d1a 100644 --- a/pymc3/glm/families.py +++ b/pymc3/glm/families.py @@ -16,8 +16,8 @@ from copy import copy +import aesara.tensor as aet import numpy as np -import theano.tensor as tt from pymc3 import distributions as pm_dists from pymc3.model import modelcontext @@ -36,9 +36,9 @@ def __call__(self, x): identity = Identity() -logit = tt.nnet.sigmoid -inverse = tt.inv -exp = tt.exp +logit = aet.nnet.sigmoid +inverse = aet.inv +exp = aet.exp class Family: @@ -80,7 +80,7 @@ def create_likelihood(self, name, y_est, y_data, model=None): Parameters ---------- - y_est: theano.tensor + y_est: aesara.tensor Estimate of dependent variable y_data: array Observed dependent variable diff --git a/pymc3/glm/linear.py b/pymc3/glm/linear.py index 81c916c1185..9ec2a2b7313 100644 --- a/pymc3/glm/linear.py +++ b/pymc3/glm/linear.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara.tensor as aet import numpy as np -import theano.tensor as tt from pymc3.distributions import Flat, Normal from pymc3.glm import families @@ -39,7 +39,7 @@ class LinearComponent(Model): use `Regressor` key for defining default prior for all regressors defaults to Normal.dist(mu=0, tau=1.0E-6) vars: dict - random variables instead of creating new ones - offset: scalar, or numpy/theano array with the same shape as y + offset: scalar, or numpy/aesara array with the same shape as y this can be used to specify an a priori known component to be included in the linear predictor during fitting. """ @@ -73,7 +73,7 @@ def __init__( x, labels = any_to_tensor_and_labels(x, labels) # now we have x, shape and labels if intercept: - x = tt.concatenate([tt.ones((x.shape[0], 1), x.dtype), x], axis=1) + x = aet.concatenate([aet.ones((x.shape[0], 1), x.dtype), x], axis=1) labels = ["Intercept"] + labels coeffs = list() for name in labels: @@ -94,7 +94,7 @@ def __init__( ), ) coeffs.append(v) - self.coeffs = tt.stack(coeffs, axis=0) + self.coeffs = aet.stack(coeffs, axis=0) self.y_est = x.dot(self.coeffs) + offset @classmethod @@ -149,7 +149,7 @@ class GLM(LinearComponent): init: dict - test_vals for coefficients vars: dict - random variables instead of creating new ones family: pymc3..families object - offset: scalar, or numpy/theano array with the same shape as y + offset: scalar, or numpy/aesara array with the same shape as y this can be used to specify an a priori known component to be included in the linear predictor during fitting. """ diff --git a/pymc3/glm/utils.py b/pymc3/glm/utils.py index 889284b3179..64318925818 100644 --- a/pymc3/glm/utils.py +++ b/pymc3/glm/utils.py @@ -12,9 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara.tensor as aet import numpy as np import pandas as pd -import theano.tensor as tt + +from aesara.graph.basic import Variable def any_to_tensor_and_labels(x, labels=None): @@ -33,7 +35,7 @@ def any_to_tensor_and_labels(x, labels=None): Parameters ---------- - x: np.ndarray | pd.DataFrame | tt.Variable | dict | list + x: np.ndarray | pd.DataFrame | Variable | dict | list labels: list - names for columns of output tensor Returns @@ -76,13 +78,13 @@ def any_to_tensor_and_labels(x, labels=None): for k, v in x.items(): res.append(v) labels.append(k) - x = tt.stack(res, axis=1) + x = aet.stack(res, axis=1) if x.ndim == 1: x = x[:, None] # case when it can appear to be some # array like value like lists of lists # numpy deals with it - elif not isinstance(x, tt.Variable): + elif not isinstance(x, Variable): x = np.asarray(x) if x.ndim == 0: raise ValueError("Cannot use scalars") @@ -92,7 +94,7 @@ def any_to_tensor_and_labels(x, labels=None): # but user passes labels trusting seems # to be a good option elif labels is not None: - x = tt.as_tensor_variable(x) + x = aet.as_tensor_variable(x) if x.ndim == 0: raise ValueError("Cannot use scalars") elif x.ndim == 1: @@ -100,15 +102,15 @@ def any_to_tensor_and_labels(x, labels=None): else: # trust input pass # we should check that we can extract labels - if labels is None and not isinstance(x, tt.Variable): + if labels is None and not isinstance(x, Variable): labels = ["x%d" % i for i in range(x.shape[1])] - # for theano variables we should have labels from user + # for aesara variables we should have labels from user elif labels is None: raise ValueError("Please provide labels as " "we cannot infer shape of input") else: # trust labels, user knows what he is doing pass # it's time to check shapes if we can - if not isinstance(x, tt.Variable): + if not isinstance(x, Variable): if not len(labels) == x.shape[1]: raise ValueError( "Please provide full list " @@ -126,8 +128,8 @@ def any_to_tensor_and_labels(x, labels=None): elif not isinstance(labels, list): labels = list(labels) # as output we need tensor - if not isinstance(x, tt.Variable): - x = tt.as_tensor_variable(x) + if not isinstance(x, Variable): + x = aet.as_tensor_variable(x) # finally check dimensions if x.ndim == 0: raise ValueError("Cannot use scalars") diff --git a/pymc3/gp/cov.py b/pymc3/gp/cov.py index 7a01a9eec51..4a02827a5d5 100644 --- a/pymc3/gp/cov.py +++ b/pymc3/gp/cov.py @@ -18,9 +18,12 @@ from numbers import Number from operator import add, mul +import aesara +import aesara.tensor as aet import numpy as np -import theano -import theano.tensor as tt + +from aesara.tensor.sharedvar import TensorSharedVariable +from aesara.tensor.var import TensorConstant, TensorVariable __all__ = [ "Constant", @@ -96,9 +99,9 @@ def _slice(self, X, Xs): " the number of columns to use. Ignore otherwise.", UserWarning, ) - X = tt.as_tensor_variable(X[:, self.active_dims]) + X = aet.as_tensor_variable(X[:, self.active_dims]) if Xs is not None: - Xs = tt.as_tensor_variable(Xs[:, self.active_dims]) + Xs = aet.as_tensor_variable(Xs[:, self.active_dims]) return X, Xs def __add__(self, other): @@ -115,10 +118,10 @@ def __rmul__(self, other): def __pow__(self, other): if ( - isinstance(other, theano.compile.SharedVariable) + isinstance(other, aesara.compile.SharedVariable) and other.get_value().squeeze().shape == () ): - other = tt.squeeze(other) + other = aet.squeeze(other) return Exponentiated(self, other) elif isinstance(other, Number): return Exponentiated(self, other) @@ -179,13 +182,13 @@ def merge_factors(self, X, Xs=None, diag=False): elif isinstance( factor, ( - tt.TensorConstant, - tt.TensorVariable, - tt.sharedvar.TensorSharedVariable, + TensorConstant, + TensorVariable, + TensorSharedVariable, ), ): if factor.ndim == 2 and diag: - factor_list.append(tt.diag(factor)) + factor_list.append(aet.diag(factor)) else: factor_list.append(factor) else: @@ -264,13 +267,13 @@ def __init__(self, c): self.c = c def diag(self, X): - return tt.alloc(self.c, X.shape[0]) + return aet.alloc(self.c, X.shape[0]) def full(self, X, Xs=None): if Xs is None: - return tt.alloc(self.c, X.shape[0], X.shape[0]) + return aet.alloc(self.c, X.shape[0], X.shape[0]) else: - return tt.alloc(self.c, X.shape[0], Xs.shape[0]) + return aet.alloc(self.c, X.shape[0], Xs.shape[0]) class WhiteNoise(Covariance): @@ -287,13 +290,13 @@ def __init__(self, sigma): self.sigma = sigma def diag(self, X): - return tt.alloc(tt.square(self.sigma), X.shape[0]) + return aet.alloc(aet.square(self.sigma), X.shape[0]) def full(self, X, Xs=None): if Xs is None: - return tt.diag(self.diag(X)) + return aet.diag(self.diag(X)) else: - return tt.alloc(0.0, X.shape[0], Xs.shape[0]) + return aet.alloc(0.0, X.shape[0], Xs.shape[0]) class Circular(Covariance): @@ -330,25 +333,25 @@ class Circular(Covariance): def __init__(self, input_dim, period, tau=4, active_dims=None): super().__init__(input_dim, active_dims) - self.c = tt.as_tensor_variable(period / 2) + self.c = aet.as_tensor_variable(period / 2) self.tau = tau def dist(self, X, Xs): if Xs is None: - Xs = tt.transpose(X) + Xs = aet.transpose(X) else: - Xs = tt.transpose(Xs) - return tt.abs_((X - Xs + self.c) % (self.c * 2) - self.c) + Xs = aet.transpose(Xs) + return aet.abs_((X - Xs + self.c) % (self.c * 2) - self.c) def weinland(self, t): - return (1 + self.tau * t / self.c) * tt.clip(1 - t / self.c, 0, np.inf) ** self.tau + return (1 + self.tau * t / self.c) * aet.clip(1 - t / self.c, 0, np.inf) ** self.tau def full(self, X, Xs=None): X, Xs = self._slice(X, Xs) return self.weinland(self.dist(X, Xs)) def diag(self, X): - return tt.alloc(1.0, X.shape[0]) + return aet.alloc(1.0, X.shape[0]) class Stationary(Covariance): @@ -371,29 +374,29 @@ def __init__(self, input_dim, ls=None, ls_inv=None, active_dims=None): ls = 1.0 / np.asarray(ls_inv) else: ls = 1.0 / ls_inv - self.ls = tt.as_tensor_variable(ls) + self.ls = aet.as_tensor_variable(ls) def square_dist(self, X, Xs): - X = tt.mul(X, 1.0 / self.ls) - X2 = tt.sum(tt.square(X), 1) + X = aet.mul(X, 1.0 / self.ls) + X2 = aet.sum(aet.square(X), 1) if Xs is None: - sqd = -2.0 * tt.dot(X, tt.transpose(X)) + ( - tt.reshape(X2, (-1, 1)) + tt.reshape(X2, (1, -1)) + sqd = -2.0 * aet.dot(X, aet.transpose(X)) + ( + aet.reshape(X2, (-1, 1)) + aet.reshape(X2, (1, -1)) ) else: - Xs = tt.mul(Xs, 1.0 / self.ls) - Xs2 = tt.sum(tt.square(Xs), 1) - sqd = -2.0 * tt.dot(X, tt.transpose(Xs)) + ( - tt.reshape(X2, (-1, 1)) + tt.reshape(Xs2, (1, -1)) + Xs = aet.mul(Xs, 1.0 / self.ls) + Xs2 = aet.sum(aet.square(Xs), 1) + sqd = -2.0 * aet.dot(X, aet.transpose(Xs)) + ( + aet.reshape(X2, (-1, 1)) + aet.reshape(Xs2, (1, -1)) ) - return tt.clip(sqd, 0.0, np.inf) + return aet.clip(sqd, 0.0, np.inf) def euclidean_dist(self, X, Xs): r2 = self.square_dist(X, Xs) - return tt.sqrt(r2 + 1e-12) + return aet.sqrt(r2 + 1e-12) def diag(self, X): - return tt.alloc(1.0, X.shape[0]) + return aet.alloc(1.0, X.shape[0]) def full(self, X, Xs=None): raise NotImplementedError @@ -429,8 +432,8 @@ def full(self, X, Xs=None): f1 = X.dimshuffle(0, "x", 1) f2 = Xs.dimshuffle("x", 0, 1) r = np.pi * (f1 - f2) / self.period - r = tt.sum(tt.square(tt.sin(r) / self.ls), 2) - return tt.exp(-0.5 * r) + r = aet.sum(aet.square(aet.sin(r) / self.ls), 2) + return aet.exp(-0.5 * r) class ExpQuad(Stationary): @@ -445,7 +448,7 @@ class ExpQuad(Stationary): def full(self, X, Xs=None): X, Xs = self._slice(X, Xs) - return tt.exp(-0.5 * self.square_dist(X, Xs)) + return aet.exp(-0.5 * self.square_dist(X, Xs)) class RatQuad(Stationary): @@ -463,7 +466,7 @@ def __init__(self, input_dim, alpha, ls=None, ls_inv=None, active_dims=None): def full(self, X, Xs=None): X, Xs = self._slice(X, Xs) - return tt.power( + return aet.power( (1.0 + 0.5 * self.square_dist(X, Xs) * (1.0 / self.alpha)), -1.0 * self.alpha, ) @@ -483,7 +486,9 @@ class Matern52(Stationary): def full(self, X, Xs=None): X, Xs = self._slice(X, Xs) r = self.euclidean_dist(X, Xs) - return (1.0 + np.sqrt(5.0) * r + 5.0 / 3.0 * tt.square(r)) * tt.exp(-1.0 * np.sqrt(5.0) * r) + return (1.0 + np.sqrt(5.0) * r + 5.0 / 3.0 * aet.square(r)) * aet.exp( + -1.0 * np.sqrt(5.0) * r + ) class Matern32(Stationary): @@ -499,7 +504,7 @@ class Matern32(Stationary): def full(self, X, Xs=None): X, Xs = self._slice(X, Xs) r = self.euclidean_dist(X, Xs) - return (1.0 + np.sqrt(3.0) * r) * tt.exp(-np.sqrt(3.0) * r) + return (1.0 + np.sqrt(3.0) * r) * aet.exp(-np.sqrt(3.0) * r) class Matern12(Stationary): @@ -512,7 +517,7 @@ class Matern12(Stationary): def full(self, X, Xs=None): X, Xs = self._slice(X, Xs) r = self.euclidean_dist(X, Xs) - return tt.exp(-r) + return aet.exp(-r) class Exponential(Stationary): @@ -526,7 +531,7 @@ class Exponential(Stationary): def full(self, X, Xs=None): X, Xs = self._slice(X, Xs) - return tt.exp(-0.5 * self.euclidean_dist(X, Xs)) + return aet.exp(-0.5 * self.euclidean_dist(X, Xs)) class Cosine(Stationary): @@ -539,7 +544,7 @@ class Cosine(Stationary): def full(self, X, Xs=None): X, Xs = self._slice(X, Xs) - return tt.cos(2.0 * np.pi * self.euclidean_dist(X, Xs)) + return aet.cos(2.0 * np.pi * self.euclidean_dist(X, Xs)) class Linear(Covariance): @@ -556,20 +561,20 @@ def __init__(self, input_dim, c, active_dims=None): def _common(self, X, Xs=None): X, Xs = self._slice(X, Xs) - Xc = tt.sub(X, self.c) + Xc = aet.sub(X, self.c) return X, Xc, Xs def full(self, X, Xs=None): X, Xc, Xs = self._common(X, Xs) if Xs is None: - return tt.dot(Xc, tt.transpose(Xc)) + return aet.dot(Xc, aet.transpose(Xc)) else: - Xsc = tt.sub(Xs, self.c) - return tt.dot(Xc, tt.transpose(Xsc)) + Xsc = aet.sub(Xs, self.c) + return aet.dot(Xc, aet.transpose(Xsc)) def diag(self, X): X, Xc, _ = self._common(X, None) - return tt.sum(tt.square(Xc), 1) + return aet.sum(aet.square(Xc), 1) class Polynomial(Linear): @@ -587,17 +592,17 @@ def __init__(self, input_dim, c, d, offset, active_dims=None): def full(self, X, Xs=None): linear = super().full(X, Xs) - return tt.power(linear + self.offset, self.d) + return aet.power(linear + self.offset, self.d) def diag(self, X): linear = super().diag(X) - return tt.power(linear + self.offset, self.d) + return aet.power(linear + self.offset, self.d) class WarpedInput(Covariance): r""" Warp the inputs of any kernel using an arbitrary function - defined using Theano. + defined using Aesara. .. math:: k(x, x') = k(w(x), w(x')) @@ -606,7 +611,7 @@ class WarpedInput(Covariance): ---------- cov_func: Covariance warp_func: callable - Theano function of X and additional optional arguments. + Aesara function of X and additional optional arguments. args: optional, tuple or list of scalars or PyMC3 variables Additional inputs (besides X or Xs) to warp_func. """ @@ -636,7 +641,7 @@ def diag(self, X): class Gibbs(Covariance): r""" The Gibbs kernel. Use an arbitrary lengthscale function defined - using Theano. Only tested in one dimension. + using Aesara. Only tested in one dimension. .. math:: k(x, x') = \sqrt{\frac{2\ell(x)\ell(x')}{\ell^2(x) + \ell^2(x')}} @@ -646,7 +651,7 @@ class Gibbs(Covariance): Parameters ---------- lengthscale_func: callable - Theano function of X and additional optional arguments. + Aesara function of X and additional optional arguments. args: optional, tuple or list of scalars or PyMC3 variables Additional inputs (besides X or Xs) to lengthscale_func. """ @@ -665,39 +670,39 @@ def __init__(self, input_dim, lengthscale_func, args=None, active_dims=None): self.args = args def square_dist(self, X, Xs=None): - X2 = tt.sum(tt.square(X), 1) + X2 = aet.sum(aet.square(X), 1) if Xs is None: - sqd = -2.0 * tt.dot(X, tt.transpose(X)) + ( - tt.reshape(X2, (-1, 1)) + tt.reshape(X2, (1, -1)) + sqd = -2.0 * aet.dot(X, aet.transpose(X)) + ( + aet.reshape(X2, (-1, 1)) + aet.reshape(X2, (1, -1)) ) else: - Xs2 = tt.sum(tt.square(Xs), 1) - sqd = -2.0 * tt.dot(X, tt.transpose(Xs)) + ( - tt.reshape(X2, (-1, 1)) + tt.reshape(Xs2, (1, -1)) + Xs2 = aet.sum(aet.square(Xs), 1) + sqd = -2.0 * aet.dot(X, aet.transpose(Xs)) + ( + aet.reshape(X2, (-1, 1)) + aet.reshape(Xs2, (1, -1)) ) - return tt.clip(sqd, 0.0, np.inf) + return aet.clip(sqd, 0.0, np.inf) def full(self, X, Xs=None): X, Xs = self._slice(X, Xs) - rx = self.lfunc(tt.as_tensor_variable(X), self.args) + rx = self.lfunc(aet.as_tensor_variable(X), self.args) if Xs is None: - rz = self.lfunc(tt.as_tensor_variable(X), self.args) + rz = self.lfunc(aet.as_tensor_variable(X), self.args) r2 = self.square_dist(X, X) else: - rz = self.lfunc(tt.as_tensor_variable(Xs), self.args) + rz = self.lfunc(aet.as_tensor_variable(Xs), self.args) r2 = self.square_dist(X, Xs) - rx2 = tt.reshape(tt.square(rx), (-1, 1)) - rz2 = tt.reshape(tt.square(rz), (1, -1)) - return tt.sqrt((2.0 * tt.outer(rx, rz)) / (rx2 + rz2)) * tt.exp(-1.0 * r2 / (rx2 + rz2)) + rx2 = aet.reshape(aet.square(rx), (-1, 1)) + rz2 = aet.reshape(aet.square(rz), (1, -1)) + return aet.sqrt((2.0 * aet.outer(rx, rz)) / (rx2 + rz2)) * aet.exp(-1.0 * r2 / (rx2 + rz2)) def diag(self, X): - return tt.alloc(1.0, X.shape[0]) + return aet.alloc(1.0, X.shape[0]) class ScaledCov(Covariance): r""" Construct a kernel by multiplying a base kernel with a scaling - function defined using Theano. The scaling function is + function defined using Aesara. The scaling function is non-negative, and can be parameterized. .. math:: @@ -708,7 +713,7 @@ class ScaledCov(Covariance): cov_func: Covariance Base kernel or covariance function scaling_func: callable - Theano function of X and additional optional arguments. + Aesara function of X and additional optional arguments. args: optional, tuple or list of scalars or PyMC3 variables Additional inputs (besides X or Xs) to lengthscale_func. """ @@ -726,17 +731,17 @@ def __init__(self, input_dim, cov_func, scaling_func, args=None, active_dims=Non def diag(self, X): X, _ = self._slice(X, None) cov_diag = self.cov_func(X, diag=True) - scf_diag = tt.square(tt.flatten(self.scaling_func(X, self.args))) + scf_diag = aet.square(aet.flatten(self.scaling_func(X, self.args))) return cov_diag * scf_diag def full(self, X, Xs=None): X, Xs = self._slice(X, Xs) scf_x = self.scaling_func(X, self.args) if Xs is None: - return tt.outer(scf_x, scf_x) * self.cov_func(X) + return aet.outer(scf_x, scf_x) * self.cov_func(X) else: scf_xs = self.scaling_func(Xs, self.args) - return tt.outer(scf_x, scf_xs) * self.cov_func(X, Xs) + return aet.outer(scf_x, scf_xs) * self.cov_func(X, Xs) class Coregion(Covariance): @@ -780,27 +785,27 @@ def __init__(self, input_dim, W=None, kappa=None, B=None, active_dims=None): if make_B and B is not None: raise ValueError("Exactly one of (W, kappa) and B must be provided to Coregion") if make_B: - self.W = tt.as_tensor_variable(W) - self.kappa = tt.as_tensor_variable(kappa) - self.B = tt.dot(self.W, self.W.T) + tt.diag(self.kappa) + self.W = aet.as_tensor_variable(W) + self.kappa = aet.as_tensor_variable(kappa) + self.B = aet.dot(self.W, self.W.T) + aet.diag(self.kappa) elif B is not None: - self.B = tt.as_tensor_variable(B) + self.B = aet.as_tensor_variable(B) else: raise ValueError("Exactly one of (W, kappa) and B must be provided to Coregion") def full(self, X, Xs=None): X, Xs = self._slice(X, Xs) - index = tt.cast(X, "int32") + index = aet.cast(X, "int32") if Xs is None: index2 = index.T else: - index2 = tt.cast(Xs, "int32").T + index2 = aet.cast(Xs, "int32").T return self.B[index, index2] def diag(self, X): X, _ = self._slice(X, None) - index = tt.cast(X, "int32") - return tt.diag(self.B)[index.ravel()] + index = aet.cast(X, "int32") + return aet.diag(self.B)[index.ravel()] def handle_args(func, args): diff --git a/pymc3/gp/gp.py b/pymc3/gp/gp.py index 654bf536cfa..43a52b2d168 100644 --- a/pymc3/gp/gp.py +++ b/pymc3/gp/gp.py @@ -15,10 +15,10 @@ import functools import warnings +import aesara.tensor as aet import numpy as np -import theano.tensor as tt -from theano.tensor.nlinalg import eigh +from aesara.tensor.nlinalg import eigh import pymc3 as pm @@ -195,9 +195,9 @@ def _build_conditional(self, Xnew, X, f, cov_total, mean_total): L = cholesky(stabilize(Kxx)) A = solve_lower(L, Kxs) v = solve_lower(L, f - mean_total(X)) - mu = self.mean_func(Xnew) + tt.dot(tt.transpose(A), v) + mu = self.mean_func(Xnew) + aet.dot(aet.transpose(A), v) Kss = self.cov_func(Xnew) - cov = Kss - tt.dot(tt.transpose(A), A) + cov = Kss - aet.dot(aet.transpose(A), A) return mu, cov def conditional(self, name, Xnew, given=None, **kwargs): @@ -281,7 +281,7 @@ def _build_prior(self, name, X, reparameterize=True, **kwargs): if reparameterize: chi2 = pm.ChiSquared(name + "_chi2_", self.nu) v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, shape=shape, **kwargs) - f = pm.Deterministic(name, (tt.sqrt(self.nu) / chi2) * (mu + cholesky(cov).dot(v))) + f = pm.Deterministic(name, (aet.sqrt(self.nu) / chi2) * (mu + cholesky(cov).dot(v))) else: f = pm.MvStudentT(name, nu=self.nu, mu=mu, cov=cov, shape=shape, **kwargs) return f @@ -318,10 +318,10 @@ def _build_conditional(self, Xnew, X, f): Kss = self.cov_func(Xnew) L = cholesky(stabilize(Kxx)) A = solve_lower(L, Kxs) - cov = Kss - tt.dot(tt.transpose(A), A) + cov = Kss - aet.dot(aet.transpose(A), A) v = solve_lower(L, f - self.mean_func(X)) - mu = self.mean_func(Xnew) + tt.dot(tt.transpose(A), v) - beta = tt.dot(v, v) + mu = self.mean_func(Xnew) + aet.dot(aet.transpose(A), v) + beta = aet.dot(v, v) nu2 = self.nu + X.shape[0] covT = (self.nu + beta - 2) / (nu2 - 2) * cov return nu2, mu, covT @@ -476,16 +476,16 @@ def _build_conditional(self, Xnew, pred_noise, diag, X, y, noise, cov_total, mea L = cholesky(stabilize(Kxx) + Knx) A = solve_lower(L, Kxs) v = solve_lower(L, rxx) - mu = self.mean_func(Xnew) + tt.dot(tt.transpose(A), v) + mu = self.mean_func(Xnew) + aet.dot(aet.transpose(A), v) if diag: Kss = self.cov_func(Xnew, diag=True) - var = Kss - tt.sum(tt.square(A), 0) + var = Kss - aet.sum(aet.square(A), 0) if pred_noise: var += noise(Xnew, diag=True) return mu, var else: Kss = self.cov_func(Xnew) - cov = Kss - tt.dot(tt.transpose(A), A) + cov = Kss - aet.dot(aet.transpose(A), A) if pred_noise: cov += noise(Xnew) return mu, cov if pred_noise else stabilize(cov) @@ -664,32 +664,32 @@ def __add__(self, other): # in marginal_likelihood instead of lambda. This makes pickling # possible. def _build_marginal_likelihood_logp(self, y, X, Xu, sigma): - sigma2 = tt.square(sigma) + sigma2 = aet.square(sigma) Kuu = self.cov_func(Xu) Kuf = self.cov_func(Xu, X) Luu = cholesky(stabilize(Kuu)) A = solve_lower(Luu, Kuf) - Qffd = tt.sum(A * A, 0) + Qffd = aet.sum(A * A, 0) if self.approx == "FITC": Kffd = self.cov_func(X, diag=True) - Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2 + Lamd = aet.clip(Kffd - Qffd, 0.0, np.inf) + sigma2 trace = 0.0 elif self.approx == "VFE": - Lamd = tt.ones_like(Qffd) * sigma2 + Lamd = aet.ones_like(Qffd) * sigma2 trace = (1.0 / (2.0 * sigma2)) * ( - tt.sum(self.cov_func(X, diag=True)) - tt.sum(tt.sum(A * A, 0)) + aet.sum(self.cov_func(X, diag=True)) - aet.sum(aet.sum(A * A, 0)) ) else: # DTC - Lamd = tt.ones_like(Qffd) * sigma2 + Lamd = aet.ones_like(Qffd) * sigma2 trace = 0.0 A_l = A / Lamd - L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A))) + L_B = cholesky(aet.eye(Xu.shape[0]) + aet.dot(A_l, aet.transpose(A))) r = y - self.mean_func(X) r_l = r / Lamd - c = solve_lower(L_B, tt.dot(A, r_l)) - constant = 0.5 * X.shape[0] * tt.log(2.0 * np.pi) - logdet = 0.5 * tt.sum(tt.log(Lamd)) + tt.sum(tt.log(tt.diag(L_B))) - quadratic = 0.5 * (tt.dot(r, r_l) - tt.dot(c, c)) + c = solve_lower(L_B, aet.dot(A, r_l)) + constant = 0.5 * X.shape[0] * aet.log(2.0 * np.pi) + logdet = 0.5 * aet.sum(aet.log(Lamd)) + aet.sum(aet.log(aet.diag(L_B))) + quadratic = 0.5 * (aet.dot(r, r_l) - aet.dot(c, c)) return -1.0 * (constant + logdet + quadratic + trace) def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kwargs): @@ -743,36 +743,38 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kw return pm.DensityDist(name, logp, shape=shape, **kwargs) def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total): - sigma2 = tt.square(sigma) + sigma2 = aet.square(sigma) Kuu = cov_total(Xu) Kuf = cov_total(Xu, X) Luu = cholesky(stabilize(Kuu)) A = solve_lower(Luu, Kuf) - Qffd = tt.sum(A * A, 0) + Qffd = aet.sum(A * A, 0) if self.approx == "FITC": Kffd = cov_total(X, diag=True) - Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2 + Lamd = aet.clip(Kffd - Qffd, 0.0, np.inf) + sigma2 else: # VFE or DTC - Lamd = tt.ones_like(Qffd) * sigma2 + Lamd = aet.ones_like(Qffd) * sigma2 A_l = A / Lamd - L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A))) + L_B = cholesky(aet.eye(Xu.shape[0]) + aet.dot(A_l, aet.transpose(A))) r = y - mean_total(X) r_l = r / Lamd - c = solve_lower(L_B, tt.dot(A, r_l)) + c = solve_lower(L_B, aet.dot(A, r_l)) Kus = self.cov_func(Xu, Xnew) As = solve_lower(Luu, Kus) - mu = self.mean_func(Xnew) + tt.dot(tt.transpose(As), solve_upper(tt.transpose(L_B), c)) + mu = self.mean_func(Xnew) + aet.dot(aet.transpose(As), solve_upper(aet.transpose(L_B), c)) C = solve_lower(L_B, As) if diag: Kss = self.cov_func(Xnew, diag=True) - var = Kss - tt.sum(tt.square(As), 0) + tt.sum(tt.square(C), 0) + var = Kss - aet.sum(aet.square(As), 0) + aet.sum(aet.square(C), 0) if pred_noise: var += sigma2 return mu, var else: - cov = self.cov_func(Xnew) - tt.dot(tt.transpose(As), As) + tt.dot(tt.transpose(C), C) + cov = ( + self.cov_func(Xnew) - aet.dot(aet.transpose(As), As) + aet.dot(aet.transpose(C), C) + ) if pred_noise: - cov += sigma2 * tt.identity_like(cov) + cov += sigma2 * aet.identity_like(cov) return mu, cov if pred_noise else stabilize(cov) def _get_given_vals(self, given): @@ -891,7 +893,7 @@ def _build_prior(self, name, Xs, **kwargs): chols = [cholesky(stabilize(cov(X))) for cov, X in zip(self.cov_funcs, Xs)] # remove reparameterization option v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, shape=self.N, **kwargs) - f = pm.Deterministic(name, mu + tt.flatten(kron_dot(chols, v))) + f = pm.Deterministic(name, mu + aet.flatten(kron_dot(chols, v))) return f def prior(self, name, Xs, **kwargs): @@ -925,15 +927,15 @@ def _build_conditional(self, Xnew): delta = f - self.mean_func(X) covs = [stabilize(cov(Xi)) for cov, Xi in zip(self.cov_funcs, Xs)] chols = [cholesky(cov) for cov in covs] - cholTs = [tt.transpose(chol) for chol in chols] + cholTs = [aet.transpose(chol) for chol in chols] Kss = self.cov_func(Xnew) Kxs = self.cov_func(X, Xnew) - Ksx = tt.transpose(Kxs) + Ksx = aet.transpose(Kxs) alpha = kron_solve_lower(chols, delta) alpha = kron_solve_upper(cholTs, alpha) - mu = tt.dot(Ksx, alpha).ravel() + self.mean_func(Xnew) + mu = aet.dot(Ksx, alpha).ravel() + self.mean_func(Xnew) A = kron_solve_lower(chols, Kxs) - cov = stabilize(Kss - tt.dot(tt.transpose(A), A)) + cov = stabilize(Kss - aet.dot(aet.transpose(A), A)) return mu, cov def conditional(self, name, Xnew, **kwargs): @@ -1103,7 +1105,7 @@ def _build_conditional(self, Xnew, pred_noise, diag): delta = y - self.mean_func(X) Kns = [f(x) for f, x in zip(self.cov_funcs, Xs)] eigs_sep, Qs = zip(*map(eigh, Kns)) # Unzip - QTs = list(map(tt.transpose, Qs)) + QTs = list(map(aet.transpose, Qs)) eigs = kron_diag(*eigs_sep) # Combine separate eigs if sigma is not None: eigs += sigma ** 2 @@ -1117,21 +1119,21 @@ def _build_conditional(self, Xnew, pred_noise, diag): alpha = kron_dot(QTs, delta) alpha = alpha / eigs[:, None] alpha = kron_dot(Qs, alpha) - mu = tt.dot(Kmn, alpha).ravel() + self.mean_func(Xnew) + mu = aet.dot(Kmn, alpha).ravel() + self.mean_func(Xnew) # Build conditional cov A = kron_dot(QTs, Knm) - A = A / tt.sqrt(eigs[:, None]) + A = A / aet.sqrt(eigs[:, None]) if diag: - Asq = tt.sum(tt.square(A), 0) + Asq = aet.sum(aet.square(A), 0) cov = Km - Asq if pred_noise: cov += sigma else: - Asq = tt.dot(A.T, A) + Asq = aet.dot(A.T, A) cov = Km - Asq if pred_noise: - cov += sigma * tt.identity_like(cov) + cov += sigma * aet.identity_like(cov) return mu, cov def conditional(self, name, Xnew, pred_noise=False, **kwargs): diff --git a/pymc3/gp/mean.py b/pymc3/gp/mean.py index d2e93fdfe5f..47d38d9897a 100644 --- a/pymc3/gp/mean.py +++ b/pymc3/gp/mean.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import theano.tensor as tt +import aesara.tensor as aet __all__ = ["Zero", "Constant", "Linear"] @@ -46,7 +46,7 @@ class Zero(Mean): """ def __call__(self, X): - return tt.alloc(0.0, X.shape[0]) + return aet.alloc(0.0, X.shape[0]) class Constant(Mean): @@ -64,7 +64,7 @@ def __init__(self, c=0): self.c = c def __call__(self, X): - return tt.alloc(1.0, X.shape[0]) * self.c + return aet.alloc(1.0, X.shape[0]) * self.c class Linear(Mean): @@ -85,7 +85,7 @@ def __init__(self, coeffs, intercept=0): self.A = coeffs def __call__(self, X): - return tt.squeeze(tt.dot(X, self.A) + self.b) + return aet.squeeze(aet.dot(X, self.A) + self.b) class Add(Mean): @@ -95,7 +95,7 @@ def __init__(self, first_mean, second_mean): self.m2 = second_mean def __call__(self, X): - return tt.add(self.m1(X), self.m2(X)) + return aet.add(self.m1(X), self.m2(X)) class Prod(Mean): @@ -105,4 +105,4 @@ def __init__(self, first_mean, second_mean): self.m2 = second_mean def __call__(self, X): - return tt.mul(self.m1(X), self.m2(X)) + return aet.mul(self.m1(X), self.m2(X)) diff --git a/pymc3/gp/util.py b/pymc3/gp/util.py index 698c300564b..1ad05f6eab9 100644 --- a/pymc3/gp/util.py +++ b/pymc3/gp/util.py @@ -14,16 +14,16 @@ import warnings +import aesara.tensor as aet import numpy as np -import theano.tensor as tt -import theano.tensor.slinalg # pylint: disable=unused-import +from aesara.tensor.slinalg import Solve, cholesky # pylint: disable=unused-import +from aesara.tensor.var import TensorConstant from scipy.cluster.vq import kmeans -cholesky = tt.slinalg.cholesky -solve_lower = tt.slinalg.Solve(A_structure="lower_triangular") -solve_upper = tt.slinalg.Solve(A_structure="upper_triangular") -solve = tt.slinalg.Solve(A_structure="general") +solve_lower = Solve(A_structure="lower_triangular") +solve_upper = Solve(A_structure="upper_triangular") +solve = Solve(A_structure="general") def infer_shape(X, n_points=None): @@ -37,12 +37,12 @@ def infer_shape(X, n_points=None): def stabilize(K): """ adds small diagonal to a covariance matrix """ - return K + 1e-6 * tt.identity_like(K) + return K + 1e-6 * aet.identity_like(K) def kmeans_inducing_points(n_inducing, X): # first whiten X - if isinstance(X, tt.TensorConstant): + if isinstance(X, TensorConstant): X = X.value elif isinstance(X, (np.ndarray, tuple, list)): X = np.asarray(X) diff --git a/pymc3/math.py b/pymc3/math.py index aff54d13b71..b90b85e09e7 100644 --- a/pymc3/math.py +++ b/pymc3/math.py @@ -16,20 +16,19 @@ from functools import partial, reduce +import aesara +import aesara.sparse +import aesara.tensor as aet +import aesara.tensor.slinalg # pylint: disable=unused-import import numpy as np import scipy as sp import scipy.sparse # pylint: disable=unused-import -import theano -import theano.sparse -import theano.tensor as tt -import theano.tensor.slinalg # pylint: disable=unused-import -from scipy.linalg import block_diag as scipy_block_diag -from theano.graph.basic import Apply -from theano.graph.op import Op +from aesara.graph.basic import Apply +from aesara.graph.op import Op # pylint: disable=unused-import -from theano.tensor import ( +from aesara.tensor import ( abs_, and_, ceil, @@ -71,10 +70,11 @@ where, zeros_like, ) -from theano.tensor.nlinalg import det, extract_diag, matrix_dot, matrix_inverse, trace -from theano.tensor.nnet import sigmoid +from aesara.tensor.nlinalg import det, extract_diag, matrix_dot, matrix_inverse, trace +from aesara.tensor.nnet import sigmoid +from scipy.linalg import block_diag as scipy_block_diag -from pymc3.theanof import floatX, ix_, largest_common_dtype +from pymc3.aesaraf import floatX, ix_, largest_common_dtype # pylint: enable=unused-import @@ -93,7 +93,7 @@ def kronecker(*Ks): np.ndarray : Block matrix Kroncker product of the argument matrices. """ - return reduce(tt.slinalg.kron, Ks) + return reduce(aet.slinalg.kron, Ks) def cartesian(*arrays): @@ -140,17 +140,17 @@ def kron_vector_op(v): raise ValueError(f"m must have ndim <= 2, not {m.ndim}") res = kron_vector_op(m) res_shape = res.shape - return tt.reshape(res, (res_shape[1], res_shape[0])).T + return aet.reshape(res, (res_shape[1], res_shape[0])).T # Define kronecker functions that work on 1D and 2D arrays -kron_dot = partial(kron_matrix_op, op=tt.dot) -kron_solve_lower = partial(kron_matrix_op, op=tt.slinalg.solve_lower_triangular) -kron_solve_upper = partial(kron_matrix_op, op=tt.slinalg.solve_upper_triangular) +kron_dot = partial(kron_matrix_op, op=aet.dot) +kron_solve_lower = partial(kron_matrix_op, op=aet.slinalg.solve_lower_triangular) +kron_solve_upper = partial(kron_matrix_op, op=aet.slinalg.solve_upper_triangular) def flat_outer(a, b): - return tt.outer(a, b).ravel() + return aet.outer(a, b).ravel() def kron_diag(*diags): @@ -166,24 +166,24 @@ def kron_diag(*diags): def tround(*args, **kwargs): """ - Temporary function to silence round warning in Theano. Please remove + Temporary function to silence round warning in Aesara. Please remove when the warning disappears. """ kwargs["mode"] = "half_to_even" - return tt.round(*args, **kwargs) + return aet.round(*args, **kwargs) def logsumexp(x, axis=None, keepdims=True): # Adapted from https://github.com/Theano/Theano/issues/1563 - x_max = tt.max(x, axis=axis, keepdims=True) - x_max = tt.switch(tt.isinf(x_max), 0, x_max) - res = tt.log(tt.sum(tt.exp(x - x_max), axis=axis, keepdims=True)) + x_max + x_max = aet.max(x, axis=axis, keepdims=True) + x_max = aet.switch(aet.isinf(x_max), 0, x_max) + res = aet.log(aet.sum(aet.exp(x - x_max), axis=axis, keepdims=True)) + x_max return res if keepdims else res.squeeze() def logaddexp(a, b): diff = b - a - return tt.switch(diff > 0, b + tt.log1p(tt.exp(-diff)), a + tt.log1p(tt.exp(diff))) + return aet.switch(diff > 0, b + aet.log1p(aet.exp(-diff)), a + aet.log1p(aet.exp(diff))) def logdiffexp(a, b): @@ -198,7 +198,7 @@ def logdiffexp_numpy(a, b): def invlogit(x, eps=sys.float_info.epsilon): """The inverse of the logit function, 1 / (1 + exp(-x)).""" - return (1.0 - 2.0 * eps) / (1.0 + tt.exp(-x)) + eps + return (1.0 - 2.0 * eps) / (1.0 + aet.exp(-x)) + eps def logbern(log_p): @@ -208,7 +208,7 @@ def logbern(log_p): def logit(p): - return tt.log(p / (floatX(1) - p)) + return aet.log(p / (floatX(1) - p)) def log1pexp(x): @@ -216,7 +216,7 @@ def log1pexp(x): This function is numerically more stable than the naive approach. """ - return tt.nnet.softplus(x) + return aet.nnet.softplus(x) def log1mexp(x): @@ -234,7 +234,9 @@ def log1mexp(x): package" """ - return tt.switch(tt.lt(x, 0.6931471805599453), tt.log(-tt.expm1(-x)), tt.log1p(-tt.exp(-x))) + return aet.switch( + aet.lt(x, 0.6931471805599453), aet.log(-aet.expm1(-x)), aet.log1p(-aet.exp(-x)) + ) def log1mexp_numpy(x): @@ -253,7 +255,7 @@ def log1mexp_numpy(x): def flatten_list(tensors): - return tt.concatenate([var.ravel() for var in tensors]) + return aet.concatenate([var.ravel() for var in tensors]) class LogDet(Op): @@ -268,8 +270,8 @@ class LogDet(Op): """ def make_node(self, x): - x = theano.tensor.as_tensor_variable(x) - o = theano.tensor.scalar(dtype=x.dtype) + x = aesara.tensor.as_tensor_variable(x) + o = aesara.tensor.scalar(dtype=x.dtype) return Apply(self, [x], [o]) def perform(self, node, inputs, outputs, params=None): @@ -319,7 +321,7 @@ def expand_packed_triangular(n, packed, lower=True, diagonal_only=False): ---------- n: int The number of rows of the triangular matrix. - packed: theano.vector + packed: aesara.vector The matrix in packed format. lower: bool, default=True If true, assume that the matrix is lower triangular. @@ -338,13 +340,13 @@ def expand_packed_triangular(n, packed, lower=True, diagonal_only=False): diag_idxs = np.arange(2, n + 2)[::-1].cumsum() - n - 1 return packed[diag_idxs] elif lower: - out = tt.zeros((n, n), dtype=theano.config.floatX) + out = aet.zeros((n, n), dtype=aesara.config.floatX) idxs = np.tril_indices(n) - return tt.set_subtensor(out[idxs], packed) + return aet.set_subtensor(out[idxs], packed) elif not lower: - out = tt.zeros((n, n), dtype=theano.config.floatX) + out = aet.zeros((n, n), dtype=aesara.config.floatX) idxs = np.triu_indices(n) - return tt.set_subtensor(out[idxs], packed) + return aet.set_subtensor(out[idxs], packed) class BatchedDiag(Op): @@ -355,11 +357,11 @@ class BatchedDiag(Op): __props__ = () def make_node(self, diag): - diag = tt.as_tensor_variable(diag) + diag = aet.as_tensor_variable(diag) if diag.type.ndim != 2: raise TypeError("data argument must be a matrix", diag.type) - return Apply(self, [diag], [tt.tensor3(dtype=diag.dtype)]) + return Apply(self, [diag], [aet.tensor3(dtype=diag.dtype)]) def perform(self, node, ins, outs, params=None): (C,) = ins @@ -375,7 +377,7 @@ def perform(self, node, ins, outs, params=None): def grad(self, inputs, gout): (gz,) = gout - idx = tt.arange(gz.shape[-1]) + idx = aet.arange(gz.shape[-1]) return [gz[..., idx, idx]] def infer_shape(self, fgraph, nodes, shapes): @@ -383,14 +385,14 @@ def infer_shape(self, fgraph, nodes, shapes): def batched_diag(C): - C = tt.as_tensor(C) + C = aet.as_tensor(C) dim = C.shape[-1] if C.ndim == 2: # diag -> matrices return BatchedDiag()(C) elif C.ndim == 3: # matrices -> diag - idx = tt.arange(dim) + idx = aet.arange(dim) return C[..., idx, idx] else: raise ValueError("Input should be 2 or 3 dimensional") @@ -408,13 +410,13 @@ def __init__(self, sparse=False, format="csr"): def make_node(self, *matrices): if not matrices: raise ValueError("no matrices to allocate") - matrices = list(map(tt.as_tensor, matrices)) + matrices = list(map(aet.as_tensor, matrices)) if any(mat.type.ndim != 2 for mat in matrices): raise TypeError("all data arguments must be matrices") if self.sparse: - out_type = theano.sparse.matrix(self.format, dtype=largest_common_dtype(matrices)) + out_type = aesara.sparse.matrix(self.format, dtype=largest_common_dtype(matrices)) else: - out_type = theano.tensor.matrix(dtype=largest_common_dtype(matrices)) + out_type = aesara.tensor.matrix(dtype=largest_common_dtype(matrices)) return Apply(self, matrices, [out_type]) def perform(self, node, inputs, output_storage, params=None): @@ -425,13 +427,13 @@ def perform(self, node, inputs, output_storage, params=None): output_storage[0][0] = scipy_block_diag(*inputs).astype(dtype) def grad(self, inputs, gout): - shapes = tt.stack([i.shape for i in inputs]) + shapes = aet.stack([i.shape for i in inputs]) index_end = shapes.cumsum(0) index_begin = index_end - shapes slices = [ ix_( - tt.arange(index_begin[i, 0], index_end[i, 0]), - tt.arange(index_begin[i, 1], index_end[i, 1]), + aet.arange(index_begin[i, 0], index_end[i, 0]), + aet.arange(index_begin[i, 1], index_end[i, 1]), ) for i in range(len(inputs)) ] @@ -439,7 +441,7 @@ def grad(self, inputs, gout): def infer_shape(self, fgraph, nodes, shapes): first, second = zip(*shapes) - return [(tt.add(*first), tt.add(*second))] + return [(aet.add(*first), aet.add(*second))] def block_diagonal(matrices, sparse=False, format="csr"): diff --git a/pymc3/model.py b/pymc3/model.py index 349affcfa01..a5a0a635c8b 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -20,25 +20,27 @@ from sys import modules from typing import TYPE_CHECKING, Any, List, Optional, Type, TypeVar, Union, cast +import aesara +import aesara.graph.basic +import aesara.sparse as sparse +import aesara.tensor as aet import numpy as np import scipy.sparse as sps -import theano -import theano.graph.basic -import theano.sparse as sparse -import theano.tensor as tt +from aesara.compile.sharedvalue import SharedVariable +from aesara.gradient import grad +from aesara.graph.basic import Apply, Variable +from aesara.tensor.type import TensorType as AesaraTensorType +from aesara.tensor.var import TensorVariable from pandas import Series -from theano.compile import SharedVariable -from theano.graph.basic import Apply -from theano.tensor.var import TensorVariable import pymc3 as pm +from pymc3.aesaraf import floatX, generator, gradient, hessian, inputvars from pymc3.blocking import ArrayOrdering, DictToArrayBijection from pymc3.exceptions import ImputationWarning from pymc3.math import flatten_list from pymc3.memoize import WithMemoization, memoize -from pymc3.theanof import floatX, generator, gradient, hessian, inputvars from pymc3.util import get_transformed_name, get_var_name from pymc3.vartypes import continuous_types, discrete_types, isgenerator, typefilter @@ -59,13 +61,13 @@ class PyMC3Variable(TensorVariable): - """Class to wrap Theano TensorVariable for custom behavior.""" + """Class to wrap Aesara TensorVariable for custom behavior.""" # Implement matrix multiplication infix operator: X @ w - __matmul__ = tt.dot + __matmul__ = aet.dot def __rmatmul__(self, other): - return tt.dot(other, self) + return aet.dot(other, self) def _str_repr(self, name=None, dist=None, formatting="plain"): if getattr(self, "distribution", None) is None: @@ -143,28 +145,28 @@ def incorporate_methods(source, destination, methods, wrapper=None, override=Fal def get_named_nodes_and_relations(graph): - """Get the named nodes in a theano graph (i.e., nodes whose name + """Get the named nodes in a aesara graph (i.e., nodes whose name attribute is not None) along with their relationships (i.e., the node's named parents, and named children, while skipping unnamed intermediate nodes) Parameters ---------- - graph: a theano node + graph: a aesara node Returns: -------- leaf_dict: Dict[str, node] A dictionary of name:node pairs, of the named nodes that - have no named ancestors in the provided theano graph. + have no named ancestors in the provided aesara graph. descendents: Dict[node, Set[node]] - Each key is a theano named node, and the corresponding value - is the set of theano named nodes that are descendents with no + Each key is a aesara named node, and the corresponding value + is the set of aesara named nodes that are descendents with no intervening named nodes in the supplied ``graph``. ancestors: Dict[node, Set[node]] A dictionary of node:set([ancestors]) pairs. Each key - is a theano named node, and the corresponding value is the set - of theano named nodes that are ancestors with no intervening named + is a aesara named node, and the corresponding value is the set + of aesara named nodes that are ancestors with no intervening named nodes in the supplied ``graph``. """ @@ -222,28 +224,28 @@ def _get_named_nodes_and_relations(graph, descendent, descendents, ancestors): def build_named_node_tree(graphs): """Build the combined descence/ancestry tree of named nodes (i.e., nodes - whose name attribute is not None) in a list (or iterable) of theano graphs. + whose name attribute is not None) in a list (or iterable) of aesara graphs. The relationship tree does not include unnamed intermediate nodes present in the supplied graphs. Parameters ---------- - graphs - iterable of theano graphs + graphs - iterable of aesara graphs Returns: -------- leaf_dict: Dict[str, node] A dictionary of name:node pairs, of the named nodes that - have no named ancestors in the provided theano graphs. + have no named ancestors in the provided aesara graphs. descendents: Dict[node, Set[node]] A dictionary of node:set([parents]) pairs. Each key is - a theano named node, and the corresponding value is the set of - theano named nodes that are descendents with no intervening named + a aesara named node, and the corresponding value is the set of + aesara named nodes that are descendents with no intervening named nodes in the supplied ``graphs``. ancestors: Dict[node, Set[node]] A dictionary of node:set([ancestors]) pairs. Each key - is a theano named node, and the corresponding value is the set - of theano named nodes that are ancestors with no intervening named + is a aesara named node, and the corresponding value is the set + of aesara named nodes that are ancestors with no intervening named nodes in the supplied ``graphs``. """ @@ -282,16 +284,16 @@ def __new__(cls, name, bases, dct, **kargs): # pylint: disable=unused-argument def __enter__(self): self.__class__.context_class.get_contexts().append(self) - # self._theano_config is set in Model.__new__ + # self._aesara_config is set in Model.__new__ self._config_context = None - if hasattr(self, "_theano_config"): - self._config_context = theano.config.change_flags(**self._theano_config) + if hasattr(self, "_aesara_config"): + self._config_context = aesara.config.change_flags(**self._aesara_config) self._config_context.__enter__() return self def __exit__(self, typ, value, traceback): # pylint: disable=unused-argument self.__class__.context_class.get_contexts().pop() - # self._theano_config is set in Model.__new__ + # self._aesara_config is set in Model.__new__ if self._config_context: self._config_context.__exit__(typ, value, traceback) @@ -468,7 +470,7 @@ def fastd2logp_nojac(self, vars=None): @property def logpt(self): - """Theano scalar of log-probability of the model""" + """Aesara scalar of log-probability of the model""" if getattr(self, "total_size", None) is not None: logp = self.logp_sum_unscaledt * self.scaling else: @@ -479,11 +481,11 @@ def logpt(self): @property def logp_nojact(self): - """Theano scalar of log-probability, excluding jacobian terms.""" + """Aesara scalar of log-probability, excluding jacobian terms.""" if getattr(self, "total_size", None) is not None: - logp = tt.sum(self.logp_nojac_unscaledt) * self.scaling + logp = aet.sum(self.logp_nojac_unscaledt) * self.scaling else: - logp = tt.sum(self.logp_nojac_unscaledt) + logp = aet.sum(self.logp_nojac_unscaledt) if self.name is not None: logp.name = "__logp_%s" % self.name return logp @@ -578,20 +580,20 @@ def tree_contains(self, item): class ValueGradFunction: - """Create a theano function that computes a value and its gradient. + """Create a aesara function that computes a value and its gradient. Parameters ---------- - costs: list of theano variables - We compute the weighted sum of the specified theano values, and the gradient + costs: list of aesara variables + We compute the weighted sum of the specified aesara values, and the gradient of that sum. The weights can be specified with `ValueGradFunction.set_weights`. - grad_vars: list of named theano variables or None + grad_vars: list of named aesara variables or None The arguments with respect to which the gradient is computed. - extra_vars: list of named theano variables or None + extra_vars: list of named aesara variables or None Other arguments of the function that are assumed constant. They are stored in shared variables and can be set using `set_extra_values`. - dtype: str, default=theano.config.floatX + dtype: str, default=aesara.config.floatX The dtype of the arrays. casting: {'no', 'equiv', 'save', 'same_kind', 'unsafe'}, default='no' Casting rule for casting `grad_args` to the array dtype. @@ -601,14 +603,14 @@ class ValueGradFunction: compute_grads: bool, default=True If False, return only the logp, not the gradient. kwargs - Extra arguments are passed on to `theano.function`. + Extra arguments are passed on to `aesara.function`. Attributes ---------- size: int The number of elements in the parameter array. - profile: theano profiling object or None - The profiling object of the theano function that computes value and + profile: aesara profiling object or None + The profiling object of the aesara function that computes value and gradient. This is None unless `profile=True` was set in the kwargs. """ @@ -640,14 +642,14 @@ def __init__( self._extra_var_names = {var.name for var in extra_vars} if dtype is None: - dtype = theano.config.floatX + dtype = aesara.config.floatX self.dtype = dtype self._n_costs = len(costs) if self._n_costs == 0: raise ValueError("At least one cost is required.") weights = np.ones(self._n_costs - 1, dtype=self.dtype) - self._weights = theano.shared(weights, "__weights") + self._weights = aesara.shared(weights, "__weights") cost = costs[0] for i, val in enumerate(costs[1:]): @@ -674,7 +676,7 @@ def __init__( givens = [] self._extra_vars_shared = {} for var in extra_vars: - shared = theano.shared(var.tag.test_value, var.name + "_shared__") + shared = aesara.shared(var.tag.test_value, var.name + "_shared__") # test TensorType compatibility if hasattr(var.tag.test_value, "shape"): testtype = TensorType(var.dtype, var.tag.test_value.shape) @@ -689,15 +691,15 @@ def __init__( ) if compute_grads: - grad = tt.grad(self._cost_joined, self._vars_joined) - grad.name = "__grad" - outputs = [self._cost_joined, grad] + grad_out = grad(self._cost_joined, self._vars_joined) + grad_out.name = "__grad" + outputs = [self._cost_joined, grad_out] else: outputs = self._cost_joined inputs = [self._vars_joined] - self._theano_function = theano.function(inputs, outputs, givens=givens, **kwargs) + self._aesara_function = aesara.function(inputs, outputs, givens=givens, **kwargs) def set_weights(self, values): if values.shape != (self._n_costs - 1,): @@ -732,7 +734,7 @@ def __call__(self, array, grad_out=None, extra_vars=None): else: out = grad_out - output = self._theano_function(array) + output = self._aesara_function(array) if grad_out is None: return output else: @@ -741,8 +743,8 @@ def __call__(self, array, grad_out=None, extra_vars=None): @property def profile(self): - """Profiling information of the underlying theano function.""" - return self._theano_function.profile + """Profiling information of the underlying aesara function.""" + return self._aesara_function.profile def dict_to_array(self, point): """Convert a dictionary with values for grad_vars to an array.""" @@ -774,7 +776,7 @@ def array_to_full_dict(self, array): return point def _build_joined(self, cost, args, vmap): - args_joined = tt.vector("__args_joined") + args_joined = aet.vector("__args_joined") args_joined.tag.test_value = np.zeros(self.size, dtype=self.dtype) joined_slices = {} @@ -784,7 +786,7 @@ def _build_joined(self, cost, args, vmap): joined_slices[vmap.var] = sliced replace = {var: joined_slices[var.name] for var in args} - return args_joined, theano.clone(cost, replace=replace) + return args_joined, aesara.clone_replace(cost, replace=replace) class Model(Factor, WithMemoization, metaclass=ContextMeta): @@ -806,10 +808,10 @@ class Model(Factor, WithMemoization, metaclass=ContextMeta): defined within instance will be passed to the parent instance. So that 'nested' model contributes to the variables and likelihood factors of parent model. - theano_config: dict - A dictionary of theano config values that should be set + aesara_config: dict + A dictionary of aesara config values that should be set temporarily in the model context. See the documentation - of theano for a complete list. Set config key + of aesara for a complete list. Set config key ``compute_test_value`` to `raise` if it is None. check_bounds: bool Ensure that input parameters to distributions are in a valid @@ -854,7 +856,7 @@ def __init__(self, mean=0, sigma=1, name='', model=None): Deterministic('v3_sq', self.v3 ** 2) # Potentials too - Potential('p1', tt.constant(1)) + Potential('p1', aet.constant(1)) # After defining a class CustomModel you can use it in several # ways @@ -896,13 +898,13 @@ def __new__(cls, *args, **kwargs): instance._parent = kwargs.get("model") else: instance._parent = cls.get_context(error_if_none=False) - theano_config = kwargs.get("theano_config", None) - if theano_config is None or "compute_test_value" not in theano_config: - theano_config = {"compute_test_value": "raise"} - instance._theano_config = theano_config + aesara_config = kwargs.get("aesara_config", None) + if aesara_config is None or "compute_test_value" not in aesara_config: + aesara_config = {"compute_test_value": "raise"} + instance._aesara_config = aesara_config return instance - def __init__(self, name="", model=None, theano_config=None, coords=None, check_bounds=True): + def __init__(self, name="", model=None, aesara_config=None, coords=None, check_bounds=True): self.name = name self.coords = {} self.RV_dims = {} @@ -970,7 +972,7 @@ def dlogp_array(self): return self.bijection.mapf(self.fastdlogp(vars)) def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs): - """Compile a theano function that computes logp and gradient. + """Compile a aesara function that computes logp and gradient. Parameters ---------- @@ -990,10 +992,10 @@ def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs): if tempered: with self: - free_RVs_logp = tt.sum( - [tt.sum(var.logpt) for var in self.free_RVs + self.potentials] + free_RVs_logp = aet.sum( + [aet.sum(var.logpt) for var in self.free_RVs + self.potentials] ) - observed_RVs_logp = tt.sum([tt.sum(var.logpt) for var in self.observed_RVs]) + observed_RVs_logp = aet.sum([aet.sum(var.logpt) for var in self.observed_RVs]) costs = [free_RVs_logp, observed_RVs_logp] else: @@ -1004,10 +1006,10 @@ def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs): @property def logpt(self): - """Theano scalar of log-probability of the model""" + """Aesara scalar of log-probability of the model""" with self: factors = [var.logpt for var in self.basic_RVs] + self.potentials - logp = tt.sum([tt.sum(factor) for factor in factors]) + logp = aet.sum([aet.sum(factor) for factor in factors]) if self.name: logp.name = "__logp_%s" % self.name else: @@ -1016,14 +1018,14 @@ def logpt(self): @property def logp_nojact(self): - """Theano scalar of log-probability of the model but without the jacobian + """Aesara scalar of log-probability of the model but without the jacobian if transformed Random Variable is presented. Note that If there is no transformed variable in the model, logp_nojact will be the same as logpt as there is no need for Jacobian correction. """ with self: factors = [var.logp_nojact for var in self.basic_RVs] + self.potentials - logp = tt.sum([tt.sum(factor) for factor in factors]) + logp = aet.sum([aet.sum(factor) for factor in factors]) if self.name: logp.name = "__logp_nojac_%s" % self.name else: @@ -1032,18 +1034,18 @@ def logp_nojact(self): @property def varlogpt(self): - """Theano scalar of log-probability of the unobserved random variables + """Aesara scalar of log-probability of the unobserved random variables (excluding deterministic).""" with self: factors = [var.logpt for var in self.free_RVs] - return tt.sum(factors) + return aet.sum(factors) @property def datalogpt(self): with self: factors = [var.logpt for var in self.observed_RVs] - factors += [tt.sum(factor) for factor in self.potentials] - return tt.sum(factors) + factors += [aet.sum(factor) for factor in self.potentials] + return aet.sum(factors) @property def vars(self): @@ -1237,20 +1239,20 @@ def __getitem__(self, key): raise e def makefn(self, outs, mode=None, *args, **kwargs): - """Compiles a Theano function which returns ``outs`` and takes the variable + """Compiles a Aesara function which returns ``outs`` and takes the variable ancestors of ``outs`` as inputs. Parameters ---------- - outs: Theano variable or iterable of Theano variables - mode: Theano compilation mode + outs: Aesara variable or iterable of Aesara variables + mode: Aesara compilation mode Returns ------- - Compiled Theano function + Compiled Aesara function """ with self: - return theano.function( + return aesara.function( self.vars, outs, allow_input_downcast=True, @@ -1262,43 +1264,43 @@ def makefn(self, outs, mode=None, *args, **kwargs): ) def fn(self, outs, mode=None, *args, **kwargs): - """Compiles a Theano function which returns the values of ``outs`` + """Compiles a Aesara function which returns the values of ``outs`` and takes values of model vars as arguments. Parameters ---------- - outs: Theano variable or iterable of Theano variables - mode: Theano compilation mode + outs: Aesara variable or iterable of Aesara variables + mode: Aesara compilation mode Returns ------- - Compiled Theano function + Compiled Aesara function """ return LoosePointFunc(self.makefn(outs, mode, *args, **kwargs), self) def fastfn(self, outs, mode=None, *args, **kwargs): - """Compiles a Theano function which returns ``outs`` and takes values + """Compiles a Aesara function which returns ``outs`` and takes values of model vars as a dict as an argument. Parameters ---------- - outs: Theano variable or iterable of Theano variables - mode: Theano compilation mode + outs: Aesara variable or iterable of Aesara variables + mode: Aesara compilation mode Returns ------- - Compiled Theano function as point function. + Compiled Aesara function as point function. """ f = self.makefn(outs, mode, *args, **kwargs) return FastPointFunc(f) def profile(self, outs, n=1000, point=None, profile=True, *args, **kwargs): - """Compiles and profiles a Theano function which returns ``outs`` and + """Compiles and profiles a Aesara function which returns ``outs`` and takes values of model vars as a dict as an argument. Parameters ---------- - outs: Theano variable or iterable of Theano variables + outs: Aesara variable or iterable of Aesara variables n: int, default 1000 Number of iterations to run point: point @@ -1335,7 +1337,7 @@ def flatten(self, vars=None, order=None, inputvar=None): if None, then all model.free_RVs are used for flattening input order: ArrayOrdering Optional, use predefined ordering - inputvar: tt.vector + inputvar: aet.vector Optional, use predefined inputvar Returns @@ -1347,8 +1349,8 @@ def flatten(self, vars=None, order=None, inputvar=None): if order is None: order = ArrayOrdering(vars) if inputvar is None: - inputvar = tt.vector("flat_view", dtype=theano.config.floatX) - if theano.config.compute_test_value != "off": + inputvar = aet.vector("flat_view", dtype=aesara.config.floatX) + if aesara.config.compute_test_value != "off": if vars: inputvar.tag.test_value = flatten_list(vars).tag.test_value else: @@ -1482,34 +1484,34 @@ def set_data(new_data, model=None): def fn(outs, mode=None, model=None, *args, **kwargs): - """Compiles a Theano function which returns the values of ``outs`` and + """Compiles a Aesara function which returns the values of ``outs`` and takes values of model vars as arguments. Parameters ---------- - outs: Theano variable or iterable of Theano variables - mode: Theano compilation mode + outs: Aesara variable or iterable of Aesara variables + mode: Aesara compilation mode Returns ------- - Compiled Theano function + Compiled Aesara function """ model = modelcontext(model) return model.fn(outs, mode, *args, **kwargs) def fastfn(outs, mode=None, model=None): - """Compiles a Theano function which returns ``outs`` and takes values of model + """Compiles a Aesara function which returns ``outs`` and takes values of model vars as a dict as an argument. Parameters ---------- - outs: Theano variable or iterable of Theano variables - mode: Theano compilation mode + outs: Aesara variable or iterable of Aesara variables + mode: Aesara compilation mode Returns ------- - Compiled Theano function as point function. + Compiled Aesara function as point function. """ model = modelcontext(model) return model.fastfn(outs, mode) @@ -1619,12 +1621,12 @@ def _get_scaling(total_size, shape, ndim): begin_coef = [floatX(t) / shp_begin[i] for i, t in enumerate(begin) if t is not None] end_coef = [floatX(t) / shp_end[i] for i, t in enumerate(end) if t is not None] coefs = begin_coef + end_coef - coef = tt.prod(coefs) + coef = aet.prod(coefs) else: raise TypeError( "Unrecognized `total_size` type, expected int or list of ints, got %r" % total_size ) - return tt.as_tensor(floatX(coef)) + return aet.as_tensor(floatX(coef)) class FreeRV(Factor, PyMC3Variable): @@ -1648,8 +1650,8 @@ def __init__( """ Parameters ---------- - type: theano type (optional) - owner: theano owner (optional) + type: aesara type (optional) + owner: aesara owner (optional) name: str distribution: Distribution model: Model @@ -1692,7 +1694,7 @@ def init_value(self): def pandas_to_array(data): """Convert a pandas object to a NumPy array. - XXX: When `data` is a generator, this will return a Theano tensor! + XXX: When `data` is a generator, this will return a Aesara tensor! """ if hasattr(data, "to_numpy") and hasattr(data, "isnull"): @@ -1720,7 +1722,7 @@ def pandas_to_array(data): else: # no masking required ret = data - elif isinstance(data, theano.graph.basic.Variable): + elif isinstance(data, Variable): ret = data elif sps.issparse(data): ret = data @@ -1762,9 +1764,9 @@ def as_tensor(data, name, model, distribution): parent_dist=distribution, ) missing_values = FreeRV(name=name + "_missing", distribution=fakedist, model=model) - constant = tt.as_tensor_variable(data.filled()) + constant = aet.as_tensor_variable(data.filled()) - dataTensor = tt.set_subtensor(constant[data.mask.nonzero()], missing_values) + dataTensor = aet.set_subtensor(constant[data.mask.nonzero()], missing_values) dataTensor.missing_values = missing_values return dataTensor elif sps.issparse(data): @@ -1772,7 +1774,7 @@ def as_tensor(data, name, model, distribution): data.missing_values = None return data else: - data = tt.as_tensor_variable(data, name=name) + data = aet.as_tensor_variable(data, name=name) data.missing_values = None return data @@ -1796,8 +1798,8 @@ def __init__( """ Parameters ---------- - type: theano type (optional) - owner: theano owner (optional) + type: aesara type (optional) + owner: aesara owner (optional) name: str distribution: Distribution model: Model @@ -1806,12 +1808,12 @@ def __init__( """ from pymc3.distributions import TensorType - if hasattr(data, "type") and isinstance(data.type, tt.TensorType): + if hasattr(data, "type") and isinstance(data.type, AesaraTensorType): type = data.type if type is None: data = pandas_to_array(data) - if isinstance(data, theano.graph.basic.Variable): + if isinstance(data, Variable): type = data.type else: type = TensorType(distribution.dtype, data.shape) @@ -1834,8 +1836,8 @@ def __init__( self.distribution = distribution # make this RV a view on the combined missing/nonmissing array - Apply(theano.compile.view_op, inputs=[data], outputs=[self]) - self.tag.test_value = theano.compile.view_op(data).tag.test_value.astype(self.dtype) + Apply(aesara.compile.view_op, inputs=[data], outputs=[self]) + self.tag.test_value = aesara.compile.view_op(data).tag.test_value.astype(self.dtype) self.scaling = _get_scaling(total_size, data.shape, data.ndim) @property @@ -1853,8 +1855,8 @@ def __init__(self, name, data, distribution, total_size=None, model=None): """ Parameters ---------- - type: theano type (optional) - owner: theano owner (optional) + type: aesara type (optional) + owner: aesara owner (optional) name: str distribution: Distribution model: Model @@ -1893,7 +1895,7 @@ def __ne__(self, other): def _walk_up_rv(rv, formatting="plain"): - """Walk up theano graph to get inputs for deterministic RV.""" + """Walk up aesara graph to get inputs for deterministic RV.""" all_rvs = [] parents = list(itertools.chain(*[j.inputs for j in rv.get_parents()])) if parents: @@ -1906,7 +1908,7 @@ def _walk_up_rv(rv, formatting="plain"): return all_rvs -class DeterministicWrapper(tt.TensorVariable): +class DeterministicWrapper(TensorVariable): def _str_repr(self, formatting="plain"): if "latex" in formatting: if formatting == "latex_with_params": @@ -1935,7 +1937,7 @@ def Deterministic(name, var, model=None, dims=None): Parameters ---------- name: str - var: theano variables + var: aesara variables Returns ------- @@ -1956,7 +1958,7 @@ def Potential(name, var, model=None): Parameters ---------- name: str - var: theano variables + var: aesara variables Returns ------- @@ -1974,8 +1976,8 @@ class TransformedRV(PyMC3Variable): Parameters ---------- - type: theano type (optional) - owner: theano owner (optional) + type: aesara type (optional) + owner: aesara owner (optional) name: str distribution: Distribution model: Model @@ -2014,7 +2016,7 @@ def __init__( normalRV = transform.backward(self.transformed) - Apply(theano.compile.view_op, inputs=[normalRV], outputs=[self]) + Apply(aesara.compile.view_op, inputs=[normalRV], outputs=[self]) self.tag.test_value = normalRV.tag.test_value self.scaling = _get_scaling(total_size, self.shape, self.ndim) incorporate_methods( diff --git a/pymc3/model_graph.py b/pymc3/model_graph.py index cd3feb30709..433dcfa54f3 100644 --- a/pymc3/model_graph.py +++ b/pymc3/model_graph.py @@ -13,19 +13,19 @@ # limitations under the License. from collections import deque -from typing import Dict, Iterator, Optional, Set +from typing import Dict, Iterator, NewType, Optional, Set -VarName = str - -from theano.compile import SharedVariable -from theano.graph.basic import walk -from theano.tensor import Tensor +from aesara.compile import SharedVariable +from aesara.graph.basic import walk +from aesara.tensor.var import TensorVariable import pymc3 as pm from pymc3.model import ObservedRV from pymc3.util import get_default_varnames, get_var_name +VarName = NewType("VarName", str) + class ModelGraph: def __init__(self, model): @@ -46,17 +46,17 @@ def get_deterministics(self, var): deterministics.append(v) return deterministics - def _get_ancestors(self, var: Tensor, func) -> Set[Tensor]: + def _get_ancestors(self, var: TensorVariable, func) -> Set[TensorVariable]: """Get all ancestors of a function, doing some accounting for deterministics.""" # this contains all of the variables in the model EXCEPT var... vars = set(self.var_list) vars.remove(var) - blockers = set() # type: Set[Tensor] - retval = set() # type: Set[Tensor] + blockers = set() # type: Set[TensorVariable] + retval = set() # type: Set[TensorVariable] - def _expand(node) -> Optional[Iterator[Tensor]]: + def _expand(node) -> Optional[Iterator[TensorVariable]]: if node in blockers: return None elif node in vars: @@ -87,7 +87,7 @@ def _filter_parents(self, var, parents) -> Set[VarName]: raise AssertionError("Do not know what to do with {}".format(get_var_name(p))) return keep - def get_parents(self, var: Tensor) -> Set[VarName]: + def get_parents(self, var: TensorVariable) -> Set[VarName]: """Get the named nodes that are direct inputs to the var""" if hasattr(var, "transformed"): func = var.transformed.logpt @@ -167,7 +167,7 @@ def get_plates(self): if hasattr(v, "observations"): try: # To get shape of _observed_ data container `pm.Data` - # (wrapper for theano.SharedVariable) we evaluate it. + # (wrapper for aesara.SharedVariable) we evaluate it. shape = tuple(v.observations.shape.eval()) except AttributeError: shape = v.observations.shape diff --git a/pymc3/ode/ode.py b/pymc3/ode/ode.py index 2eba398404f..5563bf898c1 100644 --- a/pymc3/ode/ode.py +++ b/pymc3/ode/ode.py @@ -14,19 +14,20 @@ import logging +import aesara +import aesara.tensor as aet import numpy as np import scipy -import theano -import theano.tensor as tt -from theano.graph.basic import Apply -from theano.graph.op import Op, get_test_value +from aesara.graph.basic import Apply +from aesara.graph.op import Op, get_test_value +from aesara.tensor.type import TensorType from pymc3.exceptions import DtypeError, ShapeError from pymc3.ode import utils _log = logging.getLogger("pymc3") -floatX = theano.config.floatX +floatX = aesara.config.floatX class DifferentialEquation(Op): @@ -65,12 +66,12 @@ def odefunc(y, t, p): ode_model = DifferentialEquation(func=odefunc, times=times, n_states=1, n_theta=1, t0=0) """ _itypes = [ - tt.TensorType(floatX, (False,)), # y0 as 1D floatX vector - tt.TensorType(floatX, (False,)), # theta as 1D floatX vector + TensorType(floatX, (False,)), # y0 as 1D floatX vector + TensorType(floatX, (False,)), # theta as 1D floatX vector ] _otypes = [ - tt.TensorType(floatX, (False, False)), # model states as floatX of shape (T, S) - tt.TensorType( + TensorType(floatX, (False, False)), # model states as floatX of shape (T, S) + TensorType( floatX, (False, False, False) ), # sensitivities as floatX of shape (T, S, len(y0) + len(theta)) ] @@ -153,8 +154,8 @@ def __call__(self, y0, theta, return_sens=False, **kwargs): ) # convert inputs to tensors (and check their types) - y0 = tt.cast(tt.unbroadcast(tt.as_tensor_variable(y0), 0), floatX) - theta = tt.cast(tt.unbroadcast(tt.as_tensor_variable(theta), 0), floatX) + y0 = aet.cast(aet.unbroadcast(aet.as_tensor_variable(y0), 0), floatX) + theta = aet.cast(aet.unbroadcast(aet.as_tensor_variable(theta), 0), floatX) inputs = [y0, theta] for i, (input_val, itype) in enumerate(zip(inputs, self._itypes)): if not input_val.type == itype: @@ -165,7 +166,7 @@ def __call__(self, y0, theta, return_sens=False, **kwargs): # use default implementation to prepare symbolic outputs (via make_node) states, sens = super().__call__(y0, theta, **kwargs) - if theano.config.compute_test_value != "off": + if aesara.config.compute_test_value != "off": # compute test values from input test values test_states, test_sens = self._simulate( y0=get_test_value(y0), theta=get_test_value(theta) @@ -234,8 +235,8 @@ def grad(self, inputs, output_grads): # for each parameter, multiply sensitivities with the output gradient and sum the result # sens is (n_times, n_states, n_p) # ograds is (n_times, n_states) - grads = [tt.sum(sens[:, :, p] * ograds) for p in range(self.n_p)] + grads = [aet.sum(sens[:, :, p] * ograds) for p in range(self.n_p)] # return separate gradient tensors for y0 and theta inputs - result = tt.stack(grads[: self.n_states]), tt.stack(grads[self.n_states :]) + result = aet.stack(grads[: self.n_states]), aet.stack(grads[self.n_states :]) return result diff --git a/pymc3/ode/utils.py b/pymc3/ode/utils.py index 141c5503f19..474ed901baf 100644 --- a/pymc3/ode/utils.py +++ b/pymc3/ode/utils.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara +import aesara.tensor as aet import numpy as np -import theano -import theano.tensor as tt def make_sens_ic(n_states, n_theta, floatX): @@ -83,20 +83,20 @@ def augment_system(ode_func, n_states, n_theta): """ # Present state of the system - t_y = tt.vector("y", dtype="float64") + t_y = aet.vector("y", dtype="float64") t_y.tag.test_value = np.ones((n_states,), dtype="float64") # Parameter(s). Should be vector to allow for generaliztion to multiparameter # systems of ODEs. Is m dimensional because it includes all initial conditions as well as ode parameters - t_p = tt.vector("p", dtype="float64") + t_p = aet.vector("p", dtype="float64") t_p.tag.test_value = np.ones((n_states + n_theta,), dtype="float64") # Time. Allow for non-automonous systems of ODEs to be analyzed - t_t = tt.scalar("t", dtype="float64") + t_t = aet.scalar("t", dtype="float64") t_t.tag.test_value = 2.459 # Present state of the gradients: # Will always be 0 unless the parameter is the inital condition # Entry i,j is partial of y[i] wrt to p[j] - dydp_vec = tt.vector("dydp", dtype="float64") + dydp_vec = aet.vector("dydp", dtype="float64") dydp_vec.tag.test_value = make_sens_ic(n_states, n_theta, "float64") dydp = dydp_vec.reshape((n_states, n_states + n_theta)) @@ -106,19 +106,19 @@ def augment_system(ode_func, n_states, n_theta): # Stack the results of the ode_func into a single tensor variable if not isinstance(yhat, (list, tuple)): yhat = (yhat,) - t_yhat = tt.stack(yhat, axis=0) + t_yhat = aet.stack(yhat, axis=0) # Now compute gradients - J = tt.jacobian(t_yhat, t_y) + J = aet.jacobian(t_yhat, t_y) - Jdfdy = tt.dot(J, dydp) + Jdfdy = aet.dot(J, dydp) - grad_f = tt.jacobian(t_yhat, t_p) + grad_f = aet.jacobian(t_yhat, t_p) # This is the time derivative of dydp ddt_dydp = (Jdfdy + grad_f).flatten() - system = theano.function( + system = aesara.function( inputs=[t_y, t_t, t_p, dydp_vec], outputs=[t_yhat, ddt_dydp], on_unused_input="ignore" ) diff --git a/pymc3/parallel_sampling.py b/pymc3/parallel_sampling.py index bdfe1a274b7..4cd39921b24 100644 --- a/pymc3/parallel_sampling.py +++ b/pymc3/parallel_sampling.py @@ -27,7 +27,7 @@ from fastprogress.fastprogress import progress_bar -from pymc3 import theanof +from pymc3 import aesaraf from pymc3.exceptions import SamplingError logger = logging.getLogger("pymc3") @@ -99,7 +99,7 @@ def __init__( self._step_method_is_pickled = step_method_is_pickled self._shared_point = shared_point self._seed = seed - self._tt_seed = seed + 1 + self._aet_seed = seed + 1 self._draws = draws self._tune = tune self._pickle_backend = pickle_backend @@ -170,7 +170,7 @@ def _recv_msg(self): def _start_loop(self): np.random.seed(self._seed) - theanof.set_tt_rng(self._tt_seed) + aesaraf.set_aet_rng(self._aet_seed) draw = 0 tuning = True diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 481d20ff034..98a2e8f3e86 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -25,10 +25,10 @@ from copy import copy, deepcopy from typing import Any, Dict, Iterable, List, Optional, Set, Union, cast +import aesara.gradient as tg import arviz import numpy as np import packaging -import theano.gradient as tg import xarray from arviz import InferenceData diff --git a/pymc3/sampling_jax.py b/pymc3/sampling_jax.py index 522bca7b12e..4f10414caf2 100644 --- a/pymc3/sampling_jax.py +++ b/pymc3/sampling_jax.py @@ -9,13 +9,13 @@ xla_flags = re.sub(r"xla_force_host_platform_device_count=.+\s", "", xla_flags).split() os.environ["XLA_FLAGS"] = " ".join(["--xla_force_host_platform_device_count={}".format(100)]) +import aesara.graph.fg import arviz as az import jax import numpy as np import pandas as pd -import theano.graph.fg -from theano.link.jax.jax_dispatch import jax_funcify +from aesara.link.jax.jax_dispatch import jax_funcify import pymc3 as pm @@ -24,9 +24,9 @@ warnings.warn("This module is experimental.") # Disable C compilation by default -# theano.config.cxx = "" +# aesara.config.cxx = "" # This will make the JAX Linker the default -# theano.config.mode = "JAX" +# aesara.config.mode = "JAX" def sample_tfp_nuts( @@ -47,7 +47,7 @@ def sample_tfp_nuts( seed = jax.random.PRNGKey(random_seed) - fgraph = theano.graph.fg.FunctionGraph(model.free_RVs, [model.logpt]) + fgraph = aesara.graph.fg.FunctionGraph(model.free_RVs, [model.logpt]) fns = jax_funcify(fgraph) logp_fn_jax = fns[0] @@ -133,7 +133,7 @@ def sample_numpyro_nuts( seed = jax.random.PRNGKey(random_seed) - fgraph = theano.graph.fg.FunctionGraph(model.free_RVs, [model.logpt]) + fgraph = aesara.graph.fg.FunctionGraph(model.free_RVs, [model.logpt]) fns = jax_funcify(fgraph) logp_fn_jax = fns[0] @@ -199,7 +199,7 @@ def _transform_samples(samples, model, keep_untransformed=False): ops_to_compute = [x for x in model.unobserved_RVs if x.name in names_to_compute] # Create function graph for these: - fgraph = theano.graph.fg.FunctionGraph(model.free_RVs, ops_to_compute) + fgraph = aesara.graph.fg.FunctionGraph(model.free_RVs, ops_to_compute) # Jaxify, which returns a list of functions, one for each op jax_fns = jax_funcify(fgraph) diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py index 2e7e369ad32..25f278d1886 100644 --- a/pymc3/smc/smc.py +++ b/pymc3/smc/smc.py @@ -14,22 +14,22 @@ from collections import OrderedDict +import aesara.tensor as aet import numpy as np -import theano.tensor as tt +from aesara import function as aesara_function from scipy.special import logsumexp from scipy.stats import multivariate_normal -from theano import function as theano_function -from pymc3.backends.ndarray import NDArray -from pymc3.model import Point, modelcontext -from pymc3.sampling import sample_prior_predictive -from pymc3.theanof import ( +from pymc3.aesaraf import ( floatX, inputvars, join_nonshared_inputs, make_shared_replacements, ) +from pymc3.backends.ndarray import NDArray +from pymc3.model import Point, modelcontext +from pymc3.sampling import sample_prior_predictive class SMC: @@ -111,8 +111,8 @@ def setup_kernel(self): if self.kernel == "abc": factors = [var.logpt for var in self.model.free_RVs] - factors += [tt.sum(factor) for factor in self.model.potentials] - self.prior_logp_func = logp_forw([tt.sum(factors)], self.variables, shared) + factors += [aet.sum(factor) for factor in self.model.potentials] + self.prior_logp_func = logp_forw([aet.sum(factors)], self.variables, shared) simulator = self.model.observed_RVs[0] distance = simulator.distribution.distance sum_stat = simulator.distribution.sum_stat @@ -271,7 +271,7 @@ def posterior_to_trace(self): def logp_forw(out_vars, vars, shared): - """Compile Theano function of the model and the input and output variables. + """Compile Aesara function of the model and the input and output variables. Parameters ---------- @@ -280,10 +280,10 @@ def logp_forw(out_vars, vars, shared): vars: List containing :class:`pymc3.Distribution` for the input variables shared: List - containing :class:`theano.tensor.Tensor` for depended shared data + containing :class:`aesara.tensor.Tensor` for depended shared data """ out_list, inarray0 = join_nonshared_inputs(out_vars, vars, shared) - f = theano_function([inarray0], out_list[0]) + f = aesara_function([inarray0], out_list[0]) f.trust_input = True return f diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py index c3e1cf6f8bb..7992153f710 100644 --- a/pymc3/step_methods/arraystep.py +++ b/pymc3/step_methods/arraystep.py @@ -19,10 +19,10 @@ from numpy.random import uniform +from pymc3.aesaraf import inputvars from pymc3.blocking import ArrayOrdering, DictToArrayBijection from pymc3.model import PyMC3Variable, modelcontext from pymc3.step_methods.compound import CompoundStep -from pymc3.theanof import inputvars from pymc3.util import get_var_name __all__ = ["ArrayStep", "ArrayStepShared", "metrop_select", "Competence"] @@ -137,7 +137,7 @@ class ArrayStep(BlockedStep): ---------- vars: list List of variables for sampler. - fs: list of logp theano functions + fs: list of logp aesara functions allvars: Boolean (default False) blocked: Boolean (default True) """ @@ -177,7 +177,7 @@ def __init__(self, vars, shared, blocked=True): Parameters ---------- vars: list of sampling variables - shared: dict of theano variable -> shared variable + shared: dict of aesara variable -> shared variable blocked: Boolean (default True) """ self.vars = vars @@ -212,7 +212,7 @@ def __init__(self, vars, shared, blocked=True): Parameters ---------- vars: list of sampling variables - shared: dict of theano variable -> shared variable + shared: dict of aesara variable -> shared variable blocked: Boolean (default True) """ self.population = None @@ -244,14 +244,14 @@ def link_population(self, population, chain_index): class GradientSharedStep(BlockedStep): def __init__( - self, vars, model=None, blocked=True, dtype=None, logp_dlogp_func=None, **theano_kwargs + self, vars, model=None, blocked=True, dtype=None, logp_dlogp_func=None, **aesara_kwargs ): model = modelcontext(model) self.vars = vars self.blocked = blocked if logp_dlogp_func is None: - func = model.logp_dlogp_function(vars, dtype=dtype, **theano_kwargs) + func = model.logp_dlogp_function(vars, dtype=dtype, **aesara_kwargs) else: func = logp_dlogp_func @@ -263,8 +263,8 @@ def __init__( except ValueError: if logp_dlogp_func is not None: raise - theano_kwargs.update(mode="FAST_COMPILE") - func = model.logp_dlogp_function(vars, dtype=dtype, **theano_kwargs) + aesara_kwargs.update(mode="FAST_COMPILE") + func = model.logp_dlogp_function(vars, dtype=dtype, **aesara_kwargs) self._logp_dlogp_func = func diff --git a/pymc3/step_methods/elliptical_slice.py b/pymc3/step_methods/elliptical_slice.py index f1c1bb40d33..0a8d432644f 100644 --- a/pymc3/step_methods/elliptical_slice.py +++ b/pymc3/step_methods/elliptical_slice.py @@ -12,14 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara.tensor as aet import numpy as np import numpy.random as nr -import theano.tensor as tt +from pymc3.aesaraf import inputvars from pymc3.distributions import draw_values from pymc3.model import modelcontext from pymc3.step_methods.arraystep import ArrayStep, Competence -from pymc3.theanof import inputvars __all__ = ["EllipticalSlice"] @@ -44,7 +44,7 @@ def get_chol(cov, chol): raise ValueError("Must pass exactly one of cov or chol") if cov is not None: - chol = tt.slinalg.cholesky(cov) + chol = aet.slinalg.cholesky(cov) return chol @@ -86,7 +86,7 @@ class EllipticalSlice(ArrayStep): def __init__(self, vars=None, prior_cov=None, prior_chol=None, model=None, **kwargs): self.model = modelcontext(model) chol = get_chol(prior_cov, prior_chol) - self.prior_chol = tt.as_tensor_variable(chol) + self.prior_chol = aet.as_tensor_variable(chol) if vars is None: vars = self.model.cont_vars diff --git a/pymc3/step_methods/gibbs.py b/pymc3/step_methods/gibbs.py index 2646a8a9e82..f109d49b263 100644 --- a/pymc3/step_methods/gibbs.py +++ b/pymc3/step_methods/gibbs.py @@ -19,6 +19,8 @@ """ from warnings import warn +from aesara.graph.basic import graph_inputs +from aesara.tensor import add from numpy import ( arange, array, @@ -31,8 +33,6 @@ searchsorted, ) from numpy.random import uniform -from theano.graph.basic import graph_inputs -from theano.tensor import add from pymc3.distributions.discrete import Categorical from pymc3.model import modelcontext diff --git a/pymc3/step_methods/hmc/base_hmc.py b/pymc3/step_methods/hmc/base_hmc.py index 323503fe491..7228b8a9c64 100644 --- a/pymc3/step_methods/hmc/base_hmc.py +++ b/pymc3/step_methods/hmc/base_hmc.py @@ -19,13 +19,13 @@ import numpy as np +from pymc3.aesaraf import floatX, inputvars from pymc3.backends.report import SamplerWarning, WarningType from pymc3.exceptions import SamplingError from pymc3.model import Point, modelcontext from pymc3.step_methods import arraystep, step_sizes from pymc3.step_methods.hmc import integration from pymc3.step_methods.hmc.quadpotential import QuadPotentialDiagAdapt, quad_potential -from pymc3.theanof import floatX, inputvars from pymc3.tuning import guess_scaling logger = logging.getLogger("pymc3") @@ -57,13 +57,13 @@ def __init__( t0=10, adapt_step_size=True, step_rand=None, - **theano_kwargs + **aesara_kwargs ): """Set up Hamiltonian samplers with common structures. Parameters ---------- - vars: list of theano variables + vars: list of aesara variables scaling: array_like, ndim = {1,2} Scaling for momentum distribution. 1d arrays interpreted matrix diagonal. @@ -77,7 +77,7 @@ def __init__( potential: Potential, optional An object that represents the Hamiltonian with methods `velocity`, `energy`, and `random` methods. - **theano_kwargs: passed to theano functions + **aesara_kwargs: passed to aesara functions """ self._model = modelcontext(model) @@ -85,7 +85,7 @@ def __init__( vars = self._model.cont_vars vars = inputvars(vars) - super().__init__(vars, blocked=blocked, model=model, dtype=dtype, **theano_kwargs) + super().__init__(vars, blocked=blocked, model=model, dtype=dtype, **aesara_kwargs) self.adapt_step_size = adapt_step_size self.Emax = Emax diff --git a/pymc3/step_methods/hmc/hmc.py b/pymc3/step_methods/hmc/hmc.py index 613160c27e3..522a40d94fb 100644 --- a/pymc3/step_methods/hmc/hmc.py +++ b/pymc3/step_methods/hmc/hmc.py @@ -59,7 +59,7 @@ def __init__(self, vars=None, path_length=2.0, max_steps=1024, **kwargs): Parameters ---------- - vars: list of theano variables + vars: list of aesara variables path_length: float, default=2 total length to travel step_rand: function float -> float, default=unif diff --git a/pymc3/step_methods/hmc/nuts.py b/pymc3/step_methods/hmc/nuts.py index 4a00ec98739..8d7b9a69ad8 100644 --- a/pymc3/step_methods/hmc/nuts.py +++ b/pymc3/step_methods/hmc/nuts.py @@ -16,13 +16,13 @@ import numpy as np +from pymc3.aesaraf import floatX from pymc3.backends.report import SamplerWarning, WarningType from pymc3.distributions import BART from pymc3.math import logbern, logdiffexp_numpy from pymc3.step_methods.arraystep import Competence from pymc3.step_methods.hmc.base_hmc import BaseHMC, DivergenceInfo, HMCStepData from pymc3.step_methods.hmc.integration import IntegrationError -from pymc3.theanof import floatX from pymc3.vartypes import continuous_types __all__ = ["NUTS"] @@ -114,7 +114,7 @@ def __init__(self, vars=None, max_treedepth=10, early_max_treedepth=8, **kwargs) Parameters ---------- - vars: list of Theano variables, default all continuous vars + vars: list of Aesara variables, default all continuous vars Emax: float, default 1000 Maximum energy change allowed during leapfrog steps. Larger deviations will abort the integration. diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py index 4c2e6acc7a3..f77f1f99883 100644 --- a/pymc3/step_methods/hmc/quadpotential.py +++ b/pymc3/step_methods/hmc/quadpotential.py @@ -14,14 +14,14 @@ import warnings +import aesara import numpy as np import scipy.linalg -import theano from numpy.random import normal from scipy.sparse import issparse -from pymc3.theanof import floatX +from pymc3.aesaraf import floatX __all__ = [ "quad_potential", @@ -170,7 +170,7 @@ def __init__( ) if dtype is None: - dtype = theano.config.floatX + dtype = aesara.config.floatX if initial_diag is None: initial_diag = np.ones(n, dtype=dtype) @@ -189,7 +189,7 @@ def __init__( def reset(self): self._var = np.array(self._initial_diag, dtype=self.dtype, copy=True) - self._var_theano = theano.shared(self._var) + self._var_aesara = aesara.shared(self._var) self._stds = np.sqrt(self._initial_diag) self._inv_stds = floatX(1.0) / self._stds self._foreground_var = _WeightedVariance( @@ -222,7 +222,7 @@ def _update_from_weightvar(self, weightvar): weightvar.current_variance(out=self._var) np.sqrt(self._var, out=self._stds) np.divide(1, self._stds, out=self._inv_stds) - self._var_theano.set_value(self._var) + self._var_aesara.set_value(self._var) def update(self, sample, grad, tune): """Inform the potential about a new sample during tuning.""" @@ -304,7 +304,7 @@ def _update(self, var): self._var[:] = var np.sqrt(self._var, out=self._stds) np.divide(1, self._stds, out=self._inv_stds) - self._var_theano.set_value(self._var) + self._var_aesara.set_value(self._var) def update(self, sample, grad, tune): """Inform the potential about a new sample during tuning.""" @@ -384,7 +384,7 @@ def __init__(self, v, dtype=None): Diagonal of covariance matrix for the potential vector """ if dtype is None: - dtype = theano.config.floatX + dtype = aesara.config.floatX self.dtype = dtype v = v.astype(self.dtype) s = v ** 0.5 @@ -428,7 +428,7 @@ def __init__(self, A, dtype=None): Inverse of covariance matrix for the potential vector """ if dtype is None: - dtype = theano.config.floatX + dtype = aesara.config.floatX self.dtype = dtype self.L = floatX(scipy.linalg.cholesky(A, lower=True)) @@ -468,7 +468,7 @@ def __init__(self, cov, dtype=None): scaling matrix for the potential vector """ if dtype is None: - dtype = theano.config.floatX + dtype = aesara.config.floatX self.dtype = dtype self._cov = np.array(cov, dtype=self.dtype, copy=True) self._chol = scipy.linalg.cholesky(self._cov, lower=True) @@ -525,7 +525,7 @@ def __init__( ) if dtype is None: - dtype = theano.config.floatX + dtype = aesara.config.floatX if initial_cov is None: initial_cov = np.eye(n, dtype=dtype) @@ -658,7 +658,7 @@ def current_mean(self): if chol_available: __all__ += ["QuadPotentialSparse"] - import theano.sparse + import aesara.sparse class QuadPotentialSparse(QuadPotential): def __init__(self, A): @@ -676,8 +676,8 @@ def __init__(self, A): def velocity(self, x): """Compute the current velocity at a position in parameter space.""" - A = theano.sparse.as_sparse(self.A) - return theano.sparse.dot(A, x) + A = aesara.sparse.as_sparse(self.A) + return aesara.sparse.dot(A, x) def random(self): """Draw random value from QuadPotential.""" diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py index 76804db2f8f..0878b2b7728 100644 --- a/pymc3/step_methods/metropolis.py +++ b/pymc3/step_methods/metropolis.py @@ -12,13 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara import numpy as np import numpy.random as nr import scipy.linalg -import theano import pymc3 as pm +from pymc3.aesaraf import floatX from pymc3.distributions import draw_values from pymc3.step_methods.arraystep import ( ArrayStep, @@ -27,7 +28,6 @@ PopulationArrayStepShared, metrop_select, ) -from pymc3.theanof import floatX __all__ = [ "Metropolis", @@ -142,7 +142,7 @@ def __init__( model: PyMC Model Optional model for sampling step. Defaults to None (taken from context). mode: string or `Mode` instance. - compilation mode passed to Theano functions + compilation mode passed to Aesara functions """ model = pm.modelcontext(model) @@ -571,7 +571,7 @@ class DEMetropolis(PopulationArrayStepShared): model: PyMC Model Optional model for sampling step. Defaults to None (taken from context). mode: string or `Mode` instance. - compilation mode passed to Theano functions + compilation mode passed to Aesara functions References ---------- @@ -713,7 +713,7 @@ class DEMetropolisZ(ArrayStepShared): model: PyMC Model Optional model for sampling step. Defaults to None (taken from context). mode: string or `Mode` instance. - compilation mode passed to Theano functions + compilation mode passed to Aesara functions References ---------- @@ -887,6 +887,6 @@ def delta_logp(logp, vars, shared): logp1 = pm.CallableTensor(logp0)(inarray1) - f = theano.function([inarray1, inarray0], logp1 - logp0) + f = aesara.function([inarray1, inarray0], logp1 - logp0) f.trust_input = True return f diff --git a/pymc3/step_methods/mlda.py b/pymc3/step_methods/mlda.py index 559f894f300..8edf54209b0 100644 --- a/pymc3/step_methods/mlda.py +++ b/pymc3/step_methods/mlda.py @@ -17,10 +17,11 @@ from typing import List, Optional, Type, Union +import aesara import arviz as az import numpy as np -import theano -import theano.tensor as tt + +from aesara.tensor.sharedvar import TensorSharedVariable import pymc3 as pm @@ -254,7 +255,7 @@ class MLDA(ArrayStepShared): (taken from context). This model should be the finest of all multilevel models. mode : string or `Mode` instance. - Compilation mode passed to Theano functions + Compilation mode passed to Aesara functions subsampling_rates : integer or list of integers One interger for all levels or a list with one number for each level (excluding the finest level). @@ -275,7 +276,7 @@ class MLDA(ArrayStepShared): the PyMC3 model (also demonstrated in the example notebook): - Include a `pm.Data()` variable with the name `Q` in the model description of all levels. - - Use a Theano Op to calculate the forward model (or the + - Use a Aesara Op to calculate the forward model (or the combination of a forward model and a likelihood). This Op should have a `perform()` method which (in addition to all the other calculations), calculates the quantity of interest @@ -300,7 +301,7 @@ class MLDA(ArrayStepShared): extra variables mu_B and Sigma_B, which will capture the bias between different levels. All these variables should be instantiated using the pm.Data method. - - Use a Theano Op to define the forward model (and + - Use a Aesara Op to define the forward model (and optionally the likelihood) for all levels. The Op needs to store the result of each forward model calculation to the variable model_output of the PyMC3 model, @@ -419,7 +420,7 @@ def __init__( "for storing the fine Q." "Use pm.Data() to define it." ) - if not isinstance(self.model.Q, tt.sharedvar.TensorSharedVariable): + if not isinstance(self.model.Q, TensorSharedVariable): raise TypeError( "The variable 'Q' in the model definition is not of type " "'TensorSharedVariable'. Use pm.Data() to define the" @@ -454,8 +455,8 @@ def __init__( "Use pm.Data() to define it." ) if not ( - isinstance(self.model_below.mu_B, tt.sharedvar.TensorSharedVariable) - and isinstance(self.model_below.Sigma_B, tt.sharedvar.TensorSharedVariable) + isinstance(self.model_below.mu_B, TensorSharedVariable) + and isinstance(self.model_below.Sigma_B, TensorSharedVariable) ): raise TypeError( "At least one of the variables 'mu_B' and 'Sigma_B' " @@ -549,12 +550,12 @@ def __init__( self.accepted = 0 - # Construct theano function for current-level model likelihood + # Construct aesara function for current-level model likelihood # (for use in acceptance) shared = pm.make_shared_replacements(vars, model) self.delta_logp = delta_logp_inverse(model.logpt, vars, shared) - # Construct theano function for below-level model likelihood + # Construct aesara function for below-level model likelihood # (for use in acceptance) model_below = pm.modelcontext(self.model_below) vars_below = [var for var in model_below.vars if var.name in self.var_names] @@ -964,7 +965,7 @@ def delta_logp_inverse(logp, vars, shared): logp1 = pm.CallableTensor(logp0)(inarray1) - f = theano.function([inarray1, inarray0], -logp0 + logp1) + f = aesara.function([inarray1, inarray0], -logp0 + logp1) f.trust_input = True return f diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py index c3bac3ade93..9649a9cb8fb 100644 --- a/pymc3/step_methods/pgbart.py +++ b/pymc3/step_methods/pgbart.py @@ -16,13 +16,13 @@ import numpy as np -from theano import function as theano_function +from aesara import function as aesara_function +from pymc3.aesaraf import inputvars, join_nonshared_inputs, make_shared_replacements from pymc3.distributions import BART from pymc3.distributions.tree import Tree from pymc3.model import modelcontext from pymc3.step_methods.arraystep import ArrayStepShared, Competence -from pymc3.theanof import inputvars, join_nonshared_inputs, make_shared_replacements _log = logging.getLogger("pymc3") @@ -274,7 +274,7 @@ def set_particle_to_step(self, t): def logp(out_vars, vars, shared): - """Compile Theano function of the model and the input and output variables. + """Compile Aesara function of the model and the input and output variables. Parameters ---------- @@ -283,9 +283,9 @@ def logp(out_vars, vars, shared): vars: List containing :class:`pymc3.Distribution` for the input variables shared: List - containing :class:`theano.tensor.Tensor` for depended shared data + containing :class:`aesara.tensor.Tensor` for depended shared data """ out_list, inarray0 = join_nonshared_inputs(out_vars, vars, shared) - f = theano_function([inarray0], out_list[0]) + f = aesara_function([inarray0], out_list[0]) f.trust_input = True return f diff --git a/pymc3/step_methods/sgmcmc.py b/pymc3/step_methods/sgmcmc.py index 1620f21b0e8..80246db758e 100644 --- a/pymc3/step_methods/sgmcmc.py +++ b/pymc3/step_methods/sgmcmc.py @@ -16,12 +16,12 @@ from collections import OrderedDict -import theano -import theano.tensor as tt +import aesara +import aesara.tensor as aet +from pymc3.aesaraf import aet_rng, make_shared_replacements from pymc3.model import inputvars, modelcontext from pymc3.step_methods.arraystep import ArrayStepShared -from pymc3.theanof import make_shared_replacements, tt_rng __all__ = [] @@ -45,8 +45,8 @@ def _check_minibatches(minibatch_tensors, minibatches): def prior_dlogp(vars, model, flat_view): """Returns the gradient of the prior on the parameters as a vector of size D x 1""" - terms = tt.concatenate([theano.grad(var.logpt, var).flatten() for var in vars], axis=0) - dlogp = theano.clone(terms, flat_view.replacements, strict=False) + terms = aet.concatenate([aesara.grad(var.logpt, var).flatten() for var in vars], axis=0) + dlogp = aesara.clone_replace(terms, flat_view.replacements, strict=False) return dlogp @@ -63,12 +63,14 @@ def elemwise_dlogL(vars, model, flat_view): # calculate fisher information terms = [] for var in vars: - output, _ = theano.scan( - lambda i, logX=logL, v=var: theano.grad(logX[i], v).flatten(), - sequences=[tt.arange(logL.shape[0])], + output, _ = aesara.scan( + lambda i, logX=logL, v=var: aesara.grad(logX[i], v).flatten(), + sequences=[aet.arange(logL.shape[0])], ) terms.append(output) - dlogL = theano.clone(tt.concatenate(terms, axis=1), flat_view.replacements, strict=False) + dlogL = aesara.clone_replace( + aet.concatenate(terms, axis=1), flat_view.replacements, strict=False + ) return dlogL @@ -106,7 +108,7 @@ class BaseStochasticGradient(ArrayStepShared): Defining a BaseStochasticGradient needs custom implementation of the following methods: - :code: `.mk_training_fn()` - Returns a theano function which is called for each sampling step + Returns a aesara function which is called for each sampling step - :code: `._initialize_values()` Returns None it creates class variables which are required for the training fn """ @@ -145,9 +147,9 @@ def __init__( # set random stream self.random = None if random_seed is None: - self.random = tt_rng() + self.random = aet_rng() else: - self.random = tt_rng(random_seed) + self.random = aet_rng(random_seed) self.step_size = step_size @@ -169,7 +171,7 @@ def __init__( # Replace input shared variables with tensors def is_shared(t): - return isinstance(t, theano.compile.sharedvalue.SharedVariable) + return isinstance(t, aesara.compile.sharedvalue.SharedVariable) tensors = [(t.type() if is_shared(t) else t) for t in minibatch_tensors] updates = OrderedDict( diff --git a/pymc3/step_methods/slicer.py b/pymc3/step_methods/slicer.py index ef68dec9939..b0320a9effd 100644 --- a/pymc3/step_methods/slicer.py +++ b/pymc3/step_methods/slicer.py @@ -17,9 +17,9 @@ import numpy as np import numpy.random as nr +from pymc3.aesaraf import inputvars from pymc3.model import modelcontext from pymc3.step_methods.arraystep import ArrayStep, Competence -from pymc3.theanof import inputvars from pymc3.vartypes import continuous_types __all__ = ["Slice"] diff --git a/pymc3/tests/backend_fixtures.py b/pymc3/tests/backend_fixtures.py index 6fd0b1318cd..9ef8d03a7d5 100644 --- a/pymc3/tests/backend_fixtures.py +++ b/pymc3/tests/backend_fixtures.py @@ -16,10 +16,10 @@ import os import shutil +import aesara import numpy as np import numpy.testing as npt import pytest -import theano from pymc3.backends import base from pymc3.tests import models @@ -250,7 +250,7 @@ def record_point(self, val): else: self.strace.record(point=point) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_standard_close(self): for idx in range(self.draws): self.record_point(idx) @@ -293,14 +293,14 @@ class SelectionTestCase(ModelBackendSampledTestCase): - shape """ - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_get_values_default(self): for varname in self.test_point.keys(): expected = np.concatenate([self.expected[chain][varname] for chain in [0, 1]]) result = self.mtrace.get_values(varname) npt.assert_equal(result, expected) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_get_values_nocombine_burn_keyword(self): burn = 2 for varname in self.test_point.keys(): @@ -311,7 +311,7 @@ def test_get_values_nocombine_burn_keyword(self): def test_len(self): assert len(self.mtrace) == self.draws - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_dtypes(self): for varname in self.test_point.keys(): assert ( @@ -515,7 +515,7 @@ def test_chain_length(self): assert self.mtrace0.nchains == self.mtrace1.nchains assert len(self.mtrace0) == len(self.mtrace1) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_dtype(self): for varname in self.test_point.keys(): assert ( diff --git a/pymc3/tests/conftest.py b/pymc3/tests/conftest.py index e9d38d163ff..1be0184c0ec 100644 --- a/pymc3/tests/conftest.py +++ b/pymc3/tests/conftest.py @@ -12,31 +12,31 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara import numpy as np import pytest -import theano import pymc3 as pm @pytest.fixture(scope="function", autouse=True) -def theano_config(): - config = theano.config.change_flags(compute_test_value="raise") +def aesara_config(): + config = aesara.config.change_flags(compute_test_value="raise") with config: yield @pytest.fixture(scope="function", autouse=True) def exception_verbosity(): - config = theano.config.change_flags(exception_verbosity="high") + config = aesara.config.change_flags(exception_verbosity="high") with config: yield @pytest.fixture(scope="function", autouse=False) def strict_float32(): - if theano.config.floatX == "float32": - config = theano.config.change_flags(warn_float64="raise") + if aesara.config.floatX == "float32": + config = aesara.config.change_flags(warn_float64="raise") with config: yield else: @@ -47,4 +47,4 @@ def strict_float32(): def seeded_test(): # TODO: use this instead of SeededTest np.random.seed(42) - pm.set_tt_rng(42) + pm.set_aet_rng(42) diff --git a/pymc3/tests/helpers.py b/pymc3/tests/helpers.py index 6e56fad9d02..9806fb0b8e0 100644 --- a/pymc3/tests/helpers.py +++ b/pymc3/tests/helpers.py @@ -16,13 +16,13 @@ from logging.handlers import BufferingHandler +import aesara import numpy.random as nr -import theano -from theano.gradient import verify_grad as tt_verify_grad -from theano.sandbox.rng_mrg import MRG_RandomStream as RandomStream +from aesara.gradient import verify_grad as aet_verify_grad +from aesara.sandbox.rng_mrg import MRG_RandomStream as RandomStream -from pymc3.theanof import set_tt_rng, tt_rng +from pymc3.aesaraf import aet_rng, set_aet_rng class SeededTest: @@ -34,11 +34,11 @@ def setup_class(cls): def setup_method(self): nr.seed(self.random_seed) - self.old_tt_rng = tt_rng() - set_tt_rng(RandomStream(self.random_seed)) + self.old_aet_rng = aet_rng() + set_aet_rng(RandomStream(self.random_seed)) def teardown_method(self): - set_tt_rng(self.old_tt_rng) + set_aet_rng(self.old_aet_rng) class LoggingHandler(BufferingHandler): @@ -104,7 +104,7 @@ def match_value(self, k, dv, v): def select_by_precision(float64, float32): """Helper function to choose reasonable decimal cutoffs for different floatX modes.""" - decimal = float64 if theano.config.floatX == "float64" else float32 + decimal = float64 if aesara.config.floatX == "float64" else float32 return decimal @@ -116,4 +116,4 @@ def not_raises(): def verify_grad(op, pt, n_tests=2, rng=None, *args, **kwargs): if rng is None: rng = nr.RandomState(411342) - tt_verify_grad(op, pt, n_tests, rng, *args, **kwargs) + aet_verify_grad(op, pt, n_tests, rng, *args, **kwargs) diff --git a/pymc3/tests/models.py b/pymc3/tests/models.py index 5f607348591..49f9cd1e7a3 100644 --- a/pymc3/tests/models.py +++ b/pymc3/tests/models.py @@ -14,23 +14,23 @@ from itertools import product +import aesara +import aesara.tensor as aet import numpy as np -import theano -import theano.tensor as tt -from theano.compile.ops import as_op +from aesara.compile.ops import as_op import pymc3 as pm from pymc3 import Categorical, Metropolis, Model, Normal -from pymc3.theanof import floatX_array +from pymc3.aesaraf import floatX_array def simple_model(): mu = -2.1 tau = 1.3 with Model() as model: - Normal("x", mu, tau=tau, shape=2, testval=tt.ones(2) * 0.1) + Normal("x", mu, tau=tau, shape=2, testval=aet.ones(2) * 0.1) return model.test_point, model, (mu, tau ** -0.5) @@ -50,13 +50,13 @@ def multidimensional_model(): mu = -2.1 tau = 1.3 with Model() as model: - Normal("x", mu, tau=tau, shape=(3, 2), testval=0.1 * tt.ones((3, 2))) + Normal("x", mu, tau=tau, shape=(3, 2), testval=0.1 * aet.ones((3, 2))) return model.test_point, model, (mu, tau ** -0.5) def simple_arbitrary_det(): - scalar_type = tt.dscalar if theano.config.floatX == "float64" else tt.fscalar + scalar_type = aet.dscalar if aesara.config.floatX == "float64" else aet.fscalar @as_op(itypes=[scalar_type], otypes=[scalar_type]) def arbitrary_det(value): @@ -82,7 +82,7 @@ def simple_2model(): p = 0.4 with Model() as model: x = pm.Normal("x", mu, tau=tau, testval=0.1) - pm.Deterministic("logx", tt.log(x)) + pm.Deterministic("logx", aet.log(x)) pm.Bernoulli("y", p) return model.test_point, model @@ -92,7 +92,7 @@ def simple_2model_continuous(): tau = 1.3 with Model() as model: x = pm.Normal("x", mu, tau=tau, testval=0.1) - pm.Deterministic("logx", tt.log(x)) + pm.Deterministic("logx", aet.log(x)) pm.Beta("y", alpha=1, beta=1, shape=2) return model.test_point, model @@ -104,8 +104,8 @@ def mv_simple(): with pm.Model() as model: pm.MvNormal( "x", - tt.constant(mu), - tau=tt.constant(tau), + aet.constant(mu), + tau=aet.constant(tau), shape=3, testval=floatX_array([0.1, 1.0, 0.8]), ) @@ -121,8 +121,8 @@ def mv_simple_coarse(): with pm.Model() as model: pm.MvNormal( "x", - tt.constant(mu), - tau=tt.constant(tau), + aet.constant(mu), + tau=aet.constant(tau), shape=3, testval=floatX_array([0.1, 1.0, 0.8]), ) @@ -138,8 +138,8 @@ def mv_simple_very_coarse(): with pm.Model() as model: pm.MvNormal( "x", - tt.constant(mu), - tau=tt.constant(tau), + aet.constant(mu), + tau=aet.constant(tau), shape=3, testval=floatX_array([0.1, 1.0, 0.8]), ) @@ -153,7 +153,7 @@ def mv_simple_discrete(): n = 5 p = floatX_array([0.15, 0.85]) with pm.Model() as model: - pm.Multinomial("x", n, tt.constant(p), shape=d, testval=np.array([1, 4])) + pm.Multinomial("x", n, aet.constant(p), shape=d, testval=np.array([1, 4])) mu = n * p # covariance matrix C = np.zeros((d, d)) diff --git a/pymc3/tests/sampler_fixtures.py b/pymc3/tests/sampler_fixtures.py index fcf66f15569..aacb3fb3aba 100644 --- a/pymc3/tests/sampler_fixtures.py +++ b/pymc3/tests/sampler_fixtures.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara.tensor as aet import arviz as az import numpy as np import numpy.testing as npt -import theano.tensor as tt from scipy import stats @@ -124,9 +124,9 @@ def make_model(cls): sd_dist = pm.Lognormal.dist(mu=sd_mu, sigma=sd_mu / 10.0, shape=5) chol_packed = pm.LKJCholeskyCov("chol_packed", eta=3, n=5, sd_dist=sd_dist) chol = pm.expand_packed_triangular(5, chol_packed, lower=True) - cov = tt.dot(chol, chol.T) - stds = tt.sqrt(tt.diag(cov)) - pm.Deterministic("log_stds", tt.log(stds)) + cov = aet.dot(chol, chol.T) + stds = aet.sqrt(aet.diag(cov)) + pm.Deterministic("log_stds", aet.log(stds)) corr = cov / stds[None, :] / stds[:, None] corr_entries_unit = (corr[np.tril_indices(5, -1)] + 1) / 2 pm.Deterministic("corr_entries_unit", corr_entries_unit) diff --git a/pymc3/tests/test_theanof.py b/pymc3/tests/test_aesaraf.py similarity index 90% rename from pymc3/tests/test_theanof.py rename to pymc3/tests/test_aesaraf.py index d54aed680d8..1b591e0a859 100644 --- a/pymc3/tests/test_theanof.py +++ b/pymc3/tests/test_aesaraf.py @@ -14,15 +14,17 @@ from itertools import product +import aesara +import aesara.tensor as aet import numpy as np import pytest -import theano -import theano.tensor as tt -from pymc3.theanof import _conversion_map, take_along_axis +from aesara.tensor.type import TensorType + +from pymc3.aesaraf import _conversion_map, take_along_axis from pymc3.vartypes import int_types -FLOATX = str(theano.config.floatX) +FLOATX = str(aesara.config.floatX) INTX = str(_conversion_map[FLOATX]) @@ -78,8 +80,8 @@ def setup_class(self): def _input_tensors(self, shape): ndim = len(shape) - arr = tt.TensorType(FLOATX, [False] * ndim)("arr") - indices = tt.TensorType(INTX, [False] * ndim)("indices") + arr = TensorType(FLOATX, [False] * ndim)("arr") + indices = TensorType(INTX, [False] * ndim)("indices") arr.tag.test_value = np.zeros(shape, dtype=FLOATX) indices.tag.test_value = np.zeros(shape, dtype=INTX) return arr, indices @@ -107,7 +109,7 @@ def get_output_tensors(self, shape, axis): return out def _function(self, arr, indices, out): - return theano.function([arr, indices], [out]) + return aesara.function([arr, indices], [out]) def get_function(self, shape, axis): ndim = len(shape) @@ -181,13 +183,13 @@ def test_take_along_axis_grad(self, shape, axis, samples): _axis = len(shape) + axis else: _axis = axis - # Setup the theano function + # Setup the aesara function t_arr, t_indices = self.get_input_tensors(shape) - t_out2 = theano.grad( - tt.sum(self._output_tensor(t_arr ** 2, t_indices, axis)), + t_out2 = aesara.grad( + aet.sum(self._output_tensor(t_arr ** 2, t_indices, axis)), t_arr, ) - func = theano.function([t_arr, t_indices], [t_out2]) + func = aesara.function([t_arr, t_indices], [t_out2]) # Test that the gradient gives the same output as what is expected arr, indices = self.get_input_values(shape, axis, samples) @@ -209,16 +211,16 @@ def test_axis_failure(self, axis): take_along_axis(arr, indices, axis=axis) def test_ndim_failure(self): - arr = tt.TensorType(FLOATX, [False] * 3)("arr") - indices = tt.TensorType(INTX, [False] * 2)("indices") + arr = TensorType(FLOATX, [False] * 3)("arr") + indices = TensorType(INTX, [False] * 2)("indices") arr.tag.test_value = np.zeros((1,) * arr.ndim, dtype=FLOATX) indices.tag.test_value = np.zeros((1,) * indices.ndim, dtype=INTX) with pytest.raises(ValueError): take_along_axis(arr, indices) def test_dtype_failure(self): - arr = tt.TensorType(FLOATX, [False] * 3)("arr") - indices = tt.TensorType(FLOATX, [False] * 3)("indices") + arr = TensorType(FLOATX, [False] * 3)("arr") + indices = TensorType(FLOATX, [False] * 3)("indices") arr.tag.test_value = np.zeros((1,) * arr.ndim, dtype=FLOATX) indices.tag.test_value = np.zeros((1,) * indices.ndim, dtype=FLOATX) with pytest.raises(IndexError): diff --git a/pymc3/tests/test_data_container.py b/pymc3/tests/test_data_container.py index 966ce47cd6a..fb4a3557493 100644 --- a/pymc3/tests/test_data_container.py +++ b/pymc3/tests/test_data_container.py @@ -16,12 +16,12 @@ import pandas as pd import pytest -from theano import shared +from aesara import shared import pymc3 as pm +from pymc3.aesaraf import floatX from pymc3.tests.helpers import SeededTest -from pymc3.theanof import floatX class TestData(SeededTest): diff --git a/pymc3/tests/test_dist_math.py b/pymc3/tests/test_dist_math.py index de9bbd5b7e5..f3b193b8a16 100644 --- a/pymc3/tests/test_dist_math.py +++ b/pymc3/tests/test_dist_math.py @@ -13,16 +13,17 @@ # limitations under the License. import sys +import aesara +import aesara.tensor as aet import numpy as np import numpy.testing as npt import pytest -import theano -import theano.tensor as tt from scipy import interpolate, stats import pymc3 as pm +from pymc3.aesaraf import floatX from pymc3.distributions import Discrete from pymc3.distributions.dist_math import ( MvNormalLogp, @@ -34,28 +35,27 @@ i0e, ) from pymc3.tests.helpers import verify_grad -from pymc3.theanof import floatX def test_bound(): - logp = tt.ones((10, 10)) - cond = tt.ones((10, 10)) + logp = aet.ones((10, 10)) + cond = aet.ones((10, 10)) assert np.all(bound(logp, cond).eval() == logp.eval()) - logp = tt.ones((10, 10)) - cond = tt.zeros((10, 10)) + logp = aet.ones((10, 10)) + cond = aet.zeros((10, 10)) assert np.all(bound(logp, cond).eval() == (-np.inf * logp).eval()) - logp = tt.ones((10, 10)) + logp = aet.ones((10, 10)) cond = True assert np.all(bound(logp, cond).eval() == logp.eval()) - logp = tt.ones(3) + logp = aet.ones(3) cond = np.array([1, 0, 1]) assert not np.all(bound(logp, cond).eval() == 1) assert np.prod(bound(logp, cond).eval()) == -np.inf - logp = tt.ones((2, 3)) + logp = aet.ones((2, 3)) cond = np.array([[1, 1, 1], [1, 0, 1]]) assert not np.all(bound(logp, cond).eval() == 1) assert np.prod(bound(logp, cond).eval()) == -np.inf @@ -63,7 +63,7 @@ def test_bound(): def test_check_bounds_false(): with pm.Model(check_bounds=False): - logp = tt.ones(3) + logp = aet.ones(3) cond = np.array([1, 0, 1]) assert np.all(bound(logp, cond).eval() == logp.eval()) @@ -71,21 +71,21 @@ def test_check_bounds_false(): def test_alltrue_scalar(): assert alltrue_scalar([]).eval() assert alltrue_scalar([True]).eval() - assert alltrue_scalar([tt.ones(10)]).eval() - assert alltrue_scalar([tt.ones(10), 5 * tt.ones(101)]).eval() - assert alltrue_scalar([np.ones(10), 5 * tt.ones(101)]).eval() - assert alltrue_scalar([np.ones(10), True, 5 * tt.ones(101)]).eval() - assert alltrue_scalar([np.array([1, 2, 3]), True, 5 * tt.ones(101)]).eval() + assert alltrue_scalar([aet.ones(10)]).eval() + assert alltrue_scalar([aet.ones(10), 5 * aet.ones(101)]).eval() + assert alltrue_scalar([np.ones(10), 5 * aet.ones(101)]).eval() + assert alltrue_scalar([np.ones(10), True, 5 * aet.ones(101)]).eval() + assert alltrue_scalar([np.array([1, 2, 3]), True, 5 * aet.ones(101)]).eval() assert not alltrue_scalar([False]).eval() - assert not alltrue_scalar([tt.zeros(10)]).eval() + assert not alltrue_scalar([aet.zeros(10)]).eval() assert not alltrue_scalar([True, False]).eval() - assert not alltrue_scalar([np.array([0, -1]), tt.ones(60)]).eval() - assert not alltrue_scalar([np.ones(10), False, 5 * tt.ones(101)]).eval() + assert not alltrue_scalar([np.array([0, -1]), aet.ones(60)]).eval() + assert not alltrue_scalar([np.ones(10), False, 5 * aet.ones(101)]).eval() def test_alltrue_shape(): - vals = [True, tt.ones(10), tt.zeros(5)] + vals = [True, aet.ones(10), aet.zeros(5)] assert alltrue_scalar(vals).eval().shape == () @@ -102,11 +102,11 @@ def logp(self, value): p = self.p return bound( - factln(n) - factln(value).sum() + (value * tt.log(p)).sum(), + factln(n) - factln(value).sum() + (value * aet.log(p)).sum(), value >= 0, 0 <= p, p <= 1, - tt.isclose(p.sum(), 1), + aet.isclose(p.sum(), 1), broadcast_conditions=False, ) @@ -123,11 +123,11 @@ def logp(self, value): p = self.p return bound( - factln(n) - factln(value).sum() + (value * tt.log(p)).sum(), - tt.all(value >= 0), - tt.all(0 <= p), - tt.all(p <= 1), - tt.isclose(p.sum(), 1), + factln(n) - factln(value).sum() + (value * aet.log(p)).sum(), + aet.all(value >= 0), + aet.all(0 <= p), + aet.all(p <= 1), + aet.isclose(p.sum(), 1), broadcast_conditions=False, ) @@ -156,30 +156,30 @@ def test_logp(self): chol_val = floatX(np.array([[1, 0.9], [0, 2]])) cov_val = floatX(np.dot(chol_val, chol_val.T)) - cov = tt.matrix("cov") + cov = aet.matrix("cov") cov.tag.test_value = cov_val delta_val = floatX(np.random.randn(5, 2)) - delta = tt.matrix("delta") + delta = aet.matrix("delta") delta.tag.test_value = delta_val expect = stats.multivariate_normal(mean=np.zeros(2), cov=cov_val) expect = expect.logpdf(delta_val).sum() logp = MvNormalLogp()(cov, delta) - logp_f = theano.function([cov, delta], logp) + logp_f = aesara.function([cov, delta], logp) logp = logp_f(cov_val, delta_val) npt.assert_allclose(logp, expect) - @theano.config.change_flags(compute_test_value="ignore") + @aesara.config.change_flags(compute_test_value="ignore") def test_grad(self): np.random.seed(42) def func(chol_vec, delta): - chol = tt.stack( + chol = aet.stack( [ - tt.stack([tt.exp(0.1 * chol_vec[0]), 0]), - tt.stack([chol_vec[1], 2 * tt.exp(chol_vec[2])]), + aet.stack([aet.exp(0.1 * chol_vec[0]), 0]), + aet.stack([chol_vec[1], 2 * aet.exp(chol_vec[2])]), ] ) - cov = tt.dot(chol, chol.T) + cov = aet.dot(chol, chol.T) return MvNormalLogp()(cov, delta) chol_vec_val = floatX(np.array([0.5, 1.0, -0.1])) @@ -190,46 +190,46 @@ def func(chol_vec, delta): delta_val = floatX(np.random.randn(5, 2)) verify_grad(func, [chol_vec_val, delta_val]) - @pytest.mark.skip(reason="Fix in theano not released yet: Theano#5908") - @theano.config.change_flags(compute_test_value="ignore") + @pytest.mark.skip(reason="Fix in aesara not released yet: Theano#5908") + @aesara.config.change_flags(compute_test_value="ignore") def test_hessian(self): - chol_vec = tt.vector("chol_vec") + chol_vec = aet.vector("chol_vec") chol_vec.tag.test_value = np.array([0.1, 2, 3]) - chol = tt.stack( + chol = aet.stack( [ - tt.stack([tt.exp(0.1 * chol_vec[0]), 0]), - tt.stack([chol_vec[1], 2 * tt.exp(chol_vec[2])]), + aet.stack([aet.exp(0.1 * chol_vec[0]), 0]), + aet.stack([chol_vec[1], 2 * aet.exp(chol_vec[2])]), ] ) - cov = tt.dot(chol, chol.T) - delta = tt.matrix("delta") + cov = aet.dot(chol, chol.T) + delta = aet.matrix("delta") delta.tag.test_value = np.ones((5, 2)) logp = MvNormalLogp()(cov, delta) - g_cov, g_delta = tt.grad(logp, [cov, delta]) - tt.grad(g_delta.sum() + g_cov.sum(), [delta, cov]) + g_cov, g_delta = aet.grad(logp, [cov, delta]) + aet.grad(g_delta.sum() + g_cov.sum(), [delta, cov]) class TestSplineWrapper: - @theano.config.change_flags(compute_test_value="ignore") + @aesara.config.change_flags(compute_test_value="ignore") def test_grad(self): x = np.linspace(0, 1, 100) y = x * x spline = SplineWrapper(interpolate.InterpolatedUnivariateSpline(x, y, k=1)) verify_grad(spline, [0.5]) - @theano.config.change_flags(compute_test_value="ignore") + @aesara.config.change_flags(compute_test_value="ignore") def test_hessian(self): x = np.linspace(0, 1, 100) y = x * x spline = SplineWrapper(interpolate.InterpolatedUnivariateSpline(x, y, k=1)) - x_var = tt.dscalar("x") - (g_x,) = tt.grad(spline(x_var), [x_var]) + x_var = aet.dscalar("x") + (g_x,) = aet.grad(spline(x_var), [x_var]) with pytest.raises(NotImplementedError): - tt.grad(g_x, [x_var]) + aet.grad(g_x, [x_var]) class TestI0e: - @theano.config.change_flags(compute_test_value="ignore") + @aesara.config.change_flags(compute_test_value="ignore") def test_grad(self): verify_grad(i0e, [0.5]) verify_grad(i0e, [-2.0]) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 06efc90b8d8..f26b6743b88 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -15,14 +15,15 @@ import itertools import sys +import aesara +import aesara.tensor as aet import numpy as np import numpy.random as nr import pytest import scipy.stats import scipy.stats.distributions as sp -import theano -import theano.tensor as tt +from aesara.tensor.var import TensorVariable from numpy import array, exp, inf, log from numpy.testing import assert_allclose, assert_almost_equal, assert_equal from packaging.version import parse @@ -32,6 +33,7 @@ import pymc3 as pm +from pymc3.aesaraf import floatX from pymc3.blocking import DictToVarBijection from pymc3.distributions import ( AR1, @@ -98,7 +100,6 @@ from pymc3.math import kronecker, logsumexp from pymc3.model import Deterministic, Model, Point from pymc3.tests.helpers import select_by_precision -from pymc3.theanof import floatX from pymc3.vartypes import continuous_types SCIPY_VERSION = parse(scipy_version) @@ -126,7 +127,7 @@ class Domain: def __init__(self, vals, dtype=None, edges=None, shape=None): avals = array(vals, dtype=dtype) if dtype is None and not str(avals.dtype).startswith("int"): - avals = avals.astype(theano.config.floatX) + avals = avals.astype(aesara.config.floatX) vals = [array(v, dtype=avals.dtype) for v in vals] if edges is None: @@ -941,7 +942,7 @@ def test_chi_squared(self): ) @pytest.mark.xfail( - condition=(theano.config.floatX == "float32"), + condition=(aesara.config.floatX == "float32"), reason="Poor CDF in SciPy. See scipy/scipy#869 for details.", ) def test_wald_scipy(self): @@ -1240,12 +1241,12 @@ def test_fun(value, mu, sigma): ) @pytest.mark.xfail( - condition=(theano.config.floatX == "float32"), + condition=(aesara.config.floatX == "float32"), reason="Fails on float32 due to numerical issues", ) def test_gamma_logcdf(self): - # pymc-devs/Theano-PyMC#224: skip_paramdomain_outside_edge_test has to be set - # True to avoid triggering a C-level assertion in the Theano GammaQ function + # pymc-devs/aesara#224: skip_paramdomain_outside_edge_test has to be set + # True to avoid triggering a C-level assertion in the Aesara GammaQ function # in gamma.c file. Can be set back to False (default) once that issue is solved self.check_logcdf( Gamma, @@ -1256,7 +1257,7 @@ def test_gamma_logcdf(self): ) @pytest.mark.xfail( - condition=(theano.config.floatX == "float32"), + condition=(aesara.config.floatX == "float32"), reason="Fails on float32 due to numerical issues", ) def test_inverse_gamma(self): @@ -1266,8 +1267,8 @@ def test_inverse_gamma(self): {"alpha": Rplus, "beta": Rplus}, lambda value, alpha, beta: sp.invgamma.logpdf(value, alpha, scale=beta), ) - # pymc-devs/Theano-PyMC#224: skip_paramdomain_outside_edge_test has to be set - # True to avoid triggering a C-level assertion in the Theano GammaQ function + # pymc-devs/aesara#224: skip_paramdomain_outside_edge_test has to be set + # True to avoid triggering a C-level assertion in the Aesara GammaQ function # in gamma.c file. Can be set back to False (default) once that issue is solved self.check_logcdf( InverseGamma, @@ -1278,7 +1279,7 @@ def test_inverse_gamma(self): ) @pytest.mark.xfail( - condition=(theano.config.floatX == "float32"), + condition=(aesara.config.floatX == "float32"), reason="Fails on float32 due to scaling issues", ) def test_inverse_gamma_alt_params(self): @@ -1309,7 +1310,7 @@ def test_pareto(self): ) @pytest.mark.xfail( - condition=(theano.config.floatX == "float32"), + condition=(aesara.config.floatX == "float32"), reason="Fails on float32 due to inf issues", ) def test_weibull(self): @@ -1366,7 +1367,7 @@ def test_binomial(self): ) # Too lazy to propagate decimal parameter through the whole chain of deps - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") @pytest.mark.xfail( condition=(SCIPY_VERSION < parse("1.4.0")), reason="betabinom is new in Scipy 1.4.0" ) @@ -1474,7 +1475,7 @@ def test_constantdist(self): self.check_logp(Constant, I, {"c": I}, lambda value, c: np.log(c == value)) # Too lazy to propagate decimal parameter through the whole chain of deps - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_zeroinflatedpoisson(self): self.checkd( ZeroInflatedPoisson, @@ -1488,7 +1489,7 @@ def test_zeroinflatedpoisson(self): ) # Too lazy to propagate decimal parameter through the whole chain of deps - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_zeroinflatednegativebinomial(self): self.checkd( ZeroInflatedNegativeBinomial, @@ -1503,7 +1504,7 @@ def test_zeroinflatednegativebinomial(self): ) # Too lazy to propagate decimal parameter through the whole chain of deps - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_zeroinflatedbinomial(self): self.checkd( ZeroInflatedBinomial, @@ -1570,28 +1571,28 @@ def MvNormalUpper(*args, **kwargs): ) @pytest.mark.xfail( - condition=(theano.config.floatX == "float32"), + condition=(aesara.config.floatX == "float32"), reason="Fails on float32 due to inf issues", ) def test_mvnormal_indef(self): cov_val = np.array([[1, 0.5], [0.5, -2]]) - cov = tt.matrix("cov") + cov = aet.matrix("cov") cov.tag.test_value = np.eye(2) mu = floatX(np.zeros(2)) - x = tt.vector("x") + x = aet.vector("x") x.tag.test_value = np.zeros(2) logp = MvNormal.dist(mu=mu, cov=cov).logp(x) - f_logp = theano.function([cov, x], logp) + f_logp = aesara.function([cov, x], logp) assert f_logp(cov_val, np.ones(2)) == -np.inf - dlogp = tt.grad(logp, cov) - f_dlogp = theano.function([cov, x], dlogp) + dlogp = aet.grad(logp, cov) + f_dlogp = aesara.function([cov, x], dlogp) assert not np.all(np.isfinite(f_dlogp(cov_val, np.ones(2)))) logp = MvNormal.dist(mu=mu, tau=cov).logp(x) - f_logp = theano.function([cov, x], logp) + f_logp = aesara.function([cov, x], logp) assert f_logp(cov_val, np.ones(2)) == -np.inf - dlogp = tt.grad(logp, cov) - f_dlogp = theano.function([cov, x], dlogp) + dlogp = aet.grad(logp, cov) + f_dlogp = aesara.function([cov, x], dlogp) assert not np.all(np.isfinite(f_dlogp(cov_val, np.ones(2)))) def test_mvnormal_init_fail(self): @@ -1778,13 +1779,13 @@ def test_dirichlet_with_batch_shapes(self, dist_shape): assert_almost_equal(pymc3_res[idx], scipy_res) def test_dirichlet_shape(self): - a = tt.as_tensor_variable(np.r_[1, 2]) + a = aet.as_tensor_variable(np.r_[1, 2]) with pytest.warns(DeprecationWarning): dir_rv = Dirichlet.dist(a) assert dir_rv.shape == (2,) - with pytest.warns(DeprecationWarning), theano.change_flags(compute_test_value="ignore"): - dir_rv = Dirichlet.dist(tt.vector()) + with pytest.warns(DeprecationWarning), aesara.change_flags(compute_test_value="ignore"): + dir_rv = Dirichlet.dist(aet.vector()) def test_dirichlet_2D(self): self.check_logp( @@ -1925,16 +1926,16 @@ def test_multinomial_vec_2d_p(self): def test_batch_multinomial(self): n = 10 vals = np.zeros((4, 5, 3), dtype="int32") - p = np.zeros_like(vals, dtype=theano.config.floatX) + p = np.zeros_like(vals, dtype=aesara.config.floatX) inds = np.random.randint(vals.shape[-1], size=vals.shape[:-1])[..., None] np.put_along_axis(vals, inds, n, axis=-1) np.put_along_axis(p, inds, 1, axis=-1) dist = Multinomial.dist(n=n, p=p, shape=vals.shape) - value = tt.tensor3(dtype="int32") + value = aet.tensor3(dtype="int32") value.tag.test_value = np.zeros_like(vals, dtype="int32") - logp = tt.exp(dist.logp(value)) - f = theano.function(inputs=[value], outputs=logp) + logp = aet.exp(dist.logp(value)) + f = aesara.function(inputs=[value], outputs=logp) assert_almost_equal( f(vals), np.ones(vals.shape[:-1] + (1,)), @@ -2063,7 +2064,7 @@ def test_batch_dirichlet_multinomial(self): # except for one category / dimension which is given the value of 1000 n = 5 vals = np.zeros((4, 5, 3), dtype="int32") - a = np.zeros_like(vals, dtype=theano.config.floatX) + 0.001 + a = np.zeros_like(vals, dtype=aesara.config.floatX) + 0.001 inds = np.random.randint(vals.shape[-1], size=vals.shape[:-1])[..., None] np.put_along_axis(vals, inds, n, axis=-1) np.put_along_axis(a, inds, 1000, axis=-1) @@ -2213,7 +2214,7 @@ def test_ex_gaussian_cdf_outside_edges(self): skip_paramdomain_inside_edge_test=True, # Valid values are tested above ) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_vonmises(self): self.check_logp( VonMises, @@ -2278,7 +2279,7 @@ def test_rice(self): lambda value, b, sigma: sp.rice.logpdf(value, b=b, loc=0, scale=sigma), ) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_moyal(self): self.check_logp( Moyal, @@ -2293,7 +2294,7 @@ def test_moyal(self): lambda value, mu, sigma: floatX(sp.moyal.logcdf(value, mu, sigma)), ) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_interpolated(self): for mu in R.vals: for sigma in Rplus.vals: @@ -2352,8 +2353,8 @@ def test_bound(): a = ArrayNormal("c", shape=2) assert_equal(a.tag.test_value, np.array([1.5, 2.5])) - lower = tt.vector("lower") - lower.tag.test_value = np.array([1, 2]).astype(theano.config.floatX) + lower = aet.vector("lower") + lower.tag.test_value = np.array([1, 2]).astype(aesara.config.floatX) upper = 3 ArrayNormal = Bound(Normal, lower=lower, upper=upper) dist = ArrayNormal.dist(mu=0, sigma=1, shape=2) @@ -2421,7 +2422,7 @@ def setup_class(self): nb2 = pm.NegativeBinomial("nb_with_p_n", p=pm.Uniform("nbp"), n=10) # Expected value of outcome - mu = Deterministic("mu", floatX(alpha + tt.dot(X, b))) + mu = Deterministic("mu", floatX(alpha + aet.dot(X, b))) # add a bounded variable as well bound_var = Bound(Normal, lower=1.0)("bound_var", mu=0, sigma=10) @@ -2582,7 +2583,7 @@ def test_issue_3051(self, dims, dist_cls, kwargs): X = np.random.normal(size=(20, dims)) actual_t = d.logp(X) - assert isinstance(actual_t, tt.TensorVariable) + assert isinstance(actual_t, TensorVariable) actual_a = actual_t.eval() assert isinstance(actual_a, np.ndarray) assert actual_a.shape == (X.shape[0],) diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index a56f3f3b7b2..684f1898ac6 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -17,12 +17,12 @@ from contextlib import ExitStack as does_not_raise +import aesara import numpy as np import numpy.random as nr import numpy.testing as npt import pytest import scipy.stats as st -import theano from scipy import linalg from scipy.special import expit @@ -1127,7 +1127,7 @@ def ref_rand(size, mu, sigma): pymc3_random(pm.Moyal, {"mu": R, "sigma": Rplus}, ref_rand=ref_rand) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_interpolated(self): for mu in R.vals: for sigma in Rplus.vals: diff --git a/pymc3/tests/test_distributions_timeseries.py b/pymc3/tests/test_distributions_timeseries.py index b1401bd90e1..8319cde6544 100644 --- a/pymc3/tests/test_distributions_timeseries.py +++ b/pymc3/tests/test_distributions_timeseries.py @@ -15,6 +15,7 @@ import numpy as np import pytest +from pymc3.aesaraf import floatX from pymc3.distributions.continuous import Flat, Normal from pymc3.distributions.timeseries import AR, AR1, GARCH11, EulerMaruyama from pymc3.model import Model @@ -24,7 +25,6 @@ sample_posterior_predictive, ) from pymc3.tests.helpers import select_by_precision -from pymc3.theanof import floatX pytestmark = pytest.mark.usefixtures("seeded_test") diff --git a/pymc3/tests/test_examples.py b/pymc3/tests/test_examples.py index d79093b3927..5cb6c9c8c20 100644 --- a/pymc3/tests/test_examples.py +++ b/pymc3/tests/test_examples.py @@ -12,20 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara +import aesara.tensor as aet import arviz as az import matplotlib import numpy as np import pandas as pd import pytest -import theano -import theano.tensor as tt from packaging import version import pymc3 as pm +from pymc3.aesaraf import floatX from pymc3.tests.helpers import SeededTest -from pymc3.theanof import floatX if version.parse(matplotlib.__version__) < version.parse("3.3"): matplotlib.use("Agg", warn=False) @@ -68,7 +68,7 @@ def build_model(self): with pm.Model() as model: effects = pm.Normal("effects", mu=0, sigma=100, shape=len(P.columns)) - logit_p = tt.dot(floatX(np.array(P)), effects) + logit_p = aet.dot(floatX(np.array(P)), effects) pm.Bernoulli("s", logit_p=logit_p, observed=floatX(data.switch.values)) return model @@ -186,13 +186,13 @@ def build_disaster_model(masked=False): # Allocate appropriate Poisson rates to years before and after current # switchpoint location idx = np.arange(years) - rate = tt.switch(switchpoint >= idx, early_mean, late_mean) + rate = aet.switch(switchpoint >= idx, early_mean, late_mean) # Data likelihood pm.Poisson("disasters", rate, observed=disasters_data) return model -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") +@pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") class TestDisasterModel(SeededTest): # Time series of recorded coal mining disasters in the UK from 1851 to 1962 def test_disaster_model(self): @@ -294,7 +294,7 @@ def test_run(self): @pytest.mark.xfail( - condition=(theano.config.floatX == "float32"), + condition=(aesara.config.floatX == "float32"), reason="Fails on float32 due to starting inf at starting logP", ) class TestRSV(SeededTest): diff --git a/pymc3/tests/test_gp.py b/pymc3/tests/test_gp.py index 893aeeaf77c..77f4261bc43 100644 --- a/pymc3/tests/test_gp.py +++ b/pymc3/tests/test_gp.py @@ -16,11 +16,11 @@ from functools import reduce from operator import add +import aesara +import aesara.tensor as aet import numpy as np import numpy.testing as npt import pytest -import theano -import theano.tensor as tt import pymc3 as pm @@ -34,7 +34,7 @@ def test_value(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: zero_mean = pm.gp.mean.Zero() - M = theano.function([], zero_mean(X))() + M = aesara.function([], zero_mean(X))() assert np.all(M == 0) assert M.shape == (10,) @@ -44,7 +44,7 @@ def test_value(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: const_mean = pm.gp.mean.Constant(6) - M = theano.function([], const_mean(X))() + M = aesara.function([], const_mean(X))() assert np.all(M == 6) assert M.shape == (10,) @@ -54,7 +54,7 @@ def test_value(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: linear_mean = pm.gp.mean.Linear(2, 0.5) - M = theano.function([], linear_mean(X))() + M = aesara.function([], linear_mean(X))() npt.assert_allclose(M[1], 0.7222, atol=1e-3) assert M.shape == (10,) @@ -66,7 +66,7 @@ def test_add(self): mean1 = pm.gp.mean.Linear(coeffs=2, intercept=0.5) mean2 = pm.gp.mean.Constant(2) mean = mean1 + mean2 + mean2 - M = theano.function([], mean(X))() + M = aesara.function([], mean(X))() npt.assert_allclose(M[1], 0.7222 + 2 + 2, atol=1e-3) def test_prod(self): @@ -75,7 +75,7 @@ def test_prod(self): mean1 = pm.gp.mean.Linear(coeffs=2, intercept=0.5) mean2 = pm.gp.mean.Constant(2) mean = mean1 * mean2 * mean2 - M = theano.function([], mean(X))() + M = aesara.function([], mean(X))() npt.assert_allclose(M[1], 0.7222 * 2 * 2, atol=1e-3) def test_add_multid(self): @@ -86,7 +86,7 @@ def test_add_multid(self): mean1 = pm.gp.mean.Linear(coeffs=A, intercept=b) mean2 = pm.gp.mean.Constant(2) mean = mean1 + mean2 + mean2 - M = theano.function([], mean(X))() + M = aesara.function([], mean(X))() npt.assert_allclose(M[1], 10.8965 + 2 + 2, atol=1e-3) def test_prod_multid(self): @@ -97,7 +97,7 @@ def test_prod_multid(self): mean1 = pm.gp.mean.Linear(coeffs=A, intercept=b) mean2 = pm.gp.mean.Constant(2) mean = mean1 * mean2 * mean2 - M = theano.function([], mean(X))() + M = aesara.function([], mean(X))() npt.assert_allclose(M[1], 10.8965 * 2 * 2, atol=1e-3) @@ -108,10 +108,10 @@ def test_symadd_cov(self): cov1 = pm.gp.cov.ExpQuad(1, 0.1) cov2 = pm.gp.cov.ExpQuad(1, 0.1) cov = cov1 + cov2 - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 2 * 0.53940, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_rightadd_scalar(self): @@ -119,10 +119,10 @@ def test_rightadd_scalar(self): with pm.Model() as model: a = 1 cov = pm.gp.cov.ExpQuad(1, 0.1) + a - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 1.53940, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_leftadd_scalar(self): @@ -130,10 +130,10 @@ def test_leftadd_scalar(self): with pm.Model() as model: a = 1 cov = a + pm.gp.cov.ExpQuad(1, 0.1) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 1.53940, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_rightadd_matrix(self): @@ -141,21 +141,21 @@ def test_rightadd_matrix(self): M = 2 * np.ones((10, 10)) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(1, 0.1) + M - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 2.53940, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_leftadd_matrixt(self): X = np.linspace(0, 1, 10)[:, None] - M = 2 * tt.ones((10, 10)) + M = 2 * aet.ones((10, 10)) with pm.Model() as model: cov = M + pm.gp.cov.ExpQuad(1, 0.1) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 2.53940, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_leftprod_matrix(self): @@ -164,8 +164,8 @@ def test_leftprod_matrix(self): with pm.Model() as model: cov = M + pm.gp.cov.ExpQuad(1, 0.1) cov_true = pm.gp.cov.ExpQuad(1, 0.1) + M - K = theano.function([], cov(X))() - K_true = theano.function([], cov_true(X))() + K = aesara.function([], cov(X))() + K_true = aesara.function([], cov_true(X))() assert np.allclose(K, K_true) def test_inv_rightadd(self): @@ -181,10 +181,10 @@ def test_symprod_cov(self): cov1 = pm.gp.cov.ExpQuad(1, 0.1) cov2 = pm.gp.cov.ExpQuad(1, 0.1) cov = cov1 * cov2 - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.53940 * 0.53940, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_rightprod_scalar(self): @@ -192,10 +192,10 @@ def test_rightprod_scalar(self): with pm.Model() as model: a = 2 cov = pm.gp.cov.ExpQuad(1, 0.1) * a - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 2 * 0.53940, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_leftprod_scalar(self): @@ -203,10 +203,10 @@ def test_leftprod_scalar(self): with pm.Model() as model: a = 2 cov = a * pm.gp.cov.ExpQuad(1, 0.1) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 2 * 0.53940, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_rightprod_matrix(self): @@ -214,10 +214,10 @@ def test_rightprod_matrix(self): M = 2 * np.ones((10, 10)) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(1, 0.1) * M - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 2 * 0.53940, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_leftprod_matrix(self): @@ -226,8 +226,8 @@ def test_leftprod_matrix(self): with pm.Model() as model: cov = M * pm.gp.cov.ExpQuad(1, 0.1) cov_true = pm.gp.cov.ExpQuad(1, 0.1) * M - K = theano.function([], cov(X))() - K_true = theano.function([], cov_true(X))() + K = aesara.function([], cov(X))() + K_true = aesara.function([], cov_true(X))() assert np.allclose(K, K_true) def test_multiops(self): @@ -244,12 +244,12 @@ def test_multiops(self): + pm.gp.cov.ExpQuad(1, 0.1) + 3 ) - K1 = theano.function([], cov1(X))() - K2 = theano.function([], cov2(X))() + K1 = aesara.function([], cov1(X))() + K2 = aesara.function([], cov2(X))() assert np.allclose(K1, K2) # check diagonal - K1d = theano.function([], cov1(X, diag=True))() - K2d = theano.function([], cov2(X, diag=True))() + K1d = aesara.function([], cov1(X, diag=True))() + K2d = aesara.function([], cov2(X, diag=True))() npt.assert_allclose(np.diag(K1), K2d, atol=1e-5) npt.assert_allclose(np.diag(K2), K1d, atol=1e-5) @@ -265,10 +265,10 @@ def test_symexp_cov(self): with pm.Model() as model: cov1 = pm.gp.cov.ExpQuad(1, 0.1) cov = cov1 ** 2 - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_covexp_numpy(self): @@ -276,32 +276,32 @@ def test_covexp_numpy(self): with pm.Model() as model: a = np.array([[2]]) cov = pm.gp.cov.ExpQuad(1, 0.1) ** a - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) - def test_covexp_theano(self): + def test_covexp_aesara(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: - a = tt.alloc(2.0, 1, 1) + a = aet.alloc(2.0, 1, 1) cov = pm.gp.cov.ExpQuad(1, 0.1) ** a - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_covexp_shared(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: - a = theano.shared(2.0) + a = aesara.shared(2.0) cov = pm.gp.cov.ExpQuad(1, 0.1) ** a - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_invalid_covexp(self): @@ -321,11 +321,11 @@ def test_symprod_cov(self): cov1 = pm.gp.cov.ExpQuad(1, 0.1) cov2 = pm.gp.cov.ExpQuad(1, 0.1) cov = pm.gp.cov.Kron([cov1, cov2]) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 1 * 0.53940, atol=1e-3) npt.assert_allclose(K[0, 11], 0.53940 * 0.53940, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_multiops(self): @@ -342,8 +342,8 @@ def test_multiops(self): ) cov2 = pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(2, 0.1) cov = pm.gp.cov.Kron([cov1, cov2]) - K_true = kronecker(theano.function([], cov1(X1))(), theano.function([], cov2(X2))()).eval() - K = theano.function([], cov(X))() + K_true = kronecker(aesara.function([], cov1(X1))(), aesara.function([], cov2(X2))()).eval() + K = aesara.function([], cov(X))() npt.assert_allclose(K_true, K) @@ -352,30 +352,30 @@ def test_slice1(self): X = np.linspace(0, 1, 30).reshape(10, 3) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(3, 0.1, active_dims=[0, 0, 1]) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.20084298, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_slice2(self): X = np.linspace(0, 1, 30).reshape(10, 3) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(3, ls=[0.1, 0.1], active_dims=[1, 2]) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.34295549, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_slice3(self): X = np.linspace(0, 1, 30).reshape(10, 3) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(3, ls=np.array([0.1, 0.1]), active_dims=[1, 2]) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.34295549, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_diffslice(self): @@ -384,10 +384,10 @@ def test_diffslice(self): cov = pm.gp.cov.ExpQuad(3, ls=0.1, active_dims=[1, 0, 0]) + pm.gp.cov.ExpQuad( 3, ls=[0.1, 0.2, 0.3] ) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.683572, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_raises(self): @@ -402,7 +402,7 @@ def test_stable(self): X = np.random.uniform(low=320.0, high=400.0, size=[2000, 2]) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(2, 0.1) - dists = theano.function([], cov.square_dist(X, X))() + dists = aesara.function([], cov.square_dist(X, X))() assert not np.any(dists < 0) @@ -411,44 +411,44 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.ExpQuad(1, 0.1) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.53940, atol=1e-3) - K = theano.function([], cov(X, X))() + K = aesara.function([], cov(X, X))() npt.assert_allclose(K[0, 1], 0.53940, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_2d(self): X = np.linspace(0, 1, 10).reshape(5, 2) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(2, 0.5) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.820754, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_2dard(self): X = np.linspace(0, 1, 10).reshape(5, 2) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(2, np.array([1, 2])) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.969607, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_inv_lengthscale(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.ExpQuad(1, ls_inv=10) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.53940, atol=1e-3) - K = theano.function([], cov(X, X))() + K = aesara.function([], cov(X, X))() npt.assert_allclose(K[0, 1], 0.53940, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -457,14 +457,14 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.WhiteNoise(sigma=0.5) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.0, atol=1e-3) npt.assert_allclose(K[0, 0], 0.5 ** 2, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) # check predict - K = theano.function([], cov(X, X))() + K = aesara.function([], cov(X, X))() npt.assert_allclose(K[0, 1], 0.0, atol=1e-3) # white noise predicting should return all zeros npt.assert_allclose(K[0, 0], 0.0, atol=1e-3) @@ -475,14 +475,14 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Constant(2.5) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 2.5, atol=1e-3) npt.assert_allclose(K[0, 0], 2.5, atol=1e-3) - K = theano.function([], cov(X, X))() + K = aesara.function([], cov(X, X))() npt.assert_allclose(K[0, 1], 2.5, atol=1e-3) npt.assert_allclose(K[0, 0], 2.5, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -491,12 +491,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.RatQuad(1, ls=0.1, alpha=0.5) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.66896, atol=1e-3) - K = theano.function([], cov(X, X))() + K = aesara.function([], cov(X, X))() npt.assert_allclose(K[0, 1], 0.66896, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -505,12 +505,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Exponential(1, 0.1) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.57375, atol=1e-3) - K = theano.function([], cov(X, X))() + K = aesara.function([], cov(X, X))() npt.assert_allclose(K[0, 1], 0.57375, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -519,12 +519,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Matern52(1, 0.1) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.46202, atol=1e-3) - K = theano.function([], cov(X, X))() + K = aesara.function([], cov(X, X))() npt.assert_allclose(K[0, 1], 0.46202, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -533,12 +533,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Matern32(1, 0.1) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.42682, atol=1e-3) - K = theano.function([], cov(X, X))() + K = aesara.function([], cov(X, X))() npt.assert_allclose(K[0, 1], 0.42682, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -547,11 +547,11 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Matern12(1, 0.1) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.32919, atol=1e-3) - K = theano.function([], cov(X, X))() + K = aesara.function([], cov(X, X))() npt.assert_allclose(K[0, 1], 0.32919, atol=1e-3) - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -560,12 +560,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Cosine(1, 0.1) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.766, atol=1e-3) - K = theano.function([], cov(X, X))() + K = aesara.function([], cov(X, X))() npt.assert_allclose(K[0, 1], 0.766, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -574,12 +574,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Periodic(1, 0.1, 0.1) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.00288, atol=1e-3) - K = theano.function([], cov(X, X))() + K = aesara.function([], cov(X, X))() npt.assert_allclose(K[0, 1], 0.00288, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -588,12 +588,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Linear(1, 0.5) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.19444, atol=1e-3) - K = theano.function([], cov(X, X))() + K = aesara.function([], cov(X, X))() npt.assert_allclose(K[0, 1], 0.19444, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -602,12 +602,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Polynomial(1, 0.5, 2, 0) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.03780, atol=1e-3) - K = theano.function([], cov(X, X))() + K = aesara.function([], cov(X, X))() npt.assert_allclose(K[0, 1], 0.03780, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -616,17 +616,17 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] def warp_func(x, a, b, c): - return x + (a * tt.tanh(b * (x - c))) + return x + (a * aet.tanh(b * (x - c))) with pm.Model() as model: cov_m52 = pm.gp.cov.Matern52(1, 0.2) cov = pm.gp.cov.WarpedInput(1, warp_func=warp_func, args=(1, 10, 1), cov_func=cov_m52) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 0.79593, atol=1e-3) - K = theano.function([], cov(X, X))() + K = aesara.function([], cov(X, X))() npt.assert_allclose(K[0, 1], 0.79593, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_raises(self): @@ -642,16 +642,16 @@ def test_1d(self): X = np.linspace(0, 2, 10)[:, None] def tanh_func(x, x1, x2, w, x0): - return (x1 + x2) / 2.0 - (x1 - x2) / 2.0 * tt.tanh((x - x0) / w) + return (x1 + x2) / 2.0 - (x1 - x2) / 2.0 * aet.tanh((x - x0) / w) with pm.Model() as model: cov = pm.gp.cov.Gibbs(1, tanh_func, args=(0.05, 0.6, 0.4, 1.0)) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[2, 3], 0.136683, atol=1e-4) - K = theano.function([], cov(X, X))() + K = aesara.function([], cov(X, X))() npt.assert_allclose(K[2, 3], 0.136683, atol=1e-4) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_raises(self): @@ -673,12 +673,12 @@ def scaling_func(x, a, b): with pm.Model() as model: cov_m52 = pm.gp.cov.Matern52(1, 0.2) cov = pm.gp.cov.ScaledCov(1, scaling_func=scaling_func, args=(2, -1), cov_func=cov_m52) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], 3.00686, atol=1e-3) - K = theano.function([], cov(X, X))() + K = aesara.function([], cov(X, X))() npt.assert_allclose(K[0, 1], 3.00686, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_raises(self): @@ -1200,12 +1200,12 @@ def test_1d_tau1(self): etalon = 0.600881 with pm.Model(): cov = pm.gp.cov.Circular(1, 1, tau=5) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], etalon, atol=1e-3) - K = theano.function([], cov(X, X))() + K = aesara.function([], cov(X, X))() npt.assert_allclose(K[0, 1], etalon, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_1d_tau2(self): @@ -1213,10 +1213,10 @@ def test_1d_tau2(self): etalon = 0.691239 with pm.Model(): cov = pm.gp.cov.Circular(1, 1, tau=4) - K = theano.function([], cov(X))() + K = aesara.function([], cov(X))() npt.assert_allclose(K[0, 1], etalon, atol=1e-3) - K = theano.function([], cov(X, X))() + K = aesara.function([], cov(X, X))() npt.assert_allclose(K[0, 1], etalon, atol=1e-3) # check diagonal - Kd = theano.function([], cov(X, diag=True))() + Kd = aesara.function([], cov(X, diag=True))() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) diff --git a/pymc3/tests/test_hmc.py b/pymc3/tests/test_hmc.py index 057c3170750..1a113343cc8 100644 --- a/pymc3/tests/test_hmc.py +++ b/pymc3/tests/test_hmc.py @@ -19,9 +19,9 @@ import pymc3 +from pymc3.aesaraf import floatX from pymc3.step_methods.hmc.base_hmc import BaseHMC from pymc3.tests import models -from pymc3.theanof import floatX logger = logging.getLogger("pymc3") diff --git a/pymc3/tests/test_math.py b/pymc3/tests/test_math.py index b31319021fd..b82459602db 100644 --- a/pymc3/tests/test_math.py +++ b/pymc3/tests/test_math.py @@ -12,14 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara +import aesara.tensor as aet import numpy as np import numpy.testing as npt import pytest -import theano -import theano.tensor as tt from scipy.special import logsumexp as scipy_logsumexp +from pymc3.aesaraf import floatX from pymc3.math import ( LogDet, cartesian, @@ -36,7 +37,6 @@ probit, ) from pymc3.tests.helpers import SeededTest, verify_grad -from pymc3.theanof import floatX def test_kronecker(): @@ -45,7 +45,7 @@ def test_kronecker(): [a, b, c] = [np.random.rand(3, 3 + i) for i in range(3)] custom = kronecker(a, b, c) # Custom version - nested = tt.slinalg.kron(a, tt.slinalg.kron(b, c)) + nested = aet.slinalg.kron(a, aet.slinalg.kron(b, c)) np.testing.assert_array_almost_equal(custom.eval(), nested.eval()) # Standard nested version @@ -83,7 +83,7 @@ def test_kron_dot(): x = np.random.rand(tot_size).reshape((tot_size, 1)) # Construct entire kronecker product then multiply big = kronecker(*Ks) - slow_ans = tt.dot(big, x) + slow_ans = aet.dot(big, x) # Use tricks to avoid construction of entire kronecker product fast_ans = kron_dot(Ks, x) np.testing.assert_array_almost_equal(slow_ans.eval(), fast_ans.eval()) @@ -98,7 +98,7 @@ def test_kron_solve_lower(): x = np.random.rand(tot_size).reshape((tot_size, 1)) # Construct entire kronecker product then solve big = kronecker(*Ls) - slow_ans = tt.slinalg.solve_lower_triangular(big, x) + slow_ans = aet.slinalg.solve_lower_triangular(big, x) # Use tricks to avoid construction of entire kronecker product fast_ans = kron_solve_lower(Ls, x) np.testing.assert_array_almost_equal(slow_ans.eval(), fast_ans.eval()) @@ -170,10 +170,10 @@ def setup_method(self): self.op_class = LogDet self.op = logdet - @theano.config.change_flags(compute_test_value="ignore") + @aesara.config.change_flags(compute_test_value="ignore") def validate(self, input_mat): - x = theano.tensor.matrix() - f = theano.function([x], self.op(x)) + x = aesara.tensor.matrix() + f = aesara.function([x], self.op(x)) out = f(input_mat) svd_diag = np.linalg.svd(input_mat, compute_uv=False) numpy_out = np.sum(np.log(np.abs(svd_diag))) @@ -185,24 +185,24 @@ def validate(self, input_mat): verify_grad(self.op, [input_mat]) @pytest.mark.skipif( - theano.config.device in ["cuda", "gpu"], + aesara.config.device in ["cuda", "gpu"], reason="No logDet implementation on GPU.", ) def test_basic(self): # Calls validate with different params test_case_1 = np.random.randn(3, 3) / np.sqrt(3) test_case_2 = np.random.randn(10, 10) / np.sqrt(10) - self.validate(test_case_1.astype(theano.config.floatX)) - self.validate(test_case_2.astype(theano.config.floatX)) + self.validate(test_case_1.astype(aesara.config.floatX)) + self.validate(test_case_2.astype(aesara.config.floatX)) def test_expand_packed_triangular(): with pytest.raises(ValueError): - x = tt.matrix("x") - x.tag.test_value = np.array([[1.0]], dtype=theano.config.floatX) + x = aet.matrix("x") + x.tag.test_value = np.array([[1.0]], dtype=aesara.config.floatX) expand_packed_triangular(5, x) N = 5 - packed = tt.vector("packed") + packed = aet.vector("packed") packed.tag.test_value = floatX(np.zeros(N * (N + 1) // 2)) with pytest.raises(TypeError): expand_packed_triangular(packed.shape[0], packed) diff --git a/pymc3/tests/test_minibatches.py b/pymc3/tests/test_minibatches.py index 34dadaa8eba..9629a0765c7 100644 --- a/pymc3/tests/test_minibatches.py +++ b/pymc3/tests/test_minibatches.py @@ -15,18 +15,18 @@ import itertools import pickle +import aesara import numpy as np import pytest -import theano +from aesara import tensor as aet from scipy import stats as stats -from theano import tensor as tt import pymc3 as pm -from pymc3 import GeneratorAdapter, Normal, floatX, generator, tt_rng +from pymc3 import GeneratorAdapter, Normal, aet_rng, floatX, generator +from pymc3.aesaraf import GeneratorOp from pymc3.tests.helpers import select_by_precision -from pymc3.theanof import GeneratorOp class _DataSampler: @@ -35,7 +35,7 @@ class _DataSampler: """ def __init__(self, data, batchsize=50, random_seed=42, dtype="floatX"): - self.dtype = theano.config.floatX if dtype == "floatX" else dtype + self.dtype = aesara.config.floatX if dtype == "floatX" else dtype self.rng = np.random.RandomState(random_seed) self.data = data self.n = batchsize @@ -77,7 +77,7 @@ def test_basic(self): generator = GeneratorAdapter(integers()) gop = GeneratorOp(generator)() assert gop.tag.test_value == np.float32(0) - f = theano.function([], gop) + f = aesara.function([], gop) assert f() == np.float32(0) assert f() == np.float32(1) for _ in range(2, 100): @@ -89,7 +89,7 @@ def test_ndim(self): res = list(itertools.islice(integers_ndim(ndim), 0, 2)) generator = GeneratorAdapter(integers_ndim(ndim)) gop = GeneratorOp(generator)() - f = theano.function([], gop) + f = aesara.function([], gop) assert ndim == res[0].ndim np.testing.assert_equal(f(), res[0]) np.testing.assert_equal(f(), res[1]) @@ -97,9 +97,9 @@ def test_ndim(self): def test_cloning_available(self): gop = generator(integers()) res = gop ** 2 - shared = theano.shared(floatX(10)) - res1 = theano.clone(res, {gop: shared}) - f = theano.function([], res1) + shared = aesara.shared(floatX(10)) + res1 = aesara.clone_replace(res, {gop: shared}) + f = aesara.function([], res1) assert f() == np.float32(100) def test_default_value(self): @@ -108,7 +108,7 @@ def gen(): yield floatX(np.ones((10, 10)) * i) gop = generator(gen(), np.ones((10, 10)) * 10) - f = theano.function([], gop) + f = aesara.function([], gop) np.testing.assert_equal(np.ones((10, 10)) * 0, f()) np.testing.assert_equal(np.ones((10, 10)) * 1, f()) np.testing.assert_equal(np.ones((10, 10)) * 10, f()) @@ -121,7 +121,7 @@ def gen(): yield floatX(np.ones((10, 10)) * i) gop = generator(gen()) - f = theano.function([], gop) + f = aesara.function([], gop) np.testing.assert_equal(np.ones((10, 10)) * 0, f()) np.testing.assert_equal(np.ones((10, 10)) * 1, f()) with pytest.raises(StopIteration): @@ -139,12 +139,12 @@ def test_pickling(self, datagen): def test_gen_cloning_with_shape_change(self, datagen): gen = generator(datagen) - gen_r = tt_rng().normal(size=gen.shape).T + gen_r = aet_rng().normal(size=gen.shape).T X = gen.dot(gen_r) - res, _ = theano.scan(lambda x: x.sum(), X, n_steps=X.shape[0]) + res, _ = aesara.scan(lambda x: x.sum(), X, n_steps=X.shape[0]) assert res.eval().shape == (50,) - shared = theano.shared(datagen.data.astype(gen.dtype)) - res2 = theano.clone(res, {gen: shared ** 2}) + shared = aesara.shared(datagen.data.astype(gen.dtype)) + res2 = aesara.clone_replace(res, {gen: shared ** 2}) assert res2.eval().shape == (1000,) @@ -170,11 +170,11 @@ class TestScaling: def test_density_scaling(self): with pm.Model() as model1: Normal("n", observed=[[1]], total_size=1) - p1 = theano.function([], model1.logpt) + p1 = aesara.function([], model1.logpt) with pm.Model() as model2: Normal("n", observed=[[1]], total_size=2) - p2 = theano.function([], model2.logpt) + p2 = aesara.function([], model2.logpt) assert p1() * 2 == p2() def test_density_scaling_with_genarator(self): @@ -189,12 +189,12 @@ def true_dens(): # We have same size models with pm.Model() as model1: Normal("n", observed=gen1(), total_size=100) - p1 = theano.function([], model1.logpt) + p1 = aesara.function([], model1.logpt) with pm.Model() as model2: gen_var = generator(gen2()) Normal("n", observed=gen_var, total_size=100) - p2 = theano.function([], model2.logpt) + p2 = aesara.function([], model2.logpt) for i in range(10): _1, _2, _t = p1(), p2(), next(t) @@ -208,12 +208,12 @@ def test_gradient_with_scaling(self): genvar = generator(gen1()) m = Normal("m") Normal("n", observed=genvar, total_size=1000) - grad1 = theano.function([m], tt.grad(model1.logpt, m)) + grad1 = aesara.function([m], aet.grad(model1.logpt, m)) with pm.Model() as model2: m = Normal("m") - shavar = theano.shared(np.ones((1000, 100))) + shavar = aesara.shared(np.ones((1000, 100))) Normal("n", observed=shavar) - grad2 = theano.function([m], tt.grad(model2.logpt, m)) + grad2 = aesara.function([m], aet.grad(model2.logpt, m)) for i in range(10): shavar.set_value(np.ones((100, 100)) * i) @@ -224,27 +224,27 @@ def test_gradient_with_scaling(self): def test_multidim_scaling(self): with pm.Model() as model0: Normal("n", observed=[[1, 1], [1, 1]], total_size=[]) - p0 = theano.function([], model0.logpt) + p0 = aesara.function([], model0.logpt) with pm.Model() as model1: Normal("n", observed=[[1, 1], [1, 1]], total_size=[2, 2]) - p1 = theano.function([], model1.logpt) + p1 = aesara.function([], model1.logpt) with pm.Model() as model2: Normal("n", observed=[[1], [1]], total_size=[2, 2]) - p2 = theano.function([], model2.logpt) + p2 = aesara.function([], model2.logpt) with pm.Model() as model3: Normal("n", observed=[[1, 1]], total_size=[2, 2]) - p3 = theano.function([], model3.logpt) + p3 = aesara.function([], model3.logpt) with pm.Model() as model4: Normal("n", observed=[[1]], total_size=[2, 2]) - p4 = theano.function([], model4.logpt) + p4 = aesara.function([], model4.logpt) with pm.Model() as model5: Normal("n", observed=[[1]], total_size=[2, Ellipsis, 2]) - p5 = theano.function([], model5.logpt) + p5 = aesara.function([], model5.logpt) _p0 = p0() assert ( np.allclose(_p0, p1()) @@ -287,11 +287,11 @@ def test_mixed2(self): def test_free_rv(self): with pm.Model() as model4: Normal("n", observed=[[1, 1], [1, 1]], total_size=[2, 2]) - p4 = theano.function([], model4.logpt) + p4 = aesara.function([], model4.logpt) with pm.Model() as model5: Normal("n", total_size=[2, Ellipsis, 2], shape=(1, 1), broadcastable=(False, False)) - p5 = theano.function([model5.n], model5.logpt) + p5 = aesara.function([model5.n], model5.logpt) assert p4() == p5(pm.floatX([[1]])) assert p4() == p5(pm.floatX([[1, 1], [1, 1]])) @@ -327,15 +327,15 @@ def test_special4(self): def test_cloning_available(self): gop = pm.Minibatch(np.arange(100), 1) res = gop ** 2 - shared = theano.shared(np.array([10])) - res1 = theano.clone(res, {gop: shared}) - f = theano.function([], res1) + shared = aesara.shared(np.array([10])) + res1 = aesara.clone_replace(res, {gop: shared}) + f = aesara.function([], res1) assert f() == np.array([100]) def test_align(self): m = pm.Minibatch(np.arange(1000), 1, random_seed=1) n = pm.Minibatch(np.arange(1000), 1, random_seed=1) - f = theano.function([], [m, n]) + f = aesara.function([], [m, n]) n.eval() # not aligned a, b = zip(*(f() for _ in range(1000))) assert a != b diff --git a/pymc3/tests/test_mixture.py b/pymc3/tests/test_mixture.py index 94b272bf43d..a6646b812b9 100644 --- a/pymc3/tests/test_mixture.py +++ b/pymc3/tests/test_mixture.py @@ -12,14 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara import numpy as np import pytest import scipy.stats as st -import theano +from aesara import tensor as aet from numpy.testing import assert_allclose from scipy.special import logsumexp -from theano import tensor as tt import pymc3 as pm @@ -37,9 +37,9 @@ Poisson, sample, ) +from pymc3.aesaraf import floatX from pymc3.distributions.shape_utils import to_tuple from pymc3.tests.helpers import SeededTest -from pymc3.theanof import floatX # Generate data @@ -248,7 +248,7 @@ def test_mixture_of_mvn(self): st.multivariate_normal.logpdf(obs, mu2, cov2), ) ).T - complogp = y.distribution._comp_logp(theano.shared(obs)).eval() + complogp = y.distribution._comp_logp(aesara.shared(obs)).eval() assert_allclose(complogp, complogp_st) # check logp of mixture @@ -264,7 +264,7 @@ def test_mixture_of_mvn(self): assert_allclose(model.logp(testpoint), mixlogp_st.sum() + priorlogp) def test_mixture_of_mixture(self): - if theano.config.floatX == "float32": + if aesara.config.floatX == "float32": rtol = 1e-4 else: rtol = 1e-7 @@ -290,7 +290,7 @@ def test_mixture_of_mixture(self): test_point = model.test_point def mixmixlogp(value, point): - floatX = theano.config.floatX + floatX = aesara.config.floatX priorlogp = ( st.dirichlet.logpdf( x=point["g_w"], @@ -392,7 +392,7 @@ def setup_method(self, *args, **kwargs): super().setup_method(*args, **kwargs) self.nd = 3 self.npop = 3 - self.mus = tt.as_tensor_variable( + self.mus = aet.as_tensor_variable( np.tile( np.reshape( np.arange(self.npop), @@ -446,7 +446,7 @@ def test_2d_w(self): shape=nd, ) z = pm.Categorical("z", p=np.ones(npop) / npop, shape=nd) - mu = tt.as_tensor_variable([mus[i, z[i]] for i in range(nd)]) + mu = aet.as_tensor_variable([mus[i, z[i]] for i in range(nd)]) latent_m = pm.Normal("latent_m", mu=mu, sigma=1e-5, shape=nd) m_val = m.random(size=size) @@ -470,7 +470,7 @@ def samples_from_same_distribution(self, *args): assert p_marginal >= 0.05 and p_correlation >= 0.05 def logp_matches(self, mixture, latent_mix, z, npop, model): - if theano.config.floatX == "float32": + if aesara.config.floatX == "float32": rtol = 1e-4 else: rtol = 1e-7 @@ -523,7 +523,7 @@ def test_with_multinomial(self, batch_shape): assert prior["mixture"].shape == (self.n_samples, *batch_shape, 3) assert mixture.random(size=self.size).shape == (self.size, *batch_shape, 3) - if theano.config.floatX == "float32": + if aesara.config.floatX == "float32": rtol = 1e-4 else: rtol = 1e-7 @@ -558,7 +558,7 @@ def test_with_mvnormal(self): assert prior["mixture"].shape == (self.n_samples, 3) assert mixture.random(size=self.size).shape == (self.size, 3) - if theano.config.floatX == "float32": + if aesara.config.floatX == "float32": rtol = 1e-4 else: rtol = 1e-7 diff --git a/pymc3/tests/test_model.py b/pymc3/tests/test_model.py index 2e5a83c1c33..4d747e203da 100644 --- a/pymc3/tests/test_model.py +++ b/pymc3/tests/test_model.py @@ -15,12 +15,12 @@ import pickle import unittest +import aesara +import aesara.tensor as aet import numpy as np import numpy.testing as npt import pandas as pd import pytest -import theano -import theano.tensor as tt import pymc3 as pm @@ -39,8 +39,8 @@ def __init__(self, name="", model=None): self.v2 = pm.Normal("v2", mu=0, sigma=1) # 2) Potentials and Deterministic variables with method too # be sure that names will not overlap with other same models - pm.Deterministic("d", tt.constant(1)) - pm.Potential("p", tt.constant(1)) + pm.Deterministic("d", aet.constant(1)) + pm.Potential("p", aet.constant(1)) class DocstringModel(pm.Model): @@ -50,7 +50,7 @@ def __init__(self, mean=0, sigma=1, name="", model=None): Normal("v2", mu=mean, sigma=sigma) Normal("v3", mu=mean, sigma=HalfCauchy("sd", beta=10, testval=1.0)) Deterministic("v3_sq", self.v3 ** 2) - Potential("p1", tt.constant(1)) + Potential("p1", aet.constant(1)) class TestBaseModel: @@ -156,7 +156,7 @@ def test_observed_rv_fail(self): def test_observed_type(self): X_ = np.random.randn(100, 5) - X = pm.floatX(theano.shared(X_)) + X = pm.floatX(aesara.shared(X_)) with pm.Model(): x1 = pm.Normal("x1", observed=X_) x2 = pm.Normal("x2", observed=X) @@ -165,21 +165,21 @@ def test_observed_type(self): assert x2.type == X.type -class TestTheanoConfig: +class TestAesaraConfig: def test_set_testval_raise(self): - with theano.config.change_flags(compute_test_value="off"): + with aesara.config.change_flags(compute_test_value="off"): with pm.Model(): - assert theano.config.compute_test_value == "raise" - assert theano.config.compute_test_value == "off" + assert aesara.config.compute_test_value == "raise" + assert aesara.config.compute_test_value == "off" def test_nested(self): - with theano.config.change_flags(compute_test_value="off"): - with pm.Model(theano_config={"compute_test_value": "ignore"}): - assert theano.config.compute_test_value == "ignore" - with pm.Model(theano_config={"compute_test_value": "warn"}): - assert theano.config.compute_test_value == "warn" - assert theano.config.compute_test_value == "ignore" - assert theano.config.compute_test_value == "off" + with aesara.config.change_flags(compute_test_value="off"): + with pm.Model(aesara_config={"compute_test_value": "ignore"}): + assert aesara.config.compute_test_value == "ignore" + with pm.Model(aesara_config={"compute_test_value": "warn"}): + assert aesara.config.compute_test_value == "warn" + assert aesara.config.compute_test_value == "ignore" + assert aesara.config.compute_test_value == "off" def test_matrix_multiplication(): @@ -262,7 +262,7 @@ def test_empty_observed(): class TestValueGradFunction(unittest.TestCase): def test_no_extra(self): - a = tt.vector("a") + a = aet.vector("a") a.tag.test_value = np.zeros(3, dtype=a.dtype) a.dshape = (3,) a.dsize = 3 @@ -270,7 +270,7 @@ def test_no_extra(self): assert f_grad.size == 3 def test_invalid_type(self): - a = tt.ivector("a") + a = aet.ivector("a") a.tag.test_value = np.zeros(3, dtype=a.dtype) a.dshape = (3,) a.dsize = 3 @@ -279,19 +279,19 @@ def test_invalid_type(self): err.match("Invalid dtype") def setUp(self): - extra1 = tt.iscalar("extra1") + extra1 = aet.iscalar("extra1") extra1_ = np.array(0, dtype=extra1.dtype) extra1.tag.test_value = extra1_ extra1.dshape = tuple() extra1.dsize = 1 - val1 = tt.vector("val1") + val1 = aet.vector("val1") val1_ = np.zeros(3, dtype=val1.dtype) val1.tag.test_value = val1_ val1.dshape = (3,) val1.dsize = 3 - val2 = tt.matrix("val2") + val2 = aet.matrix("val2") val2_ = np.zeros((2, 3), dtype=val2.dtype) val2.tag.test_value = val2_ val2.dshape = (2, 3) @@ -366,8 +366,8 @@ def test_tensor_type_conversion(self): assert m["x2_missing"].type == gf._extra_vars_shared["x2_missing"].type - def test_theano_switch_broadcast_edge_cases(self): - # Tests against two subtle issues related to a previous bug in Theano where tt.switch would not + def test_aesara_switch_broadcast_edge_cases(self): + # Tests against two subtle issues related to a previous bug in Aesara where aet.switch would not # always broadcast tensors with single values https://github.com/pymc-devs/aesara/issues/270 # Known issue 1: https://github.com/pymc-devs/pymc3/issues/4389 diff --git a/pymc3/tests/test_model_graph.py b/pymc3/tests/test_model_graph.py index d68abafaabd..fe0d10955c2 100644 --- a/pymc3/tests/test_model_graph.py +++ b/pymc3/tests/test_model_graph.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara as th import numpy as np -import theano as th import pymc3 as pm diff --git a/pymc3/tests/test_model_helpers.py b/pymc3/tests/test_model_helpers.py index 20745febad0..72bd1b058a7 100644 --- a/pymc3/tests/test_model_helpers.py +++ b/pymc3/tests/test_model_helpers.py @@ -12,15 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara +import aesara.sparse as sparse +import aesara.tensor as aet import numpy as np import numpy.ma as ma import numpy.testing as npt import pandas as pd import pytest import scipy.sparse as sps -import theano -import theano.sparse as sparse -import theano.tensor as tt + +from aesara.graph.basic import Variable +from aesara.tensor.var import TensorConstant, TensorVariable import pymc3 as pm @@ -37,7 +40,7 @@ def test_pandas_to_array(self, input_dtype): dense_input = np.arange(9).reshape((3, 3)).astype(input_dtype) input_name = "input_variable" - theano_graph_input = tt.as_tensor(dense_input, name=input_name) + aesara_graph_input = aet.as_tensor(dense_input, name=input_name) pandas_input = pd.DataFrame(dense_input) # All the even numbers are replaced with NaN @@ -77,22 +80,22 @@ def test_pandas_to_array(self, input_dtype): assert func_output.shape == input_value.shape npt.assert_allclose(func_output, masked_array_input) - # Check function behavior with Theano graph variable - theano_output = func(theano_graph_input) - assert isinstance(theano_output, theano.graph.basic.Variable) - npt.assert_allclose(theano_output.eval(), theano_graph_input.eval()) - intX = pm.theanof._conversion_map[theano.config.floatX] - if dense_input.dtype == intX or dense_input.dtype == theano.config.floatX: - assert theano_output.owner is None # func should not have added new nodes - assert theano_output.name == input_name + # Check function behavior with Aesara graph variable + aesara_output = func(aesara_graph_input) + assert isinstance(aesara_output, Variable) + npt.assert_allclose(aesara_output.eval(), aesara_graph_input.eval()) + intX = pm.aesaraf._conversion_map[aesara.config.floatX] + if dense_input.dtype == intX or dense_input.dtype == aesara.config.floatX: + assert aesara_output.owner is None # func should not have added new nodes + assert aesara_output.name == input_name else: - assert theano_output.owner is not None # func should have casted - assert theano_output.owner.inputs[0].name == input_name + assert aesara_output.owner is not None # func should have casted + assert aesara_output.owner.inputs[0].name == input_name if "float" in input_dtype: - assert theano_output.dtype == theano.config.floatX + assert aesara_output.dtype == aesara.config.floatX else: - assert theano_output.dtype == intX + assert aesara_output.dtype == intX # Check function behavior with generator data generator_output = func(square_generator) @@ -102,15 +105,15 @@ def test_pandas_to_array(self, input_dtype): # Make sure the returned object has .set_gen and .set_default methods assert hasattr(wrapped, "set_gen") assert hasattr(wrapped, "set_default") - # Make sure the returned object is a Theano TensorVariable - assert isinstance(wrapped, tt.TensorVariable) + # Make sure the returned object is a Aesara TensorVariable + assert isinstance(wrapped, TensorVariable) def test_as_tensor(self): """ Check returned values for `data` given known inputs to `as_tensor()`. Note that ndarrays should return a TensorConstant and sparse inputs - should return a Sparse Theano object. + should return a Sparse Aesara object. """ # Create the various inputs to the function input_name = "testing_inputs" @@ -137,18 +140,18 @@ def test_as_tensor(self): for func_output in [dense_output, sparse_output]: assert func_output.missing_values is None - # Ensure that the Theano variable names are correctly set. + # Ensure that the Aesara variable names are correctly set. # Note that the output for masked inputs do not have their names set # to the passed value. for func_output in [dense_output, sparse_output]: assert func_output.name == input_name # Ensure the that returned functions are all of the correct type - assert isinstance(dense_output, tt.TensorConstant) + assert isinstance(dense_output, TensorConstant) assert sparse.basic._is_sparse_variable(sparse_output) # Masked output is something weird. Just ensure it has missing values - # self.assertIsInstance(masked_output, tt.TensorConstant) + # self.assertIsInstance(masked_output, TensorConstant) assert masked_output.missing_values is not None return None diff --git a/pymc3/tests/test_models_utils.py b/pymc3/tests/test_models_utils.py index 84d25b3c2bf..c6f55f8b090 100644 --- a/pymc3/tests/test_models_utils.py +++ b/pymc3/tests/test_models_utils.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara.tensor as aet import numpy as np import pandas as pd import pytest -import theano.tensor as tt from pymc3.glm import utils @@ -51,7 +51,7 @@ def test_dict_input(self): m, l = utils.any_to_tensor_and_labels(self.data.to_dict("list")) self.assertMatrixLabels(m, l, mt=self.data[l].values, lt=l) - inp = {k: tt.as_tensor_variable(v.values) for k, v in self.data.to_dict("series").items()} + inp = {k: aet.as_tensor_variable(v.values) for k, v in self.data.to_dict("series").items()} m, l = utils.any_to_tensor_and_labels(inp) self.assertMatrixLabels(m, l, mt=self.data[l].values, lt=l) @@ -63,18 +63,18 @@ def test_list_input(self): def test_tensor_input(self): m, l = utils.any_to_tensor_and_labels( - tt.as_tensor_variable(self.data.values.tolist()), labels=["x0", "x1"] + aet.as_tensor_variable(self.data.values.tolist()), labels=["x0", "x1"] ) self.assertMatrixLabels(m, l, lt=["x0", "x1"]) m, l = utils.any_to_tensor_and_labels( - tt.as_tensor_variable(self.data.values.tolist()), labels=["x2", "x3"] + aet.as_tensor_variable(self.data.values.tolist()), labels=["x2", "x3"] ) self.assertMatrixLabels(m, l, lt=["x2", "x3"]) def test_user_mistakes(self): # no labels for tensor variable with pytest.raises(ValueError): - utils.any_to_tensor_and_labels(tt.as_tensor_variable(self.data.values.tolist())) + utils.any_to_tensor_and_labels(aet.as_tensor_variable(self.data.values.tolist())) # len of labels is bad with pytest.raises(ValueError): utils.any_to_tensor_and_labels(self.data.values.tolist(), labels=["x"]) diff --git a/pymc3/tests/test_ode.py b/pymc3/tests/test_ode.py index 1d336bfba67..efdaa31812b 100644 --- a/pymc3/tests/test_ode.py +++ b/pymc3/tests/test_ode.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara import numpy as np import pytest -import theano from scipy.integrate import odeint from scipy.stats import norm @@ -26,13 +26,13 @@ def test_gradients(): - """Tests the computation of the sensitivities from the theano computation graph""" + """Tests the computation of the sensitivities from the aesara computation graph""" # ODE system for which to compute gradients def ode_func(y, t, p): return np.exp(-t) - p[0] * y[0] - # Computation of graidients with Theano + # Computation of graidients with Aesara augmented_ode_func = augment_system(ode_func, 1, 1 + 1) # This is the new system, ODE + Sensitivities, which will be integrated @@ -210,22 +210,22 @@ def system(y, t, p): ode_model = DifferentialEquation(func=system, t0=0, times=times, n_states=1, n_theta=1) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_too_many_params(self): with pytest.raises(pm.ShapeError): self.ode_model(theta=[1, 1], y0=[0]) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_too_many_y0(self): with pytest.raises(pm.ShapeError): self.ode_model(theta=[1], y0=[0, 0]) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_too_few_params(self): with pytest.raises(pm.ShapeError): self.ode_model(theta=[], y0=[1]) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_too_few_y0(self): with pytest.raises(pm.ShapeError): self.ode_model(theta=[1], y0=[]) diff --git a/pymc3/tests/test_parallel_sampling.py b/pymc3/tests/test_parallel_sampling.py index e458c609a8b..bd1a37abcff 100644 --- a/pymc3/tests/test_parallel_sampling.py +++ b/pymc3/tests/test_parallel_sampling.py @@ -14,12 +14,13 @@ import multiprocessing import os +import aesara +import aesara.tensor as aet import numpy as np import pytest -import theano -import theano.tensor as tt -from theano.compile.ops import as_op +from aesara.compile.ops import as_op +from aesara.tensor.type import TensorType import pymc3 as pm import pymc3.parallel_sampling as ps @@ -60,10 +61,10 @@ def test_bad_unpickle(): assert "could not be unpickled" in str(exc_info.getrepr(style="short")) -tt_vector = tt.TensorType(theano.config.floatX, [False]) +aet_vector = TensorType(aesara.config.floatX, [False]) -@as_op([tt_vector, tt.iscalar], [tt_vector]) +@as_op([aet_vector, aet.iscalar], [aet_vector]) def _crash_remote_process(a, master_pid): if os.getpid() != master_pid: os.exit(0) @@ -80,8 +81,8 @@ def test_remote_pipe_closed(): master_pid = os.getpid() with pm.Model(): x = pm.Normal("x", shape=2, mu=0.1) - tt_pid = tt.as_tensor_variable(np.array(master_pid, dtype="int32")) - pm.Normal("y", mu=_crash_remote_process(x, tt_pid), shape=2) + aet_pid = aet.as_tensor_variable(np.array(master_pid, dtype="int32")) + pm.Normal("y", mu=_crash_remote_process(x, aet_pid), shape=2) step = pm.Metropolis() with pytest.raises(RuntimeError, match="Chain [0-9] failed"): diff --git a/pymc3/tests/test_posdef_sym.py b/pymc3/tests/test_posdef_sym.py index a7aa714357f..cfb406ca1df 100644 --- a/pymc3/tests/test_posdef_sym.py +++ b/pymc3/tests/test_posdef_sym.py @@ -12,19 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara import numpy as np -import theano from pymc3.distributions import multivariate as mv def test_posdef_symmetric1(): - data = np.array([[1.0, 0], [0, 1]], dtype=theano.config.floatX) + data = np.array([[1.0, 0], [0, 1]], dtype=aesara.config.floatX) assert mv.posdef(data) == 1 def test_posdef_symmetric2(): - data = np.array([[1.0, 2], [2, 1]], dtype=theano.config.floatX) + data = np.array([[1.0, 2], [2, 1]], dtype=aesara.config.floatX) assert mv.posdef(data) == 0 @@ -33,11 +33,11 @@ def test_posdef_symmetric3(): Is this correct? """ - data = np.array([[1.0, 1], [1, 1]], dtype=theano.config.floatX) + data = np.array([[1.0, 1], [1, 1]], dtype=aesara.config.floatX) assert mv.posdef(data) == 0 def test_posdef_symmetric4(): - d = np.array([[1, 0.99, 1], [0.99, 1, 0.999], [1, 0.999, 1]], theano.config.floatX) + d = np.array([[1, 0.99, 1], [0.99, 1, 0.999], [1, 0.999, 1]], aesara.config.floatX) assert mv.posdef(d) == 0 diff --git a/pymc3/tests/test_posteriors.py b/pymc3/tests/test_posteriors.py index 453ae98efd8..8ac068bd757 100644 --- a/pymc3/tests/test_posteriors.py +++ b/pymc3/tests/test_posteriors.py @@ -12,13 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara import pytest -import theano from pymc3.tests import sampler_fixtures as sf -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") +@pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") class TestNUTSUniform(sf.NutsFixture, sf.UniformFixture): n_samples = 10000 tune = 1000 diff --git a/pymc3/tests/test_quadpotential.py b/pymc3/tests/test_quadpotential.py index d91a80b5e90..aa89f37075b 100644 --- a/pymc3/tests/test_quadpotential.py +++ b/pymc3/tests/test_quadpotential.py @@ -19,8 +19,8 @@ import pymc3 +from pymc3.aesaraf import floatX from pymc3.step_methods.hmc import quadpotential -from pymc3.theanof import floatX def test_elemwise_posdef(): diff --git a/pymc3/tests/test_random.py b/pymc3/tests/test_random.py index 7a4ae42ce22..f88e6f75f96 100644 --- a/pymc3/tests/test_random.py +++ b/pymc3/tests/test_random.py @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara +import aesara.tensor as aet import numpy as np import numpy.testing as npt import pytest -import theano -import theano.tensor as tt from numpy import random as nr @@ -30,15 +30,15 @@ def test_draw_value(): npt.assert_equal(_draw_value(np.array([5, 6])), [5, 6]) npt.assert_equal(_draw_value(np.array(5.0)), 5) - npt.assert_equal(_draw_value(tt.constant([5.0, 6.0])), [5, 6]) - assert _draw_value(tt.constant(5)) == 5 - npt.assert_equal(_draw_value(2 * tt.constant([5.0, 6.0])), [10, 12]) + npt.assert_equal(_draw_value(aet.constant([5.0, 6.0])), [5, 6]) + assert _draw_value(aet.constant(5)) == 5 + npt.assert_equal(_draw_value(2 * aet.constant([5.0, 6.0])), [10, 12]) - val = theano.shared(np.array([5.0, 6.0])) + val = aesara.shared(np.array([5.0, 6.0])) npt.assert_equal(_draw_value(val), [5, 6]) npt.assert_equal(_draw_value(2 * val), [10, 12]) - a = tt.scalar("a") + a = aet.scalar("a") a.tag.test_value = 6 npt.assert_equal(_draw_value(2 * a, givens=[(a, 1)]), 2) @@ -48,7 +48,7 @@ def test_draw_value(): assert isinstance(_draw_value(5), type(5)) with pm.Model(): - mu = 2 * tt.constant(np.array([5.0, 6.0])) + theano.shared(np.array(5)) + mu = 2 * aet.constant(np.array([5.0, 6.0])) + aesara.shared(np.array(5)) a = pm.Normal("a", mu=mu, sigma=5, shape=2) val1 = _draw_value(a) @@ -68,17 +68,17 @@ def test_vals(self): npt.assert_equal(draw_values([np.array([5, 6])])[0], [5, 6]) npt.assert_equal(draw_values([np.array(5.0)])[0], 5) - npt.assert_equal(draw_values([tt.constant([5.0, 6.0])])[0], [5, 6]) - assert draw_values([tt.constant(5)])[0] == 5 - npt.assert_equal(draw_values([2 * tt.constant([5.0, 6.0])])[0], [10, 12]) + npt.assert_equal(draw_values([aet.constant([5.0, 6.0])])[0], [5, 6]) + assert draw_values([aet.constant(5)])[0] == 5 + npt.assert_equal(draw_values([2 * aet.constant([5.0, 6.0])])[0], [10, 12]) - val = theano.shared(np.array([5.0, 6.0])) + val = aesara.shared(np.array([5.0, 6.0])) npt.assert_equal(draw_values([val])[0], [5, 6]) npt.assert_equal(draw_values([2 * val])[0], [10, 12]) def test_simple_model(self): with pm.Model(): - mu = 2 * tt.constant(np.array([5.0, 6.0])) + theano.shared(np.array(5)) + mu = 2 * aet.constant(np.array([5.0, 6.0])) + aesara.shared(np.array(5)) a = pm.Normal("a", mu=mu, sigma=5, shape=2) val1 = draw_values([a]) @@ -90,7 +90,7 @@ def test_simple_model(self): def test_dep_vars(self): with pm.Model(): - mu = 2 * tt.constant(np.array([5.0, 6.0])) + theano.shared(np.array(5)) + mu = 2 * aet.constant(np.array([5.0, 6.0])) + aesara.shared(np.array(5)) sd = pm.HalfNormal("sd", shape=2) tau = 1 / sd ** 2 a = pm.Normal("a", mu=mu, tau=tau, shape=2) @@ -116,7 +116,7 @@ def test_dep_vars(self): def test_graph_constant(self): # Issue 3595 pointed out that slice(None) can introduce - # theano.graph.basic.Constant into the compute graph, which wasn't + # aesara.graph.basic.Constant into the compute graph, which wasn't # handled correctly by draw_values n_d = 500 n_x = 2 diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py index f3f2872c442..243ece046ed 100644 --- a/pymc3/tests/test_sampling.py +++ b/pymc3/tests/test_sampling.py @@ -18,15 +18,15 @@ from itertools import combinations from typing import Tuple +import aesara +import aesara.tensor as aet import arviz as az import numpy as np import numpy.testing as npt import pytest -import theano -import theano.tensor as tt +from aesara import shared from scipy import stats -from theano import shared import pymc3 as pm @@ -36,7 +36,7 @@ from pymc3.tests.models import simple_init -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") +@pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") class TestSample(SeededTest): def setup_method(self): super().setup_method() @@ -348,7 +348,7 @@ def test_choose_chains(n_points, tune, expected_length, expected_n_traces): assert expected_n_traces == len(traces) -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") +@pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") class TestNamedSampling(SeededTest): def test_shared_named(self): G_var = shared(value=np.atleast_2d(1.0), broadcastable=(True, False), name="G") @@ -362,7 +362,7 @@ def test_shared_named(self): testval=np.atleast_2d(0), ) theta = pm.Normal( - "theta", mu=tt.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1) + "theta", mu=aet.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1) ) res = theta.random() assert np.isclose(res, 0.0) @@ -378,13 +378,13 @@ def test_shared_unnamed(self): testval=np.atleast_2d(0), ) theta = pm.Normal( - "theta", mu=tt.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1) + "theta", mu=aet.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1) ) res = theta.random() assert np.isclose(res, 0.0) def test_constant_named(self): - G_var = tt.constant(np.atleast_2d(1.0), name="G") + G_var = aet.constant(np.atleast_2d(1.0), name="G") with pm.Model(): theta0 = pm.Normal( "theta0", @@ -394,7 +394,7 @@ def test_constant_named(self): testval=np.atleast_2d(0), ) theta = pm.Normal( - "theta", mu=tt.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1) + "theta", mu=aet.dot(G_var, theta0), tau=np.atleast_2d(1e20), shape=(1, 1) ) res = theta.random() @@ -621,8 +621,8 @@ def test_model_not_drawable_prior(self): def test_model_shared_variable(self): x = np.random.randn(100) y = x > 0 - x_shared = theano.shared(x) - y_shared = theano.shared(y) + x_shared = aesara.shared(x) + y_shared = aesara.shared(y) with pm.Model() as model: coeff = pm.Normal("x", mu=0, sd=1) logistic = pm.Deterministic("p", pm.math.sigmoid(coeff * x_shared)) @@ -655,8 +655,8 @@ def test_model_shared_variable(self): npt.assert_allclose(post_pred["p"], expected_p) def test_deterministic_of_observed(self): - meas_in_1 = pm.theanof.floatX(2 + 4 * np.random.randn(10)) - meas_in_2 = pm.theanof.floatX(5 + 4 * np.random.randn(10)) + meas_in_1 = pm.aesaraf.floatX(2 + 4 * np.random.randn(10)) + meas_in_2 = pm.aesaraf.floatX(5 + 4 * np.random.randn(10)) nchains = 2 with pm.Model() as model: mu_in_1 = pm.Normal("mu_in_1", 0, 1) @@ -671,7 +671,7 @@ def test_deterministic_of_observed(self): trace = pm.sample(100, chains=nchains) np.random.seed(0) - rtol = 1e-5 if theano.config.floatX == "float64" else 1e-4 + rtol = 1e-5 if aesara.config.floatX == "float64" else 1e-4 np.random.seed(0) ppc = pm.sample_posterior_predictive( @@ -694,8 +694,8 @@ def test_deterministic_of_observed(self): npt.assert_allclose(ppc["in_1"] + ppc["in_2"], ppc["out"], rtol=rtol) def test_deterministic_of_observed_modified_interface(self): - meas_in_1 = pm.theanof.floatX(2 + 4 * np.random.randn(100)) - meas_in_2 = pm.theanof.floatX(5 + 4 * np.random.randn(100)) + meas_in_1 = pm.aesaraf.floatX(2 + 4 * np.random.randn(100)) + meas_in_2 = pm.aesaraf.floatX(5 + 4 * np.random.randn(100)) with pm.Model() as model: mu_in_1 = pm.Normal("mu_in_1", 0, 1) sigma_in_1 = pm.HalfNormal("sd_in_1", 1) @@ -718,7 +718,7 @@ def test_deterministic_of_observed_modified_interface(self): var_names=[x.name for x in (model.deterministics + model.basic_RVs)], ) - rtol = 1e-5 if theano.config.floatX == "float64" else 1e-3 + rtol = 1e-5 if aesara.config.floatX == "float64" else 1e-3 npt.assert_allclose(ppc["in_1"] + ppc["in_2"], ppc["out"], rtol=rtol) ppc = pm.fast_sample_posterior_predictive( @@ -728,7 +728,7 @@ def test_deterministic_of_observed_modified_interface(self): var_names=[x.name for x in (model.deterministics + model.basic_RVs)], ) - rtol = 1e-5 if theano.config.floatX == "float64" else 1e-3 + rtol = 1e-5 if aesara.config.floatX == "float64" else 1e-3 npt.assert_allclose(ppc["in_1"] + ppc["in_2"], ppc["out"], rtol=rtol) def test_variable_type(self): @@ -987,7 +987,7 @@ def test_transformed(self): phi = pm.Beta("phi", alpha=1.0, beta=1.0) kappa_log = pm.Exponential("logkappa", lam=5.0) - kappa = pm.Deterministic("kappa", tt.exp(kappa_log)) + kappa = pm.Deterministic("kappa", aet.exp(kappa_log)) thetas = pm.Beta("thetas", alpha=phi * kappa, beta=(1.0 - phi) * kappa, shape=n) @@ -1053,7 +1053,7 @@ def test_zeroinflatedpoisson(self): def test_bounded_dist(self): with pm.Model() as model: BoundedNormal = pm.Bound(pm.Normal, lower=0.0) - x = BoundedNormal("x", mu=tt.zeros((3, 1)), sd=1 * tt.ones((3, 1)), shape=(3, 1)) + x = BoundedNormal("x", mu=aet.zeros((3, 1)), sd=1 * aet.ones((3, 1)), shape=(3, 1)) with model: prior_trace = pm.sample_prior_predictive(5) diff --git a/pymc3/tests/test_shape_handling.py b/pymc3/tests/test_shape_handling.py index 070535969df..39cd181083a 100644 --- a/pymc3/tests/test_shape_handling.py +++ b/pymc3/tests/test_shape_handling.py @@ -15,7 +15,7 @@ import numpy as np import pytest -from theano import tensor as tt +from aesara import tensor as aet import pymc3 as pm @@ -106,7 +106,7 @@ def fixture_model(): cov = pm.InverseGamma("cov", alpha=1, beta=1) x = pm.Normal("x", mu=np.ones((dim,)), sigma=pm.math.sqrt(cov), shape=(n, dim)) eps = pm.HalfNormal("eps", np.ones((n, 1)), shape=(n, dim)) - mu = pm.Deterministic("mu", tt.sum(x + eps, axis=-1)) + mu = pm.Deterministic("mu", aet.sum(x + eps, axis=-1)) y = pm.Normal("y", mu=mu, sigma=1, shape=(n,)) return model, [cov, x, eps, y] diff --git a/pymc3/tests/test_shared.py b/pymc3/tests/test_shared.py index 723216362fb..247b5ebdb55 100644 --- a/pymc3/tests/test_shared.py +++ b/pymc3/tests/test_shared.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara import numpy as np -import theano import pymc3 as pm @@ -24,7 +24,7 @@ class TestShared(SeededTest): def test_deterministic(self): with pm.Model() as model: data_values = np.array([0.5, 0.4, 5, 2]) - X = theano.shared(np.asarray(data_values, dtype=theano.config.floatX), borrow=True) + X = aesara.shared(np.asarray(data_values, dtype=aesara.config.floatX), borrow=True) pm.Normal("y", 0, 1, observed=X) model.logp(model.test_point) @@ -34,7 +34,7 @@ def test_sample(self): x_pred = np.linspace(-3, 3, 200) - x_shared = theano.shared(x) + x_shared = aesara.shared(x) with pm.Model() as model: b = pm.Normal("b", 0.0, 10.0) diff --git a/pymc3/tests/test_smc.py b/pymc3/tests/test_smc.py index 695ea461f73..b2ebdd65dde 100644 --- a/pymc3/tests/test_smc.py +++ b/pymc3/tests/test_smc.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara.tensor as aet import numpy as np import pytest -import theano.tensor as tt import pymc3 as pm @@ -39,16 +39,16 @@ def setup_class(self): def two_gaussians(x): log_like1 = ( - -0.5 * n * tt.log(2 * np.pi) - - 0.5 * tt.log(dsigma) + -0.5 * n * aet.log(2 * np.pi) + - 0.5 * aet.log(dsigma) - 0.5 * (x - mu1).T.dot(isigma).dot(x - mu1) ) log_like2 = ( - -0.5 * n * tt.log(2 * np.pi) - - 0.5 * tt.log(dsigma) + -0.5 * n * aet.log(2 * np.pi) + - 0.5 * aet.log(dsigma) - 0.5 * (x - mu2).T.dot(isigma).dot(x - mu2) ) - return tt.log(w1 * tt.exp(log_like1) + w2 * tt.exp(log_like2)) + return aet.log(w1 * aet.exp(log_like1) + w2 * aet.exp(log_like2)) with pm.Model() as self.SMC_test: X = pm.Uniform("X", lower=-2, upper=2.0, shape=n) diff --git a/pymc3/tests/test_special_functions.py b/pymc3/tests/test_special_functions.py index e7e2e53cbc4..b293163ad63 100644 --- a/pymc3/tests/test_special_functions.py +++ b/pymc3/tests/test_special_functions.py @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara.tensor as aet import numpy as np import scipy.special as ss -import theano.tensor as tt -from theano import function +from aesara import function import pymc3.distributions.special as ps @@ -26,10 +26,10 @@ def test_functions(): xvals = list(map(np.atleast_1d, [0.01, 0.1, 2, 100, 10000])) - x = tt.dvector("x") + x = aet.dvector("x") x.tag.test_value = xvals[0] - p = tt.iscalar("p") + p = aet.iscalar("p") p.tag.test_value = 1 gammaln = function([x], ps.gammaln(x)) @@ -55,10 +55,10 @@ def test_functions(): def t_multigamma(): xvals = list(map(np.atleast_1d, [0, 0.1, 2, 100])) - x = tt.dvector("x") + x = aet.dvector("x") x.tag.test_value = xvals[0] - p = tt.iscalar("p") + p = aet.iscalar("p") p.tag.test_value = 1 multigammaln = function([x, p], ps.multigammaln(x, p)) diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py index 6da70f2a7ab..54b126ba0f3 100644 --- a/pymc3/tests/test_step.py +++ b/pymc3/tests/test_step.py @@ -18,17 +18,18 @@ from math import isclose +import aesara +import aesara.tensor as aet import arviz as az import numpy as np import numpy.testing as npt import pytest -import theano -import theano.tensor as tt +from aesara.compile.ops import as_op +from aesara.graph.op import Op from numpy.testing import assert_array_almost_equal -from theano.compile.ops import as_op -from theano.graph.op import Op +from pymc3.aesaraf import floatX from pymc3.data import Data from pymc3.distributions import ( Bernoulli, @@ -71,7 +72,6 @@ simple_2model_continuous, simple_categorical, ) -from pymc3.theanof import floatX class TestStepMethods: # yield test doesn't work subclassing object @@ -500,7 +500,7 @@ def setup_class(self): def teardown_class(self): shutil.rmtree(self.temp_dir) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_sample_exact(self): for step_method in self.master_samples: self.check_trace(step_method) @@ -591,7 +591,7 @@ def test_step_continuous(self): self.check_stat(check, trace, step.__class__.__name__) def test_step_discrete(self): - if theano.config.floatX == "float32": + if aesara.config.floatX == "float32": return # Cannot use @skip because it only skips one iteration of the yield start, model, (mu, C) = mv_simple_discrete() unc = np.diag(C) ** 0.5 @@ -657,7 +657,7 @@ class TestCompoundStep: samplers = (Metropolis, Slice, HamiltonianMC, NUTS, DEMetropolis) @pytest.mark.skipif( - theano.config.floatX == "float32", reason="Test fails on 32 bit due to linalg issues" + aesara.config.floatX == "float32", reason="Test fails on 32 bit due to linalg issues" ) def test_non_blocked(self): """Test that samplers correctly create non-blocked compound steps.""" @@ -667,7 +667,7 @@ def test_non_blocked(self): assert isinstance(sampler(blocked=False), CompoundStep) @pytest.mark.skipif( - theano.config.floatX == "float32", reason="Test fails on 32 bit due to linalg issues" + aesara.config.floatX == "float32", reason="Test fails on 32 bit due to linalg issues" ) def test_blocked(self): _, model = simple_2model_continuous() @@ -716,17 +716,17 @@ def test_normal_nograd_op(self): with Model() as model: x = Normal("x", 0, 1) - # a custom Theano Op that does not have a grad: - is_64 = theano.config.floatX == "float64" - itypes = [tt.dscalar] if is_64 else [tt.fscalar] - otypes = [tt.dscalar] if is_64 else [tt.fscalar] + # a custom Aesara Op that does not have a grad: + is_64 = aesara.config.floatX == "float64" + itypes = [aet.dscalar] if is_64 else [aet.fscalar] + otypes = [aet.dscalar] if is_64 else [aet.fscalar] @as_op(itypes, otypes) def kill_grad(x): return x data = np.random.normal(size=(100,)) - Normal("y", mu=kill_grad(x), sigma=1, observed=data.astype(theano.config.floatX)) + Normal("y", mu=kill_grad(x), sigma=1, observed=data.astype(aesara.config.floatX)) steps = assign_step_methods(model, []) assert isinstance(steps, Slice) @@ -957,7 +957,7 @@ def test_custom_proposal_dist(self): pass -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") +@pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") class TestNutsCheckTrace: def test_multiple_samplers(self, caplog): with Model(): @@ -986,8 +986,8 @@ def test_bad_init_parallel(self): def test_linalg(self, caplog): with Model(): a = Normal("a", shape=2) - a = tt.switch(a > 0, np.inf, a) - b = tt.slinalg.solve(floatX(np.eye(2)), a) + a = aet.switch(a > 0, np.inf, a) + b = aet.slinalg.solve(floatX(np.eye(2)), a) Normal("c", mu=b, shape=2) caplog.clear() trace = sample(20, init=None, tune=5, chains=2) @@ -1440,7 +1440,7 @@ def test_aem_mu_sigma(self): """Test that AEM estimates mu_B and Sigma_B in the coarse models of a 3-level LR example correctly""" # create data for linear regression - if theano.config.floatX == "float32": + if aesara.config.floatX == "float32": p = "float32" else: p = "float64" @@ -1459,12 +1459,12 @@ def test_aem_mu_sigma(self): # forward model Op - here, just the regression equation class ForwardModel(Op): - if theano.config.floatX == "float32": - itypes = [tt.fvector] - otypes = [tt.fvector] + if aesara.config.floatX == "float32": + itypes = [aet.fvector] + otypes = [aet.fvector] else: - itypes = [tt.dvector] - otypes = [tt.dvector] + itypes = [aet.dvector] + otypes = [aet.dvector] def __init__(self, x, pymc3_model): self.x = x @@ -1494,7 +1494,7 @@ def perform(self, node, inputs, outputs): intercept = Normal("Intercept", 0, sigma=20) x_coeff = Normal("x", 0, sigma=20) - theta = tt.as_tensor_variable([intercept, x_coeff]) + theta = aet.as_tensor_variable([intercept, x_coeff]) mout.append(ForwardModel(x, coarse_model_0)) @@ -1514,7 +1514,7 @@ def perform(self, node, inputs, outputs): intercept = Normal("Intercept", 0, sigma=20) x_coeff = Normal("x", 0, sigma=20) - theta = tt.as_tensor_variable([intercept, x_coeff]) + theta = aet.as_tensor_variable([intercept, x_coeff]) mout.append(ForwardModel(x, coarse_model_1)) @@ -1533,7 +1533,7 @@ def perform(self, node, inputs, outputs): intercept = Normal("Intercept", 0, sigma=20) x_coeff = Normal("x", 0, sigma=20) - theta = tt.as_tensor_variable([intercept, x_coeff]) + theta = aet.as_tensor_variable([intercept, x_coeff]) mout.append(ForwardModel(x, model)) @@ -1569,7 +1569,7 @@ def test_variance_reduction(self): model with multiple levels where approximate levels have fewer data. """ # arithmetic precision - if theano.config.floatX == "float32": + if aesara.config.floatX == "float32": p = "float32" else: p = "float64" @@ -1601,12 +1601,12 @@ def test_variance_reduction(self): # define likelihoods with different Q class Likelihood1(Op): - if theano.config.floatX == "float32": - itypes = [tt.fvector] - otypes = [tt.fscalar] + if aesara.config.floatX == "float32": + itypes = [aet.fvector] + otypes = [aet.fscalar] else: - itypes = [tt.dvector] - otypes = [tt.dscalar] + itypes = [aet.dvector] + otypes = [aet.dscalar] def __init__(self, x, y, pymc3_model): self.x = x @@ -1624,12 +1624,12 @@ def perform(self, node, inputs, outputs): ) class Likelihood2(Op): - if theano.config.floatX == "float32": - itypes = [tt.fvector] - otypes = [tt.fscalar] + if aesara.config.floatX == "float32": + itypes = [aet.fvector] + otypes = [aet.fscalar] else: - itypes = [tt.dvector] - otypes = [tt.dscalar] + itypes = [aet.dvector] + otypes = [aet.dscalar] def __init__(self, x, y, pymc3_model): self.x = x @@ -1654,7 +1654,7 @@ def perform(self, node, inputs, outputs): coarse_models = [] with Model() as coarse_model_0: - if theano.config.floatX == "float32": + if aesara.config.floatX == "float32": Q = Data("Q", np.float32(0.0)) else: Q = Data("Q", np.float64(0.0)) @@ -1663,7 +1663,7 @@ def perform(self, node, inputs, outputs): intercept = Normal("Intercept", 0, sigma=20) x_coeff = Normal("x", 0, sigma=20) - theta = tt.as_tensor_variable([intercept, x_coeff]) + theta = aet.as_tensor_variable([intercept, x_coeff]) mout.append(f(x_coarse_0, y_coarse_0, coarse_model_0)) Potential("likelihood", mout[0](theta)) @@ -1671,7 +1671,7 @@ def perform(self, node, inputs, outputs): coarse_models.append(coarse_model_0) with Model() as coarse_model_1: - if theano.config.floatX == "float32": + if aesara.config.floatX == "float32": Q = Data("Q", np.float32(0.0)) else: Q = Data("Q", np.float64(0.0)) @@ -1680,7 +1680,7 @@ def perform(self, node, inputs, outputs): intercept = Normal("Intercept", 0, sigma=20) x_coeff = Normal("x", 0, sigma=20) - theta = tt.as_tensor_variable([intercept, x_coeff]) + theta = aet.as_tensor_variable([intercept, x_coeff]) mout.append(f(x_coarse_1, y_coarse_1, coarse_model_1)) Potential("likelihood", mout[1](theta)) @@ -1688,7 +1688,7 @@ def perform(self, node, inputs, outputs): coarse_models.append(coarse_model_1) with Model() as model: - if theano.config.floatX == "float32": + if aesara.config.floatX == "float32": Q = Data("Q", np.float32(0.0)) else: Q = Data("Q", np.float64(0.0)) @@ -1697,7 +1697,7 @@ def perform(self, node, inputs, outputs): intercept = Normal("Intercept", 0, sigma=20) x_coeff = Normal("x", 0, sigma=20) - theta = tt.as_tensor_variable([intercept, x_coeff]) + theta = aet.as_tensor_variable([intercept, x_coeff]) mout.append(f(x, y, model)) Potential("likelihood", mout[-1](theta)) diff --git a/pymc3/tests/test_transforms.py b/pymc3/tests/test_transforms.py index e9ab89938b6..844a9eb389e 100644 --- a/pymc3/tests/test_transforms.py +++ b/pymc3/tests/test_transforms.py @@ -12,14 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara +import aesara.tensor as aet import numpy as np import pytest -import theano -import theano.tensor as tt + +from aesara.tensor.var import TensorConstant import pymc3 as pm import pymc3.distributions.transforms as tr +from pymc3.aesaraf import jacobian from pymc3.tests.checks import close_to, close_to_logical from pymc3.tests.helpers import SeededTest from pymc3.tests.test_distributions import ( @@ -34,38 +37,37 @@ UnitSortedVector, Vector, ) -from pymc3.theanof import jacobian # some transforms (stick breaking) require additon of small slack in order to be numerically # stable. The minimal addable slack for float32 is higher thus we need to be less strict -tol = 1e-7 if theano.config.floatX == "float64" else 1e-6 +tol = 1e-7 if aesara.config.floatX == "float64" else 1e-6 -def check_transform(transform, domain, constructor=tt.dscalar, test=0): +def check_transform(transform, domain, constructor=aet.dscalar, test=0): x = constructor("x") x.tag.test_value = test # test forward and forward_val - forward_f = theano.function([x], transform.forward(x)) + forward_f = aesara.function([x], transform.forward(x)) # test transform identity - identity_f = theano.function([x], transform.backward(transform.forward(x))) + identity_f = aesara.function([x], transform.backward(transform.forward(x))) for val in domain.vals: close_to(val, identity_f(val), tol) close_to(transform.forward_val(val), forward_f(val), tol) def check_vector_transform(transform, domain): - return check_transform(transform, domain, tt.dvector, test=np.array([0, 0])) + return check_transform(transform, domain, aet.dvector, test=np.array([0, 0])) -def get_values(transform, domain=R, constructor=tt.dscalar, test=0): +def get_values(transform, domain=R, constructor=aet.dscalar, test=0): x = constructor("x") x.tag.test_value = test - f = theano.function([x], transform.backward(x)) + f = aesara.function([x], transform.backward(x)) return np.array([f(val) for val in domain.vals]) def check_jacobian_det( - transform, domain, constructor=tt.dscalar, test=0, make_comparable=None, elemwise=False + transform, domain, constructor=aet.dscalar, test=0, make_comparable=None, elemwise=False ): y = constructor("y") y.tag.test_value = test @@ -75,15 +77,15 @@ def check_jacobian_det( x = make_comparable(x) if not elemwise: - jac = tt.log(tt.nlinalg.det(jacobian(x, [y]))) + jac = aet.log(aet.nlinalg.det(jacobian(x, [y]))) else: - jac = tt.log(tt.abs_(tt.diag(jacobian(x, [y])))) + jac = aet.log(aet.abs_(aet.diag(jacobian(x, [y])))) # ljd = log jacobian det - actual_ljd = theano.function([y], jac) + actual_ljd = aesara.function([y], jac) - computed_ljd = theano.function( - [y], tt.as_tensor_variable(transform.jacobian_det(y)), on_unused_input="ignore" + computed_ljd = aesara.function( + [y], aet.as_tensor_variable(transform.jacobian_det(y)), on_unused_input="ignore" ) for yval in domain.vals: @@ -99,27 +101,27 @@ def test_stickbreaking(): check_vector_transform(tr.stick_breaking, Simplex(4)) check_transform( - tr.stick_breaking, MultiSimplex(3, 2), constructor=tt.dmatrix, test=np.zeros((2, 2)) + tr.stick_breaking, MultiSimplex(3, 2), constructor=aet.dmatrix, test=np.zeros((2, 2)) ) def test_stickbreaking_bounds(): - vals = get_values(tr.stick_breaking, Vector(R, 2), tt.dvector, np.array([0, 0])) + vals = get_values(tr.stick_breaking, Vector(R, 2), aet.dvector, np.array([0, 0])) close_to(vals.sum(axis=1), 1, tol) close_to_logical(vals > 0, True, tol) close_to_logical(vals < 1, True, tol) check_jacobian_det( - tr.stick_breaking, Vector(R, 2), tt.dvector, np.array([0, 0]), lambda x: x[:-1] + tr.stick_breaking, Vector(R, 2), aet.dvector, np.array([0, 0]), lambda x: x[:-1] ) def test_stickbreaking_accuracy(): val = np.array([-30]) - x = tt.dvector("x") + x = aet.dvector("x") x.tag.test_value = val - identity_f = theano.function([x], tr.stick_breaking.forward(tr.stick_breaking.backward(x))) + identity_f = aesara.function([x], tr.stick_breaking.forward(tr.stick_breaking.backward(x))) close_to(val, identity_f(val), tol) @@ -127,14 +129,16 @@ def test_sum_to_1(): check_vector_transform(tr.sum_to_1, Simplex(2)) check_vector_transform(tr.sum_to_1, Simplex(4)) - check_jacobian_det(tr.sum_to_1, Vector(Unit, 2), tt.dvector, np.array([0, 0]), lambda x: x[:-1]) + check_jacobian_det( + tr.sum_to_1, Vector(Unit, 2), aet.dvector, np.array([0, 0]), lambda x: x[:-1] + ) def test_log(): check_transform(tr.log, Rplusbig) check_jacobian_det(tr.log, Rplusbig, elemwise=True) - check_jacobian_det(tr.log, Vector(Rplusbig, 2), tt.dvector, [0, 0], elemwise=True) + check_jacobian_det(tr.log, Vector(Rplusbig, 2), aet.dvector, [0, 0], elemwise=True) vals = get_values(tr.log) close_to_logical(vals > 0, True, tol) @@ -144,7 +148,7 @@ def test_log_exp_m1(): check_transform(tr.log_exp_m1, Rplusbig) check_jacobian_det(tr.log_exp_m1, Rplusbig, elemwise=True) - check_jacobian_det(tr.log_exp_m1, Vector(Rplusbig, 2), tt.dvector, [0, 0], elemwise=True) + check_jacobian_det(tr.log_exp_m1, Vector(Rplusbig, 2), aet.dvector, [0, 0], elemwise=True) vals = get_values(tr.log_exp_m1) close_to_logical(vals > 0, True, tol) @@ -154,7 +158,7 @@ def test_logodds(): check_transform(tr.logodds, Unit) check_jacobian_det(tr.logodds, Unit, elemwise=True) - check_jacobian_det(tr.logodds, Vector(Unit, 2), tt.dvector, [0.5, 0.5], elemwise=True) + check_jacobian_det(tr.logodds, Vector(Unit, 2), aet.dvector, [0.5, 0.5], elemwise=True) vals = get_values(tr.logodds) close_to_logical(vals > 0, True, tol) @@ -166,7 +170,7 @@ def test_lowerbound(): check_transform(trans, Rplusbig) check_jacobian_det(trans, Rplusbig, elemwise=True) - check_jacobian_det(trans, Vector(Rplusbig, 2), tt.dvector, [0, 0], elemwise=True) + check_jacobian_det(trans, Vector(Rplusbig, 2), aet.dvector, [0, 0], elemwise=True) vals = get_values(trans) close_to_logical(vals > 0, True, tol) @@ -177,7 +181,7 @@ def test_upperbound(): check_transform(trans, Rminusbig) check_jacobian_det(trans, Rminusbig, elemwise=True) - check_jacobian_det(trans, Vector(Rminusbig, 2), tt.dvector, [-1, -1], elemwise=True) + check_jacobian_det(trans, Vector(Rminusbig, 2), aet.dvector, [-1, -1], elemwise=True) vals = get_values(trans) close_to_logical(vals < 0, True, tol) @@ -196,7 +200,7 @@ def test_interval(): close_to_logical(vals < b, True, tol) -@pytest.mark.skipif(theano.config.floatX == "float32", reason="Test fails on 32 bit") +@pytest.mark.skipif(aesara.config.floatX == "float32", reason="Test fails on 32 bit") def test_interval_near_boundary(): lb = -1.0 ub = 1e-7 @@ -219,26 +223,26 @@ def test_circular(): close_to_logical(vals > -np.pi, True, tol) close_to_logical(vals < np.pi, True, tol) - assert isinstance(trans.forward(1), tt.TensorConstant) + assert isinstance(trans.forward(1), TensorConstant) def test_ordered(): check_vector_transform(tr.ordered, SortedVector(6)) - check_jacobian_det(tr.ordered, Vector(R, 2), tt.dvector, np.array([0, 0]), elemwise=False) + check_jacobian_det(tr.ordered, Vector(R, 2), aet.dvector, np.array([0, 0]), elemwise=False) - vals = get_values(tr.ordered, Vector(R, 3), tt.dvector, np.zeros(3)) + vals = get_values(tr.ordered, Vector(R, 3), aet.dvector, np.zeros(3)) close_to_logical(np.diff(vals) >= 0, True, tol) -@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") +@pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_chain(): chain_tranf = tr.Chain([tr.logodds, tr.ordered]) check_vector_transform(chain_tranf, UnitSortedVector(3)) - check_jacobian_det(chain_tranf, Vector(R, 4), tt.dvector, np.zeros(4), elemwise=False) + check_jacobian_det(chain_tranf, Vector(R, 4), aet.dvector, np.zeros(4), elemwise=False) - vals = get_values(chain_tranf, Vector(R, 5), tt.dvector, np.zeros(5)) + vals = get_values(chain_tranf, Vector(R, 5), aet.dvector, np.zeros(5)) close_to_logical(np.diff(vals) >= 0, True, tol) @@ -260,7 +264,7 @@ def check_transform_elementwise_logp(self, model): pt[x.name] = array dist = x.distribution logp_nojac = x0.distribution.logp(dist.transform_used.backward(array)) - jacob_det = dist.transform_used.jacobian_det(theano.shared(array)) + jacob_det = dist.transform_used.jacobian_det(aesara.shared(array)) assert x.logp_elemwiset.ndim == jacob_det.ndim elementwiselogp = logp_nojac + jacob_det @@ -277,7 +281,7 @@ def check_vectortransform_elementwise_logp(self, model, vect_opt=0): pt[x.name] = array dist = x.distribution logp_nojac = x0.distribution.logp(dist.transform_used.backward(array)) - jacob_det = dist.transform_used.jacobian_det(theano.shared(array)) + jacob_det = dist.transform_used.jacobian_det(aesara.shared(array)) assert x.logp_elemwiset.ndim == jacob_det.ndim if vect_opt == 0: @@ -369,7 +373,7 @@ def test_normal_ordered(self): (np.ones(3), (4, 3)), ], ) - @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") + @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32") def test_half_normal_ordered(self, sd, shape): testval = np.sort(np.abs(np.random.randn(*shape))) model = self.build_model( diff --git a/pymc3/tests/test_types.py b/pymc3/tests/test_types.py index bd8eaa42df0..4adf8a62186 100644 --- a/pymc3/tests/test_types.py +++ b/pymc3/tests/test_types.py @@ -14,8 +14,8 @@ from copy import copy +import aesara import numpy as np -import theano from pymc3.distributions import Normal from pymc3.model import Model @@ -27,14 +27,14 @@ class TestType: samplers = (Metropolis, Slice, HamiltonianMC, NUTS) def setup_method(self): - # save theano config object - self.theano_config = copy(theano.config) + # save aesara config object + self.aesara_config = copy(aesara.config) def teardown_method(self): - # restore theano config - theano.config = self.theano_config + # restore aesara config + aesara.config = self.aesara_config - @theano.config.change_flags({"floatX": "float64", "warn_float64": "ignore"}) + @aesara.config.change_flags({"floatX": "float64", "warn_float64": "ignore"}) def test_float64(self): with Model() as model: x = Normal("x", testval=np.array(1.0, dtype="float64")) @@ -47,7 +47,7 @@ def test_float64(self): with model: sample(10, sampler()) - @theano.config.change_flags({"floatX": "float32", "warn_float64": "warn"}) + @aesara.config.change_flags({"floatX": "float32", "warn_float64": "warn"}) def test_float32(self): with Model() as model: x = Normal("x", testval=np.array(1.0, dtype="float32")) @@ -60,7 +60,7 @@ def test_float32(self): with model: sample(10, sampler()) - @theano.config.change_flags({"floatX": "float64", "warn_float64": "ignore"}) + @aesara.config.change_flags({"floatX": "float64", "warn_float64": "ignore"}) def test_float64_MLDA(self): data = np.random.randn(5) @@ -78,7 +78,7 @@ def test_float64_MLDA(self): with model: sample(10, MLDA(coarse_models=[coarse_model])) - @theano.config.change_flags({"floatX": "float32", "warn_float64": "warn"}) + @aesara.config.change_flags({"floatX": "float32", "warn_float64": "warn"}) def test_float32_MLDA(self): data = np.random.randn(5).astype("float32") diff --git a/pymc3/tests/test_updates.py b/pymc3/tests/test_updates.py index 9d8f644075c..77dff3f17ad 100644 --- a/pymc3/tests/test_updates.py +++ b/pymc3/tests/test_updates.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara import numpy as np import pytest -import theano from pymc3.variational.updates import ( adadelta, @@ -28,12 +28,12 @@ sgd, ) -_a = theano.shared(1.0) +_a = aesara.shared(1.0) _b = _a * 2 -_m = theano.shared(np.empty((10,), theano.config.floatX)) +_m = aesara.shared(np.empty((10,), aesara.config.floatX)) _n = _m.sum() -_m2 = theano.shared(np.empty((10, 10, 10), theano.config.floatX)) +_m2 = aesara.shared(np.empty((10, 10, 10), aesara.config.floatX)) _n2 = _b + _n + _m2.sum() @@ -71,7 +71,7 @@ ids=["scalar", "matrix", "mixed"], ) def test_updates_fast(opt, loss_and_params, kwargs, getter): - with theano.config.change_flags(compute_test_value="ignore"): + with aesara.config.change_flags(compute_test_value="ignore"): loss, param = getter(loss_and_params) args = dict() args.update(**kwargs) diff --git a/pymc3/tests/test_variational_inference.py b/pymc3/tests/test_variational_inference.py index 1ef9b616290..8e115350b49 100644 --- a/pymc3/tests/test_variational_inference.py +++ b/pymc3/tests/test_variational_inference.py @@ -16,18 +16,18 @@ import io import operator +import aesara +import aesara.tensor as aet import numpy as np import pytest -import theano -import theano.tensor as tt import pymc3 as pm import pymc3.memoize import pymc3.util +from pymc3.aesaraf import intX from pymc3.tests import models from pymc3.tests.helpers import not_raises -from pymc3.theanof import intX from pymc3.variational import flows, opvi from pymc3.variational.approximations import ( Empirical, @@ -51,7 +51,7 @@ def test_callbacks_convergence(diff, ord): cb = pm.variational.callbacks.CheckParametersConvergence(every=1, diff=diff, ord=ord) class _approx: - params = (theano.shared(np.asarray([1, 2, 3])),) + params = (aesara.shared(np.asarray([1, 2, 3])),) approx = _approx() @@ -186,7 +186,7 @@ def test_sample_simple(three_var_approx, request): @pytest.fixture def aevb_initial(): - return theano.shared(np.random.rand(3, 7).astype("float32")) + return aesara.shared(np.random.rand(3, 7).astype("float32")) @pytest.fixture( @@ -251,7 +251,7 @@ def test_sample_aevb(three_var_aevb_approx, aevb_initial): def test_replacements_in_sample_node_aevb(three_var_aevb_approx, aevb_initial): - inp = tt.matrix(dtype="float32") + inp = aet.matrix(dtype="float32") three_var_aevb_approx.sample_node( three_var_aevb_approx.model.one, 2, more_replacements={aevb_initial: inp} ).eval({inp: np.random.rand(7, 7).astype("float32")}) @@ -265,14 +265,14 @@ def test_vae(): minibatch_size = 10 data = pm.floatX(np.random.rand(100)) x_mini = pm.Minibatch(data, minibatch_size) - x_inp = tt.vector() + x_inp = aet.vector() x_inp.tag.test_value = data[:minibatch_size] - ae = theano.shared(pm.floatX([0.1, 0.1])) - be = theano.shared(pm.floatX(1.0)) + ae = aesara.shared(pm.floatX([0.1, 0.1])) + be = aesara.shared(pm.floatX(1.0)) - ad = theano.shared(pm.floatX(1.0)) - bd = theano.shared(pm.floatX(1.0)) + ad = aesara.shared(pm.floatX(1.0)) + bd = aesara.shared(pm.floatX(1.0)) enc = x_inp.dimshuffle(0, "x") * ae.dimshuffle("x", 0) + be mu, rho = enc[:, 0], enc[:, 1] @@ -496,8 +496,8 @@ def test_elbo(): sigma = 1.0 y_obs = np.array([1.6, 1.4]) - post_mu = np.array([1.88], dtype=theano.config.floatX) - post_sigma = np.array([1], dtype=theano.config.floatX) + post_mu = np.array([1.88], dtype=aesara.config.floatX) + post_sigma = np.array([1], dtype=aesara.config.floatX) # Create a model for test with pm.Model() as model: mu = pm.Normal("mu", mu=mu0, sigma=sigma) @@ -505,13 +505,13 @@ def test_elbo(): # Create variational gradient tensor mean_field = MeanField(model=model) - with theano.config.change_flags(compute_test_value="off"): + with aesara.config.change_flags(compute_test_value="off"): elbo = -pm.operators.KL(mean_field)()(10000) mean_field.shared_params["mu"].set_value(post_mu) mean_field.shared_params["rho"].set_value(np.log(np.exp(post_sigma) - 1)) - f = theano.function([], elbo) + f = aesara.function([], elbo) elbo_mc = f() # Exact value @@ -534,17 +534,17 @@ def test_scale_cost_to_minibatch_works(aux_total_size): y_obs = np.array([1.6, 1.4]) beta = len(y_obs) / float(aux_total_size) - # TODO: theano_config - # with pm.Model(theano_config=dict(floatX='float64')): + # TODO: aesara_config + # with pm.Model(aesara_config=dict(floatX='float64')): # did not not work as expected # there were some numeric problems, so float64 is forced - with theano.config.change_flags(floatX="float64", warn_float64="ignore"): + with aesara.config.change_flags(floatX="float64", warn_float64="ignore"): - assert theano.config.floatX == "float64" - assert theano.config.warn_float64 == "ignore" + assert aesara.config.floatX == "float64" + assert aesara.config.warn_float64 == "ignore" - post_mu = np.array([1.88], dtype=theano.config.floatX) - post_sigma = np.array([1], dtype=theano.config.floatX) + post_mu = np.array([1.88], dtype=aesara.config.floatX) + post_sigma = np.array([1], dtype=aesara.config.floatX) with pm.Model(): mu = pm.Normal("mu", mu=mu0, sigma=sigma) @@ -555,7 +555,7 @@ def test_scale_cost_to_minibatch_works(aux_total_size): mean_field_1.shared_params["mu"].set_value(post_mu) mean_field_1.shared_params["rho"].set_value(np.log(np.exp(post_sigma) - 1)) - with theano.config.change_flags(compute_test_value="off"): + with aesara.config.change_flags(compute_test_value="off"): elbo_via_total_size_scaled = -pm.operators.KL(mean_field_1)()(10000) with pm.Model(): @@ -569,7 +569,7 @@ def test_scale_cost_to_minibatch_works(aux_total_size): mean_field_2.shared_params["mu"].set_value(post_mu) mean_field_2.shared_params["rho"].set_value(np.log(np.exp(post_sigma) - 1)) - with theano.config.change_flags(compute_test_value="off"): + with aesara.config.change_flags(compute_test_value="off"): elbo_via_total_size_unscaled = -pm.operators.KL(mean_field_2)()(10000) np.testing.assert_allclose( @@ -587,10 +587,10 @@ def test_elbo_beta_kl(aux_total_size): y_obs = np.array([1.6, 1.4]) beta = len(y_obs) / float(aux_total_size) - with theano.config.change_flags(floatX="float64", warn_float64="ignore"): + with aesara.config.change_flags(floatX="float64", warn_float64="ignore"): - post_mu = np.array([1.88], dtype=theano.config.floatX) - post_sigma = np.array([1], dtype=theano.config.floatX) + post_mu = np.array([1.88], dtype=aesara.config.floatX) + post_sigma = np.array([1], dtype=aesara.config.floatX) with pm.Model(): mu = pm.Normal("mu", mu=mu0, sigma=sigma) @@ -601,7 +601,7 @@ def test_elbo_beta_kl(aux_total_size): mean_field_1.shared_params["mu"].set_value(post_mu) mean_field_1.shared_params["rho"].set_value(np.log(np.exp(post_sigma) - 1)) - with theano.config.change_flags(compute_test_value="off"): + with aesara.config.change_flags(compute_test_value="off"): elbo_via_total_size_scaled = -pm.operators.KL(mean_field_1)()(10000) with pm.Model(): @@ -612,7 +612,7 @@ def test_elbo_beta_kl(aux_total_size): mean_field_3.shared_params["mu"].set_value(post_mu) mean_field_3.shared_params["rho"].set_value(np.log(np.exp(post_sigma) - 1)) - with theano.config.change_flags(compute_test_value="off"): + with aesara.config.change_flags(compute_test_value="off"): elbo_via_beta_kl = -pm.operators.KL(mean_field_3, beta=beta)()(10000) np.testing.assert_allclose( @@ -750,7 +750,7 @@ def test_remove_scan_op(): inference = ADVI() buff = io.StringIO() inference.run_profiling(n=10).summary(buff) - assert "theano.scan.op.Scan" not in buff.getvalue() + assert "aesara.scan.op.Scan" not in buff.getvalue() buff.close() @@ -780,7 +780,7 @@ def test_clear_cache(): def another_simple_model(): _model = models.simple_model()[1] with _model: - pm.Potential("pot", tt.ones((10, 10))) + pm.Potential("pot", aet.ones((10, 10))) return _model @@ -831,8 +831,8 @@ def aevb_model(): pm.Normal("y", shape=(2,)) x = model.x y = model.y - mu = theano.shared(x.init_value) - rho = theano.shared(np.zeros_like(x.init_value)) + mu = aesara.shared(x.init_value) + rho = aesara.shared(np.zeros_like(x.init_value)) return {"model": model, "y": y, "x": x, "replace": dict(mu=mu, rho=rho)} @@ -911,13 +911,13 @@ def binomial_model_inference(binomial_model, inference_spec): def test_replacements(binomial_model_inference): - d = tt.bscalar() + d = aet.bscalar() d.tag.test_value = 1 approx = binomial_model_inference.approx p = approx.model.p p_t = p ** 3 p_s = approx.sample_node(p_t) - if theano.config.compute_test_value != "off": + if aesara.config.compute_test_value != "off": assert p_s.tag.test_value.shape == p_t.tag.test_value.shape sampled = [p_s.eval() for _ in range(100)] assert any(map(operator.ne, sampled[1:], sampled[:-1])) # stochastic @@ -934,13 +934,13 @@ def test_replacements(binomial_model_inference): def test_sample_replacements(binomial_model_inference): - i = tt.iscalar() + i = aet.iscalar() i.tag.test_value = 1 approx = binomial_model_inference.approx p = approx.model.p p_t = p ** 3 p_s = approx.sample_node(p_t, size=100) - if theano.config.compute_test_value != "off": + if aesara.config.compute_test_value != "off": assert p_s.tag.test_value.shape == (100,) + p_t.tag.test_value.shape sampled = p_s.eval() assert any(map(operator.ne, sampled[1:], sampled[:-1])) # stochastic @@ -961,7 +961,7 @@ def test_discrete_not_allowed(): with pm.Model(): mu = pm.Normal("mu", mu=0, sigma=10, shape=3) - z = pm.Categorical("z", p=tt.ones(3) / 3, shape=len(y)) + z = pm.Categorical("z", p=aet.ones(3) / 3, shape=len(y)) pm.Normal("y_obs", mu=mu[z], sigma=1.0, observed=y) with pytest.raises(opvi.ParametrizationError): pm.fit(n=1) # fails @@ -1016,34 +1016,34 @@ def init_(**kw): def test_flow_det(flow_spec): - z0 = tt.arange(0, 20).astype("float32") + z0 = aet.arange(0, 20).astype("float32") flow = flow_spec(dim=20, z0=z0.dimshuffle("x", 0)) - with theano.config.change_flags(compute_test_value="off"): + with aesara.config.change_flags(compute_test_value="off"): z1 = flow.forward.flatten() - J = tt.jacobian(z1, z0) - logJdet = tt.log(tt.abs_(tt.nlinalg.det(J))) + J = aet.jacobian(z1, z0) + logJdet = aet.log(aet.abs_(aet.nlinalg.det(J))) det = flow.logdet[0] np.testing.assert_allclose(logJdet.eval(), det.eval(), atol=0.0001) def test_flow_det_local(flow_spec): - z0 = tt.arange(0, 12).astype("float32") + z0 = aet.arange(0, 12).astype("float32") spec = flow_spec.cls.get_param_spec_for(d=12) params = dict() for k, shp in spec.items(): params[k] = np.random.randn(1, *shp).astype("float32") flow = flow_spec(dim=12, z0=z0.reshape((1, 1, 12)), **params) assert flow.batched - with theano.config.change_flags(compute_test_value="off"): + with aesara.config.change_flags(compute_test_value="off"): z1 = flow.forward.flatten() - J = tt.jacobian(z1, z0) - logJdet = tt.log(tt.abs_(tt.nlinalg.det(J))) + J = aet.jacobian(z1, z0) + logJdet = aet.log(aet.abs_(aet.nlinalg.det(J))) det = flow.logdet[0] np.testing.assert_allclose(logJdet.eval(), det.eval(), atol=0.0001) def test_flows_collect_chain(): - initial = tt.ones((3, 2)) + initial = aet.ones((3, 2)) flow1 = flows.PlanarFlow(dim=2, z0=initial) flow2 = flows.PlanarFlow(dim=2, z0=flow1) assert len(flow2.params) == 3 @@ -1067,4 +1067,4 @@ def test_flow_formula(formula, length, order): assert len(flows_list) == length if order is not None: assert flows_list == order - spec(dim=2, jitter=1)(tt.ones((3, 2))).eval() # should work + spec(dim=2, jitter=1)(aet.ones((3, 2))).eval() # should work diff --git a/pymc3/tuning/scaling.py b/pymc3/tuning/scaling.py index 49a59ff0d74..41d2af28203 100644 --- a/pymc3/tuning/scaling.py +++ b/pymc3/tuning/scaling.py @@ -16,9 +16,9 @@ from numpy import exp, log, sqrt +from pymc3.aesaraf import hessian_diag, inputvars from pymc3.blocking import ArrayOrdering, DictToArrayBijection from pymc3.model import Point, modelcontext -from pymc3.theanof import hessian_diag, inputvars from pymc3.util import get_var_name __all__ = ["find_hessian", "trace_cov", "guess_scaling"] diff --git a/pymc3/tuning/starting.py b/pymc3/tuning/starting.py index 2a800b2b4dd..fcdd4fe8c4d 100644 --- a/pymc3/tuning/starting.py +++ b/pymc3/tuning/starting.py @@ -19,8 +19,8 @@ """ import copy +import aesara.gradient as tg import numpy as np -import theano.gradient as tg from fastprogress.fastprogress import ProgressBar, progress_bar from numpy import isfinite, nan_to_num @@ -28,9 +28,9 @@ import pymc3 as pm +from pymc3.aesaraf import inputvars from pymc3.blocking import ArrayOrdering, DictToArrayBijection from pymc3.model import Point, modelcontext -from pymc3.theanof import inputvars from pymc3.util import ( check_start_vals, get_default_varnames, diff --git a/pymc3/util.py b/pymc3/util.py index 84b4f6c3e5f..f0429901f8e 100644 --- a/pymc3/util.py +++ b/pymc3/util.py @@ -22,7 +22,7 @@ import numpy as np import xarray -from theano.tensor import TensorVariable +from aesara.tensor.var import TensorVariable from pymc3.exceptions import SamplingError @@ -169,7 +169,7 @@ def get_repr_for_variable(variable, formatting="plain"): def get_var_name(var): """Get an appropriate, plain variable name for a variable. Necessary - because we override theano.tensor.TensorVariable.__str__ to give informative + because we override aesara.tensor.var.TensorVariable.__str__ to give informative string representations to our pymc3.PyMC3Variables, yet we want to use the plain name as e.g. keys in dicts. """ diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py index 896f7422c3d..4b6784f2efd 100644 --- a/pymc3/variational/approximations.py +++ b/pymc3/variational/approximations.py @@ -12,10 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara import numpy as np -import theano -from theano import tensor as tt +from aesara import tensor as aet +from aesara.graph.basic import Variable +from aesara.tensor.var import TensorVariable import pymc3 as pm @@ -53,13 +55,13 @@ def cov(self): if self.batched: return batched_diag(var) else: - return tt.diag(var) + return aet.diag(var) @node_property def std(self): return rho2sigma(self.rho) - @theano.config.change_flags(compute_test_value="off") + @aesara.config.change_flags(compute_test_value="off") def __init_group__(self, group): super().__init_group__(group) if not self._check_user_params(): @@ -82,8 +84,8 @@ def create_shared_params(self, start=None): start = np.tile(start, (self.bdim, 1)) rho = np.tile(rho, (self.bdim, 1)) return { - "mu": theano.shared(pm.floatX(start), "mu"), - "rho": theano.shared(pm.floatX(rho), "rho"), + "mu": aesara.shared(pm.floatX(start), "mu"), + "rho": aesara.shared(pm.floatX(rho), "rho"), } @node_property @@ -97,7 +99,7 @@ def symbolic_random(self): def symbolic_logq_not_scaled(self): z0 = self.symbolic_initial std = rho2sigma(self.rho) - logdet = tt.log(std) + logdet = aet.log(std) logq = pm.Normal.dist().logp(z0) - logdet return logq.sum(range(1, logq.ndim)) @@ -114,7 +116,7 @@ class FullRankGroup(Group): short_name = "full_rank" alias_names = frozenset(["fr"]) - @theano.config.change_flags(compute_test_value="off") + @aesara.config.change_flags(compute_test_value="off") def __init_group__(self, group): super().__init_group__(group) if not self._check_user_params(): @@ -133,21 +135,21 @@ def create_shared_params(self, start=None): else: start = self.bij.map(start) n = self.ddim - L_tril = np.eye(n)[np.tril_indices(n)].astype(theano.config.floatX) + L_tril = np.eye(n)[np.tril_indices(n)].astype(aesara.config.floatX) if self.batched: start = np.tile(start, (self.bdim, 1)) L_tril = np.tile(L_tril, (self.bdim, 1)) - return {"mu": theano.shared(start, "mu"), "L_tril": theano.shared(L_tril, "L_tril")} + return {"mu": aesara.shared(start, "mu"), "L_tril": aesara.shared(L_tril, "L_tril")} @node_property def L(self): if self.batched: - L = tt.zeros((self.ddim, self.ddim, self.bdim)) - L = tt.set_subtensor(L[self.tril_indices], self.params_dict["L_tril"].T) + L = aet.zeros((self.ddim, self.ddim, self.bdim)) + L = aet.set_subtensor(L[self.tril_indices], self.params_dict["L_tril"].T) L = L.dimshuffle(2, 0, 1) else: - L = tt.zeros((self.ddim, self.ddim)) - L = tt.set_subtensor(L[self.tril_indices], self.params_dict["L_tril"]) + L = aet.zeros((self.ddim, self.ddim)) + L = aet.set_subtensor(L[self.tril_indices], self.params_dict["L_tril"]) return L @node_property @@ -158,16 +160,16 @@ def mean(self): def cov(self): L = self.L if self.batched: - return tt.batched_dot(L, L.swapaxes(-1, -2)) + return aet.batched_dot(L, L.swapaxes(-1, -2)) else: return L.dot(L.T) @node_property def std(self): if self.batched: - return tt.sqrt(batched_diag(self.cov)) + return aet.sqrt(batched_diag(self.cov)) else: - return tt.sqrt(tt.diag(self.cov)) + return aet.sqrt(aet.diag(self.cov)) @property def num_tril_entries(self): @@ -189,7 +191,7 @@ def logq(z_b, mu_b, L_b): # it's gonna be so slow # scan is computed over batch and then summed up # output shape is (batch, samples) - return theano.scan(logq, [z.swapaxes(0, 1), self.mean, self.L])[0].sum(0) + return aesara.scan(logq, [z.swapaxes(0, 1), self.mean, self.L])[0].sum(0) else: return pm.MvNormal.dist(mu=self.mean, chol=self.L).logp(z) @@ -202,7 +204,7 @@ def symbolic_random(self): # initial: bxsxd # L: bxdxd initial = initial.swapaxes(0, 1) - return tt.batched_dot(initial, L.swapaxes(1, 2)).swapaxes(0, 1) + mu + return aet.batched_dot(initial, L.swapaxes(1, 2)).swapaxes(0, 1) + mu else: return initial.dot(L.T) + mu @@ -218,7 +220,7 @@ class EmpiricalGroup(Group): __param_spec__ = dict(histogram=("s", "d")) short_name = "empirical" - @theano.config.change_flags(compute_test_value="off") + @aesara.config.change_flags(compute_test_value="off") def __init_group__(self, group): super().__init_group__(group) self._check_trace() @@ -254,7 +256,7 @@ def create_shared_params(self, trace=None, size=None, jitter=1, start=None): for j in range(len(trace)): histogram[i] = self.bij.map(trace.point(j, t)) i += 1 - return dict(histogram=theano.shared(pm.floatX(histogram), "histogram")) + return dict(histogram=aesara.shared(pm.floatX(histogram), "histogram")) def _check_trace(self): trace = self._kwargs.get("trace", None) @@ -264,7 +266,7 @@ def _check_trace(self): def randidx(self, size=None): if size is None: size = (1,) - elif isinstance(size, tt.TensorVariable): + elif isinstance(size, TensorVariable): if size.ndim < 1: size = size[None] elif size.ndim > 1: @@ -278,16 +280,16 @@ def randidx(self, size=None): ).astype("int32") def _new_initial(self, size, deterministic, more_replacements=None): - theano_condition_is_here = isinstance(deterministic, tt.Variable) - if theano_condition_is_here: - return tt.switch( + aesara_condition_is_here = isinstance(deterministic, Variable) + if aesara_condition_is_here: + return aet.switch( deterministic, - tt.repeat(self.mean.dimshuffle("x", 0), size if size is not None else 1, -1), + aet.repeat(self.mean.dimshuffle("x", 0), size if size is not None else 1, -1), self.histogram[self.randidx(size)], ) else: if deterministic: - return tt.repeat(self.mean.dimshuffle("x", 0), size if size is not None else 1, -1) + return aet.repeat(self.mean.dimshuffle("x", 0), size if size is not None else 1, -1) else: return self.histogram[self.randidx(size)] @@ -310,10 +312,10 @@ def cov(self): @node_property def std(self): - return tt.sqrt(tt.diag(self.cov)) + return aet.sqrt(aet.diag(self.cov)) def __str__(self): - if isinstance(self.histogram, theano.compile.SharedVariable): + if isinstance(self.histogram, aesara.compile.SharedVariable): shp = ", ".join(map(str, self.histogram.shape.eval())) else: shp = "None, " + str(self.ddim) @@ -370,7 +372,7 @@ class NormalizingFlowGroup(Group): """ default_flow = "scale-loc" - @theano.config.change_flags(compute_test_value="off") + @aesara.config.change_flags(compute_test_value="off") def __init_group__(self, group): super().__init_group__(group) # objects to be resolved @@ -584,7 +586,7 @@ def evaluate_over_trace(self, node): Parameters ---------- - node: Theano Variables (or Theano expressions) + node: Aesara Variables (or Aesara expressions) Returns ------- @@ -593,9 +595,9 @@ def evaluate_over_trace(self, node): node = self.to_flat_input(node) def sample(post): - return theano.clone(node, {self.input: post}) + return aesara.clone_replace(node, {self.input: post}) - nodes, _ = theano.scan(sample, self.histogram) + nodes, _ = aesara.scan(sample, self.histogram) return nodes diff --git a/pymc3/variational/flows.py b/pymc3/variational/flows.py index 601c7351fa7..f78c32e69bb 100644 --- a/pymc3/variational/flows.py +++ b/pymc3/variational/flows.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import aesara import numpy as np -import theano -from theano import tensor as tt +from aesara import tensor as aet from pymc3.distributions.dist_math import rho2sigma from pymc3.memoize import WithMemoization @@ -161,14 +161,14 @@ def __init__(self, z0=None, dim=None, jitter=0.001, batch_size=None, local=False "Cannot infer dimension of flow, " "please provide dim or Flow instance as z0" ) if z0 is None: - self.z0 = tt.matrix() # type: tt.TensorVariable + self.z0 = aet.matrix() # type: TensorVariable else: - self.z0 = tt.as_tensor(z0) + self.z0 = aet.as_tensor(z0) self.parent = parent def add_param(self, user=None, name=None, ref=0.0, dtype="floatX"): if dtype == "floatX": - dtype = theano.config.floatX + dtype = aesara.config.floatX spec = self.__param_spec__[name] shape = tuple(eval(s, {"d": self.dim}) for s in spec) if user is None: @@ -178,7 +178,7 @@ def add_param(self, user=None, name=None, ref=0.0, dtype="floatX"): if self.batch_size is None: raise opvi.BatchedGroupError("Need batch size to infer parameter shape") shape = (self.batch_size,) + shape - return theano.shared( + return aesara.shared( np.asarray(np.random.normal(size=shape) * self.__jitter + ref).astype(dtype), name=name, ) @@ -189,7 +189,7 @@ def add_param(self, user=None, name=None, ref=0.0, dtype="floatX"): shape = (-1,) + shape else: shape = (self.batch_size,) + shape - return tt.as_tensor(user).reshape(shape) + return aet.as_tensor(user).reshape(shape) @property def params(self): @@ -205,14 +205,14 @@ def all_params(self): return params @property - @theano.config.change_flags(compute_test_value="off") + @aesara.config.change_flags(compute_test_value="off") def sum_logdets(self): dets = [self.logdet] current = self while not current.isroot: current = current.parent dets.append(current.logdet) - return tt.add(*dets) + return aet.add(*dets) @node_property def forward(self): @@ -222,9 +222,9 @@ def forward(self): def logdet(self): raise NotImplementedError - @theano.config.change_flags(compute_test_value="off") + @aesara.config.change_flags(compute_test_value="off") def forward_pass(self, z0): - ret = theano.clone(self.forward, {self.root.z0: z0}) + ret = aesara.clone_replace(self.forward, {self.root.z0: z0}) try: ret.tag.test_value = np.random.normal(size=z0.tag.test_value.shape).astype( self.z0.dtype @@ -297,7 +297,7 @@ def __call__(self, *args): class LinearFlow(AbstractFlow): __param_spec__ = dict(u=("d",), w=("d",), b=()) - @theano.config.change_flags(compute_test_value="off") + @aesara.config.change_flags(compute_test_value="off") def __init__(self, h, u=None, w=None, b=None, **kwargs): self.h = h super().__init__(**kwargs) @@ -325,7 +325,7 @@ def forward(self): if not self.batched: hwz = h(z.dot(w) + b) # s # sxd + (s \outer d) = sxd - z1 = z + tt.outer(hwz, u) # sxd + z1 = z + aet.outer(hwz, u) # sxd return z1 else: z = z.swapaxes(0, 1) @@ -334,7 +334,7 @@ def forward(self): # w bxd b = b.dimshuffle(0, "x") # b bx- - hwz = h(tt.batched_dot(z, w) + b) # bxs + hwz = h(aet.batched_dot(z, w) + b) # bxs # bxsxd + (bxsx- * bx-xd) = bxsxd hwz = hwz.dimshuffle(0, 1, "x") # bxsx- u = u.dimshuffle(0, "x", 1) # bx-xd @@ -352,8 +352,8 @@ def logdet(self): # f'(sxd \dot d + .) * -xd = sxd phi = deriv(z.dot(w) + b).dimshuffle(0, "x") * w.dimshuffle("x", 0) # \abs(. + sxd \dot d) = s - det = tt.abs_(1.0 + phi.dot(u)) - return tt.log(det) + det = aet.abs_(1.0 + phi.dot(u)) + return aet.log(det) else: z = z.swapaxes(0, 1) b = b.dimshuffle(0, "x") @@ -362,20 +362,20 @@ def logdet(self): # w bxd # b bx-x- # f'(bxsxd \bdot bxd + bx-x-) * bx-xd = bxsxd - phi = deriv(tt.batched_dot(z, w) + b).dimshuffle(0, 1, "x") * w.dimshuffle(0, "x", 1) + phi = deriv(aet.batched_dot(z, w) + b).dimshuffle(0, 1, "x") * w.dimshuffle(0, "x", 1) # \abs(. + bxsxd \bdot bxd) = bxs - det = tt.abs_(1.0 + tt.batched_dot(phi, u)) # bxs - return tt.log(det).sum(0) # s + det = aet.abs_(1.0 + aet.batched_dot(phi, u)) # bxs + return aet.log(det).sum(0) # s class Tanh(FlowFn): - fn = tt.tanh - inv = tt.arctanh + fn = aet.tanh + inv = aet.arctanh @staticmethod def deriv(*args): (x,) = args - return 1.0 - tt.tanh(x) ** 2 + return 1.0 - aet.tanh(x) ** 2 @AbstractFlow.register @@ -390,7 +390,7 @@ def make_uw(self, u, w): # u_: d # w_: d wu = u.dot(w) # . - mwu = -1.0 + tt.nnet.softplus(wu) # . + mwu = -1.0 + aet.nnet.softplus(wu) # . # d + (. - .) * d / . u_h = u + (mwu - wu) * w / ((w ** 2).sum() + 1e-10) return u_h, w @@ -398,7 +398,7 @@ def make_uw(self, u, w): # u_: bxd # w_: bxd wu = (u * w).sum(-1, keepdims=True) # bx- - mwu = -1.0 + tt.nnet.softplus(wu) # bx- + mwu = -1.0 + aet.nnet.softplus(wu) # bx- # bxd + (bx- - bx-) * bxd / bx- = bxd u_h = u + (mwu - wu) * w / ((w ** 2).sum(-1, keepdims=True) + 1e-10) return u_h, w @@ -407,7 +407,7 @@ def make_uw(self, u, w): class ReferencePointFlow(AbstractFlow): __param_spec__ = dict(a=(), b=(), z_ref=("d",)) - @theano.config.change_flags(compute_test_value="off") + @aesara.config.change_flags(compute_test_value="off") def __init__(self, h, a=None, b=None, z_ref=None, **kwargs): super().__init__(**kwargs) a = self.add_param(a, "a") @@ -474,7 +474,7 @@ def logdet(self): r = (z - z_ref).norm(2, axis=-1, keepdims=True) # s har = h(a, r) dar = deriv(a, r) - logdet = tt.log((1.0 + b * har) ** (d - 1.0) * (1.0 + b * har + b * dar * r)) + logdet = aet.log((1.0 + b * har) ** (d - 1.0) * (1.0 + b * har + b * dar * r)) if self.batched: return logdet.sum([0, -1]) else: @@ -506,8 +506,8 @@ def __init__(self, **kwargs): super().__init__(Radial(), **kwargs) def make_ab(self, a, b): - a = tt.exp(a) - b = -a + tt.nnet.softplus(b) + a = aet.exp(a) + b = -a + aet.nnet.softplus(b) return a, b @@ -531,7 +531,7 @@ def forward(self): @node_property def logdet(self): - return tt.zeros((self.z0.shape[0],)) + return aet.zeros((self.z0.shape[0],)) @AbstractFlow.register @@ -539,7 +539,7 @@ class ScaleFlow(AbstractFlow): __param_spec__ = dict(rho=("d",)) short_name = "scale" - @theano.config.change_flags(compute_test_value="off") + @aesara.config.change_flags(compute_test_value="off") def __init__(self, rho=None, **kwargs): super().__init__(**kwargs) rho = self.add_param(rho, "rho") @@ -556,7 +556,7 @@ def forward(self): @node_property def logdet(self): - return tt.repeat(tt.sum(tt.log(self.scale)), self.z0.shape[0]) + return aet.repeat(aet.sum(aet.log(self.scale)), self.z0.shape[0]) @AbstractFlow.register @@ -564,18 +564,18 @@ class HouseholderFlow(AbstractFlow): __param_spec__ = dict(v=("d",)) short_name = "hh" - @theano.config.change_flags(compute_test_value="raise") + @aesara.config.change_flags(compute_test_value="raise") def __init__(self, v=None, **kwargs): super().__init__(**kwargs) v = self.add_param(v, "v") self.shared_params = dict(v=v) if self.batched: vv = v.dimshuffle(0, 1, "x") * v.dimshuffle(0, "x", 1) - I = tt.eye(self.dim).dimshuffle("x", 0, 1) + I = aet.eye(self.dim).dimshuffle("x", 0, 1) vvn = (1e-10 + (v ** 2).sum(-1)).dimshuffle(0, "x", "x") else: - vv = tt.outer(v, v) - I = tt.eye(self.dim) + vv = aet.outer(v, v) + I = aet.eye(self.dim) vvn = (v ** 2).sum(-1) + 1e-10 self.H = I - 2.0 * vv / vvn @@ -584,10 +584,10 @@ def forward(self): z = self.z0 # sxd H = self.H # dxd if self.batched: - return tt.batched_dot(z.swapaxes(0, 1), H).swapaxes(0, 1) + return aet.batched_dot(z.swapaxes(0, 1), H).swapaxes(0, 1) else: return z.dot(H) @node_property def logdet(self): - return tt.zeros((self.z0.shape[0],)) + return aet.zeros((self.z0.shape[0],)) diff --git a/pymc3/variational/inference.py b/pymc3/variational/inference.py index 85eb08e65c0..1b77104c60a 100644 --- a/pymc3/variational/inference.py +++ b/pymc3/variational/inference.py @@ -130,7 +130,7 @@ def fit(self, n=10000, score=None, callbacks=None, progressbar=True, **kwargs): total_grad_norm_constraint: `float` Bounds gradient norm, prevents exploding gradient problem fn_kwargs: `dict` - Add kwargs to theano.function (e.g. `{'profile': True}`) + Add kwargs to aesara.function (e.g. `{'profile': True}`) more_replacements: `dict` Apply custom replacements before calculating gradients @@ -423,7 +423,7 @@ class ADVI(KLqp): The tensors to which mini-bathced samples are supplied are handled separately by using callbacks in :func:`Inference.fit` method - that change storage of shared theano variable or by :func:`pymc3.generator` + that change storage of shared aesara variable or by :func:`pymc3.generator` that automatically iterates over minibatches and defined beforehand. - (optional) Parameters of deterministic mappings @@ -794,7 +794,7 @@ def fit( total_grad_norm_constraint: `float` Bounds gradient norm, prevents exploding gradient problem fn_kwargs: `dict` - Add kwargs to theano.function (e.g. `{'profile': True}`) + Add kwargs to aesara.function (e.g. `{'profile': True}`) more_replacements: `dict` Apply custom replacements before calculating gradients diff --git a/pymc3/variational/operators.py b/pymc3/variational/operators.py index 9a5c2fdc200..e69d9c447e4 100644 --- a/pymc3/variational/operators.py +++ b/pymc3/variational/operators.py @@ -11,9 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import theano +import aesara -from theano import tensor as tt +from aesara import tensor as aet import pymc3 as pm @@ -75,7 +75,7 @@ def __init__(self, op, tf): raise opvi.ParametrizationError("Op should be KSD") ObjectiveFunction.__init__(self, op, tf) - @theano.config.change_flags(compute_test_value="off") + @aesara.config.change_flags(compute_test_value="off") def __call__(self, nmc, **kwargs): op = self.op # type: KSD grad = op.apply(self.tf) @@ -88,7 +88,7 @@ def __call__(self, nmc, **kwargs): else: params = self.test_params + kwargs["more_tf_params"] grad *= pm.floatX(-1) - grads = tt.grad(None, params, known_grads={z: grad}) + grads = aet.grad(None, params, known_grads={z: grad}) return self.approx.set_size_and_deterministic( grads, nmc, 0, kwargs.get("more_replacements") ) diff --git a/pymc3/variational/opvi.py b/pymc3/variational/opvi.py index ebf4a9cda84..115c0abcaef 100644 --- a/pymc3/variational/opvi.py +++ b/pymc3/variational/opvi.py @@ -49,17 +49,19 @@ import itertools import warnings +import aesara +import aesara.tensor as aet import numpy as np -import theano -import theano.tensor as tt + +from aesara.graph.basic import Variable import pymc3 as pm +from pymc3.aesaraf import aet_rng, identity from pymc3.backends import NDArray from pymc3.blocking import ArrayOrdering, DictToArrayBijection, VarMap from pymc3.memoize import WithMemoization, memoize from pymc3.model import modelcontext -from pymc3.theanof import identity, tt_rng from pymc3.util import get_default_varnames, get_transformed from pymc3.variational.updates import adagrad_window @@ -116,7 +118,7 @@ def node_property(f): def wrapper(fn): return property( memoize( - theano.config.change_flags(compute_test_value="off")(append_name(f)(fn)), + aesara.config.change_flags(compute_test_value="off")(append_name(f)(fn)), bound=True, ) ) @@ -124,16 +126,16 @@ def wrapper(fn): return wrapper else: return property( - memoize(theano.config.change_flags(compute_test_value="off")(f), bound=True) + memoize(aesara.config.change_flags(compute_test_value="off")(f), bound=True) ) -@theano.config.change_flags(compute_test_value="ignore") +@aesara.config.change_flags(compute_test_value="ignore") def try_to_set_test_value(node_in, node_out, s): _s = s if s is None: s = 1 - s = theano.compile.view_op(tt.as_tensor(s)) + s = aesara.compile.view_op(aet.as_tensor(s)) if not isinstance(node_in, (list, tuple)): node_in = [node_in] if not isinstance(node_out, (list, tuple)): @@ -150,7 +152,7 @@ def try_to_set_test_value(node_in, node_out, s): o.tag.test_value = tv -class ObjectiveUpdates(theano.OrderedUpdates): +class ObjectiveUpdates(aesara.OrderedUpdates): """OrderedUpdates extension for storing loss""" loss = None @@ -291,7 +293,7 @@ def add_obj_updates( if self.op.returns_loss: updates.loss = obj_target - @theano.config.change_flags(compute_test_value="off") + @aesara.config.change_flags(compute_test_value="off") def step_function( self, obj_n_mc=None, @@ -335,13 +337,13 @@ def step_function( score: `bool` calculate loss on each step? Defaults to False for speed fn_kwargs: `dict` - Add kwargs to theano.function (e.g. `{'profile': True}`) + Add kwargs to aesara.function (e.g. `{'profile': True}`) more_replacements: `dict` Apply custom replacements before calculating gradients Returns ------- - `theano.function` + `aesara.function` """ if fn_kwargs is None: fn_kwargs = {} @@ -359,12 +361,12 @@ def step_function( total_grad_norm_constraint=total_grad_norm_constraint, ) if score: - step_fn = theano.function([], updates.loss, updates=updates, **fn_kwargs) + step_fn = aesara.function([], updates.loss, updates=updates, **fn_kwargs) else: - step_fn = theano.function([], None, updates=updates, **fn_kwargs) + step_fn = aesara.function([], None, updates=updates, **fn_kwargs) return step_fn - @theano.config.change_flags(compute_test_value="off") + @aesara.config.change_flags(compute_test_value="off") def score_function( self, sc_n_mc=None, more_replacements=None, fn_kwargs=None ): # pragma: no cover @@ -377,11 +379,11 @@ def score_function( more_replacements: Apply custom replacements before compiling a function fn_kwargs: `dict` - arbitrary kwargs passed to `theano.function` + arbitrary kwargs passed to `aesara.function` Returns ------- - theano.function + aesara.function """ if fn_kwargs is None: fn_kwargs = {} @@ -390,9 +392,9 @@ def score_function( if more_replacements is None: more_replacements = {} loss = self(sc_n_mc, more_replacements=more_replacements) - return theano.function([], loss, **fn_kwargs) + return aesara.function([], loss, **fn_kwargs) - @theano.config.change_flags(compute_test_value="off") + @aesara.config.change_flags(compute_test_value="off") def __call__(self, nmc, **kwargs): if "more_tf_params" in kwargs: m = -1.0 @@ -504,7 +506,7 @@ def collect_shared_to_list(params): return list( t[1] for t in sorted(params.items(), key=lambda t: t[0]) - if isinstance(t[1], theano.compile.SharedVariable) + if isinstance(t[1], aesara.compile.SharedVariable) ) elif params is None: return [] @@ -842,7 +844,7 @@ def __init__( self._vfam = vfam self._local = local self._batched = rowwise - self._rng = tt_rng(random_seed) + self._rng = aet_rng(random_seed) model = modelcontext(model) self.model = model self.group = group @@ -895,7 +897,7 @@ def _check_user_params(self, **kwargs): shape = (-1,) + shape elif self.batched: shape = (self.bdim,) + shape - self._user_params[name] = tt.as_tensor(param).reshape(shape) + self._user_params[name] = aet.as_tensor(param).reshape(shape) return True def _initial_type(self, name): @@ -910,9 +912,9 @@ def _initial_type(self, name): tensor """ if self.batched: - return tt.tensor3(name) + return aet.tensor3(name) else: - return tt.matrix(name) + return aet.matrix(name) def _input_type(self, name): R"""*Dev* - input type with given name. The correct type depends on `self.batched` @@ -926,11 +928,11 @@ def _input_type(self, name): tensor """ if self.batched: - return tt.matrix(name) + return aet.matrix(name) else: - return tt.vector(name) + return aet.vector(name) - @theano.config.change_flags(compute_test_value="off") + @aesara.config.change_flags(compute_test_value="off") def __init_group__(self, group): if not group: raise GroupError("Got empty group") @@ -1020,11 +1022,11 @@ def _new_initial_shape(self, size, dim, more_replacements=None): shape vector """ if self.batched: - bdim = tt.as_tensor(self.bdim) - bdim = theano.clone(bdim, more_replacements) - return tt.stack([size, bdim, dim]) + bdim = aet.as_tensor(self.bdim) + bdim = aesara.clone_replace(bdim, more_replacements) + return aet.stack([size, bdim, dim]) else: - return tt.stack([size, dim]) + return aet.stack([size, dim]) @node_property def bdim(self): @@ -1071,22 +1073,22 @@ def _new_initial(self, size, deterministic, more_replacements=None): """ if size is None: size = 1 - if not isinstance(deterministic, tt.Variable): + if not isinstance(deterministic, Variable): deterministic = np.int8(deterministic) dim, dist_name, dist_map = (self.ddim, self.initial_dist_name, self.initial_dist_map) dtype = self.symbolic_initial.dtype - dim = tt.as_tensor(dim) - size = tt.as_tensor(size) + dim = aet.as_tensor(dim) + size = aet.as_tensor(size) shape = self._new_initial_shape(size, dim, more_replacements) # apply optimizations if possible - if not isinstance(deterministic, tt.Variable): + if not isinstance(deterministic, Variable): if deterministic: - return tt.ones(shape, dtype) * dist_map + return aet.ones(shape, dtype) * dist_map else: return getattr(self._rng, dist_name)(size=shape) else: sample = getattr(self._rng, dist_name)(size=shape) - initial = tt.switch(deterministic, tt.ones(shape, dtype) * dist_map, sample) + initial = aet.switch(deterministic, aet.ones(shape, dtype) * dist_map, sample) return initial @node_property @@ -1111,7 +1113,7 @@ def symbolic_random2d(self): else: return self.symbolic_random - @theano.config.change_flags(compute_test_value="off") + @aesara.config.change_flags(compute_test_value="off") def set_size_and_deterministic(self, node, s, d, more_replacements=None): """*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or :func:`symbolic_single_sample` new random generator can be allocated and applied to node @@ -1119,7 +1121,7 @@ def set_size_and_deterministic(self, node, s, d, more_replacements=None): Parameters ---------- node: :class:`Variable` - Theano node with symbolically applied VI replacements + Aesara node with symbolically applied VI replacements s: scalar desired number of samples d: bool or int @@ -1132,13 +1134,13 @@ def set_size_and_deterministic(self, node, s, d, more_replacements=None): :class:`Variable` with applied replacements, ready to use """ flat2rand = self.make_size_and_deterministic_replacements(s, d, more_replacements) - node_out = theano.clone(node, flat2rand) + node_out = aesara.clone_replace(node, flat2rand) try_to_set_test_value(node, node_out, s) return node_out def to_flat_input(self, node): """*Dev* - replace vars with flattened view stored in `self.inputs`""" - return theano.clone(node, self.replacements) + return aesara.clone_replace(node, self.replacements) def symbolic_sample_over_posterior(self, node): """*Dev* - performs sampling of node applying independent samples from posterior each time. @@ -1146,12 +1148,12 @@ def symbolic_sample_over_posterior(self, node): """ node = self.to_flat_input(node) random = self.symbolic_random.astype(self.symbolic_initial.dtype) - random = tt.patternbroadcast(random, self.symbolic_initial.broadcastable) + random = aet.patternbroadcast(random, self.symbolic_initial.broadcastable) def sample(post): - return theano.clone(node, {self.input: post}) + return aesara.clone_replace(node, {self.input: post}) - nodes, _ = theano.scan(sample, random) + nodes, _ = aesara.scan(sample, random) return nodes def symbolic_single_sample(self, node): @@ -1161,8 +1163,8 @@ def symbolic_single_sample(self, node): """ node = self.to_flat_input(node) random = self.symbolic_random.astype(self.symbolic_initial.dtype) - random = tt.patternbroadcast(random, self.symbolic_initial.broadcastable) - return theano.clone(node, {self.input: random[0]}) + random = aet.patternbroadcast(random, self.symbolic_initial.broadcastable) + return aesara.clone_replace(node, {self.input: random[0]}) def make_size_and_deterministic_replacements(self, s, d, more_replacements=None): """*Dev* - creates correct replacements for initial depending on @@ -1182,15 +1184,15 @@ def make_size_and_deterministic_replacements(self, s, d, more_replacements=None) dict with replacements for initial """ initial = self._new_initial(s, d, more_replacements) - initial = tt.patternbroadcast(initial, self.symbolic_initial.broadcastable) + initial = aet.patternbroadcast(initial, self.symbolic_initial.broadcastable) if more_replacements: - initial = theano.clone(initial, more_replacements) + initial = aesara.clone_replace(initial, more_replacements) return {self.symbolic_initial: initial} @node_property def symbolic_normalizing_constant(self): """*Dev* - normalizing constant for `self.logq`, scales it to `minibatch_size` instead of `total_size`""" - t = self.to_flat_input(tt.max([v.scaling for v in self.group])) + t = self.to_flat_input(aet.max([v.scaling for v in self.group])) t = self.symbolic_single_sample(t) return pm.floatX(t) @@ -1282,7 +1284,7 @@ class Approximation(WithMemoization): """ def __init__(self, groups, model=None): - self._scale_cost_to_minibatch = theano.shared(np.int8(1)) + self._scale_cost_to_minibatch = aesara.shared(np.int8(1)) model = modelcontext(model) if not model.free_RVs: raise TypeError("Model does not have FreeRVs") @@ -1341,22 +1343,22 @@ def symbolic_normalizing_constant(self): """*Dev* - normalizing constant for `self.logq`, scales it to `minibatch_size` instead of `total_size`. Here the effect is controlled by `self.scale_cost_to_minibatch` """ - t = tt.max( + t = aet.max( self.collect("symbolic_normalizing_constant") + [var.scaling for var in self.model.observed_RVs] ) - t = tt.switch(self._scale_cost_to_minibatch, t, tt.constant(1, dtype=t.dtype)) + t = aet.switch(self._scale_cost_to_minibatch, t, aet.constant(1, dtype=t.dtype)) return pm.floatX(t) @node_property def symbolic_logq(self): """*Dev* - collects `symbolic_logq` for all groups""" - return tt.add(*self.collect("symbolic_logq")) + return aet.add(*self.collect("symbolic_logq")) @node_property def logq(self): """*Dev* - collects `logQ` for all groups""" - return tt.add(*self.collect("logq")) + return aet.add(*self.collect("logq")) @node_property def logq_norm(self): @@ -1365,7 +1367,7 @@ def logq_norm(self): @node_property def _sized_symbolic_varlogp_and_datalogp(self): - """*Dev* - computes sampled prior term from model via `theano.scan`""" + """*Dev* - computes sampled prior term from model via `aesara.scan`""" varlogp_s, datalogp_s = self.symbolic_sample_over_posterior( [self.model.varlogpt, self.model.datalogpt] ) @@ -1373,55 +1375,55 @@ def _sized_symbolic_varlogp_and_datalogp(self): @node_property def sized_symbolic_varlogp(self): - """*Dev* - computes sampled prior term from model via `theano.scan`""" + """*Dev* - computes sampled prior term from model via `aesara.scan`""" return self._sized_symbolic_varlogp_and_datalogp[0] # shape (s,) @node_property def sized_symbolic_datalogp(self): - """*Dev* - computes sampled data term from model via `theano.scan`""" + """*Dev* - computes sampled data term from model via `aesara.scan`""" return self._sized_symbolic_varlogp_and_datalogp[1] # shape (s,) @node_property def sized_symbolic_logp(self): - """*Dev* - computes sampled logP from model via `theano.scan`""" + """*Dev* - computes sampled logP from model via `aesara.scan`""" return self.sized_symbolic_varlogp + self.sized_symbolic_datalogp # shape (s,) @node_property def logp(self): - """*Dev* - computes :math:`E_{q}(logP)` from model via `theano.scan` that can be optimized later""" + """*Dev* - computes :math:`E_{q}(logP)` from model via `aesara.scan` that can be optimized later""" return self.varlogp + self.datalogp @node_property def varlogp(self): - """*Dev* - computes :math:`E_{q}(prior term)` from model via `theano.scan` that can be optimized later""" + """*Dev* - computes :math:`E_{q}(prior term)` from model via `aesara.scan` that can be optimized later""" return self.sized_symbolic_varlogp.mean(0) @node_property def datalogp(self): - """*Dev* - computes :math:`E_{q}(data term)` from model via `theano.scan` that can be optimized later""" + """*Dev* - computes :math:`E_{q}(data term)` from model via `aesara.scan` that can be optimized later""" return self.sized_symbolic_datalogp.mean(0) @node_property def _single_symbolic_varlogp_and_datalogp(self): - """*Dev* - computes sampled prior term from model via `theano.scan`""" + """*Dev* - computes sampled prior term from model via `aesara.scan`""" varlogp, datalogp = self.symbolic_single_sample([self.model.varlogpt, self.model.datalogpt]) return varlogp, datalogp @node_property def single_symbolic_varlogp(self): - """*Dev* - for single MC sample estimate of :math:`E_{q}(prior term)` `theano.scan` + """*Dev* - for single MC sample estimate of :math:`E_{q}(prior term)` `aesara.scan` is not needed and code can be optimized""" return self._single_symbolic_varlogp_and_datalogp[0] @node_property def single_symbolic_datalogp(self): - """*Dev* - for single MC sample estimate of :math:`E_{q}(data term)` `theano.scan` + """*Dev* - for single MC sample estimate of :math:`E_{q}(data term)` `aesara.scan` is not needed and code can be optimized""" return self._single_symbolic_varlogp_and_datalogp[1] @node_property def single_symbolic_logp(self): - """*Dev* - for single MC sample estimate of :math:`E_{q}(logP)` `theano.scan` + """*Dev* - for single MC sample estimate of :math:`E_{q}(logP)` `aesara.scan` is not needed and code can be optimized""" return self.single_symbolic_datalogp + self.single_symbolic_varlogp @@ -1472,7 +1474,7 @@ def make_size_and_deterministic_replacements(self, s, d, more_replacements=None) flat2rand.update(more_replacements) return flat2rand - @theano.config.change_flags(compute_test_value="off") + @aesara.config.change_flags(compute_test_value="off") def set_size_and_deterministic(self, node, s, d, more_replacements=None): """*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or :func:`symbolic_single_sample` new random generator can be allocated and applied to node @@ -1480,7 +1482,7 @@ def set_size_and_deterministic(self, node, s, d, more_replacements=None): Parameters ---------- node: :class:`Variable` - Theano node with symbolically applied VI replacements + Aesara node with symbolically applied VI replacements s: scalar desired number of samples d: bool or int @@ -1495,14 +1497,14 @@ def set_size_and_deterministic(self, node, s, d, more_replacements=None): _node = node optimizations = self.get_optimization_replacements(s, d) flat2rand = self.make_size_and_deterministic_replacements(s, d, more_replacements) - node = theano.clone(node, optimizations) - node = theano.clone(node, flat2rand) + node = aesara.clone_replace(node, optimizations) + node = aesara.clone_replace(node, flat2rand) try_to_set_test_value(_node, node, s) return node def to_flat_input(self, node): """*Dev* - replace vars with flattened view stored in `self.inputs`""" - return theano.clone(node, self.replacements) + return aesara.clone_replace(node, self.replacements) def symbolic_sample_over_posterior(self, node): """*Dev* - performs sampling of node applying independent samples from posterior each time. @@ -1511,9 +1513,9 @@ def symbolic_sample_over_posterior(self, node): node = self.to_flat_input(node) def sample(*post): - return theano.clone(node, dict(zip(self.inputs, post))) + return aesara.clone_replace(node, dict(zip(self.inputs, post))) - nodes, _ = theano.scan(sample, self.symbolic_randoms) + nodes, _ = aesara.scan(sample, self.symbolic_randoms) return nodes def symbolic_single_sample(self, node): @@ -1524,11 +1526,11 @@ def symbolic_single_sample(self, node): node = self.to_flat_input(node) post = [v[0] for v in self.symbolic_randoms] inp = self.inputs - return theano.clone(node, dict(zip(inp, post))) + return aesara.clone_replace(node, dict(zip(inp, post))) def get_optimization_replacements(self, s, d): """*Dev* - optimizations for logP. If sample size is static and equal to 1: - then `theano.scan` MC estimate is replaced with single sample without call to `theano.scan`. + then `aesara.scan` MC estimate is replaced with single sample without call to `aesara.scan`. """ repl = collections.OrderedDict() # avoid scan if size is constant and equal to one @@ -1537,13 +1539,13 @@ def get_optimization_replacements(self, s, d): repl[self.datalogp] = self.single_symbolic_datalogp return repl - @theano.config.change_flags(compute_test_value="off") + @aesara.config.change_flags(compute_test_value="off") def sample_node(self, node, size=None, deterministic=False, more_replacements=None): """Samples given node or nodes over shared posterior Parameters ---------- - node: Theano Variables (or Theano expressions) + node: Aesara Variables (or Aesara expressions) size: None or scalar number of samples more_replacements: `dict` @@ -1557,7 +1559,7 @@ def sample_node(self, node, size=None, deterministic=False, more_replacements=No sampled node(s) with replacements """ node_in = node - node = theano.clone(node, more_replacements) + node = aesara.clone_replace(node, more_replacements) if size is None: node_out = self.symbolic_single_sample(node) else: @@ -1567,7 +1569,7 @@ def sample_node(self, node, size=None, deterministic=False, more_replacements=No return node_out def rslice(self, name): - """*Dev* - vectorized sampling for named random variable without call to `theano.scan`. + """*Dev* - vectorized sampling for named random variable without call to `aesara.scan`. This node still needs :func:`set_size_and_deterministic` to be evaluated """ @@ -1588,13 +1590,13 @@ def vars_names(vs): @property @memoize(bound=True) - @theano.config.change_flags(compute_test_value="off") + @aesara.config.change_flags(compute_test_value="off") def sample_dict_fn(self): - s = tt.iscalar() + s = aet.iscalar() names = [v.name for v in self.model.free_RVs] sampled = [self.rslice(name) for name in names] sampled = self.set_size_and_deterministic(sampled, s, 0) - sample_fn = theano.function([s], sampled) + sample_fn = aesara.function([s], sampled) def inner(draws=100): _samples = sample_fn(draws) @@ -1658,7 +1660,7 @@ def has_batched(self): @node_property def symbolic_random(self): - return tt.concatenate(self.collect("symbolic_random2d"), axis=-1) + return aet.concatenate(self.collect("symbolic_random2d"), axis=-1) def __str__(self): if len(self.groups) < 5: @@ -1679,7 +1681,7 @@ def any_histograms(self): def joint_histogram(self): if not self.all_histograms: raise VariationalInferenceError("%s does not consist of all Empirical approximations") - return tt.concatenate(self.collect("histogram"), axis=-1) + return aet.concatenate(self.collect("histogram"), axis=-1) @property def params(self): diff --git a/pymc3/variational/stein.py b/pymc3/variational/stein.py index ca9a9249106..79a7d78183c 100644 --- a/pymc3/variational/stein.py +++ b/pymc3/variational/stein.py @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import theano -import theano.tensor as tt +import aesara +import aesara.tensor as aet +from pymc3.aesaraf import floatX from pymc3.memoize import WithMemoization, memoize -from pymc3.theanof import floatX from pymc3.variational.opvi import node_property from pymc3.variational.test_functions import rbf @@ -46,12 +46,12 @@ def approx_symbolic_matrices(self): @node_property def dlogp(self): - grad = tt.grad(self.logp_norm.sum(), self.approx_symbolic_matrices) + grad = aet.grad(self.logp_norm.sum(), self.approx_symbolic_matrices) def flatten2(tensor): return tensor.flatten(2) - return tt.concatenate(list(map(flatten2, grad)), -1) + return aet.concatenate(list(map(flatten2, grad)), -1) @node_property def grad(self): @@ -64,7 +64,7 @@ def grad(self): def density_part_grad(self): Kxy = self.Kxy dlogpdx = self.dlogp - return tt.dot(Kxy, dlogpdx) + return aet.dot(Kxy, dlogpdx) @node_property def repulsive_part_grad(self): @@ -84,13 +84,13 @@ def dxkxy(self): def logp_norm(self): sized_symbolic_logp = self.approx.sized_symbolic_logp if self.use_histogram: - sized_symbolic_logp = theano.clone( + sized_symbolic_logp = aesara.clone_replace( sized_symbolic_logp, dict(zip(self.approx.symbolic_randoms, self.approx.collect("histogram"))), ) return sized_symbolic_logp / self.approx.symbolic_normalizing_constant @memoize - @theano.config.change_flags(compute_test_value="off") + @aesara.config.change_flags(compute_test_value="off") def _kernel(self): return self._kernel_f(self.input_joint_matrix) diff --git a/pymc3/variational/test_functions.py b/pymc3/variational/test_functions.py index 8f95abd4e18..3380ed27b85 100644 --- a/pymc3/variational/test_functions.py +++ b/pymc3/variational/test_functions.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from theano import tensor as tt +from aesara import tensor as aet -from pymc3.theanof import floatX +from pymc3.aesaraf import floatX from pymc3.variational.opvi import TestFunction __all__ = ["rbf"] @@ -34,30 +34,30 @@ class Kernel(TestFunction): class RBF(Kernel): def __call__(self, X): XY = X.dot(X.T) - x2 = tt.sum(X ** 2, axis=1).dimshuffle(0, "x") - X2e = tt.repeat(x2, X.shape[0], axis=1) + x2 = aet.sum(X ** 2, axis=1).dimshuffle(0, "x") + X2e = aet.repeat(x2, X.shape[0], axis=1) H = X2e + X2e.T - 2.0 * XY - V = tt.sort(H.flatten()) + V = aet.sort(H.flatten()) length = V.shape[0] # median distance - m = tt.switch( - tt.eq((length % 2), 0), + m = aet.switch( + aet.eq((length % 2), 0), # if even vector - tt.mean(V[((length // 2) - 1) : ((length // 2) + 1)]), + aet.mean(V[((length // 2) - 1) : ((length // 2) + 1)]), # if odd vector V[length // 2], ) - h = 0.5 * m / tt.log(floatX(H.shape[0]) + floatX(1)) + h = 0.5 * m / aet.log(floatX(H.shape[0]) + floatX(1)) # RBF - Kxy = tt.exp(-H / h / 2.0) + Kxy = aet.exp(-H / h / 2.0) # Derivative - dxkxy = -tt.dot(Kxy, X) - sumkxy = tt.sum(Kxy, axis=-1, keepdims=True) - dxkxy = tt.add(dxkxy, tt.mul(X, sumkxy)) / h + dxkxy = -aet.dot(Kxy, X) + sumkxy = aet.sum(Kxy, axis=-1, keepdims=True) + dxkxy = aet.add(dxkxy, aet.mul(X, sumkxy)) / h return Kxy, dxkxy diff --git a/pymc3/variational/updates.py b/pymc3/variational/updates.py index a2baa462c5e..62776f48ad1 100755 --- a/pymc3/variational/updates.py +++ b/pymc3/variational/updates.py @@ -44,7 +44,7 @@ # SOFTWARE. """ -Functions to generate Theano update dictionaries for training. +Functions to generate Aesara update dictionaries for training. The update functions implement different methods to control the learning rate for use with stochastic gradient descent. @@ -88,21 +88,20 @@ Examples -------- >>> import lasagne ->>> import theano.tensor as T ->>> import theano +>>> import aesara >>> from lasagne.nonlinearities import softmax >>> from lasagne.layers import InputLayer, DenseLayer, get_output >>> from lasagne.updates import sgd, apply_momentum >>> l_in = InputLayer((100, 20)) >>> l1 = DenseLayer(l_in, num_units=3, nonlinearity=softmax) ->>> x = tt.matrix('x') # shp: num_batch x num_features ->>> y = tt.ivector('y') # shp: num_batch +>>> x = aet.matrix('x') # shp: num_batch x num_features +>>> y = aet.ivector('y') # shp: num_batch >>> l_out = get_output(l1, x) >>> params = lasagne.layers.get_all_params(l1) ->>> loss = tt.mean(tt.nnet.categorical_crossentropy(l_out, y)) +>>> loss = aet.mean(aet.nnet.categorical_crossentropy(l_out, y)) >>> updates_sgd = sgd(loss, params, learning_rate=0.0001) >>> updates = apply_momentum(updates_sgd, params, momentum=0.9) ->>> train_function = theano.function([x, y], updates=updates) +>>> train_function = aesara.function([x, y], updates=updates) Notes ----- @@ -112,9 +111,9 @@ from collections import OrderedDict from functools import partial +import aesara +import aesara.tensor as aet import numpy as np -import theano -import theano.tensor as tt import pymc3 as pm @@ -152,7 +151,7 @@ def get_or_compute_grads(loss_or_grads, params): gradients and returned as is, unless it does not match the length of `params`, in which case a `ValueError` is raised. Otherwise, `loss_or_grads` is assumed to be a cost expression and - the function returns `theano.grad(loss_or_grads, params)`. + the function returns `aesara.grad(loss_or_grads, params)`. Raises ------ @@ -161,7 +160,7 @@ def get_or_compute_grads(loss_or_grads, params): any element of `params` is not a shared variable (while we could still compute its gradient, we can never update it and want to fail early). """ - if any(not isinstance(p, theano.compile.SharedVariable) for p in params): + if any(not isinstance(p, aesara.compile.SharedVariable) for p in params): raise ValueError( "params must contain shared variables only. If it " "contains arbitrary parameter expressions, then " @@ -174,7 +173,7 @@ def get_or_compute_grads(loss_or_grads, params): ) return loss_or_grads else: - return theano.grad(loss_or_grads, params) + return aesara.grad(loss_or_grads, params) def _get_call_kwargs(_locals_): @@ -212,7 +211,7 @@ def sgd(loss_or_grads=None, params=None, learning_rate=1e-3): Examples -------- - >>> a = theano.shared(1.) + >>> a = aesara.shared(1.) >>> b = a*2 >>> updates = sgd(b, [a], learning_rate=.01) >>> isinstance(updates, dict) @@ -276,7 +275,7 @@ def apply_momentum(updates, params=None, momentum=0.9): for param in params: value = param.get_value(borrow=True) - velocity = theano.shared( + velocity = aesara.shared( np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable ) x = momentum * velocity + updates[param] @@ -326,7 +325,7 @@ def momentum(loss_or_grads=None, params=None, learning_rate=1e-3, momentum=0.9): Examples -------- - >>> a = theano.shared(1.) + >>> a = aesara.shared(1.) >>> b = a*2 >>> updates = momentum(b, [a], learning_rate=.01) >>> isinstance(updates, dict) @@ -391,7 +390,7 @@ def apply_nesterov_momentum(updates, params=None, momentum=0.9): for param in params: value = param.get_value(borrow=True) - velocity = theano.shared( + velocity = aesara.shared( np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable ) x = momentum * velocity + updates[param] - param @@ -446,7 +445,7 @@ def nesterov_momentum(loss_or_grads=None, params=None, learning_rate=1e-3, momen Examples -------- - >>> a = theano.shared(1.) + >>> a = aesara.shared(1.) >>> b = a*2 >>> updates = nesterov_momentum(b, [a], learning_rate=.01) >>> isinstance(updates, dict) @@ -514,7 +513,7 @@ def adagrad(loss_or_grads=None, params=None, learning_rate=1.0, epsilon=1e-6): Examples -------- - >>> a = theano.shared(1.) + >>> a = aesara.shared(1.) >>> b = a*2 >>> updates = adagrad(b, [a], learning_rate=.01) >>> isinstance(updates, dict) @@ -535,12 +534,12 @@ def adagrad(loss_or_grads=None, params=None, learning_rate=1.0, epsilon=1e-6): for param, grad in zip(params, grads): value = param.get_value(borrow=True) - accu = theano.shared( + accu = aesara.shared( np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable ) accu_new = accu + grad ** 2 updates[accu] = accu_new - updates[param] = param - (learning_rate * grad / tt.sqrt(accu_new + epsilon)) + updates[param] = param - (learning_rate * grad / aet.sqrt(accu_new + epsilon)) return updates @@ -574,19 +573,19 @@ def adagrad_window(loss_or_grads=None, params=None, learning_rate=0.001, epsilon grads = get_or_compute_grads(loss_or_grads, params) updates = OrderedDict() for param, grad in zip(params, grads): - i = theano.shared(pm.floatX(0)) + i = aesara.shared(pm.floatX(0)) i_int = i.astype("int32") value = param.get_value(borrow=True) - accu = theano.shared(np.zeros(value.shape + (n_win,), dtype=value.dtype)) + accu = aesara.shared(np.zeros(value.shape + (n_win,), dtype=value.dtype)) # Append squared gradient vector to accu_new - accu_new = tt.set_subtensor(accu[..., i_int], grad ** 2) - i_new = tt.switch((i + 1) < n_win, i + 1, 0) + accu_new = aet.set_subtensor(accu[..., i_int], grad ** 2) + i_new = aet.switch((i + 1) < n_win, i + 1, 0) updates[accu] = accu_new updates[i] = i_new accu_sum = accu_new.sum(axis=-1) - updates[param] = param - (learning_rate * grad / tt.sqrt(accu_sum + epsilon)) + updates[param] = param - (learning_rate * grad / aet.sqrt(accu_sum + epsilon)) return updates @@ -633,13 +632,13 @@ def rmsprop(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.9, epsilon References ---------- - .. [1] Tieleman, tt. and Hinton, G. (2012): + .. [1] Tieleman, aet. and Hinton, G. (2012): Neural Networks for Machine Learning, Lecture 6.5 - rmsprop. Coursera. http://www.youtube.com/watch?v=O3sxAc4hxZU (formula @5:20) Examples -------- - >>> a = theano.shared(1.) + >>> a = aesara.shared(1.) >>> b = a*2 >>> updates = rmsprop(b, [a], learning_rate=.01) >>> isinstance(updates, dict) @@ -658,17 +657,17 @@ def rmsprop(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.9, epsilon grads = get_or_compute_grads(loss_or_grads, params) updates = OrderedDict() - # Using theano constant to prevent upcasting of float32 - one = tt.constant(1) + # Using aesara constant to prevent upcasting of float32 + one = aet.constant(1) for param, grad in zip(params, grads): value = param.get_value(borrow=True) - accu = theano.shared( + accu = aesara.shared( np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable ) accu_new = rho * accu + (one - rho) * grad ** 2 updates[accu] = accu_new - updates[param] = param - (learning_rate * grad / tt.sqrt(accu_new + epsilon)) + updates[param] = param - (learning_rate * grad / aet.sqrt(accu_new + epsilon)) return updates @@ -731,7 +730,7 @@ def adadelta(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.95, epsil Examples -------- - >>> a = theano.shared(1.) + >>> a = aesara.shared(1.) >>> b = a*2 >>> updates = adadelta(b, [a], learning_rate=.01) >>> isinstance(updates, dict) @@ -750,17 +749,17 @@ def adadelta(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.95, epsil grads = get_or_compute_grads(loss_or_grads, params) updates = OrderedDict() - # Using theano constant to prevent upcasting of float32 - one = tt.constant(1) + # Using aesara constant to prevent upcasting of float32 + one = aet.constant(1) for param, grad in zip(params, grads): value = param.get_value(borrow=True) # accu: accumulate gradient magnitudes - accu = theano.shared( + accu = aesara.shared( np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable ) # delta_accu: accumulate update magnitudes (recursively!) - delta_accu = theano.shared( + delta_accu = aesara.shared( np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable ) @@ -769,7 +768,7 @@ def adadelta(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.95, epsil updates[accu] = accu_new # compute parameter update, using the 'old' delta_accu - update = grad * tt.sqrt(delta_accu + epsilon) / tt.sqrt(accu_new + epsilon) + update = grad * aet.sqrt(delta_accu + epsilon) / aet.sqrt(accu_new + epsilon) updates[param] = param - learning_rate * update # update delta_accu (as accu, but accumulating updates) @@ -823,7 +822,7 @@ def adam( Examples -------- - >>> a = theano.shared(1.) + >>> a = aesara.shared(1.) >>> b = a*2 >>> updates = adam(b, [a], learning_rate=.01) >>> isinstance(updates, dict) @@ -840,27 +839,27 @@ def adam( elif loss_or_grads is None or params is None: raise ValueError("Please provide both `loss_or_grads` and `params` to get updates") all_grads = get_or_compute_grads(loss_or_grads, params) - t_prev = theano.shared(pm.theanof.floatX(0.0)) + t_prev = aesara.shared(pm.aesaraf.floatX(0.0)) updates = OrderedDict() - # Using theano constant to prevent upcasting of float32 - one = tt.constant(1) + # Using aesara constant to prevent upcasting of float32 + one = aet.constant(1) t = t_prev + 1 - a_t = learning_rate * tt.sqrt(one - beta2 ** t) / (one - beta1 ** t) + a_t = learning_rate * aet.sqrt(one - beta2 ** t) / (one - beta1 ** t) for param, g_t in zip(params, all_grads): value = param.get_value(borrow=True) - m_prev = theano.shared( + m_prev = aesara.shared( np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable ) - v_prev = theano.shared( + v_prev = aesara.shared( np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable ) m_t = beta1 * m_prev + (one - beta1) * g_t v_t = beta2 * v_prev + (one - beta2) * g_t ** 2 - step = a_t * m_t / (tt.sqrt(v_t) + epsilon) + step = a_t * m_t / (aet.sqrt(v_t) + epsilon) updates[m_prev] = m_t updates[v_prev] = v_t @@ -911,7 +910,7 @@ def adamax( Examples -------- - >>> a = theano.shared(1.) + >>> a = aesara.shared(1.) >>> b = a*2 >>> updates = adamax(b, [a], learning_rate=.01) >>> isinstance(updates, dict) @@ -928,26 +927,26 @@ def adamax( elif loss_or_grads is None or params is None: raise ValueError("Please provide both `loss_or_grads` and `params` to get updates") all_grads = get_or_compute_grads(loss_or_grads, params) - t_prev = theano.shared(pm.theanof.floatX(0.0)) + t_prev = aesara.shared(pm.aesaraf.floatX(0.0)) updates = OrderedDict() - # Using theano constant to prevent upcasting of float32 - one = tt.constant(1) + # Using aesara constant to prevent upcasting of float32 + one = aet.constant(1) t = t_prev + 1 a_t = learning_rate / (one - beta1 ** t) for param, g_t in zip(params, all_grads): value = param.get_value(borrow=True) - m_prev = theano.shared( + m_prev = aesara.shared( np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable ) - u_prev = theano.shared( + u_prev = aesara.shared( np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable ) m_t = beta1 * m_prev + (one - beta1) * g_t - u_t = tt.maximum(beta2 * u_prev, abs(g_t)) + u_t = aet.maximum(beta2 * u_prev, abs(g_t)) step = a_t * m_t / (u_t + epsilon) updates[m_prev] = m_t @@ -968,7 +967,7 @@ def norm_constraint(tensor_var, max_norm, norm_axes=None, epsilon=1e-7): Parameters ---------- tensor_var: TensorVariable - Theano expression for update, gradient, or other quantity. + Aesara expression for update, gradient, or other quantity. max_norm: scalar This value sets the maximum allowed value of any norm in `tensor_var`. @@ -993,11 +992,11 @@ def norm_constraint(tensor_var, max_norm, norm_axes=None, epsilon=1e-7): Examples -------- - >>> param = theano.shared( - ... np.random.randn(100, 200).astype(theano.config.floatX)) + >>> param = aesara.shared( + ... np.random.randn(100, 200).astype(aesara.config.floatX)) >>> update = param + 100 >>> update = norm_constraint(update, 10) - >>> func = theano.function([], [], updates=[(param, update)]) + >>> func = aesara.function([], [], updates=[(param, update)]) >>> # Apply constrained update >>> _ = func() >>> from lasagne.utils import compute_norms @@ -1028,9 +1027,9 @@ def norm_constraint(tensor_var, max_norm, norm_axes=None, epsilon=1e-7): "Unsupported tensor dimensionality {}." "Must specify `norm_axes`".format(ndim) ) - dtype = np.dtype(theano.config.floatX).type - norms = tt.sqrt(tt.sum(tt.sqr(tensor_var), axis=sum_over, keepdims=True)) - target_norms = tt.clip(norms, 0, dtype(max_norm)) + dtype = np.dtype(aesara.config.floatX).type + norms = aet.sqrt(aet.sum(aet.sqr(tensor_var), axis=sum_over, keepdims=True)) + target_norms = aet.clip(norms, 0, dtype(max_norm)) constrained_output = tensor_var * (target_norms / (dtype(epsilon) + norms)) return constrained_output @@ -1061,7 +1060,7 @@ def total_norm_constraint(tensor_vars, max_norm, epsilon=1e-7, return_norm=False ------- tensor_vars_scaled: list of TensorVariables The scaled tensor variables. - norm: Theano scalar + norm: Aesara scalar The combined norms of the input variables prior to rescaling, only returned if ``return_norms=True``. @@ -1070,14 +1069,14 @@ def total_norm_constraint(tensor_vars, max_norm, epsilon=1e-7, return_norm=False >>> from lasagne.layers import InputLayer, DenseLayer >>> import lasagne >>> from lasagne.updates import sgd, total_norm_constraint - >>> x = tt.matrix() - >>> y = tt.ivector() + >>> x = aet.matrix() + >>> y = aet.ivector() >>> l_in = InputLayer((5, 10)) - >>> l1 = DenseLayer(l_in, num_units=7, nonlinearity=tt.nnet.softmax) + >>> l1 = DenseLayer(l_in, num_units=7, nonlinearity=aet.nnet.softmax) >>> output = lasagne.layers.get_output(l1, x) - >>> cost = tt.mean(tt.nnet.categorical_crossentropy(output, y)) + >>> cost = aet.mean(aet.nnet.categorical_crossentropy(output, y)) >>> all_params = lasagne.layers.get_all_params(l1) - >>> all_grads = tt.grad(cost, all_params) + >>> all_grads = aet.grad(cost, all_params) >>> scaled_grads = total_norm_constraint(all_grads, 5) >>> updates = sgd(scaled_grads, all_params, learning_rate=0.1) @@ -1091,9 +1090,9 @@ def total_norm_constraint(tensor_vars, max_norm, epsilon=1e-7, return_norm=False learning with neural networks. In Advances in Neural Information Processing Systems (pp. 3104-3112). """ - norm = tt.sqrt(sum(tt.sum(tensor ** 2) for tensor in tensor_vars)) - dtype = np.dtype(theano.config.floatX).type - target_norm = tt.clip(norm, 0, dtype(max_norm)) + norm = aet.sqrt(sum(aet.sum(tensor ** 2) for tensor in tensor_vars)) + dtype = np.dtype(aesara.config.floatX).type + target_norm = aet.clip(norm, 0, dtype(max_norm)) multiplier = target_norm / (dtype(epsilon) + norm) tensor_vars_scaled = [step * multiplier for step in tensor_vars] diff --git a/pymc3/vartypes.py b/pymc3/vartypes.py index 2469036f312..8cb61333aba 100644 --- a/pymc3/vartypes.py +++ b/pymc3/vartypes.py @@ -12,9 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from theano.graph.basic import Constant as graph_constant -from theano.tensor import Constant as tensor_constant - __all__ = [ "bool_types", "int_types", @@ -24,7 +21,6 @@ "discrete_types", "typefilter", "isgenerator", - "theano_constant", ] bool_types = {"int8"} @@ -45,6 +41,3 @@ def typefilter(vars, types): def isgenerator(obj): return hasattr(obj, "__next__") - - -theano_constant = (tensor_constant, graph_constant) diff --git a/requirements.txt b/requirements.txt index 93cb80ebc13..9ec84e75387 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ -arviz>=0.11.0 +aesara>=2.0.1 +arviz>=0.11.1 dill fastprogress>=0.2.0 numpy>=1.15.0 pandas>=0.24.0 patsy>=0.5.1 scipy>=1.2.0 -theano-pymc==1.1.2 typing-extensions>=3.7.4 diff --git a/scripts/test.sh b/scripts/test.sh index f9ae8111f7b..9045f8df509 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -3,4 +3,4 @@ set -e _FLOATX=${FLOATX:=float64} -THEANO_FLAGS="floatX=${_FLOATX},gcc__cxxflags='-march=core2'" pytest -v --cov=pymc3 --cov-report=xml "$@" --cov-report term +AESARA_FLAGS="floatX=${_FLOATX},gcc__cxxflags='-march=core2'" pytest -v --cov=pymc3 --cov-report=xml "$@" --cov-report term diff --git a/setup.py b/setup.py index 9b8091ba1bb..c7ccef4c071 100755 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ from setuptools import find_packages, setup DISTNAME = "pymc3" -DESCRIPTION = "Probabilistic Programming in Python: Bayesian Modeling and Probabilistic Machine Learning with Theano" +DESCRIPTION = "Probabilistic Programming in Python: Bayesian Modeling and Probabilistic Machine Learning with Aesara" AUTHOR = "PyMC Developers" AUTHOR_EMAIL = "pymc.devs@gmail.com" URL = "http://github.com/pymc-devs/pymc3"