Fix broadcastable -> shape deprecations

pymc-devs · Aug 25, 2022 · 7af102d · 7af102d
1 parent 33c8004
commit 7af102d
Show file tree

Hide file tree

Showing 7 changed files with 19 additions and 33 deletions.
diff --git a/pymc/aesaraf.py b/pymc/aesaraf.py
@@ -619,7 +619,7 @@ def make_shared_replacements(point, vars, model):
     """
     othervars = set(model.value_vars) - set(vars)
     return {
-        var: aesara.shared(point[var.name], var.name + "_shared", broadcastable=var.broadcastable)
+        var: aesara.shared(point[var.name], var.name + "_shared", shape=var.broadcastable)
         for var in othervars
     }
 

diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py
@@ -3976,9 +3976,9 @@ def make_node(self, x, h, z):
         x = at.as_tensor_variable(floatX(x))
         h = at.as_tensor_variable(floatX(h))
         z = at.as_tensor_variable(floatX(z))
-        shape = broadcast_shape(x, h, z)
-        broadcastable = [] if not shape else [False] * len(shape)
-        return Apply(self, [x, h, z], [at.TensorType(aesara.config.floatX, broadcastable)()])
+        bshape = broadcast_shape(x, h, z)
+        shape = [False] * len(bshape)
+        return Apply(self, [x, h, z], [at.TensorType(aesara.config.floatX, shape=shape)()])
 
     def perform(self, node, ins, outs):
         x, h, z = ins[0], ins[1], ins[2]

diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py
@@ -834,7 +834,7 @@ class PosDefMatrix(Op):
     def make_node(self, x):
         x = at.as_tensor_variable(x)
         assert x.ndim == 2
-        o = TensorType(dtype="int8", broadcastable=[])()
+        o = TensorType(dtype="int8", shape=[])()
         return Apply(self, [x], [o])
 
     # Python implementation:

diff --git a/pymc/model.py b/pymc/model.py
@@ -362,7 +362,7 @@ def __init__(
         self._extra_vars_shared = {}
         for var, value in extra_vars_and_values.items():
             shared = aesara.shared(
-                value, var.name + "_shared__", broadcastable=[s == 1 for s in value.shape]
+                value, var.name + "_shared__", shape=[s == 1 for s in value.shape]
             )
             self._extra_vars_shared[var.name] = shared
             givens.append((var, shared))

diff --git a/pymc/smc/smc.py b/pymc/smc/smc.py
@@ -565,7 +565,7 @@ def _logp_forw(point, out_vars, in_vars, shared):
         new_in_vars = []
         for in_var in in_vars:
             if in_var.dtype in discrete_types:
-                float_var = at.TensorType("floatX", in_var.broadcastable)(in_var.name)
+                float_var = at.TensorType("floatX", in_var.shape)(in_var.name)
                 new_in_vars.append(float_var)
                 replace_int_input[in_var] = at.round(float_var).astype(in_var.dtype)
             else:

diff --git a/pymc/tests/test_sampling.py b/pymc/tests/test_sampling.py
@@ -474,7 +474,7 @@ def test_choose_chains(n_points, tune, expected_length, expected_n_traces):
 @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
 class TestNamedSampling(SeededTest):
     def test_shared_named(self):
-        G_var = shared(value=np.atleast_2d(1.0), broadcastable=(True, False), name="G")
+        G_var = shared(value=np.atleast_2d(1.0), shape=(True, False), name="G")
 
         with pm.Model():
             theta0 = pm.Normal(
@@ -491,7 +491,7 @@ def test_shared_named(self):
             assert np.isclose(res, 0.0)
 
     def test_shared_unnamed(self):
-        G_var = shared(value=np.atleast_2d(1.0), broadcastable=(True, False))
+        G_var = shared(value=np.atleast_2d(1.0), shape=(True, False))
         with pm.Model():
             theta0 = pm.Normal(
                 "theta0",

diff --git a/pymc/variational/updates.py b/pymc/variational/updates.py
@@ -276,7 +276,7 @@ def apply_momentum(updates, params=None, momentum=0.9):
     for param in params:
         value = param.get_value(borrow=True)
         velocity = aesara.shared(
-            np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
+            np.zeros(value.shape, dtype=value.dtype), shape=param.broadcastable
         )
         x = momentum * velocity + updates[param]
         updates[velocity] = x - param
@@ -391,7 +391,7 @@ def apply_nesterov_momentum(updates, params=None, momentum=0.9):
     for param in params:
         value = param.get_value(borrow=True)
         velocity = aesara.shared(
-            np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
+            np.zeros(value.shape, dtype=value.dtype), shape=param.broadcastable
         )
         x = momentum * velocity + updates[param] - param
         updates[velocity] = x
@@ -534,9 +534,7 @@ def adagrad(loss_or_grads=None, params=None, learning_rate=1.0, epsilon=1e-6):
 
     for param, grad in zip(params, grads):
         value = param.get_value(borrow=True)
-        accu = aesara.shared(
-            np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
-        )
+        accu = aesara.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.broadcastable)
         accu_new = accu + grad**2
         updates[accu] = accu_new
         updates[param] = param - (learning_rate * grad / at.sqrt(accu_new + epsilon))
@@ -662,9 +660,7 @@ def rmsprop(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.9, epsilon
 
     for param, grad in zip(params, grads):
         value = param.get_value(borrow=True)
-        accu = aesara.shared(
-            np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
-        )
+        accu = aesara.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.broadcastable)
         accu_new = rho * accu + (one - rho) * grad**2
         updates[accu] = accu_new
         updates[param] = param - (learning_rate * grad / at.sqrt(accu_new + epsilon))
@@ -755,12 +751,10 @@ def adadelta(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.95, epsil
     for param, grad in zip(params, grads):
         value = param.get_value(borrow=True)
         # accu: accumulate gradient magnitudes
-        accu = aesara.shared(
-            np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
-        )
+        accu = aesara.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.broadcastable)
         # delta_accu: accumulate update magnitudes (recursively!)
         delta_accu = aesara.shared(
-            np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
+            np.zeros(value.shape, dtype=value.dtype), shape=param.broadcastable
         )
 
         # update accu (as in rmsprop)
@@ -850,12 +844,8 @@ def adam(
 
     for param, g_t in zip(params, all_grads):
         value = param.get_value(borrow=True)
-        m_prev = aesara.shared(
-            np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
-        )
-        v_prev = aesara.shared(
-            np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
-        )
+        m_prev = aesara.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.broadcastable)
+        v_prev = aesara.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.broadcastable)
 
         m_t = beta1 * m_prev + (one - beta1) * g_t
         v_t = beta2 * v_prev + (one - beta2) * g_t**2
@@ -938,12 +928,8 @@ def adamax(
 
     for param, g_t in zip(params, all_grads):
         value = param.get_value(borrow=True)
-        m_prev = aesara.shared(
-            np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
-        )
-        u_prev = aesara.shared(
-            np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable
-        )
+        m_prev = aesara.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.broadcastable)
+        u_prev = aesara.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.broadcastable)
 
         m_t = beta1 * m_prev + (one - beta1) * g_t
         u_t = at.maximum(beta2 * u_prev, abs(g_t))