From e6b39a51666b90c064daf46e8821b9dcac69671a Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Thu, 20 Oct 2022 18:32:31 -0500 Subject: [PATCH 1/6] Remove TensorType.value_zeros --- aesara/compile/debugmode.py | 7 ++++--- aesara/scan/op.py | 12 +++++++----- aesara/sparse/type.py | 8 -------- aesara/tensor/type.py | 7 ------- tests/compile/test_debugmode.py | 4 ++-- tests/tensor/test_type.py | 1 - 6 files changed, 13 insertions(+), 26 deletions(-) diff --git a/aesara/compile/debugmode.py b/aesara/compile/debugmode.py index 6cd1cf4cfc..39d3cf4c3b 100644 --- a/aesara/compile/debugmode.py +++ b/aesara/compile/debugmode.py @@ -807,7 +807,7 @@ def _get_preallocated_maps( for r in considered_outputs: if isinstance(r.type, TensorType): # Build a C-contiguous buffer - new_buf = r.type.value_zeros(r_vals[r].shape) + new_buf = np.empty(r_vals[r].shape, dtype=r.type.dtype) assert new_buf.flags["C_CONTIGUOUS"] new_buf[...] = np.asarray(def_val).astype(r.type.dtype) @@ -875,7 +875,8 @@ def _get_preallocated_maps( buf_shape.append(s) else: buf_shape.append(s * 2) - new_buf = r.type.value_zeros(buf_shape) + + new_buf = np.empty(buf_shape, dtype=r.type.dtype) new_buf[...] = np.asarray(def_val).astype(r.type.dtype) init_strided[r] = new_buf @@ -950,7 +951,7 @@ def _get_preallocated_maps( max((s + sd), 0) for s, sd in zip(r_vals[r].shape, r_shape_diff) ] - new_buf = r.type.value_zeros(out_shape) + new_buf = np.empty(out_shape, dtype=r.type.dtype) new_buf[...] = np.asarray(def_val).astype(r.type.dtype) wrong_size[r] = new_buf diff --git a/aesara/scan/op.py b/aesara/scan/op.py index 65c62808fc..c1016ddf4d 100644 --- a/aesara/scan/op.py +++ b/aesara/scan/op.py @@ -1380,11 +1380,13 @@ def prepare_fgraph(self, fgraph): # the output value, possibly inplace, at the end of the # function execution. Also, since an update is defined, # a default value must also be (this is verified by - # DebugMode). Use an array of size 0 with the correct - # ndim and dtype (use a shape of 1 on broadcastable - # dimensions, and 0 on the others). - default_shape = [1 if _b else 0 for _b in inp.broadcastable] - default_val = inp.type.value_zeros(default_shape) + # DebugMode). + # TODO FIXME: Why do we need a "default value" here? + # This sounds like a serious design issue. + default_shape = tuple( + s if s is not None else 0 for s in inp.type.shape + ) + default_val = np.empty(default_shape, dtype=inp.type.dtype) wrapped_inp = In( variable=inp, value=default_val, diff --git a/aesara/sparse/type.py b/aesara/sparse/type.py index e2ce91d64c..40602d1cd9 100644 --- a/aesara/sparse/type.py +++ b/aesara/sparse/type.py @@ -224,14 +224,6 @@ def get_size(self, shape_info): + (shape_info[2] + shape_info[3]) * np.dtype("int32").itemsize ) - def value_zeros(self, shape): - matrix_constructor = self.format_cls.get(self.format) - - if matrix_constructor is None: - raise ValueError(f"Sparse matrix type {self.format} not found in SciPy") - - return matrix_constructor(shape, dtype=self.dtype) - def __eq__(self, other): res = super().__eq__(other) diff --git a/aesara/tensor/type.py b/aesara/tensor/type.py index 06cf964142..df6fafa983 100644 --- a/aesara/tensor/type.py +++ b/aesara/tensor/type.py @@ -331,13 +331,6 @@ def convert_variable(self, var): # `specify_shape` will combine the more precise shapes of the two types return aesara.tensor.specify_shape(var, self.shape) - def value_zeros(self, shape): - """Create an numpy ndarray full of 0 values. - - TODO: Remove this trivial method. - """ - return np.zeros(shape, dtype=self.dtype) - @staticmethod def values_eq(a, b, force_same_dtype=True): # TODO: check to see if the shapes must match; for now, we err on safe diff --git a/tests/compile/test_debugmode.py b/tests/compile/test_debugmode.py index e724cfd50a..c9c59a463f 100644 --- a/tests/compile/test_debugmode.py +++ b/tests/compile/test_debugmode.py @@ -725,10 +725,10 @@ def perform(self, node, inp, out): r, c = out lv = v.shape[0] if (r[0] is None) or (r[0].shape != (1, lv)): - r[0] = node.outputs[0].type.value_zeros((1, lv)) + r[0] = np.empty((1, lv), dtype=node.outputs[0].type.dtype) if (c[0] is None) or (c[0].shape != (lv, 1)): - c[0] = node.outputs[1].type.value_zeros((lv, 1)) + c[0] = np.empty((lv, 1), dtype=node.outputs[0].type.dtype) for i in range(lv): r[0][0, i] = v[i] diff --git a/tests/tensor/test_type.py b/tests/tensor/test_type.py index eed93e2527..3c8a5194ef 100644 --- a/tests/tensor/test_type.py +++ b/tests/tensor/test_type.py @@ -234,7 +234,6 @@ def test_fixed_shape_basic(): t1 = TensorType("float64", (2, 3)) assert t1.shape == (2, 3) assert t1.broadcastable == (False, False) - assert t1.value_zeros(t1.shape).shape == t1.shape assert str(t1) == "TensorType(float64, (2, 3))" From 69b80f7650e1ce6ed129c03617291c4e68801c30 Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Wed, 12 Oct 2022 21:20:34 -0500 Subject: [PATCH 2/6] Use Composite graphs in aesara.tensor.extra_ops.broadcast_shape_iter --- aesara/tensor/extra_ops.py | 43 +++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/aesara/tensor/extra_ops.py b/aesara/tensor/extra_ops.py index f3d18335c6..e2287204b4 100644 --- a/aesara/tensor/extra_ops.py +++ b/aesara/tensor/extra_ops.py @@ -23,6 +23,7 @@ from aesara.raise_op import Assert from aesara.scalar import int32 as int_t from aesara.scalar import upcast +from aesara.scalar.basic import Composite from aesara.tensor import basic as at from aesara.tensor import get_vector_length from aesara.tensor.exceptions import NotScalarConstantError @@ -1552,16 +1553,32 @@ def broadcast_shape_iter( # be broadcastable or equal to the one non-broadcastable # constant `const_nt_shape_var`. assert_dim = Assert("Could not broadcast dimensions") + + scalar_nonconst_nb_shapes = [ + at.scalar_from_tensor(s) + if isinstance(s.type, TensorType) + else s + for s in nonconst_nb_shapes + ] + + dummy_nonconst_nb_shapes = [ + aes.get_scalar_type(dtype=v.dtype)() + for v in scalar_nonconst_nb_shapes + ] assert_cond = reduce( aes.and_, ( aes.or_( aes.eq(nbv, one_at), aes.eq(nbv, const_nt_shape_var) ) - for nbv in nonconst_nb_shapes + for nbv in dummy_nonconst_nb_shapes ), ) - bcast_dim = assert_dim(const_nt_shape_var, assert_cond) + assert_cond_op = Composite(dummy_nonconst_nb_shapes, [assert_cond]) + + bcast_dim = assert_dim( + const_nt_shape_var, assert_cond_op(*scalar_nonconst_nb_shapes) + ) else: bcast_dim = const_nt_shape_var else: @@ -1579,21 +1596,37 @@ def broadcast_shape_iter( result_dims.append(maybe_non_bcast_shapes[0]) continue + scalar_maybe_non_bcast_shapes = [ + at.scalar_from_tensor(s) if isinstance(s.type, TensorType) else s + for s in maybe_non_bcast_shapes + ] + dummy_maybe_non_bcast_shapes = [ + aes.get_scalar_type(dtype=v.dtype)() + for v in scalar_maybe_non_bcast_shapes + ] non_bcast_vec = [ aes.switch(aes.eq(nbv, 1), -one_at, nbv) - for nbv in maybe_non_bcast_shapes + for nbv in dummy_maybe_non_bcast_shapes ] dim_max = aes.abs(reduce(aes.scalar_maximum, non_bcast_vec)) + dim_max_op = Composite(dummy_maybe_non_bcast_shapes, [dim_max]) + + dummy_dim_max = dim_max_op(*dummy_maybe_non_bcast_shapes) assert_dim = Assert("Could not broadcast dimensions") assert_cond = reduce( aes.and_, ( - aes.or_(aes.eq(nbv, -one_at), aes.eq(nbv, dim_max)) + aes.or_(aes.eq(nbv, -one_at), aes.eq(nbv, dummy_dim_max)) for nbv in non_bcast_vec ), ) - bcast_dim = assert_dim(dim_max, assert_cond) + assert_cond_op = Composite(dummy_maybe_non_bcast_shapes, [assert_cond]) + + bcast_dim = assert_dim( + dim_max_op(*scalar_maybe_non_bcast_shapes), + assert_cond_op(*scalar_maybe_non_bcast_shapes), + ) result_dims.append(bcast_dim) From b11a9a0a8c553491b57bf8b35dffdb35a1b792f1 Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Wed, 12 Oct 2022 19:06:14 -0500 Subject: [PATCH 3/6] Change infer_broadcastable to infer_static_shape --- aesara/tensor/basic.py | 43 ++++++++++++++++++++-------- aesara/tensor/extra_ops.py | 13 +++++---- aesara/tensor/random/op.py | 6 ++-- tests/tensor/rewriting/test_basic.py | 5 ++-- tests/tensor/test_basic.py | 12 ++++---- 5 files changed, 51 insertions(+), 28 deletions(-) diff --git a/aesara/tensor/basic.py b/aesara/tensor/basic.py index 83a127b3c5..f629bfb095 100644 --- a/aesara/tensor/basic.py +++ b/aesara/tensor/basic.py @@ -10,7 +10,7 @@ from collections.abc import Sequence from functools import partial from numbers import Number -from typing import Optional +from typing import TYPE_CHECKING, Optional from typing import Sequence as TypeSequence from typing import Tuple, Union from typing import cast as type_cast @@ -68,6 +68,10 @@ from aesara.tensor.var import TensorConstant, TensorVariable, get_unique_value +if TYPE_CHECKING: + from aesara.tensor import TensorLike + + def __oplist_tag(thing, tag): tags = getattr(thing, "__oplist_tags", []) tags.append(tag) @@ -1334,11 +1338,25 @@ def identity_like(x, dtype: Optional[Union[str, np.generic, np.dtype]] = None): return eye(_x.shape[0], _x.shape[1], k=0, dtype=dtype) -def infer_broadcastable(shape): - """Infer the broadcastable dimensions for `shape`. +def infer_static_shape( + shape: Union[Variable, TypeSequence[Union[Variable, int]]] +) -> Tuple[TypeSequence["TensorLike"], TypeSequence[Optional[int]]]: + """Infer the static shapes implied by the potentially symbolic elements in `shape`. + + `shape` will be validated and constant folded. As a result, this function + can be expensive and shouldn't be used unless absolutely necessary. + + It mostly exists as a hold-over from pre-static shape times, when it was + required in order to produce correct broadcastable arrays and prevent + some graphs from being unusable. Now, it is no longer strictly required, + so don't use it unless you want the same shape graphs to be rewritten + multiple times during graph construction. + + Returns + ------- + A validated sequence of symbolic shape values, and a sequence of + ``None``/``int`` values that can be used as `TensorType.shape` values. - `shape` will be validated and constant folded in order to determine - which dimensions are broadcastable (i.e. equal to ``1``). """ from aesara.tensor.rewriting.basic import topo_constant_folding from aesara.tensor.rewriting.shape import ShapeFeature @@ -1362,9 +1380,10 @@ def check_type(s): clone=True, ) folded_shape = rewrite_graph(shape_fg, custom_rewrite=topo_constant_folding).outputs - - bcast = tuple(getattr(s, "data", s) == 1 for s in folded_shape) - return sh, bcast + static_shape = tuple( + s.data.item() if isinstance(s, Constant) else None for s in folded_shape + ) + return sh, static_shape class Alloc(COp): @@ -1394,7 +1413,7 @@ class Alloc(COp): def make_node(self, value, *shape): v = as_tensor_variable(value) - sh, bcast = infer_broadcastable(shape) + sh, static_shape = infer_static_shape(shape) if v.ndim > len(sh): raise TypeError( "The Alloc value to use has more dimensions" @@ -1402,7 +1421,7 @@ def make_node(self, value, *shape): v.ndim, len(sh), ) - otype = TensorType(dtype=v.dtype, shape=bcast) + otype = TensorType(dtype=v.dtype, shape=static_shape) return Apply(self, [v] + sh, [otype()]) def perform(self, node, inputs, out_): @@ -3823,8 +3842,8 @@ def typecode(self): return np.dtype(self.dtype).num def make_node(self, *_shape): - _shape, bcast = infer_broadcastable(_shape) - otype = TensorType(dtype=self.dtype, shape=bcast) + _shape, static_shape = infer_static_shape(_shape) + otype = TensorType(dtype=self.dtype, shape=static_shape) output = otype() output.tag.values_eq_approx = values_eq_approx_always_true diff --git a/aesara/tensor/extra_ops.py b/aesara/tensor/extra_ops.py index e2287204b4..ca3e720339 100644 --- a/aesara/tensor/extra_ops.py +++ b/aesara/tensor/extra_ops.py @@ -1646,9 +1646,9 @@ def __call__(self, a, shape, **kwargs): def make_node(self, a, *shape): a = at.as_tensor_variable(a) - shape, bcast = at.infer_broadcastable(shape) + shape, static_shape = at.infer_static_shape(shape) - out = TensorType(dtype=a.type.dtype, shape=bcast)() + out = TensorType(dtype=a.type.dtype, shape=static_shape)() # Attempt to prevent in-place operations on this view-based output out.tag.indestructible = True @@ -1670,11 +1670,14 @@ def grad(self, inputs, outputs_gradients): d_wrt_a = broadcast_to(dout, shape).sum(axis=new_dims) # Determine the dimensions that were broadcast - _, shape_bcast = at.infer_broadcastable(shape) + _, static_shape = at.infer_static_shape(shape) + + # TODO: This needs to be performed at run-time when static shape + # information isn't available. bcast_sums = [ i - for i, (a_b, s_b) in enumerate(zip(a.broadcastable, shape_bcast[-a.ndim :])) - if a_b and not s_b + for i, (a_s, s_s) in enumerate(zip(a.type.shape, static_shape[-a.ndim :])) + if a_s == 1 and s_s != 1 ] if bcast_sums: diff --git a/aesara/tensor/random/op.py b/aesara/tensor/random/op.py index 6f4ab98ca6..ad37008ba7 100644 --- a/aesara/tensor/random/op.py +++ b/aesara/tensor/random/op.py @@ -14,7 +14,7 @@ constant, get_scalar_constant_value, get_vector_length, - infer_broadcastable, + infer_static_shape, ) from aesara.tensor.random.type import RandomGeneratorType, RandomStateType, RandomType from aesara.tensor.random.utils import normalize_size_param, params_broadcast_shapes @@ -322,7 +322,7 @@ def make_node(self, rng, size, dtype, *dist_params): ) shape = self._infer_shape(size, dist_params) - _, bcast = infer_broadcastable(shape) + _, static_shape = infer_static_shape(shape) dtype = self.dtype or dtype if dtype == "floatX": @@ -336,7 +336,7 @@ def make_node(self, rng, size, dtype, *dist_params): dtype_idx = constant(dtype, dtype="int64") dtype = all_dtypes[dtype_idx.data] - outtype = TensorType(dtype=dtype, shape=bcast) + outtype = TensorType(dtype=dtype, shape=static_shape) out_var = outtype() inputs = (rng, size, dtype_idx) + dist_params outputs = (rng.type(), out_var) diff --git a/tests/tensor/rewriting/test_basic.py b/tests/tensor/rewriting/test_basic.py index 0301909c82..b835fe79ec 100644 --- a/tests/tensor/rewriting/test_basic.py +++ b/tests/tensor/rewriting/test_basic.py @@ -276,8 +276,9 @@ def test_inconsistent_constant(self): assert a.owner and isinstance(a.owner.op, Alloc) - # `local_useless_alloc` should replace the `Alloc` with an `Assert` - with pytest.raises(AssertionError): + # `local_useless_alloc` should attempt to replace the `Alloc` with an + # `Assert` and fail when the static shape information conflicts. + with pytest.raises(TypeError): f = function([], a, mode=rewrite_mode) x = at.as_tensor(self.rng.standard_normal((6, 7))) diff --git a/tests/tensor/test_basic.py b/tests/tensor/test_basic.py index 509b651085..9c123f4c3a 100644 --- a/tests/tensor/test_basic.py +++ b/tests/tensor/test_basic.py @@ -55,7 +55,7 @@ get_vector_length, horizontal_stack, identity_like, - infer_broadcastable, + infer_static_shape, inverse_permutation, join, make_vector, @@ -796,20 +796,20 @@ def test_full(self): def test_infer_broadcastable(): with pytest.raises(TypeError, match="^Shapes must be scalar integers.*"): - infer_broadcastable([constant(1.0)]) + infer_static_shape([constant(1.0)]) with config.change_flags(exception_verbosity="high"), pytest.raises( TypeError, match=r"A\. x" ): - infer_broadcastable([dscalar("x")]) + infer_static_shape([dscalar("x")]) with pytest.raises(ValueError, match=".*could not be cast to have 0 dimensions"): - infer_broadcastable((as_tensor_variable([[1, 2]]),)) + infer_static_shape((as_tensor_variable([[1, 2]]),)) constant_size = constant([1]) specify_size = specify_shape(constant_size, [1]) - sh, bcast = infer_broadcastable(specify_size) - assert bcast == (True,) + sh, static_shape = infer_static_shape(specify_size) + assert static_shape == (1,) # This is slow for the ('int8', 3) version. From 97954597ca29c8ed9423f61c0c678d97ed96be1e Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Thu, 20 Oct 2022 18:42:32 -0500 Subject: [PATCH 4/6] Fix shape/type mismatch in TestScanInplaceOptimizer.test_inplace3 --- tests/scan/test_rewriting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scan/test_rewriting.py b/tests/scan/test_rewriting.py index eacde13a66..1a6dbf9cf0 100644 --- a/tests/scan/test_rewriting.py +++ b/tests/scan/test_rewriting.py @@ -1101,7 +1101,7 @@ def test_inplace3(self): outputs, updates = scan( lambda x, y: (x + asarrayX(1), y + asarrayX(1)), [], [x0, x1], n_steps=3 ) - x0 = asarrayX(np.zeros((3,))) + x0 = asarrayX(np.zeros((4,))) x0[0] = vx0 x0 = at.constant(x0) From 6c978d8eedd979f319c8e46b9d96c9ed14fdebd5 Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Thu, 20 Oct 2022 18:54:13 -0500 Subject: [PATCH 5/6] Use static shape information instead of broadcastable in Scan --- aesara/scan/op.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/aesara/scan/op.py b/aesara/scan/op.py index c1016ddf4d..f0d5b2a4da 100644 --- a/aesara/scan/op.py +++ b/aesara/scan/op.py @@ -200,7 +200,7 @@ def copy_var_format(var, as_var): rval = as_var.type.filter_variable(rval) else: tmp = as_var.type.clone( - shape=(tuple(var.broadcastable[:1]) + tuple(as_var.broadcastable)) + shape=(tuple(var.type.shape[:1]) + tuple(as_var.type.shape)) ) rval = tmp.filter_variable(rval) return rval @@ -805,7 +805,9 @@ def tensorConstructor(shape, dtype): # output sequence o = outputs[idx] self.output_types.append( - typeConstructor((False,) + o.type.broadcastable, o.type.dtype) + # TODO: What can we actually say about the shape of this + # added dimension? + typeConstructor((None,) + o.type.shape, o.type.dtype) ) idx += len(info.mit_mot_out_slices[jdx]) @@ -816,7 +818,9 @@ def tensorConstructor(shape, dtype): for o in outputs[idx:end]: self.output_types.append( - typeConstructor((False,) + o.type.broadcastable, o.type.dtype) + # TODO: What can we actually say about the shape of this + # added dimension? + typeConstructor((None,) + o.type.shape, o.type.dtype) ) # shared outputs + possibly the ending condition @@ -2320,8 +2324,8 @@ def infer_shape(self, fgraph, node, input_shapes): # equivalent (if False). Here, we only need the variable. v_shp_i = validator.check(shp_i) if v_shp_i is None: - if hasattr(r, "broadcastable") and r.broadcastable[i]: - shp.append(1) + if r.type.shape[i] is not None: + shp.append(r.type.shape[i]) else: shp.append(Shape_i(i)(r)) else: From 272880e64ae13b4fc2652510e9fc8d1e0d6fca90 Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Thu, 20 Oct 2022 18:55:04 -0500 Subject: [PATCH 6/6] Clean up some usage of the TensorType interface in Scan --- aesara/scan/op.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/aesara/scan/op.py b/aesara/scan/op.py index f0d5b2a4da..481eaf971d 100644 --- a/aesara/scan/op.py +++ b/aesara/scan/op.py @@ -161,8 +161,9 @@ def check_broadcast(v1, v2): which may wrongly be interpreted as broadcastable. """ - if not hasattr(v1, "broadcastable") and not hasattr(v2, "broadcastable"): + if not isinstance(v1.type, TensorType) and not isinstance(v2.type, TensorType): return + msg = ( "The broadcast pattern of the output of scan (%s) is " "inconsistent with the one provided in `output_info` " @@ -173,13 +174,13 @@ def check_broadcast(v1, v2): "them consistent, e.g. using aesara.tensor." "{unbroadcast, specify_broadcastable}." ) - size = min(len(v1.broadcastable), len(v2.broadcastable)) + size = min(v1.type.ndim, v2.type.ndim) for n, (b1, b2) in enumerate( - zip(v1.broadcastable[-size:], v2.broadcastable[-size:]) + zip(v1.type.broadcastable[-size:], v2.type.broadcastable[-size:]) ): if b1 != b2: - a1 = n + size - len(v1.broadcastable) + 1 - a2 = n + size - len(v2.broadcastable) + 1 + a1 = n + size - v1.type.ndim + 1 + a2 = n + size - v2.type.ndim + 1 raise TypeError(msg % (v1.type, v2.type, a1, b1, b2, a2)) @@ -628,6 +629,7 @@ def validate_inner_graph(self): type_input = self.inner_inputs[inner_iidx].type type_output = self.inner_outputs[inner_oidx].type if ( + # TODO: Use the `Type` interface for this type_input.dtype != type_output.dtype or type_input.broadcastable != type_output.broadcastable ):