From 6617b691326e71f3b925c708cce925664699ba6c Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Wed, 29 Jun 2022 22:30:43 +0800 Subject: [PATCH 01/20] added Ops for solve_discrete_lyapunov and solve_continuous_lyapunov --- aesara/tensor/slinalg.py | 211 ++++++++++++++++++++++++++++++----- tests/tensor/test_slinalg.py | 54 +++++++-- 2 files changed, 232 insertions(+), 33 deletions(-) diff --git a/aesara/tensor/slinalg.py b/aesara/tensor/slinalg.py index 0adba5cbef..c4081a03df 100644 --- a/aesara/tensor/slinalg.py +++ b/aesara/tensor/slinalg.py @@ -1,9 +1,10 @@ import logging import warnings -from typing import Union +from typing import Union, Optional import numpy as np import scipy.linalg +from numpy.typing._array_like import ArrayLike import aesara.tensor from aesara.graph.basic import Apply @@ -14,7 +15,6 @@ from aesara.tensor.type import matrix, tensor, vector from aesara.tensor.var import TensorVariable - logger = logging.getLogger(__name__) @@ -140,7 +140,7 @@ def make_node(self, x, l, dz): assert l.ndim == 2 assert dz.ndim == 2 assert ( - l.owner.op.lower == self.lower + l.owner.op.lower == self.lower ), "lower/upper mismatch between Cholesky op and CholeskyGrad op" return Apply(self, [x, l, dz], [x.type()]) @@ -191,13 +191,12 @@ def infer_shape(self, fgraph, node, shapes): class CholeskySolve(Op): - __props__ = ("lower", "check_finite") def __init__( - self, - lower=True, - check_finite=True, + self, + lower=True, + check_finite=True, ): self.lower = lower self.check_finite = check_finite @@ -270,9 +269,9 @@ class SolveBase(Op): ) def __init__( - self, - lower=False, - check_finite=True, + self, + lower=False, + check_finite=True, ): self.lower = lower self.check_finite = check_finite @@ -352,11 +351,11 @@ class SolveTriangular(SolveBase): ) def __init__( - self, - trans=0, - lower=False, - unit_diagonal=False, - check_finite=True, + self, + trans=0, + lower=False, + unit_diagonal=False, + check_finite=True, ): super().__init__(lower=lower, check_finite=check_finite) self.trans = trans @@ -388,12 +387,12 @@ def L_op(self, inputs, outputs, output_gradients): def solve_triangular( - a: TensorVariable, - b: TensorVariable, - trans: Union[int, str] = 0, - lower: bool = False, - unit_diagonal: bool = False, - check_finite: bool = True, + a: TensorVariable, + b: TensorVariable, + trans: Union[int, str] = 0, + lower: bool = False, + unit_diagonal: bool = False, + check_finite: bool = True, ) -> TensorVariable: """Solve the equation `a x = b` for `x`, assuming `a` is a triangular matrix. @@ -438,10 +437,10 @@ class Solve(SolveBase): ) def __init__( - self, - assume_a="gen", - lower=False, - check_finite=True, + self, + assume_a="gen", + lower=False, + check_finite=True, ): if assume_a not in ("gen", "sym", "her", "pos"): raise ValueError(f"{assume_a} is not a recognized matrix structure") @@ -512,6 +511,7 @@ def solve(a, b, assume_a="gen", lower=False, check_finite=True): solve_upper_triangular = SolveTriangular(lower=False) solve_symmetric = Solve(assume_a="sym") + # TODO: Optimizations to replace multiplication by matrix inverse # with solve() Op (still unwritten) @@ -745,6 +745,165 @@ def perform(self, node, inputs, outputs): expm = Expm() + +class SolveContinuousLyapunov(at.Op): + __props__ = () + + def make_node(self, A, B): + A = as_tensor_variable(A) + B = as_tensor_variable(B) + + out_dtype = aesara.scalar.upcast(A.dtype, B.dtype) + X = aesara.tensor.matrix(dtype=out_dtype) + + return aesara.graph.basic.Apply(self, [A, B], [X]) + + def perform(self, node, inputs, output_storage): + (A, B) = inputs + X = output_storage[0] + + X[0] = scipy.linalg.solve_continuous_lyapunov(A, B) + + def infer_shape(self, fgraph, node, shapes): + return [shapes[0]] + + def grad(self, inputs, output_grads): + # Gradient computations come from Kao and Hennequin (2020), https://arxiv.org/pdf/2011.11430.pdf + # Note that they write the equation as AX + XA.H + Q = 0, while scipy uses AX + XA^H = Q, + # so minor adjustments need to be made. + A, Q = inputs + (dX,) = output_grads + + X = self(A, Q) + S = self(A.conj().T, -dX) # Eq 31, adjusted + + A_bar = S.dot(X.conj().T) + S.conj().T.dot(X) + Q_bar = -S # Eq 29, adjusted + + return [A_bar, Q_bar] + + def _check_input_dims(self, A, B): + if A.ndim != 2: + raise ValueError(f'solve_discrete_lyapunov only valid for 2d matrices, matrix A has {A.ndim} dimensions') + if B.ndim != 2: + raise ValueError(f'solve_discrete_lyapunov only valid for 2d matrices, matrix A has {B.ndim} dimensions') + + AN, AM = A.shape + BN, BM = B.shape + + if AN != AM: + raise ValueError(f'Matrix A should be square, the provided matrix has shape {AN} x {AM}') + if BN != BM: + raise ValueError(f'Matrix B should be square, the provided matrix has shape {BN} x {BM}') + + if AN != BN: + raise ValueError(f'Matrices A and B should have the same size, but matrix A has shape {AN} x {AN}, and' + f'matrix B has shape {BN} x {BM}') + + +class SolveDiscreteLyapunov(at.Op): + __props__ = () + + def __init__(self, method=None): + self.method = method + + def make_node(self, A, B): + A = as_tensor_variable(A) + B = as_tensor_variable(B) + + out_dtype = aesara.scalar.upcast(A.dtype, B.dtype) + X = aesara.tensor.matrix(dtype=out_dtype) + + return aesara.graph.basic.Apply(self, [A, B], [X]) + + def perform(self, node, inputs, output_storage): + (A, B) = inputs + self._check_input_dims(A, B) + + X = output_storage[0] + + X[0] = scipy.linalg.solve_discrete_lyapunov(A, B, method=self.method) + + def infer_shape(self, fgraph, node, shapes): + return [shapes[0]] + + def grad(self, inputs, output_grads): + # Gradient computations come from Kao and Hennequin (2020), https://arxiv.org/pdf/2011.11430.pdf + A, Q = inputs + (dX,) = output_grads + + X = self(A, Q) + S = self(A.conj().T, dX) # Eq 41, note that it is not written as a proper Lyapunov equation + + A_bar = aesara.tensor.linalg.matrix_dot(S, A, X.conj().T) + aesara.tensor.linalg.matrix_dot(S.conj().T, A, X) + Q_bar = S + return [A_bar, Q_bar] + + def _check_input_dims(self, A, B): + if A.ndim != 2: + raise ValueError(f'solve_continuous_lyapunov only valid for 2d matrices, matrix A has {A.ndim} dimensions') + if B.ndim != 2: + raise ValueError(f'solve_continuous_lyapunov only valid for 2d matrices, matrix A has {B.ndim} dimensions') + + AN, AM = A.shape + BN, BM = B.shape + + if AN != AM: + raise ValueError(f'Matrix A should be square, the provided matrix has shape {AN} x {AM}') + if BN != BM: + raise ValueError(f'Matrix B should be square, the provided matrix has shape {BN} x {BM}') + + if AN != BN: + raise ValueError(f'Matrices A and B should have the same size, but matrix A has shape {AN} x {AN}, and' + f'matrix B has shape {BN} x {BM}') + + +_solve_continuous_lyapunov = SolveContinuousLyapunov() + + +def solve_discrete_lyapunov(A, Q, method: Optional[str] = None) -> at.Op: + """ + Solve the discrete Lyapunov equation :math:`AXA^H - X = Q`. + Parameters + ---------- + A: ArrayLike + Square matrix of shape N x N; must have the same shape as Q + Q: ArrayLike + Square matrix of shape N x N; must have the same shape as A + method: Optional, string + Solver method passed to scipy.linalg.solve_discrete_lyapunov, either "bilinear", "direct", or None. "direct" + scales poorly with size. If None, uses "direct" if N < 10, else "bilinear". + + Returns + ------- + X: at.matrix + Square matrix of shape N x N, representing the solution to the Lyapunov equation + """ + + return SolveDiscreteLyapunov(method)(A, Q) + + +def solve_continuous_lyapunov(A, Q) -> at.Op: + """ + Solve the continuous Lyapunov equation :math: `AX + XA^H = Q + + Parameters + ---------- + A: ArrayLike + Square matrix of shape N x N; must have the same shape as Q + Q: ArrayLike + Square matrix of shape N x N; must have the same shape as A + + Returns + ------- + X: at.matrix + Square matrix of shape N x N, representing the solution to the Lyapunov equation + + """ + + return _solve_continuous_lyapunov(A, Q) + + __all__ = [ "cholesky", "solve", @@ -754,4 +913,6 @@ def perform(self, node, inputs, outputs): "eigvalsh", "kron", "expm", + "solve_discrete_lyapunov", + "solve_continuous_lyapunov" ] diff --git a/tests/tensor/test_slinalg.py b/tests/tensor/test_slinalg.py index 13acf1febc..0fec3f9e76 100644 --- a/tests/tensor/test_slinalg.py +++ b/tests/tensor/test_slinalg.py @@ -24,9 +24,12 @@ kron, solve, solve_triangular, + solve_discrete_lyapunov, + solve_continuous_lyapunov ) from aesara.tensor.type import dmatrix, matrix, tensor, vector from tests import unittest_tools as utt +from tests.unittest_tools import InferShapeTester def check_lower_triangular(pd, ch_f): @@ -178,14 +181,14 @@ class TestSolveBase(utt.InferShapeTester): [ (vector, matrix, "`A` must be a matrix.*"), ( - functools.partial(tensor, dtype="floatX", shape=(False,) * 3), - matrix, - "`A` must be a matrix.*", + functools.partial(tensor, dtype="floatX", shape=(False,) * 3), + matrix, + "`A` must be a matrix.*", ), ( - matrix, - functools.partial(tensor, dtype="floatX", shape=(False,) * 3), - "`b` must be a matrix or a vector.*", + matrix, + functools.partial(tensor, dtype="floatX", shape=(False,) * 3), + "`b` must be a matrix or a vector.*", ), ], ) @@ -498,7 +501,7 @@ def test_expm_grad_2(): # Always test in float64 for better numerical stability. A = rng.standard_normal((5, 5)) w = rng.standard_normal((5)) ** 2 - A = (np.diag(w**0.5)).dot(A + A.T).dot(np.diag(w ** (-0.5))) + A = (np.diag(w ** 0.5)).dot(A + A.T).dot(np.diag(w ** (-0.5))) assert not np.allclose(A, A.T) utt.verify_grad(expm, [A], rng=rng) @@ -514,7 +517,6 @@ def test_expm_grad_3(): class TestKron(utt.InferShapeTester): - rng = np.random.default_rng(43) def setup_method(self): @@ -552,3 +554,39 @@ def test_numpy_2d(self): b = self.rng.random(shp1).astype(config.floatX) out = f(a, b) assert np.allclose(out, np.kron(a, b)) + + +def test_solve_discrete_lyapunov(): + N = 5 + rng = np.random.default_rng(utt.fetch_seed()) + a = at.dmatrix() + q = at.dmatrix() + f = function([a, q], [solve_discrete_lyapunov(a, q)]) + + A = rng.normal(size=(N, N)) + Q = rng.normal(size=(N, N)) + + X = f(A, Q) + assert np.allclose(A @ X @ A.conj().T - X + Q, 0.0) + + utt.verify_grad(solve_discrete_lyapunov, + pt=[A, Q], + rng=rng) + + +def test_solve_continuous_lyapunov(): + N = 5 + rng = np.random.default_rng(utt.fetch_seed()) + a = at.dmatrix() + q = at.dmatrix() + f = function([a, q], [solve_continuous_lyapunov(a, q)]) + + A = rng.normal(size=(N, N)) + Q = rng.normal(size=(N, N)) + X = f(A, Q) + + assert np.allclose(A @ X + X @ A.conj().T, Q) + + utt.verify_grad(solve_continuous_lyapunov, + pt=[A, Q], + rng=rng) \ No newline at end of file From 34acf8f0b27997d60a894f19cbf03cc8e42aea04 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Wed, 29 Jun 2022 22:48:01 +0800 Subject: [PATCH 02/20] ran pre-commit --- aesara/tensor/slinalg.py | 102 +++++++++++++++++++++-------------- tests/tensor/test_slinalg.py | 27 ++++------ 2 files changed, 74 insertions(+), 55 deletions(-) diff --git a/aesara/tensor/slinalg.py b/aesara/tensor/slinalg.py index c4081a03df..c005d32b27 100644 --- a/aesara/tensor/slinalg.py +++ b/aesara/tensor/slinalg.py @@ -1,10 +1,9 @@ import logging import warnings -from typing import Union, Optional +from typing import Optional, Union import numpy as np import scipy.linalg -from numpy.typing._array_like import ArrayLike import aesara.tensor from aesara.graph.basic import Apply @@ -15,6 +14,7 @@ from aesara.tensor.type import matrix, tensor, vector from aesara.tensor.var import TensorVariable + logger = logging.getLogger(__name__) @@ -140,7 +140,7 @@ def make_node(self, x, l, dz): assert l.ndim == 2 assert dz.ndim == 2 assert ( - l.owner.op.lower == self.lower + l.owner.op.lower == self.lower ), "lower/upper mismatch between Cholesky op and CholeskyGrad op" return Apply(self, [x, l, dz], [x.type()]) @@ -194,9 +194,9 @@ class CholeskySolve(Op): __props__ = ("lower", "check_finite") def __init__( - self, - lower=True, - check_finite=True, + self, + lower=True, + check_finite=True, ): self.lower = lower self.check_finite = check_finite @@ -269,9 +269,9 @@ class SolveBase(Op): ) def __init__( - self, - lower=False, - check_finite=True, + self, + lower=False, + check_finite=True, ): self.lower = lower self.check_finite = check_finite @@ -351,11 +351,11 @@ class SolveTriangular(SolveBase): ) def __init__( - self, - trans=0, - lower=False, - unit_diagonal=False, - check_finite=True, + self, + trans=0, + lower=False, + unit_diagonal=False, + check_finite=True, ): super().__init__(lower=lower, check_finite=check_finite) self.trans = trans @@ -387,12 +387,12 @@ def L_op(self, inputs, outputs, output_gradients): def solve_triangular( - a: TensorVariable, - b: TensorVariable, - trans: Union[int, str] = 0, - lower: bool = False, - unit_diagonal: bool = False, - check_finite: bool = True, + a: TensorVariable, + b: TensorVariable, + trans: Union[int, str] = 0, + lower: bool = False, + unit_diagonal: bool = False, + check_finite: bool = True, ) -> TensorVariable: """Solve the equation `a x = b` for `x`, assuming `a` is a triangular matrix. @@ -437,10 +437,10 @@ class Solve(SolveBase): ) def __init__( - self, - assume_a="gen", - lower=False, - check_finite=True, + self, + assume_a="gen", + lower=False, + check_finite=True, ): if assume_a not in ("gen", "sym", "her", "pos"): raise ValueError(f"{assume_a} is not a recognized matrix structure") @@ -784,21 +784,31 @@ def grad(self, inputs, output_grads): def _check_input_dims(self, A, B): if A.ndim != 2: - raise ValueError(f'solve_discrete_lyapunov only valid for 2d matrices, matrix A has {A.ndim} dimensions') + raise ValueError( + f"solve_discrete_lyapunov only valid for 2d matrices, matrix A has {A.ndim} dimensions" + ) if B.ndim != 2: - raise ValueError(f'solve_discrete_lyapunov only valid for 2d matrices, matrix A has {B.ndim} dimensions') + raise ValueError( + f"solve_discrete_lyapunov only valid for 2d matrices, matrix A has {B.ndim} dimensions" + ) AN, AM = A.shape BN, BM = B.shape if AN != AM: - raise ValueError(f'Matrix A should be square, the provided matrix has shape {AN} x {AM}') + raise ValueError( + f"Matrix A should be square, the provided matrix has shape {AN} x {AM}" + ) if BN != BM: - raise ValueError(f'Matrix B should be square, the provided matrix has shape {BN} x {BM}') + raise ValueError( + f"Matrix B should be square, the provided matrix has shape {BN} x {BM}" + ) if AN != BN: - raise ValueError(f'Matrices A and B should have the same size, but matrix A has shape {AN} x {AN}, and' - f'matrix B has shape {BN} x {BM}') + raise ValueError( + f"Matrices A and B should have the same size, but matrix A has shape {AN} x {AN}, and" + f"matrix B has shape {BN} x {BM}" + ) class SolveDiscreteLyapunov(at.Op): @@ -833,29 +843,43 @@ def grad(self, inputs, output_grads): (dX,) = output_grads X = self(A, Q) - S = self(A.conj().T, dX) # Eq 41, note that it is not written as a proper Lyapunov equation + S = self( + A.conj().T, dX + ) # Eq 41, note that it is not written as a proper Lyapunov equation - A_bar = aesara.tensor.linalg.matrix_dot(S, A, X.conj().T) + aesara.tensor.linalg.matrix_dot(S.conj().T, A, X) + A_bar = aesara.tensor.linalg.matrix_dot( + S, A, X.conj().T + ) + aesara.tensor.linalg.matrix_dot(S.conj().T, A, X) Q_bar = S return [A_bar, Q_bar] def _check_input_dims(self, A, B): if A.ndim != 2: - raise ValueError(f'solve_continuous_lyapunov only valid for 2d matrices, matrix A has {A.ndim} dimensions') + raise ValueError( + f"solve_continuous_lyapunov only valid for 2d matrices, matrix A has {A.ndim} dimensions" + ) if B.ndim != 2: - raise ValueError(f'solve_continuous_lyapunov only valid for 2d matrices, matrix A has {B.ndim} dimensions') + raise ValueError( + f"solve_continuous_lyapunov only valid for 2d matrices, matrix A has {B.ndim} dimensions" + ) AN, AM = A.shape BN, BM = B.shape if AN != AM: - raise ValueError(f'Matrix A should be square, the provided matrix has shape {AN} x {AM}') + raise ValueError( + f"Matrix A should be square, the provided matrix has shape {AN} x {AM}" + ) if BN != BM: - raise ValueError(f'Matrix B should be square, the provided matrix has shape {BN} x {BM}') + raise ValueError( + f"Matrix B should be square, the provided matrix has shape {BN} x {BM}" + ) if AN != BN: - raise ValueError(f'Matrices A and B should have the same size, but matrix A has shape {AN} x {AN}, and' - f'matrix B has shape {BN} x {BM}') + raise ValueError( + f"Matrices A and B should have the same size, but matrix A has shape {AN} x {AN}, and" + f"matrix B has shape {BN} x {BM}" + ) _solve_continuous_lyapunov = SolveContinuousLyapunov() @@ -914,5 +938,5 @@ def solve_continuous_lyapunov(A, Q) -> at.Op: "kron", "expm", "solve_discrete_lyapunov", - "solve_continuous_lyapunov" + "solve_continuous_lyapunov", ] diff --git a/tests/tensor/test_slinalg.py b/tests/tensor/test_slinalg.py index 0fec3f9e76..8f6876cd1b 100644 --- a/tests/tensor/test_slinalg.py +++ b/tests/tensor/test_slinalg.py @@ -23,13 +23,12 @@ expm, kron, solve, - solve_triangular, + solve_continuous_lyapunov, solve_discrete_lyapunov, - solve_continuous_lyapunov + solve_triangular, ) from aesara.tensor.type import dmatrix, matrix, tensor, vector from tests import unittest_tools as utt -from tests.unittest_tools import InferShapeTester def check_lower_triangular(pd, ch_f): @@ -181,14 +180,14 @@ class TestSolveBase(utt.InferShapeTester): [ (vector, matrix, "`A` must be a matrix.*"), ( - functools.partial(tensor, dtype="floatX", shape=(False,) * 3), - matrix, - "`A` must be a matrix.*", + functools.partial(tensor, dtype="floatX", shape=(False,) * 3), + matrix, + "`A` must be a matrix.*", ), ( - matrix, - functools.partial(tensor, dtype="floatX", shape=(False,) * 3), - "`b` must be a matrix or a vector.*", + matrix, + functools.partial(tensor, dtype="floatX", shape=(False,) * 3), + "`b` must be a matrix or a vector.*", ), ], ) @@ -501,7 +500,7 @@ def test_expm_grad_2(): # Always test in float64 for better numerical stability. A = rng.standard_normal((5, 5)) w = rng.standard_normal((5)) ** 2 - A = (np.diag(w ** 0.5)).dot(A + A.T).dot(np.diag(w ** (-0.5))) + A = (np.diag(w**0.5)).dot(A + A.T).dot(np.diag(w ** (-0.5))) assert not np.allclose(A, A.T) utt.verify_grad(expm, [A], rng=rng) @@ -569,9 +568,7 @@ def test_solve_discrete_lyapunov(): X = f(A, Q) assert np.allclose(A @ X @ A.conj().T - X + Q, 0.0) - utt.verify_grad(solve_discrete_lyapunov, - pt=[A, Q], - rng=rng) + utt.verify_grad(solve_discrete_lyapunov, pt=[A, Q], rng=rng) def test_solve_continuous_lyapunov(): @@ -587,6 +584,4 @@ def test_solve_continuous_lyapunov(): assert np.allclose(A @ X + X @ A.conj().T, Q) - utt.verify_grad(solve_continuous_lyapunov, - pt=[A, Q], - rng=rng) \ No newline at end of file + utt.verify_grad(solve_continuous_lyapunov, pt=[A, Q], rng=rng) From f28cee46d4205ee179084e83762dcef986eb9237 Mon Sep 17 00:00:00 2001 From: jessegrabowski <48652735+jessegrabowski@users.noreply.github.com> Date: Thu, 30 Jun 2022 13:37:14 +0800 Subject: [PATCH 03/20] add `method` to `SolveDiscreteLyapunov` `__props__` Co-authored-by: Ricardo Vieira <28983449+ricardoV94@users.noreply.github.com> --- aesara/tensor/slinalg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aesara/tensor/slinalg.py b/aesara/tensor/slinalg.py index c005d32b27..8b584849e2 100644 --- a/aesara/tensor/slinalg.py +++ b/aesara/tensor/slinalg.py @@ -812,7 +812,7 @@ def _check_input_dims(self, A, B): class SolveDiscreteLyapunov(at.Op): - __props__ = () + __props__ = ("method",) def __init__(self, method=None): self.method = method From 10472ba56d1479c96f98152005f4f0dc5a9a95a3 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Thu, 30 Jun 2022 15:02:31 +0800 Subject: [PATCH 04/20] Remove shape checks from `SolveDiscreteLyapunov` and `SolveContinuousLyapunov` --- aesara/tensor/slinalg.py | 58 ---------------------------------------- 1 file changed, 58 deletions(-) diff --git a/aesara/tensor/slinalg.py b/aesara/tensor/slinalg.py index c005d32b27..56d75b67c7 100644 --- a/aesara/tensor/slinalg.py +++ b/aesara/tensor/slinalg.py @@ -782,34 +782,6 @@ def grad(self, inputs, output_grads): return [A_bar, Q_bar] - def _check_input_dims(self, A, B): - if A.ndim != 2: - raise ValueError( - f"solve_discrete_lyapunov only valid for 2d matrices, matrix A has {A.ndim} dimensions" - ) - if B.ndim != 2: - raise ValueError( - f"solve_discrete_lyapunov only valid for 2d matrices, matrix A has {B.ndim} dimensions" - ) - - AN, AM = A.shape - BN, BM = B.shape - - if AN != AM: - raise ValueError( - f"Matrix A should be square, the provided matrix has shape {AN} x {AM}" - ) - if BN != BM: - raise ValueError( - f"Matrix B should be square, the provided matrix has shape {BN} x {BM}" - ) - - if AN != BN: - raise ValueError( - f"Matrices A and B should have the same size, but matrix A has shape {AN} x {AN}, and" - f"matrix B has shape {BN} x {BM}" - ) - class SolveDiscreteLyapunov(at.Op): __props__ = () @@ -828,8 +800,6 @@ def make_node(self, A, B): def perform(self, node, inputs, output_storage): (A, B) = inputs - self._check_input_dims(A, B) - X = output_storage[0] X[0] = scipy.linalg.solve_discrete_lyapunov(A, B, method=self.method) @@ -853,34 +823,6 @@ def grad(self, inputs, output_grads): Q_bar = S return [A_bar, Q_bar] - def _check_input_dims(self, A, B): - if A.ndim != 2: - raise ValueError( - f"solve_continuous_lyapunov only valid for 2d matrices, matrix A has {A.ndim} dimensions" - ) - if B.ndim != 2: - raise ValueError( - f"solve_continuous_lyapunov only valid for 2d matrices, matrix A has {B.ndim} dimensions" - ) - - AN, AM = A.shape - BN, BM = B.shape - - if AN != AM: - raise ValueError( - f"Matrix A should be square, the provided matrix has shape {AN} x {AM}" - ) - if BN != BM: - raise ValueError( - f"Matrix B should be square, the provided matrix has shape {BN} x {BM}" - ) - - if AN != BN: - raise ValueError( - f"Matrices A and B should have the same size, but matrix A has shape {AN} x {AN}, and" - f"matrix B has shape {BN} x {BM}" - ) - _solve_continuous_lyapunov = SolveContinuousLyapunov() From bf445abd96b2fb221970078b39c4b4f0f6d4a6c9 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Thu, 30 Jun 2022 15:06:51 +0800 Subject: [PATCH 05/20] Update signature of `solve_discrete_lyapunov` and `solve_continuous_lyapunov` to correctly show a TensorVariable is returned --- test.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 test.py diff --git a/test.py b/test.py new file mode 100644 index 0000000000..e69de29bb2 From 8a9da628b11977d5c423330ad1cdb70f7c946ad1 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Thu, 30 Jun 2022 15:07:21 +0800 Subject: [PATCH 06/20] Update signature of `solve_discrete_lyapunov` and `solve_continuous_lyapunov` to correctly show a TensorVariable is returned --- aesara/tensor/slinalg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aesara/tensor/slinalg.py b/aesara/tensor/slinalg.py index c9e255549f..a2a1c68b32 100644 --- a/aesara/tensor/slinalg.py +++ b/aesara/tensor/slinalg.py @@ -827,7 +827,7 @@ def grad(self, inputs, output_grads): _solve_continuous_lyapunov = SolveContinuousLyapunov() -def solve_discrete_lyapunov(A, Q, method: Optional[str] = None) -> at.Op: +def solve_discrete_lyapunov(A, Q, method: Optional[str] = None) -> TensorVariable: """ Solve the discrete Lyapunov equation :math:`AXA^H - X = Q`. Parameters @@ -849,7 +849,7 @@ def solve_discrete_lyapunov(A, Q, method: Optional[str] = None) -> at.Op: return SolveDiscreteLyapunov(method)(A, Q) -def solve_continuous_lyapunov(A, Q) -> at.Op: +def solve_continuous_lyapunov(A, Q) -> TensorVariable: """ Solve the continuous Lyapunov equation :math: `AX + XA^H = Q From 1738058e2ff15eab6c5664ec875b42f6f0c9f056 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Thu, 30 Jun 2022 15:08:59 +0800 Subject: [PATCH 07/20] Delete a scratchpad file --- test.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 test.py diff --git a/test.py b/test.py deleted file mode 100644 index e69de29bb2..0000000000 From 5c2442b7ba273b867858c179412e6823d40c3b19 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Tue, 27 Sep 2022 19:24:13 +0200 Subject: [PATCH 08/20] Add a direct aesara solution via `at.solve` for `solve_discrete_lyapunov`. Add a complex check in `_direct_solve_discrete_lyapunov` to try to avoid calling `.conj()`, allowing for a symbolic gradient. Change default `method` parameter in `solve_discrete_lyapunov` from `None` to `direct` Update docstring of `solve_discrete_lyapunov` to explain that `method=direct` should be preferred for compatibility reasons. Add tests for `_direct_solve_discrete_lyapunov` in the real and complex case. --- aesara/tensor/slinalg.py | 56 +++++++++++++++++++++++++----------- tests/tensor/test_slinalg.py | 37 ++++++++++++++++++++++-- 2 files changed, 73 insertions(+), 20 deletions(-) diff --git a/aesara/tensor/slinalg.py b/aesara/tensor/slinalg.py index a2a1c68b32..6c3cebb537 100644 --- a/aesara/tensor/slinalg.py +++ b/aesara/tensor/slinalg.py @@ -1,6 +1,6 @@ import logging import warnings -from typing import Optional, Union +from typing import Union import numpy as np import scipy.linalg @@ -11,6 +11,7 @@ from aesara.tensor import as_tensor_variable from aesara.tensor import basic as at from aesara.tensor import math as atm +from aesara.tensor.shape import reshape from aesara.tensor.type import matrix, tensor, vector from aesara.tensor.var import TensorVariable @@ -783,12 +784,7 @@ def grad(self, inputs, output_grads): return [A_bar, Q_bar] -class SolveDiscreteLyapunov(at.Op): - __props__ = ("method",) - - def __init__(self, method=None): - self.method = method - +class BilinearSolveDiscreteLyapunov(at.Op): def make_node(self, A, B): A = as_tensor_variable(A) B = as_tensor_variable(B) @@ -802,7 +798,7 @@ def perform(self, node, inputs, output_storage): (A, B) = inputs X = output_storage[0] - X[0] = scipy.linalg.solve_discrete_lyapunov(A, B, method=self.method) + X[0] = scipy.linalg.solve_discrete_lyapunov(A, B, method="bilinear") def infer_shape(self, fgraph, node, shapes): return [shapes[0]] @@ -813,9 +809,9 @@ def grad(self, inputs, output_grads): (dX,) = output_grads X = self(A, Q) - S = self( - A.conj().T, dX - ) # Eq 41, note that it is not written as a proper Lyapunov equation + + # Eq 41, note that it is not written as a proper Lyapunov equation + S = self(A.conj().T, dX) A_bar = aesara.tensor.linalg.matrix_dot( S, A, X.conj().T @@ -825,9 +821,26 @@ def grad(self, inputs, output_grads): _solve_continuous_lyapunov = SolveContinuousLyapunov() +_solve_bilinear_direct_lyapunov = BilinearSolveDiscreteLyapunov() + + +def iscomplexobj(x): + type_ = x.type + dtype = type_.dtype + return "complex" in dtype -def solve_discrete_lyapunov(A, Q, method: Optional[str] = None) -> TensorVariable: +def _direct_solve_discrete_lyapunov(A, Q): + if iscomplexobj(A): + AA = kron(A, A.conj()) + else: + AA = kron(A, A) + + X = solve(at.eye(AA.shape[0]) - AA, Q.ravel()) + return reshape(X, Q.shape) + + +def solve_discrete_lyapunov(A, Q, method="direct") -> TensorVariable: """ Solve the discrete Lyapunov equation :math:`AXA^H - X = Q`. Parameters @@ -836,22 +849,31 @@ def solve_discrete_lyapunov(A, Q, method: Optional[str] = None) -> TensorVariabl Square matrix of shape N x N; must have the same shape as Q Q: ArrayLike Square matrix of shape N x N; must have the same shape as A - method: Optional, string - Solver method passed to scipy.linalg.solve_discrete_lyapunov, either "bilinear", "direct", or None. "direct" - scales poorly with size. If None, uses "direct" if N < 10, else "bilinear". + method: str, default "direct" + Solver method used, one of "direct" or "bilinar". "direct" solves the problem directly via matrix inversion. + This has a pure Aesara implementation and can thus be cross-compiled to supported backends, and should be + preferred when N is not large. The direct method scales poorly with the size of N, and the bilinear can be + used in these cases. Returns ------- X: at.matrix Square matrix of shape N x N, representing the solution to the Lyapunov equation """ + if method not in ["direct", "bilinear"]: + raise ValueError( + f'Parameter "method" must be one of "direct" or "bilinear", found {method}' + ) - return SolveDiscreteLyapunov(method)(A, Q) + if method == "direct": + return _direct_solve_discrete_lyapunov(A, Q) + if method == "bilinear": + return _solve_bilinear_direct_lyapunov(A, Q) def solve_continuous_lyapunov(A, Q) -> TensorVariable: """ - Solve the continuous Lyapunov equation :math: `AX + XA^H = Q + Solve the continuous Lyapunov equation :math: `AX + XA^H + Q = 0 Parameters ---------- diff --git a/tests/tensor/test_slinalg.py b/tests/tensor/test_slinalg.py index 8f6876cd1b..35d376a131 100644 --- a/tests/tensor/test_slinalg.py +++ b/tests/tensor/test_slinalg.py @@ -2,7 +2,6 @@ import itertools import numpy as np -import numpy.linalg import pytest import scipy @@ -555,12 +554,44 @@ def test_numpy_2d(self): assert np.allclose(out, np.kron(a, b)) -def test_solve_discrete_lyapunov(): +def test_solve_discrete_lyapunov_via_direct_real(): N = 5 rng = np.random.default_rng(utt.fetch_seed()) a = at.dmatrix() q = at.dmatrix() - f = function([a, q], [solve_discrete_lyapunov(a, q)]) + f = function([a, q], [solve_discrete_lyapunov(a, q, method="direct")]) + + A = rng.normal(size=(N, N)) + Q = rng.normal(size=(N, N)) + + X = f(A, Q) + assert np.allclose(A @ X @ A.T - X + Q, 0.0) + + utt.verify_grad(solve_discrete_lyapunov, pt=[A, Q], rng=rng) + + +def test_solve_discrete_lyapunov_via_direct_complex(): + N = 5 + rng = np.random.default_rng(utt.fetch_seed()) + a = at.zmatrix() + q = at.zmatrix() + f = function([a, q], [solve_discrete_lyapunov(a, q, method="direct")]) + + A = rng.normal(size=(N, N)) + rng.normal(size=(N, N)) * 1j + Q = rng.normal(size=(N, N)) + X = f(A, Q) + assert np.allclose(A @ X @ A.conj().T - X + Q, 0.0) + + # TODO: the .conj() method currently does not have a gradient; add this test when gradients are implemented. + # utt.verify_grad(solve_discrete_lyapunov, pt=[A, Q], rng=rng) + + +def test_solve_discrete_lyapunov_via_bilinear(): + N = 5 + rng = np.random.default_rng(utt.fetch_seed()) + a = at.dmatrix() + q = at.dmatrix() + f = function([a, q], [solve_discrete_lyapunov(a, q, method="bilinear")]) A = rng.normal(size=(N, N)) Q = rng.normal(size=(N, N)) From e0b5d2e6cd347f509895bfeda27bc2031f1ce1bb Mon Sep 17 00:00:00 2001 From: jessegrabowski <48652735+jessegrabowski@users.noreply.github.com> Date: Thu, 30 Jun 2022 13:37:14 +0800 Subject: [PATCH 09/20] add `method` to `SolveDiscreteLyapunov` `__props__` Co-authored-by: Ricardo Vieira <28983449+ricardoV94@users.noreply.github.com> --- aesara/tensor/slinalg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aesara/tensor/slinalg.py b/aesara/tensor/slinalg.py index 56d75b67c7..c9e255549f 100644 --- a/aesara/tensor/slinalg.py +++ b/aesara/tensor/slinalg.py @@ -784,7 +784,7 @@ def grad(self, inputs, output_grads): class SolveDiscreteLyapunov(at.Op): - __props__ = () + __props__ = ("method",) def __init__(self, method=None): self.method = method From a14303a1de5526b40162eed43bc5a63b98b3bb84 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Thu, 30 Jun 2022 15:06:51 +0800 Subject: [PATCH 10/20] Update signature of `solve_discrete_lyapunov` and `solve_continuous_lyapunov` to correctly show a TensorVariable is returned --- test.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 test.py diff --git a/test.py b/test.py new file mode 100644 index 0000000000..e69de29bb2 From 4c095b68e157e58d0160db4a98d197056f0f833f Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Thu, 30 Jun 2022 15:07:21 +0800 Subject: [PATCH 11/20] Update signature of `solve_discrete_lyapunov` and `solve_continuous_lyapunov` to correctly show a TensorVariable is returned --- aesara/tensor/slinalg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aesara/tensor/slinalg.py b/aesara/tensor/slinalg.py index c9e255549f..a2a1c68b32 100644 --- a/aesara/tensor/slinalg.py +++ b/aesara/tensor/slinalg.py @@ -827,7 +827,7 @@ def grad(self, inputs, output_grads): _solve_continuous_lyapunov = SolveContinuousLyapunov() -def solve_discrete_lyapunov(A, Q, method: Optional[str] = None) -> at.Op: +def solve_discrete_lyapunov(A, Q, method: Optional[str] = None) -> TensorVariable: """ Solve the discrete Lyapunov equation :math:`AXA^H - X = Q`. Parameters @@ -849,7 +849,7 @@ def solve_discrete_lyapunov(A, Q, method: Optional[str] = None) -> at.Op: return SolveDiscreteLyapunov(method)(A, Q) -def solve_continuous_lyapunov(A, Q) -> at.Op: +def solve_continuous_lyapunov(A, Q) -> TensorVariable: """ Solve the continuous Lyapunov equation :math: `AX + XA^H = Q From 810f5d965a3ddf83f850f6f5b5bfae261ed499ef Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Thu, 30 Jun 2022 15:08:59 +0800 Subject: [PATCH 12/20] Delete a scratchpad file --- test.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 test.py diff --git a/test.py b/test.py deleted file mode 100644 index e69de29bb2..0000000000 From 837ec910821dd6e6a617e1a54840ef81503b866e Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Tue, 27 Sep 2022 19:24:13 +0200 Subject: [PATCH 13/20] Add a direct aesara solution via `at.solve` for `solve_discrete_lyapunov`. Add a complex check in `_direct_solve_discrete_lyapunov` to try to avoid calling `.conj()`, allowing for a symbolic gradient. Change default `method` parameter in `solve_discrete_lyapunov` from `None` to `direct` Update docstring of `solve_discrete_lyapunov` to explain that `method=direct` should be preferred for compatibility reasons. Add tests for `_direct_solve_discrete_lyapunov` in the real and complex case. --- aesara/tensor/slinalg.py | 56 +++++++++++++++++++++++++----------- tests/tensor/test_slinalg.py | 37 ++++++++++++++++++++++-- 2 files changed, 73 insertions(+), 20 deletions(-) diff --git a/aesara/tensor/slinalg.py b/aesara/tensor/slinalg.py index a2a1c68b32..6c3cebb537 100644 --- a/aesara/tensor/slinalg.py +++ b/aesara/tensor/slinalg.py @@ -1,6 +1,6 @@ import logging import warnings -from typing import Optional, Union +from typing import Union import numpy as np import scipy.linalg @@ -11,6 +11,7 @@ from aesara.tensor import as_tensor_variable from aesara.tensor import basic as at from aesara.tensor import math as atm +from aesara.tensor.shape import reshape from aesara.tensor.type import matrix, tensor, vector from aesara.tensor.var import TensorVariable @@ -783,12 +784,7 @@ def grad(self, inputs, output_grads): return [A_bar, Q_bar] -class SolveDiscreteLyapunov(at.Op): - __props__ = ("method",) - - def __init__(self, method=None): - self.method = method - +class BilinearSolveDiscreteLyapunov(at.Op): def make_node(self, A, B): A = as_tensor_variable(A) B = as_tensor_variable(B) @@ -802,7 +798,7 @@ def perform(self, node, inputs, output_storage): (A, B) = inputs X = output_storage[0] - X[0] = scipy.linalg.solve_discrete_lyapunov(A, B, method=self.method) + X[0] = scipy.linalg.solve_discrete_lyapunov(A, B, method="bilinear") def infer_shape(self, fgraph, node, shapes): return [shapes[0]] @@ -813,9 +809,9 @@ def grad(self, inputs, output_grads): (dX,) = output_grads X = self(A, Q) - S = self( - A.conj().T, dX - ) # Eq 41, note that it is not written as a proper Lyapunov equation + + # Eq 41, note that it is not written as a proper Lyapunov equation + S = self(A.conj().T, dX) A_bar = aesara.tensor.linalg.matrix_dot( S, A, X.conj().T @@ -825,9 +821,26 @@ def grad(self, inputs, output_grads): _solve_continuous_lyapunov = SolveContinuousLyapunov() +_solve_bilinear_direct_lyapunov = BilinearSolveDiscreteLyapunov() + + +def iscomplexobj(x): + type_ = x.type + dtype = type_.dtype + return "complex" in dtype -def solve_discrete_lyapunov(A, Q, method: Optional[str] = None) -> TensorVariable: +def _direct_solve_discrete_lyapunov(A, Q): + if iscomplexobj(A): + AA = kron(A, A.conj()) + else: + AA = kron(A, A) + + X = solve(at.eye(AA.shape[0]) - AA, Q.ravel()) + return reshape(X, Q.shape) + + +def solve_discrete_lyapunov(A, Q, method="direct") -> TensorVariable: """ Solve the discrete Lyapunov equation :math:`AXA^H - X = Q`. Parameters @@ -836,22 +849,31 @@ def solve_discrete_lyapunov(A, Q, method: Optional[str] = None) -> TensorVariabl Square matrix of shape N x N; must have the same shape as Q Q: ArrayLike Square matrix of shape N x N; must have the same shape as A - method: Optional, string - Solver method passed to scipy.linalg.solve_discrete_lyapunov, either "bilinear", "direct", or None. "direct" - scales poorly with size. If None, uses "direct" if N < 10, else "bilinear". + method: str, default "direct" + Solver method used, one of "direct" or "bilinar". "direct" solves the problem directly via matrix inversion. + This has a pure Aesara implementation and can thus be cross-compiled to supported backends, and should be + preferred when N is not large. The direct method scales poorly with the size of N, and the bilinear can be + used in these cases. Returns ------- X: at.matrix Square matrix of shape N x N, representing the solution to the Lyapunov equation """ + if method not in ["direct", "bilinear"]: + raise ValueError( + f'Parameter "method" must be one of "direct" or "bilinear", found {method}' + ) - return SolveDiscreteLyapunov(method)(A, Q) + if method == "direct": + return _direct_solve_discrete_lyapunov(A, Q) + if method == "bilinear": + return _solve_bilinear_direct_lyapunov(A, Q) def solve_continuous_lyapunov(A, Q) -> TensorVariable: """ - Solve the continuous Lyapunov equation :math: `AX + XA^H = Q + Solve the continuous Lyapunov equation :math: `AX + XA^H + Q = 0 Parameters ---------- diff --git a/tests/tensor/test_slinalg.py b/tests/tensor/test_slinalg.py index 8f6876cd1b..35d376a131 100644 --- a/tests/tensor/test_slinalg.py +++ b/tests/tensor/test_slinalg.py @@ -2,7 +2,6 @@ import itertools import numpy as np -import numpy.linalg import pytest import scipy @@ -555,12 +554,44 @@ def test_numpy_2d(self): assert np.allclose(out, np.kron(a, b)) -def test_solve_discrete_lyapunov(): +def test_solve_discrete_lyapunov_via_direct_real(): N = 5 rng = np.random.default_rng(utt.fetch_seed()) a = at.dmatrix() q = at.dmatrix() - f = function([a, q], [solve_discrete_lyapunov(a, q)]) + f = function([a, q], [solve_discrete_lyapunov(a, q, method="direct")]) + + A = rng.normal(size=(N, N)) + Q = rng.normal(size=(N, N)) + + X = f(A, Q) + assert np.allclose(A @ X @ A.T - X + Q, 0.0) + + utt.verify_grad(solve_discrete_lyapunov, pt=[A, Q], rng=rng) + + +def test_solve_discrete_lyapunov_via_direct_complex(): + N = 5 + rng = np.random.default_rng(utt.fetch_seed()) + a = at.zmatrix() + q = at.zmatrix() + f = function([a, q], [solve_discrete_lyapunov(a, q, method="direct")]) + + A = rng.normal(size=(N, N)) + rng.normal(size=(N, N)) * 1j + Q = rng.normal(size=(N, N)) + X = f(A, Q) + assert np.allclose(A @ X @ A.conj().T - X + Q, 0.0) + + # TODO: the .conj() method currently does not have a gradient; add this test when gradients are implemented. + # utt.verify_grad(solve_discrete_lyapunov, pt=[A, Q], rng=rng) + + +def test_solve_discrete_lyapunov_via_bilinear(): + N = 5 + rng = np.random.default_rng(utt.fetch_seed()) + a = at.dmatrix() + q = at.dmatrix() + f = function([a, q], [solve_discrete_lyapunov(a, q, method="bilinear")]) A = rng.normal(size=(N, N)) Q = rng.normal(size=(N, N)) From 2861abdfd21c9297a9b8df2d319689d91717dab6 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Tue, 27 Sep 2022 20:59:53 +0200 Subject: [PATCH 14/20] Rewrite function `eye` using `at.zeros` and `at.set_subtensor` Update `test_basic.test_eye` to reflect the changes to `at.eye`. --- aesara/tensor/basic.py | 25 ++++++++++++++++-------- tests/tensor/test_basic.py | 39 +++++++++++++++++--------------------- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/aesara/tensor/basic.py b/aesara/tensor/basic.py index 8a974ea670..ca12feddca 100644 --- a/aesara/tensor/basic.py +++ b/aesara/tensor/basic.py @@ -50,6 +50,7 @@ shape_padright, shape_tuple, ) + from aesara.tensor.type import ( TensorType, discrete_dtypes, @@ -1409,18 +1410,26 @@ def eye(n, m=None, k=0, dtype=None): Returns ------- - ndarray of shape (N,M) - An array where all elements are equal to zero, except for the `k`-th - diagonal, whose values are equal to one. - + aesara tensor of shape (N,M) + A symbolic tensor representing a matrix where all elements are equal to zero, + except for the `k`-th diagonal, whose values are equal to one. """ - if dtype is None: - dtype = config.floatX + if m is None: m = n - localop = Eye(dtype) - return localop(n, m, k) + if dtype is None: + dtype = aesara.config.floatX + + n = as_tensor_variable(n) + m = as_tensor_variable(m) + k = as_tensor_variable(k) + + i = switch(k >= 0, k, -k * m) + eye = zeros(n * m, dtype=dtype) + eye = aesara.tensor.subtensor.set_subtensor(eye[i::m + 1], 1).reshape((n, m)) + eye = aesara.tensor.subtensor.set_subtensor(eye[m - k:, :], 0) + return eye def identity_like(x): return eye(x.shape[0], x.shape[1], k=0, dtype=x.dtype) diff --git a/tests/tensor/test_basic.py b/tests/tensor/test_basic.py index 9a291a3074..f6ed2c3288 100644 --- a/tests/tensor/test_basic.py +++ b/tests/tensor/test_basic.py @@ -30,7 +30,6 @@ ARange, Choose, ExtractDiag, - Eye, Join, MakeVector, PermuteRowElements, @@ -828,11 +827,8 @@ def check(dtype, N, M_=None, k=0): # allowed. if M is None and config.mode in ["DebugMode", "DEBUG_MODE"]: M = N - N_symb = iscalar() - M_symb = iscalar() - k_symb = iscalar() - f = function([N_symb, M_symb, k_symb], eye(N_symb, M_symb, k_symb, dtype=dtype)) - result = f(N, M, k) + + result = eye(N, M, k, dtype).eval() assert np.allclose(result, np.eye(N, M_, k, dtype=dtype)) assert result.dtype == np.dtype(dtype) @@ -851,7 +847,6 @@ def check(dtype, N, M_=None, k=0): check(dtype, 5, 3, 1) check(dtype, 5, 3, -1) - class TestTriangle: def test_tri(self): def check(dtype, N, M_=None, k=0): @@ -3861,21 +3856,21 @@ def test_Flatten(self): excluding=["local_useless_reshape"], ) - def test_Eye(self): - aiscal = iscalar() - biscal = iscalar() - ciscal = iscalar() - self._compile_and_check( - [aiscal, biscal, ciscal], [Eye()(aiscal, biscal, ciscal)], [4, 4, 0], Eye - ) - - self._compile_and_check( - [aiscal, biscal, ciscal], [Eye()(aiscal, biscal, ciscal)], [4, 5, 0], Eye - ) - - self._compile_and_check( - [aiscal, biscal, ciscal], [Eye()(aiscal, biscal, ciscal)], [3, 5, 0], Eye - ) + # def test_Eye(self): + # aiscal = iscalar() + # biscal = iscalar() + # ciscal = iscalar() + # self._compile_and_check( + # [aiscal, biscal, ciscal], [Eye()(aiscal, biscal, ciscal)], [4, 4, 0], Eye + # ) + # + # self._compile_and_check( + # [aiscal, biscal, ciscal], [Eye()(aiscal, biscal, ciscal)], [4, 5, 0], Eye + # ) + # + # self._compile_and_check( + # [aiscal, biscal, ciscal], [Eye()(aiscal, biscal, ciscal)], [3, 5, 0], Eye + # ) def test_Tri(self): aiscal = iscalar() From 4ac63b319669677366655a400d7c925563f99bc9 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Tue, 27 Sep 2022 21:04:40 +0200 Subject: [PATCH 15/20] remove changes unrelated to eye --- aesara/tensor/basic.py | 6 +- aesara/tensor/slinalg.py | 149 ----------------------------------- tests/tensor/test_basic.py | 1 + tests/tensor/test_slinalg.py | 66 ---------------- 4 files changed, 4 insertions(+), 218 deletions(-) diff --git a/aesara/tensor/basic.py b/aesara/tensor/basic.py index ca12feddca..aac7512068 100644 --- a/aesara/tensor/basic.py +++ b/aesara/tensor/basic.py @@ -50,7 +50,6 @@ shape_padright, shape_tuple, ) - from aesara.tensor.type import ( TensorType, discrete_dtypes, @@ -1426,11 +1425,12 @@ def eye(n, m=None, k=0, dtype=None): i = switch(k >= 0, k, -k * m) eye = zeros(n * m, dtype=dtype) - eye = aesara.tensor.subtensor.set_subtensor(eye[i::m + 1], 1).reshape((n, m)) - eye = aesara.tensor.subtensor.set_subtensor(eye[m - k:, :], 0) + eye = aesara.tensor.subtensor.set_subtensor(eye[i :: m + 1], 1).reshape((n, m)) + eye = aesara.tensor.subtensor.set_subtensor(eye[m - k :, :], 0) return eye + def identity_like(x): return eye(x.shape[0], x.shape[1], k=0, dtype=x.dtype) diff --git a/aesara/tensor/slinalg.py b/aesara/tensor/slinalg.py index 6c3cebb537..37ca3b96f6 100644 --- a/aesara/tensor/slinalg.py +++ b/aesara/tensor/slinalg.py @@ -11,7 +11,6 @@ from aesara.tensor import as_tensor_variable from aesara.tensor import basic as at from aesara.tensor import math as atm -from aesara.tensor.shape import reshape from aesara.tensor.type import matrix, tensor, vector from aesara.tensor.var import TensorVariable @@ -746,152 +745,6 @@ def perform(self, node, inputs, outputs): expm = Expm() - -class SolveContinuousLyapunov(at.Op): - __props__ = () - - def make_node(self, A, B): - A = as_tensor_variable(A) - B = as_tensor_variable(B) - - out_dtype = aesara.scalar.upcast(A.dtype, B.dtype) - X = aesara.tensor.matrix(dtype=out_dtype) - - return aesara.graph.basic.Apply(self, [A, B], [X]) - - def perform(self, node, inputs, output_storage): - (A, B) = inputs - X = output_storage[0] - - X[0] = scipy.linalg.solve_continuous_lyapunov(A, B) - - def infer_shape(self, fgraph, node, shapes): - return [shapes[0]] - - def grad(self, inputs, output_grads): - # Gradient computations come from Kao and Hennequin (2020), https://arxiv.org/pdf/2011.11430.pdf - # Note that they write the equation as AX + XA.H + Q = 0, while scipy uses AX + XA^H = Q, - # so minor adjustments need to be made. - A, Q = inputs - (dX,) = output_grads - - X = self(A, Q) - S = self(A.conj().T, -dX) # Eq 31, adjusted - - A_bar = S.dot(X.conj().T) + S.conj().T.dot(X) - Q_bar = -S # Eq 29, adjusted - - return [A_bar, Q_bar] - - -class BilinearSolveDiscreteLyapunov(at.Op): - def make_node(self, A, B): - A = as_tensor_variable(A) - B = as_tensor_variable(B) - - out_dtype = aesara.scalar.upcast(A.dtype, B.dtype) - X = aesara.tensor.matrix(dtype=out_dtype) - - return aesara.graph.basic.Apply(self, [A, B], [X]) - - def perform(self, node, inputs, output_storage): - (A, B) = inputs - X = output_storage[0] - - X[0] = scipy.linalg.solve_discrete_lyapunov(A, B, method="bilinear") - - def infer_shape(self, fgraph, node, shapes): - return [shapes[0]] - - def grad(self, inputs, output_grads): - # Gradient computations come from Kao and Hennequin (2020), https://arxiv.org/pdf/2011.11430.pdf - A, Q = inputs - (dX,) = output_grads - - X = self(A, Q) - - # Eq 41, note that it is not written as a proper Lyapunov equation - S = self(A.conj().T, dX) - - A_bar = aesara.tensor.linalg.matrix_dot( - S, A, X.conj().T - ) + aesara.tensor.linalg.matrix_dot(S.conj().T, A, X) - Q_bar = S - return [A_bar, Q_bar] - - -_solve_continuous_lyapunov = SolveContinuousLyapunov() -_solve_bilinear_direct_lyapunov = BilinearSolveDiscreteLyapunov() - - -def iscomplexobj(x): - type_ = x.type - dtype = type_.dtype - return "complex" in dtype - - -def _direct_solve_discrete_lyapunov(A, Q): - if iscomplexobj(A): - AA = kron(A, A.conj()) - else: - AA = kron(A, A) - - X = solve(at.eye(AA.shape[0]) - AA, Q.ravel()) - return reshape(X, Q.shape) - - -def solve_discrete_lyapunov(A, Q, method="direct") -> TensorVariable: - """ - Solve the discrete Lyapunov equation :math:`AXA^H - X = Q`. - Parameters - ---------- - A: ArrayLike - Square matrix of shape N x N; must have the same shape as Q - Q: ArrayLike - Square matrix of shape N x N; must have the same shape as A - method: str, default "direct" - Solver method used, one of "direct" or "bilinar". "direct" solves the problem directly via matrix inversion. - This has a pure Aesara implementation and can thus be cross-compiled to supported backends, and should be - preferred when N is not large. The direct method scales poorly with the size of N, and the bilinear can be - used in these cases. - - Returns - ------- - X: at.matrix - Square matrix of shape N x N, representing the solution to the Lyapunov equation - """ - if method not in ["direct", "bilinear"]: - raise ValueError( - f'Parameter "method" must be one of "direct" or "bilinear", found {method}' - ) - - if method == "direct": - return _direct_solve_discrete_lyapunov(A, Q) - if method == "bilinear": - return _solve_bilinear_direct_lyapunov(A, Q) - - -def solve_continuous_lyapunov(A, Q) -> TensorVariable: - """ - Solve the continuous Lyapunov equation :math: `AX + XA^H + Q = 0 - - Parameters - ---------- - A: ArrayLike - Square matrix of shape N x N; must have the same shape as Q - Q: ArrayLike - Square matrix of shape N x N; must have the same shape as A - - Returns - ------- - X: at.matrix - Square matrix of shape N x N, representing the solution to the Lyapunov equation - - """ - - return _solve_continuous_lyapunov(A, Q) - - __all__ = [ "cholesky", "solve", @@ -901,6 +754,4 @@ def solve_continuous_lyapunov(A, Q) -> TensorVariable: "eigvalsh", "kron", "expm", - "solve_discrete_lyapunov", - "solve_continuous_lyapunov", ] diff --git a/tests/tensor/test_basic.py b/tests/tensor/test_basic.py index f6ed2c3288..b70c434c24 100644 --- a/tests/tensor/test_basic.py +++ b/tests/tensor/test_basic.py @@ -847,6 +847,7 @@ def check(dtype, N, M_=None, k=0): check(dtype, 5, 3, 1) check(dtype, 5, 3, -1) + class TestTriangle: def test_tri(self): def check(dtype, N, M_=None, k=0): diff --git a/tests/tensor/test_slinalg.py b/tests/tensor/test_slinalg.py index 35d376a131..a7524e6538 100644 --- a/tests/tensor/test_slinalg.py +++ b/tests/tensor/test_slinalg.py @@ -22,8 +22,6 @@ expm, kron, solve, - solve_continuous_lyapunov, - solve_discrete_lyapunov, solve_triangular, ) from aesara.tensor.type import dmatrix, matrix, tensor, vector @@ -552,67 +550,3 @@ def test_numpy_2d(self): b = self.rng.random(shp1).astype(config.floatX) out = f(a, b) assert np.allclose(out, np.kron(a, b)) - - -def test_solve_discrete_lyapunov_via_direct_real(): - N = 5 - rng = np.random.default_rng(utt.fetch_seed()) - a = at.dmatrix() - q = at.dmatrix() - f = function([a, q], [solve_discrete_lyapunov(a, q, method="direct")]) - - A = rng.normal(size=(N, N)) - Q = rng.normal(size=(N, N)) - - X = f(A, Q) - assert np.allclose(A @ X @ A.T - X + Q, 0.0) - - utt.verify_grad(solve_discrete_lyapunov, pt=[A, Q], rng=rng) - - -def test_solve_discrete_lyapunov_via_direct_complex(): - N = 5 - rng = np.random.default_rng(utt.fetch_seed()) - a = at.zmatrix() - q = at.zmatrix() - f = function([a, q], [solve_discrete_lyapunov(a, q, method="direct")]) - - A = rng.normal(size=(N, N)) + rng.normal(size=(N, N)) * 1j - Q = rng.normal(size=(N, N)) - X = f(A, Q) - assert np.allclose(A @ X @ A.conj().T - X + Q, 0.0) - - # TODO: the .conj() method currently does not have a gradient; add this test when gradients are implemented. - # utt.verify_grad(solve_discrete_lyapunov, pt=[A, Q], rng=rng) - - -def test_solve_discrete_lyapunov_via_bilinear(): - N = 5 - rng = np.random.default_rng(utt.fetch_seed()) - a = at.dmatrix() - q = at.dmatrix() - f = function([a, q], [solve_discrete_lyapunov(a, q, method="bilinear")]) - - A = rng.normal(size=(N, N)) - Q = rng.normal(size=(N, N)) - - X = f(A, Q) - assert np.allclose(A @ X @ A.conj().T - X + Q, 0.0) - - utt.verify_grad(solve_discrete_lyapunov, pt=[A, Q], rng=rng) - - -def test_solve_continuous_lyapunov(): - N = 5 - rng = np.random.default_rng(utt.fetch_seed()) - a = at.dmatrix() - q = at.dmatrix() - f = function([a, q], [solve_continuous_lyapunov(a, q)]) - - A = rng.normal(size=(N, N)) - Q = rng.normal(size=(N, N)) - X = f(A, Q) - - assert np.allclose(A @ X + X @ A.conj().T, Q) - - utt.verify_grad(solve_continuous_lyapunov, pt=[A, Q], rng=rng) From af2dc7d0e55afc0cba3603b209d40bd1c762103f Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Tue, 27 Sep 2022 21:07:43 +0200 Subject: [PATCH 16/20] remove changes unrelated to eye --- aesara/tensor/slinalg.py | 1 + tests/tensor/test_slinalg.py | 1 + 2 files changed, 2 insertions(+) diff --git a/aesara/tensor/slinalg.py b/aesara/tensor/slinalg.py index 37ca3b96f6..c2b6a78d66 100644 --- a/aesara/tensor/slinalg.py +++ b/aesara/tensor/slinalg.py @@ -191,6 +191,7 @@ def infer_shape(self, fgraph, node, shapes): class CholeskySolve(Op): + __props__ = ("lower", "check_finite") def __init__( diff --git a/tests/tensor/test_slinalg.py b/tests/tensor/test_slinalg.py index a7524e6538..13e7401871 100644 --- a/tests/tensor/test_slinalg.py +++ b/tests/tensor/test_slinalg.py @@ -2,6 +2,7 @@ import itertools import numpy as np +import numpy.linalg import pytest import scipy From 94ce70dc933f64fbdc1ca542feae725941a22201 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Tue, 27 Sep 2022 21:09:15 +0200 Subject: [PATCH 17/20] remove changes unrelated to eye --- aesara/tensor/slinalg.py | 1 - tests/tensor/test_slinalg.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/aesara/tensor/slinalg.py b/aesara/tensor/slinalg.py index c2b6a78d66..0adba5cbef 100644 --- a/aesara/tensor/slinalg.py +++ b/aesara/tensor/slinalg.py @@ -512,7 +512,6 @@ def solve(a, b, assume_a="gen", lower=False, check_finite=True): solve_upper_triangular = SolveTriangular(lower=False) solve_symmetric = Solve(assume_a="sym") - # TODO: Optimizations to replace multiplication by matrix inverse # with solve() Op (still unwritten) diff --git a/tests/tensor/test_slinalg.py b/tests/tensor/test_slinalg.py index 13e7401871..13acf1febc 100644 --- a/tests/tensor/test_slinalg.py +++ b/tests/tensor/test_slinalg.py @@ -514,6 +514,7 @@ def test_expm_grad_3(): class TestKron(utt.InferShapeTester): + rng = np.random.default_rng(43) def setup_method(self): From 29991e33adbbaeadb48c79f6daaafc1d15a1e210 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Tue, 27 Sep 2022 21:23:07 +0200 Subject: [PATCH 18/20] remove changes unrelated to eye --- tests/tensor/test_basic.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/tests/tensor/test_basic.py b/tests/tensor/test_basic.py index b70c434c24..2fc300718c 100644 --- a/tests/tensor/test_basic.py +++ b/tests/tensor/test_basic.py @@ -30,6 +30,7 @@ ARange, Choose, ExtractDiag, + Eye, Join, MakeVector, PermuteRowElements, @@ -3857,21 +3858,21 @@ def test_Flatten(self): excluding=["local_useless_reshape"], ) - # def test_Eye(self): - # aiscal = iscalar() - # biscal = iscalar() - # ciscal = iscalar() - # self._compile_and_check( - # [aiscal, biscal, ciscal], [Eye()(aiscal, biscal, ciscal)], [4, 4, 0], Eye - # ) - # - # self._compile_and_check( - # [aiscal, biscal, ciscal], [Eye()(aiscal, biscal, ciscal)], [4, 5, 0], Eye - # ) - # - # self._compile_and_check( - # [aiscal, biscal, ciscal], [Eye()(aiscal, biscal, ciscal)], [3, 5, 0], Eye - # ) + def test_Eye(self): + aiscal = iscalar() + biscal = iscalar() + ciscal = iscalar() + self._compile_and_check( + [aiscal, biscal, ciscal], [Eye()(aiscal, biscal, ciscal)], [4, 4, 0], Eye + ) + + self._compile_and_check( + [aiscal, biscal, ciscal], [Eye()(aiscal, biscal, ciscal)], [4, 5, 0], Eye + ) + + self._compile_and_check( + [aiscal, biscal, ciscal], [Eye()(aiscal, biscal, ciscal)], [3, 5, 0], Eye + ) def test_Tri(self): aiscal = iscalar() From 4c2a622368b27483179644656e525fc504cf1f90 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Tue, 27 Sep 2022 21:41:06 +0200 Subject: [PATCH 19/20] reverted unnecessary changes to `test_eye` --- tests/tensor/test_basic.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/tensor/test_basic.py b/tests/tensor/test_basic.py index 2fc300718c..fd36639a4e 100644 --- a/tests/tensor/test_basic.py +++ b/tests/tensor/test_basic.py @@ -829,7 +829,13 @@ def check(dtype, N, M_=None, k=0): if M is None and config.mode in ["DebugMode", "DEBUG_MODE"]: M = N - result = eye(N, M, k, dtype).eval() + N_symb = iscalar() + M_symb = iscalar() + k_symb = iscalar() + + f = function([N_symb, M_symb, k_symb], eye(N_symb, M_symb, k_symb, dtype=dtype)) + result = f(N, M, k) + assert np.allclose(result, np.eye(N, M_, k, dtype=dtype)) assert result.dtype == np.dtype(dtype) From 5ece74793df9081bfb552c4955ce57ed30248159 Mon Sep 17 00:00:00 2001 From: jessegrabowski Date: Sat, 1 Oct 2022 16:27:46 +0200 Subject: [PATCH 20/20] Add typing information to arguments in function signature Add typing information to return value Re-write function using scalar functions for optimization Fix bug where identity matrix incorrectly included 1's in the lower triangle when k > m --- aesara/tensor/basic.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/aesara/tensor/basic.py b/aesara/tensor/basic.py index 74d29ffae7..f3e559a18d 100644 --- a/aesara/tensor/basic.py +++ b/aesara/tensor/basic.py @@ -1284,7 +1284,7 @@ def grad(self, inp, grads): return [grad_undefined(self, i, inp[i]) for i in range(3)] -def eye(n, m=None, k=0, dtype=None): +def eye(n: int, m: int = None, k: int = 0, dtype=None) -> TensorVariable: """Return a 2-D array with ones on the diagonal and zeros elsewhere. Parameters @@ -1312,14 +1312,29 @@ def eye(n, m=None, k=0, dtype=None): if dtype is None: dtype = aesara.config.floatX - n = as_tensor_variable(n) - m = as_tensor_variable(m) - k = as_tensor_variable(k) + n = aesara.scalar.as_scalar(n) + m = aesara.scalar.as_scalar(m) + k = aesara.scalar.as_scalar(k) + + i = aesara.scalar.switch(k >= 0, k, -k * m) + i_comp_op = aesara.scalar.Composite([n, m, k], [i]) + i_comp = i_comp_op(n, m, k) + + mkm = (m - k) * m + mkm_comp_op = aesara.scalar.Composite([m, k], [mkm]) + mkm_comp = mkm_comp_op(m, k) + + last_row = aesara.scalar.switch(m - k > 0, m - k, 0) + last_row_op = aesara.scalar.Composite([m, k], [last_row]) + last_valid_row = last_row_op(m, k) - i = switch(k >= 0, k, -k * m) eye = zeros(n * m, dtype=dtype) - eye = aesara.tensor.subtensor.set_subtensor(eye[i :: m + 1], 1).reshape((n, m)) - eye = aesara.tensor.subtensor.set_subtensor(eye[m - k :, :], 0) + + ones_slice = slice(i_comp, mkm_comp, m + 1) + overflow_rows = slice(last_valid_row, None, None) + + eye = aesara.tensor.subtensor.set_subtensor(eye[ones_slice], 1).reshape((n, m)) + eye = aesara.tensor.subtensor.set_subtensor(eye[overflow_rows, :], 0) return eye