Merge pull request #489 from shizejin/add_Markov_LQ

Add `LQMarkov`.
QuantEcon · Jul 8, 2019 · 1da28de · 1da28de
2 parents c96cae1 + 5fea2de
commit 1da28de
Show file tree

Hide file tree

Showing 4 changed files with 513 additions and 6 deletions.
diff --git a/quantecon/__init__.py b/quantecon/__init__.py
@@ -29,7 +29,7 @@
 from .kalman import Kalman
 from .lae import LAE
 from .arma import ARMA
-from .lqcontrol import LQ
+from .lqcontrol import LQ, LQMarkov
 from .filter import hamilton_filter
 from .lqnash import nnash
 from .lss import LinearStateSpace

diff --git a/quantecon/lqcontrol.py b/quantecon/lqcontrol.py
@@ -1,14 +1,16 @@
 """
 Provides a class called LQ for solving linear quadratic control
-problems.
+problems, and a class called LQMarkov for solving Markov jump
+linear quadratic control problems.
 
 """
 from textwrap import dedent
 import numpy as np
 from numpy import dot
 from scipy.linalg import solve
-from .matrix_eqn import solve_discrete_riccati
+from .matrix_eqn import solve_discrete_riccati, solve_discrete_riccati_system
 from .util import check_random_state
+from .markov import MarkovChain
 
 
 class LQ:
@@ -327,3 +329,282 @@ def compute_sequence(self, x0, ts_length=None, method='doubling',
  x_path[:, T] = Ax + Bu + Cw_path[:, T]
 
  return x_path, u_path, w_path
+
+
+class LQMarkov:
+ r"""
+ This class is for analyzing Markov jump linear quadratic optimal
+ control problems of the infinite horizon form
+
+ .. math::
+
+ \min \mathbb{E}
+ \Big[ \sum_{t=0}^{\infty} \beta^t r(x_t, s_t, u_t) \Big]
+
+ with
+
+ .. math::
+
+ r(x_t, s_t, u_t) :=
+ (x_t' R(s_t) x_t + u_t' Q(s_t) u_t + 2 u_t' N(s_t) x_t)
+
+ subject to the law of motion
+
+ .. math::
+
+ x_{t+1} = A(s_t) x_t + B(s_t) u_t + C(s_t) w_{t+1}
+
+ Here :math:`x` is n x 1, :math:`u` is k x 1, :math:`w` is j x 1 and the
+ matrices are conformable for these dimensions. The sequence :math:`{w_t}`
+ is assumed to be white noise, with zero mean and
+ :math:`\mathbb{E} [ w_t' w_t ] = I`, the j x j identity.
+
+ If :math:`C` is not supplied as a parameter, the model is assumed to be
+ deterministic (and :math:`C` is set to a zero matrix of appropriate
+ dimension).
+
+ The optimal value function :math:`V(x_t, s_t)` takes the form
+
+ .. math::
+
+ x_t' P(s_t) x_t + d(s_t)
+
+ and the optimal policy is of the form :math:`u_t = -F(s_t) x_t`.
+
+ Parameters
+ ----------
+ Π : array_like(float, ndim=2)
+ The Markov chain transition matrix with dimension m x m.
+ Qs : array_like(float)
+ Consists of m symmetric and non-negative definite payoff
+ matrices Q(s) with dimension k x k that corresponds with
+ the control variable u for each Markov state s
+ Rs : array_like(float)
+ Consists of m symmetric and non-negative definite payoff
+ matrices R(s) with dimension n x n that corresponds with
+ the state variable x for each Markov state s
+ As : array_like(float)
+ Consists of m state transition matrices A(s) with dimension
+ n x n for each Markov state s
+ Bs : array_like(float)
+ Consists of m state transition matrices B(s) with dimension
+ n x k for each Markov state s
+ Cs : array_like(float), optional(default=None)
+ Consists of m state transition matrices C(s) with dimension
+ n x j for each Markov state s. If the model is deterministic
+ then Cs should take default value of None
+ Ns : array_like(float), optional(default=None)
+ Consists of m cross product term matrices N(s) with dimension
+ k x n for each Markov state,
+ beta : scalar(float), optional(default=1)
+ beta is the discount parameter
+
+ Attributes
+ ----------
+ Π, Qs, Rs, Ns, As, Bs, Cs, beta : see Parameters
+ Ps : array_like(float)
+ Ps is part of the value function representation of
+ :math:`V(x, s) = x' P(s) x + d(s)`
+ ds : array_like(float)
+ ds is part of the value function representation of
+ :math:`V(x, s) = x' P(s) x + d(s)`
+ Fs : array_like(float)
+ Fs is the policy rule that determines the choice of control in
+ each period at each Markov state
+ m : scalar(int)
+ The number of Markov states
+ k, n, j : scalar(int)
+ The dimensions of the matrices as presented above
+
+ """
+
+ def __init__(self, Π, Qs, Rs, As, Bs, Cs=None, Ns=None, beta=1):
+
+ # == Make sure all matrices for each state are 2D arrays == #
+ def converter(Xs):
+ return np.array([np.atleast_2d(np.asarray(X, dtype='float'))
+ for X in Xs])
+ self.As, self.Bs, self.Qs, self.Rs = list(map(converter,
+ (As, Bs, Qs, Rs)))
+
+ # == Record number of states == #
+ self.m = self.Qs.shape[0]
+ # == Record dimensions == #
+ self.k, self.n = self.Qs.shape[1], self.Rs.shape[1]
+
+ if Ns is None:
+ # == No cross product term in payoff. Set N=0. == #
+ Ns = [np.zeros((self.k, self.n)) for i in range(self.m)]
+
+ self.Ns = converter(Ns)
+
+ if Cs is None:
+ # == If C not given, then model is deterministic. Set C=0. == #
+ self.j = 1
+ Cs = [np.zeros((self.n, self.j)) for i in range(self.m)]
+
+ self.Cs = converter(Cs)
+ self.j = self.Cs.shape[2]
+
+ self.beta = beta
+
+ self.Π = np.asarray(Π, dtype='float')
+
+ self.Ps = None
+ self.ds = None
+ self.Fs = None
+
+ def __repr__(self):
+ return self.__str__()
+
+ def __str__(self):
+ m = """\
+ Markov Jump Linear Quadratic control system
+ - beta (discount parameter) : {b}
+ - T (time horizon) : {t}
+ - m (number of Markov states) : {m}
+ - n (number of state variables) : {n}
+ - k (number of control variables) : {k}
+ - j (number of shocks) : {j}
+ """
+ t = "infinite"
+ return dedent(m.format(b=self.beta, m=self.m, n=self.n, k=self.k,
+ j=self.j, t=t))
+
+ def stationary_values(self):
+ """
+ Computes the matrix :math:`P(s)` and scalar :math:`d(s)` that
+ represent the value function
+
+ .. math::
+
+ V(x, s) = x' P(s) x + d(s)
+
+ in the infinite horizon case. Also computes the control matrix
+ :math:`F` from :math:`u = - F(s) x`.
+
+ Returns
+ -------
+ Ps : array_like(float)
+ Ps is part of the value function representation of
+ :math:`V(x, s) = x' P(s) x + d(s)`
+ ds : array_like(float)
+ ds is part of the value function representation of
+ :math:`V(x, s) = x' P(s) x + d(s)`
+ Fs : array_like(float)
+ Fs is the policy rule that determines the choice of control in
+ each period at each Markov state
+
+ """
+
+ # == Simplify notations == #
+ beta, Π = self.beta, self.Π
+ m, n, k = self.m, self.n, self.k
+ As, Bs, Cs = self.As, self.Bs, self.Cs
+ Qs, Rs, Ns = self.Qs, self.Rs, self.Ns
+
+ # == Solve for P(s) by iterating discrete riccati system== #
+ Ps = solve_discrete_riccati_system(Π, As, Bs, Cs, Qs, Rs, Ns, beta)
+
+ # == calculate F and d == #
+ Fs = np.array([np.empty((k, n)) for i in range(m)])
+ X = np.empty((m, m))
+ sum1, sum2 = np.empty((k, k)), np.empty((k, n))
+ for i in range(m):
+ # CCi = C_i C_i'
+ CCi = Cs[i] @ Cs[i].T
+ sum1[:, :] = 0.
+ sum2[:, :] = 0.
+ for j in range(m):
+ # for F
+ sum1 += beta * Π[i, j] * Bs[i].T @ Ps[j] @ Bs[i]
+ sum2 += beta * Π[i, j] * Bs[i].T @ Ps[j] @ As[i]
+
+ # for d
+ X[j, i] = np.trace(Ps[j] @ CCi)
+
+ Fs[i][:, :] = solve(Qs[i] + sum1, sum2 + Ns[i])
+
+ ds = solve(np.eye(m) - beta * Π,
+ np.diag(beta * Π @ X).reshape((m, 1))).flatten()
+
+ self.Ps, self.ds, self.Fs = Ps, ds, Fs
+
+ return Ps, ds, Fs
+
+ def compute_sequence(self, x0, ts_length=None, random_state=None):
+ """
+ Compute and return the optimal state and control sequences
+ :math:`x_0, ..., x_T` and :math:`u_0,..., u_T` under the
+ assumption that :math:`{w_t}` is iid and :math:`N(0, 1)`,
+ with Markov states sequence :math:`s_0, ..., s_T`
+
+ Parameters
+ ----------
+ x0 : array_like(float)
+ The initial state, a vector of length n
+
+ ts_length : scalar(int), optional(default=None)
+ Length of the simulation. If None, T is set to be 100
+
+ random_state : int or np.random.RandomState, optional
+ Random seed (integer) or np.random.RandomState instance to set
+ the initial state of the random number generator for
+ reproducibility. If None, a randomly initialized RandomState is
+ used.
+
+ Returns
+ -------
+ x_path : array_like(float)
+ An n x T+1 matrix, where the t-th column represents :math:`x_t`
+
+ u_path : array_like(float)
+ A k x T matrix, where the t-th column represents :math:`u_t`
+
+ w_path : array_like(float)
+ A j x T+1 matrix, where the t-th column represent :math:`w_t`
+
+ state : array_like(int)
+ Array containing the state values :math:`s_t` of the sample path
+
+ """
+
+ # === solve for optimal policies === #
+ self.stationary_values()
+
+ # === Simplify notation === #
+ As, Bs, Cs = self.As, self.Bs, self.Cs
+ Fs = self.Fs
+
+ random_state = check_random_state(random_state)
+ x0 = np.asarray(x0)
+ x0 = x0.reshape(self.n, 1)
+
+ T = ts_length if ts_length else 100
+
+ # == Simulate Markov states == #
+ chain = MarkovChain(self.Π)
+ state = chain.simulate_indices(ts_length=T+1,
+ random_state=random_state)
+
+ # == Prepare storage arrays == #
+ x_path = np.empty((self.n, T+1))
+ u_path = np.empty((self.k, T))
+ w_path = random_state.randn(self.j, T+1)
+ Cw_path = np.empty((self.n, T+1))
+ for i in range(T+1):
+ Cw_path[:, i] = Cs[state[i]] @ w_path[:, i]
+
+ # == Use policy sequence to generate states and controls == #
+ x_path[:, 0] = x0.flatten()
+ u_path[:, 0] = - (Fs[state[0]] @ x0).flatten()
+ for t in range(1, T):
+ Ax = As[state[t]] @ x_path[:, t-1]
+ Bu = Bs[state[t]] @ u_path[:, t-1]
+ x_path[:, t] = Ax + Bu + Cw_path[:, t]
+ u_path[:, t] = - (Fs[state[t]] @ x_path[:, t])
+ Ax = As[state[T]] @ x_path[:, T-1]
+ Bu = Bs[state[T]] @ u_path[:, T-1]
+ x_path[:, T] = Ax + Bu + w_path[:, T]
+
+ return x_path, u_path, w_path, state