Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MarkovChain: Sparse matrix support #174

Merged
merged 3 commits into from
Sep 2, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions quantecon/graph_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,27 @@ def cyclic_components(self):
return [np.where(self._cyclic_components_proj == k)[0]
for k in range(self.period)]

def subgraph(self, nodes):
"""
Return the subgraph consisting of the given nodes and edges
between thses nodes.

Parameters
----------
nodes : array_like(int, ndim=1)
Array of nodes.

Returns
-------
DiGraph
A DiGraph representing the subgraph.

"""
adj_matrix = self.csgraph[nodes, :][:, nodes]

weighted = True # To copy the dtype
return DiGraph(adj_matrix, weighted=weighted)


def _csr_matrix_indices(S):
"""
Expand Down
131 changes: 116 additions & 15 deletions quantecon/markov/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
"""
from __future__ import division
import numpy as np
from scipy import sparse
from fractions import gcd
from .gth_solve import gth_solve
from ..graph_tools import DiGraph
Expand All @@ -103,12 +104,13 @@ class MarkovChain(object):

Parameters
----------
P : array_like(float, ndim=2)
P : array_like or scipy sparse matrix (float, ndim=2)
The transition matrix. Must be of shape n x n.

Attributes
----------
P : see Parameters
P : ndarray or scipy.sparse.csr_matrix (float, ndim=2)
See Parameters

stationary_distributions : array_like(float, ndim=2)
Array containing stationary distributions, one for each
Expand Down Expand Up @@ -139,32 +141,50 @@ class MarkovChain(object):
List of numpy arrays containing the cyclic classes. Defined only
when the Markov chain is irreducible.

Notes
-----
In computing stationary distributions, if the input matrix is a
sparse matrix, internally it is converted to a dense matrix.

"""

def __init__(self, P):
self.P = np.asarray(P)
if sparse.issparse(P): # Sparse matrix
self.P = sparse.csr_matrix(P)
self.is_sparse = True
else: # Dense matrix
self.P = np.asarray(P)
self.is_sparse = False

# Check Properties
# Double check that P is a square matrix
if len(self.P.shape) != 2 or self.P.shape[0] != self.P.shape[1]:
raise ValueError('P must be a square matrix')

# The number of states
self.n = self.P.shape[0]

# Double check that P is a nonnegative matrix
if not np.all(self.P >= 0):
if not self.is_sparse:
data_nonnegative = (self.P >= 0) # ndarray
else:
data_nonnegative = (self.P.data >= 0) # csr_matrx
if not np.all(data_nonnegative):
raise ValueError('P must be nonnegative')

# Double check that the rows of P sum to one
if not np.allclose(np.sum(self.P, axis=1), np.ones(self.P.shape[0])):
row_sums = self.P.sum(axis=1)
if self.is_sparse: # row_sums is np.matrix (ndim=2)
row_sums = row_sums.getA1()
if not np.allclose(row_sums, np.ones(self.n)):
raise ValueError('The rows of P must sum to 1')

# The number of states
self.n = self.P.shape[0]

# To analyze the structure of P as a directed graph
self._digraph = None

self._stationary_dists = None
self._cdfs = None
self._cdfs = None # For dense matrix
self._cdfs1d = None # For sparse matrix

def __repr__(self):
msg = "Markov chain with transition matrix \nP = \n{0}"
Expand Down Expand Up @@ -221,7 +241,7 @@ def period(self):
# Determine the period, the LCM of the periods of rec_classes
d = 1
for rec_class in rec_classes:
period = DiGraph(self.P[rec_class, :][:, rec_class]).period
period = self.digraph.subgraph(rec_class).period
d = (d * period) // gcd(d, period)

return d
Expand All @@ -241,13 +261,23 @@ def _compute_stationary(self):

"""
if self.is_irreducible:
stationary_dists = gth_solve(self.P).reshape(1, self.n)
if not self.is_sparse: # Dense
stationary_dists = gth_solve(self.P).reshape(1, self.n)
else: # Sparse
stationary_dists = \
gth_solve(self.P.toarray(),
overwrite=True).reshape(1, self.n)
else:
rec_classes = self.recurrent_classes
stationary_dists = np.zeros((len(rec_classes), self.n))
for i, rec_class in enumerate(rec_classes):
stationary_dists[i, rec_class] = \
gth_solve(self.P[rec_class, :][:, rec_class])
if not self.is_sparse: # Dense
stationary_dists[i, rec_class] = \
gth_solve(self.P[rec_class, :][:, rec_class])
else: # Sparse
stationary_dists[i, rec_class] = \
gth_solve(self.P[rec_class, :][:, rec_class].toarray(),
overwrite=True)

self._stationary_dists = stationary_dists

Expand All @@ -259,13 +289,27 @@ def stationary_distributions(self):

@property
def cdfs(self):
if self._cdfs is None:
if (self._cdfs is None) and not self.is_sparse:
# See issue #137#issuecomment-96128186
cdfs = np.empty((self.n, self.n), order='C')
np.cumsum(self.P, axis=-1, out=cdfs)
self._cdfs = cdfs
return self._cdfs

@property
def cdfs1d(self):
if (self._cdfs1d is None) and self.is_sparse:
data = self.P.data
indices = self.P.indices
indptr = self.P.indptr

cdfs1d = np.empty(self.P.nnz, order='C')
for i in range(self.n):
cdfs1d[indptr[i]:indptr[i+1]] = \
data[indptr[i]:indptr[i+1]].cumsum()
self._cdfs1d = cdfs1d
return self._cdfs1d

def simulate(self, ts_length, init=None, num_reps=None, random_state=None):
"""
Simulate time series of state transitions.
Expand Down Expand Up @@ -332,7 +376,15 @@ def simulate(self, ts_length, init=None, num_reps=None, random_state=None):
random_values = random_state.random_sample(size=(k, ts_length-1))

# Generate sample paths and store in X
_generate_sample_paths(self.cdfs, init_states, random_values, out=X)
if not self.is_sparse: # Dense
_generate_sample_paths(
self.cdfs, init_states, random_values, out=X
)
else: # Sparse
_generate_sample_paths_sparse(
self.cdfs1d, self.P.indices, self.P.indptr, init_states,
random_values, out=X
)

if dim == 1:
return X[0]
Expand Down Expand Up @@ -377,6 +429,55 @@ def _generate_sample_paths(P_cdfs, init_states, random_values, out):
_generate_sample_paths = jit(nopython=True)(_generate_sample_paths)


def _generate_sample_paths_sparse(P_cdfs1d, indices, indptr, init_states,
random_values, out):
"""
For sparse matrix.

Generate num_reps sample paths of length ts_length, where num_reps =
out.shape[0] and ts_length = out.shape[1].

Parameters
----------
P_cdfs1d : ndarray(float, ndim=1)
1D array containing the CDFs of the state transition.

indices : ndarray(int, ndim=1)
CSR format index array.

indptr : ndarray(int, ndim=1)
CSR format index pointer array.

init_states : array_like(int, ndim=1)
Array containing the initial states. Its length must be equal to
num_reps.

random_values : ndarray(float, ndim=2)
Array containing random values from [0, 1). Its shape must be
equal to (num_reps, ts_length-1)

out : ndarray(int, ndim=2)
Array to store the sample paths.

Notes
-----
This routine is jit-complied if the module Numba is vailable.

"""
num_reps, ts_length = out.shape

for i in range(num_reps):
out[i, 0] = init_states[i]
for t in range(ts_length-1):
k = searchsorted(P_cdfs1d[indptr[out[i, t]]:indptr[out[i, t]+1]],
random_values[i, t])
out[i, t+1] = indices[indptr[out[i, t]]+k]

if numba_installed:
_generate_sample_paths_sparse = \
jit(nopython=True)(_generate_sample_paths_sparse)


def mc_compute_stationary(P):
"""
Computes stationary distributions of P, one for each recurrent
Expand Down
7 changes: 5 additions & 2 deletions quantecon/markov/gth_solve.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
xrange = range


def gth_solve(A):
def gth_solve(A, overwrite=False):
r"""
This routine computes the stationary distribution of an irreducible
Markov transition matrix (stochastic matrix) or transition rate
Expand Down Expand Up @@ -51,6 +51,9 @@ def gth_solve(A):
x : numpy.ndarray(float, ndim=1)
Stationary distribution of `A`.

overwrite : bool, optional(default=False)
Whether to overwrite `A`.

References
----------
.. [1] W. K. Grassmann, M. I. Taksar and D. P. Heyman, "Regenerative
Expand All @@ -61,7 +64,7 @@ def gth_solve(A):
Simulation, Princeton University Press, 2009.

"""
A1 = np.array(A, dtype=float, copy=True, order='C')
A1 = np.array(A, dtype=float, copy=not overwrite, order='C')
# `order='C'` is for use with Numba <= 0.18.2
# See issue github.com/numba/numba/issues/1103

Expand Down
7 changes: 3 additions & 4 deletions quantecon/markov/random.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@
import numpy as np
import scipy.sparse

from ..util import check_random_state, numba_installed, jit
from ..util import check_random_state
from ..random import (
probvec, sample_without_replacement
)

from .core import MarkovChain


def random_markov_chain(n, k=None, sparse=False, random_state=None):
"""
Return a randomly sampled MarkovChain instance with n states, where
Expand All @@ -32,7 +33,7 @@ def random_markov_chain(n, k=None, sparse=False, random_state=None):

sparse : bool, optional(default=False)
Whether to store the transition probability matrix in sparse
matrix form. (Sparse format is not yet implemented.)
matrix form.

random_state : scalar(int) or np.random.RandomState,
optional(default=None)
Expand All @@ -59,8 +60,6 @@ def random_markov_chain(n, k=None, sparse=False, random_state=None):
[ 0.56227226, 0. , 0.43772774]])

"""
if sparse:
raise NotImplementedError
P = random_stochastic_matrix(n, k, sparse, format='csr',
random_state=random_state)
mc = MarkovChain(P)
Expand Down
Loading