Skip to content

CSG does not work with multivariate target #3233

Closed
@konstmish

Description

@konstmish

Description of your problem

Please provide a minimal, self-contained, and reproducible example.

# Most of the code is from the CSG example https://docs.pymc.io/notebooks/constant_stochastic_gradient.html

import theano
import pymc3 as pm
import numpy as np
import pandas as pd

rng = np.random.RandomState(0)
raw_data = pd.read_csv('/tmp/CASP.csv', delimiter=',')
data = (raw_data - raw_data.mean())/raw_data.std()
q_size = data.shape[1]-1
q_name = ['F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9']

data_N = data.size // len(data.columns)
train_test_split = 0.3 # It is equal to 0.95 in the CSG example, but to make it faster we changed it to 0.3
ixs = rng.randint(data_N, size=int(data_N*train_test_split))
neg_ixs = list(set(range(data_N)) - set(ixs))
train_df = data.iloc[ixs]
test_df = data.iloc[neg_ixs]

N = train_df.size / len(train_df.columns)
n_test = test_df.size / len(test_df.columns)

train_X = train_df[q_name].as_matrix()
train_Y1 = train_df['RMSD'].as_matrix() # the name was changed from train_Y to train_Y1

test_X = test_df[q_name].as_matrix()
test_Y1 = test_df['RMSD'].as_matrix()

# These are extra variables that were not in the example
b2_true = np.random.random()
b3_true = np.random.random(train_X.shape[1])
train_Y2 = 0.01 * np.random.random(len(train_Y1)) + train_X @ b3_true + b2_true
test_Y2 = 0.01 * np.random.random(len(test_Y1)) + test_X @ b3_true + b2_true

Y_all = np.stack([train_Y1, train_Y2]).T

# Generator that returns mini-batches in each iteration
def create_minibatches(batch_size):
    while True:
        # Return random data samples of set size 100 each iteration
        ixs = rng.randint(N, size=batch_size)
        yield (train_X[ixs], Y_all[ixs])

# Tensors and RV that wil l be using mini-batches
batch_size = 50
minibatches = create_minibatches(batch_size)

model_input = theano.shared(train_X, name='X')
model_output = theano.shared(np.stack([train_Y1, train_Y2]).T, name='Y')
n_targets = 2

with pm.Model() as model:
    b00 = pm.Normal("Intercept0", mu=0.0, sd=1.0)
    b10 = pm.Normal("Slope0", mu=0.0, shape=(q_size,))
    mu0 = b00 + theano.dot(model_input, b10)
    
    b01 = pm.Normal("Intercept1", mu=0.0, sd=1.0)
    b11 = pm.Normal("Slope1", mu=0.0, shape=(q_size,))
    mu1 = b01 + theano.dot(model_input, b11)
    
    BoundedNormal = pm.Bound(pm.Normal, lower=0.0)
    sd_dist = BoundedNormal.dist('sd_obs', mu=0.1, sd=0.5, shape=n_targets)
    chol_packed = pm.LKJCholeskyCov('chol_packed', n=n_targets, eta=2, sd_dist=sd_dist)
    sigma = pm.expand_packed_triangular(n_targets, chol_packed)
    mus = theano.tensor.stack([mu0, mu1], axis=1)
    
    y = pm.MvNormal("train_obs", mu=mus, chol=sigma, observed=model_output)

minibatch_tensors = [model_input, model_output]

draws = 1000 * 5

use_csg = True

if use_csg:
    with model:
        csg_step_method = pm.step_methods.CSG(vars=model.vars,
                                              model=model,
                                              total_size=N, 
                                              batch_size=batch_size,
                                              minibatches=minibatches, 
                                              minibatch_tensors=minibatch_tensors) 
        csg_trace = pm.sample(draws=draws, n_init=200, step=csg_step_method, tune=500, init='map')
else:
    with model:
        trace = pm.sample(draws=200, tune=200, n_init=2000, init='map')

Please provide the full traceback.

Multiprocess sampling (2 chains in 2 jobs)
CSG: [chol_packed, Slope1, Intercept1, Slope0, Intercept0]
Sampling 2 chains:   0%|          | 2/11000 [00:22<34:07:37, 11.17s/draws]
---------------------------------------------------------------------------
RemoteTraceback                           Traceback (most recent call last)
RemoteTraceback: 
"""
Traceback (most recent call last):
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/theano/compile/function_module.py", line 903, in __call__
    self.fn() if output_subset is None else\
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/theano/gof/op.py", line 892, in rval
    r = p(n, [x[0] for x in i], o)
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/theano/tensor/slinalg.py", line 76, in perform
    z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype)
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/scipy/linalg/decomp_cholesky.py", line 91, in cholesky
    check_finite=check_finite)
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/scipy/linalg/decomp_cholesky.py", line 40, in _cholesky
    "definite" % info)
numpy.linalg.linalg.LinAlgError: 14-th leading minor of the array is not positive definite

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/parallel_sampling.py", line 73, in run
    self._start_loop()
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/parallel_sampling.py", line 113, in _start_loop
    point, stats = self._compute_point()
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/parallel_sampling.py", line 141, in _compute_point
    point = self._step_method.step(self._point)
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/step_methods/arraystep.py", line 178, in step
    apoint = self.astep(self.bij.map(point))
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/step_methods/sgmcmc.py", line 199, in astep
    return q0 + self.training_fn(q0, *next(self.minibatches))
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/theano/compile/function_module.py", line 917, in __call__
    storage_map=getattr(self.fn, 'storage_map', None))
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/theano/gof/link.py", line 325, in raise_with_op
    reraise(exc_type, exc_value, exc_trace)
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/six.py", line 692, in reraise
    raise value.with_traceback(tb)
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/theano/compile/function_module.py", line 903, in __call__
    self.fn() if output_subset is None else\
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/theano/gof/op.py", line 892, in rval
    r = p(n, [x[0] for x in i], o)
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/theano/tensor/slinalg.py", line 76, in perform
    z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype)
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/scipy/linalg/decomp_cholesky.py", line 91, in cholesky
    check_finite=check_finite)
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/scipy/linalg/decomp_cholesky.py", line 40, in _cholesky
    "definite" % info)
numpy.linalg.linalg.LinAlgError: 14-th leading minor of the array is not positive definite
Apply node that caused the error: Cholesky{lower=True, destructive=False, on_error='raise'}(Elemwise{Composite{((i0 - (i0 / i1)) + (i2 / i1))}}[(0, 0)].0)
Toposort index: 105
Inputs types: [TensorType(float64, matrix)]
Inputs shapes: [(23, 23)]
Inputs strides: [(184, 8)]
Inputs values: ['not shown']
Outputs clients: [[Elemwise{Switch}[(0, 2)](Elemwise{lt,no_inplace}.0, TensorConstant{[[1. 0. 0...0. 0. 1.]]}, Cholesky{lower=True, destructive=False, on_error='raise'}.0)]]

Backtrace when the node is created(use Theano flag traceback.limit=N to make it longer):
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3018, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3183, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3265, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-4-0b67b0bab7b3>", line 83, in <module>
    minibatch_tensors=minibatch_tensors)
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/step_methods/sgmcmc.py", line 350, in __init__
    super(CSG, self).__init__(vars, **kwargs)
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/step_methods/sgmcmc.py", line 169, in __init__
    self._initialize_values()
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/step_methods/sgmcmc.py", line 358, in _initialize_values
    self.training_fn = self.mk_training_fn()
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/step_methods/sgmcmc.py", line 384, in mk_training_fn
    B = tt.switch(t < 0, tt.eye(q_size), tt.slinalg.cholesky(C_t))

HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
"""

The above exception was the direct cause of the following exception:

LinAlgError                               Traceback (most recent call last)
LinAlgError: 14-th leading minor of the array is not positive definite
Apply node that caused the error: Cholesky{lower=True, destructive=False, on_error='raise'}(Elemwise{Composite{((i0 - (i0 / i1)) + (i2 / i1))}}[(0, 0)].0)
Toposort index: 105
Inputs types: [TensorType(float64, matrix)]
Inputs shapes: [(23, 23)]
Inputs strides: [(184, 8)]
Inputs values: ['not shown']
Outputs clients: [[Elemwise{Switch}[(0, 2)](Elemwise{lt,no_inplace}.0, TensorConstant{[[1. 0. 0...0. 0. 1.]]}, Cholesky{lower=True, destructive=False, on_error='raise'}.0)]]

Backtrace when the node is created(use Theano flag traceback.limit=N to make it longer):
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3018, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3183, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3265, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-4-0b67b0bab7b3>", line 83, in <module>
    minibatch_tensors=minibatch_tensors)
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/step_methods/sgmcmc.py", line 350, in __init__
    super(CSG, self).__init__(vars, **kwargs)
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/step_methods/sgmcmc.py", line 169, in __init__
    self._initialize_values()
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/step_methods/sgmcmc.py", line 358, in _initialize_values
    self.training_fn = self.mk_training_fn()
  File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/step_methods/sgmcmc.py", line 384, in mk_training_fn
    B = tt.switch(t < 0, tt.eye(q_size), tt.slinalg.cholesky(C_t))

HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.

The above exception was the direct cause of the following exception:

RuntimeError                              Traceback (most recent call last)
<ipython-input-4-0b67b0bab7b3> in <module>
     82                                               minibatches=minibatches,
     83                                               minibatch_tensors=minibatch_tensors) 
---> 84         csg_trace = pm.sample(draws=draws, n_init=200, step=csg_step_method, tune=500, init='map')
     85 else:
     86     with model:

~/miniconda3/lib/python3.6/site-packages/pymc3/sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, nuts_kwargs, step_kwargs, progressbar, model, random_seed, live_plot, discard_tuned_samples, live_plot_kwargs, compute_convergence_checks, use_mmap, **kwargs)
    447             _print_step_hierarchy(step)
    448             try:
--> 449                 trace = _mp_sample(**sample_args)
    450             except pickle.PickleError:
    451                 _log.warning("Could not pickle model, sampling singlethreaded.")

~/miniconda3/lib/python3.6/site-packages/pymc3/sampling.py in _mp_sample(draws, tune, step, chains, cores, chain, random_seed, start, progressbar, trace, model, use_mmap, **kwargs)
    997         try:
    998             with sampler:
--> 999                 for draw in sampler:
   1000                     trace = traces[draw.chain - chain]
   1001                     if trace.supports_sampler_stats and draw.stats is not None:

~/miniconda3/lib/python3.6/site-packages/pymc3/parallel_sampling.py in __iter__(self)
    303 
    304         while self._active:
--> 305             draw = ProcessAdapter.recv_draw(self._active)
    306             proc, is_last, draw, tuning, stats, warns = draw
    307             if self._progress is not None:

~/miniconda3/lib/python3.6/site-packages/pymc3/parallel_sampling.py in recv_draw(processes, timeout)
    221         if msg[0] == 'error':
    222             old = msg[1]
--> 223             six.raise_from(RuntimeError('Chain %s failed.' % proc.chain), old)
    224         elif msg[0] == 'writing_done':
    225             proc._readable = True

~/miniconda3/lib/python3.6/site-packages/six.py in raise_from(value, from_value)

RuntimeError: Chain 1 failed.

Please provide any additional information below.
I made the target 2-dimensional and added covariance matrix to the likelihood. The rest is mainly taken from the CSG example (https://docs.pymc.io/notebooks/constant_stochastic_gradient.html). Note that everything works perfectly with 1-D target. If I change use_csg to False, then the nuts sampler is used and chains do not fail, so the issue seems to be in CSG. It also breaks with: 3 targets; init='advi+adapt_diag'; two independent likelihoods for each target. If two likelihoods are used, the error is different, "DisconnectedInputError:". However, it works if we use a simpler covariance matrix:

    std = pm.HalfCauchy('std', beta=0.2)
    y = pm.MvNormal("train_obs", mu=mus, cov=np.eye(2) * std, observed=model_output)

We really hope that we will be able to use CSG as for 1-D target it's orders of magnitude faster.

Versions and main components

  • PyMC3 Version: 3.5
  • Theano Version: 1.0.3
  • Python Version: 3.6
  • Operating system: macos Sierra 10.12.6
  • How did you install PyMC3: conda

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions