Description
Description of your problem
Please provide a minimal, self-contained, and reproducible example.
# Most of the code is from the CSG example https://docs.pymc.io/notebooks/constant_stochastic_gradient.html
import theano
import pymc3 as pm
import numpy as np
import pandas as pd
rng = np.random.RandomState(0)
raw_data = pd.read_csv('/tmp/CASP.csv', delimiter=',')
data = (raw_data - raw_data.mean())/raw_data.std()
q_size = data.shape[1]-1
q_name = ['F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9']
data_N = data.size // len(data.columns)
train_test_split = 0.3 # It is equal to 0.95 in the CSG example, but to make it faster we changed it to 0.3
ixs = rng.randint(data_N, size=int(data_N*train_test_split))
neg_ixs = list(set(range(data_N)) - set(ixs))
train_df = data.iloc[ixs]
test_df = data.iloc[neg_ixs]
N = train_df.size / len(train_df.columns)
n_test = test_df.size / len(test_df.columns)
train_X = train_df[q_name].as_matrix()
train_Y1 = train_df['RMSD'].as_matrix() # the name was changed from train_Y to train_Y1
test_X = test_df[q_name].as_matrix()
test_Y1 = test_df['RMSD'].as_matrix()
# These are extra variables that were not in the example
b2_true = np.random.random()
b3_true = np.random.random(train_X.shape[1])
train_Y2 = 0.01 * np.random.random(len(train_Y1)) + train_X @ b3_true + b2_true
test_Y2 = 0.01 * np.random.random(len(test_Y1)) + test_X @ b3_true + b2_true
Y_all = np.stack([train_Y1, train_Y2]).T
# Generator that returns mini-batches in each iteration
def create_minibatches(batch_size):
while True:
# Return random data samples of set size 100 each iteration
ixs = rng.randint(N, size=batch_size)
yield (train_X[ixs], Y_all[ixs])
# Tensors and RV that wil l be using mini-batches
batch_size = 50
minibatches = create_minibatches(batch_size)
model_input = theano.shared(train_X, name='X')
model_output = theano.shared(np.stack([train_Y1, train_Y2]).T, name='Y')
n_targets = 2
with pm.Model() as model:
b00 = pm.Normal("Intercept0", mu=0.0, sd=1.0)
b10 = pm.Normal("Slope0", mu=0.0, shape=(q_size,))
mu0 = b00 + theano.dot(model_input, b10)
b01 = pm.Normal("Intercept1", mu=0.0, sd=1.0)
b11 = pm.Normal("Slope1", mu=0.0, shape=(q_size,))
mu1 = b01 + theano.dot(model_input, b11)
BoundedNormal = pm.Bound(pm.Normal, lower=0.0)
sd_dist = BoundedNormal.dist('sd_obs', mu=0.1, sd=0.5, shape=n_targets)
chol_packed = pm.LKJCholeskyCov('chol_packed', n=n_targets, eta=2, sd_dist=sd_dist)
sigma = pm.expand_packed_triangular(n_targets, chol_packed)
mus = theano.tensor.stack([mu0, mu1], axis=1)
y = pm.MvNormal("train_obs", mu=mus, chol=sigma, observed=model_output)
minibatch_tensors = [model_input, model_output]
draws = 1000 * 5
use_csg = True
if use_csg:
with model:
csg_step_method = pm.step_methods.CSG(vars=model.vars,
model=model,
total_size=N,
batch_size=batch_size,
minibatches=minibatches,
minibatch_tensors=minibatch_tensors)
csg_trace = pm.sample(draws=draws, n_init=200, step=csg_step_method, tune=500, init='map')
else:
with model:
trace = pm.sample(draws=200, tune=200, n_init=2000, init='map')
Please provide the full traceback.
Multiprocess sampling (2 chains in 2 jobs)
CSG: [chol_packed, Slope1, Intercept1, Slope0, Intercept0]
Sampling 2 chains: 0%| | 2/11000 [00:22<34:07:37, 11.17s/draws]
---------------------------------------------------------------------------
RemoteTraceback Traceback (most recent call last)
RemoteTraceback:
"""
Traceback (most recent call last):
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/theano/compile/function_module.py", line 903, in __call__
self.fn() if output_subset is None else\
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/theano/gof/op.py", line 892, in rval
r = p(n, [x[0] for x in i], o)
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/theano/tensor/slinalg.py", line 76, in perform
z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype)
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/scipy/linalg/decomp_cholesky.py", line 91, in cholesky
check_finite=check_finite)
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/scipy/linalg/decomp_cholesky.py", line 40, in _cholesky
"definite" % info)
numpy.linalg.linalg.LinAlgError: 14-th leading minor of the array is not positive definite
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/parallel_sampling.py", line 73, in run
self._start_loop()
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/parallel_sampling.py", line 113, in _start_loop
point, stats = self._compute_point()
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/parallel_sampling.py", line 141, in _compute_point
point = self._step_method.step(self._point)
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/step_methods/arraystep.py", line 178, in step
apoint = self.astep(self.bij.map(point))
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/step_methods/sgmcmc.py", line 199, in astep
return q0 + self.training_fn(q0, *next(self.minibatches))
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/theano/compile/function_module.py", line 917, in __call__
storage_map=getattr(self.fn, 'storage_map', None))
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/theano/gof/link.py", line 325, in raise_with_op
reraise(exc_type, exc_value, exc_trace)
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/six.py", line 692, in reraise
raise value.with_traceback(tb)
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/theano/compile/function_module.py", line 903, in __call__
self.fn() if output_subset is None else\
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/theano/gof/op.py", line 892, in rval
r = p(n, [x[0] for x in i], o)
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/theano/tensor/slinalg.py", line 76, in perform
z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype)
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/scipy/linalg/decomp_cholesky.py", line 91, in cholesky
check_finite=check_finite)
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/scipy/linalg/decomp_cholesky.py", line 40, in _cholesky
"definite" % info)
numpy.linalg.linalg.LinAlgError: 14-th leading minor of the array is not positive definite
Apply node that caused the error: Cholesky{lower=True, destructive=False, on_error='raise'}(Elemwise{Composite{((i0 - (i0 / i1)) + (i2 / i1))}}[(0, 0)].0)
Toposort index: 105
Inputs types: [TensorType(float64, matrix)]
Inputs shapes: [(23, 23)]
Inputs strides: [(184, 8)]
Inputs values: ['not shown']
Outputs clients: [[Elemwise{Switch}[(0, 2)](Elemwise{lt,no_inplace}.0, TensorConstant{[[1. 0. 0...0. 0. 1.]]}, Cholesky{lower=True, destructive=False, on_error='raise'}.0)]]
Backtrace when the node is created(use Theano flag traceback.limit=N to make it longer):
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3018, in run_cell_async
interactivity=interactivity, compiler=compiler, result=result)
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3183, in run_ast_nodes
if (yield from self.run_code(code, result)):
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3265, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-4-0b67b0bab7b3>", line 83, in <module>
minibatch_tensors=minibatch_tensors)
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/step_methods/sgmcmc.py", line 350, in __init__
super(CSG, self).__init__(vars, **kwargs)
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/step_methods/sgmcmc.py", line 169, in __init__
self._initialize_values()
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/step_methods/sgmcmc.py", line 358, in _initialize_values
self.training_fn = self.mk_training_fn()
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/step_methods/sgmcmc.py", line 384, in mk_training_fn
B = tt.switch(t < 0, tt.eye(q_size), tt.slinalg.cholesky(C_t))
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
"""
The above exception was the direct cause of the following exception:
LinAlgError Traceback (most recent call last)
LinAlgError: 14-th leading minor of the array is not positive definite
Apply node that caused the error: Cholesky{lower=True, destructive=False, on_error='raise'}(Elemwise{Composite{((i0 - (i0 / i1)) + (i2 / i1))}}[(0, 0)].0)
Toposort index: 105
Inputs types: [TensorType(float64, matrix)]
Inputs shapes: [(23, 23)]
Inputs strides: [(184, 8)]
Inputs values: ['not shown']
Outputs clients: [[Elemwise{Switch}[(0, 2)](Elemwise{lt,no_inplace}.0, TensorConstant{[[1. 0. 0...0. 0. 1.]]}, Cholesky{lower=True, destructive=False, on_error='raise'}.0)]]
Backtrace when the node is created(use Theano flag traceback.limit=N to make it longer):
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3018, in run_cell_async
interactivity=interactivity, compiler=compiler, result=result)
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3183, in run_ast_nodes
if (yield from self.run_code(code, result)):
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3265, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-4-0b67b0bab7b3>", line 83, in <module>
minibatch_tensors=minibatch_tensors)
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/step_methods/sgmcmc.py", line 350, in __init__
super(CSG, self).__init__(vars, **kwargs)
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/step_methods/sgmcmc.py", line 169, in __init__
self._initialize_values()
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/step_methods/sgmcmc.py", line 358, in _initialize_values
self.training_fn = self.mk_training_fn()
File "/Users/mishchk/miniconda3/lib/python3.6/site-packages/pymc3/step_methods/sgmcmc.py", line 384, in mk_training_fn
B = tt.switch(t < 0, tt.eye(q_size), tt.slinalg.cholesky(C_t))
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
The above exception was the direct cause of the following exception:
RuntimeError Traceback (most recent call last)
<ipython-input-4-0b67b0bab7b3> in <module>
82 minibatches=minibatches,
83 minibatch_tensors=minibatch_tensors)
---> 84 csg_trace = pm.sample(draws=draws, n_init=200, step=csg_step_method, tune=500, init='map')
85 else:
86 with model:
~/miniconda3/lib/python3.6/site-packages/pymc3/sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, nuts_kwargs, step_kwargs, progressbar, model, random_seed, live_plot, discard_tuned_samples, live_plot_kwargs, compute_convergence_checks, use_mmap, **kwargs)
447 _print_step_hierarchy(step)
448 try:
--> 449 trace = _mp_sample(**sample_args)
450 except pickle.PickleError:
451 _log.warning("Could not pickle model, sampling singlethreaded.")
~/miniconda3/lib/python3.6/site-packages/pymc3/sampling.py in _mp_sample(draws, tune, step, chains, cores, chain, random_seed, start, progressbar, trace, model, use_mmap, **kwargs)
997 try:
998 with sampler:
--> 999 for draw in sampler:
1000 trace = traces[draw.chain - chain]
1001 if trace.supports_sampler_stats and draw.stats is not None:
~/miniconda3/lib/python3.6/site-packages/pymc3/parallel_sampling.py in __iter__(self)
303
304 while self._active:
--> 305 draw = ProcessAdapter.recv_draw(self._active)
306 proc, is_last, draw, tuning, stats, warns = draw
307 if self._progress is not None:
~/miniconda3/lib/python3.6/site-packages/pymc3/parallel_sampling.py in recv_draw(processes, timeout)
221 if msg[0] == 'error':
222 old = msg[1]
--> 223 six.raise_from(RuntimeError('Chain %s failed.' % proc.chain), old)
224 elif msg[0] == 'writing_done':
225 proc._readable = True
~/miniconda3/lib/python3.6/site-packages/six.py in raise_from(value, from_value)
RuntimeError: Chain 1 failed.
Please provide any additional information below.
I made the target 2-dimensional and added covariance matrix to the likelihood. The rest is mainly taken from the CSG example (https://docs.pymc.io/notebooks/constant_stochastic_gradient.html). Note that everything works perfectly with 1-D target. If I change use_csg to False, then the nuts sampler is used and chains do not fail, so the issue seems to be in CSG. It also breaks with: 3 targets; init='advi+adapt_diag'; two independent likelihoods for each target. If two likelihoods are used, the error is different, "DisconnectedInputError:". However, it works if we use a simpler covariance matrix:
std = pm.HalfCauchy('std', beta=0.2)
y = pm.MvNormal("train_obs", mu=mus, cov=np.eye(2) * std, observed=model_output)
We really hope that we will be able to use CSG as for 1-D target it's orders of magnitude faster.
Versions and main components
- PyMC3 Version: 3.5
- Theano Version: 1.0.3
- Python Version: 3.6
- Operating system: macos Sierra 10.12.6
- How did you install PyMC3: conda