Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/issue 2814 warmup auto #2815

Open
wants to merge 29 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
1952caf
Added in switching adaptation
bbbales2 Apr 18, 2019
4dc0fff
Simplified code and added some comments
bbbales2 Apr 20, 2019
8000691
Changed 'switching' to 'auto'
bbbales2 Apr 20, 2019
b1205f3
Added try/catch so that if auto adaptation fails it falls back to dia…
bbbales2 Apr 22, 2019
a6ae242
Merge branch 'develop' of https://github.com/stan-dev/stan into exper…
bbbales2 Sep 4, 2019
baacc34
Fixed bug with regularization of automatically picked metric. Added t…
bbbales2 Sep 11, 2019
88225d0
Merge branch 'develop' of https://github.com/stan-dev/stan into exper…
bbbales2 Sep 11, 2019
1310988
Merge branch 'develop' into feature/issue-2814-warmup-auto
bbbales2 Oct 29, 2020
989b396
Updated test file with new model signature (Issue #2814)
bbbales2 Oct 29, 2020
0f36d29
Fix cpplint errors (Issue #2814)
bbbales2 Oct 29, 2020
5d37ce4
Moved format in front of lint
bbbales2 Oct 29, 2020
f5d06e3
Merge commit '84535f53ef5e75029273f063f753663aa74d85a9' into HEAD
yashikno Oct 29, 2020
70c5417
[Jenkins] auto-formatting by clang-format version 6.0.0-1ubuntu2~16.0…
stan-buildbot Oct 29, 2020
9213b08
cpplint fixes (Issue #2814)
bbbales2 Oct 29, 2020
fa17886
Merge branch 'feature/issue-2814-warmup-auto' of https://github.com/s…
bbbales2 Oct 29, 2020
fa20a9f
Cpplint fix (Issue #2814)
bbbales2 Oct 29, 2020
0a2f629
Updated tests (Issue #2814)
bbbales2 Oct 30, 2020
9021602
[Jenkins] auto-formatting by clang-format version 6.0.0-1ubuntu2~16.0…
stan-buildbot Oct 30, 2020
8cf629f
Switched strings for characters in state machine (Issue #2814)
bbbales2 Oct 31, 2020
ba03ec6
[Jenkins] auto-formatting by clang-format version 6.0.0-1ubuntu2~16.0…
stan-buildbot Oct 31, 2020
b870702
Updated tolerances on power method (Issue #2814)
bbbales2 Oct 31, 2020
584bffd
Merge branch 'feature/issue-2814-warmup-auto' of https://github.com/s…
bbbales2 Oct 31, 2020
e3d1221
Updated test to make randomness matter less (Issue #2814)
bbbales2 Oct 31, 2020
b07569a
[Jenkins] auto-formatting by clang-format version 6.0.0-1ubuntu2~16.0…
stan-buildbot Oct 31, 2020
9a3cb36
Fixed header problems (Issue #2814)
bbbales2 Oct 31, 2020
473a28c
Merge branch 'feature/issue-2814-warmup-auto' of https://github.com/s…
bbbales2 Oct 31, 2020
da00166
Merge remote-tracking branch 'origin/develop' into feature/issue-2814…
bbbales2 Nov 28, 2020
9f9f955
Merge commit 'f601195083da06dbcbf88271aa386e67520e10e9' into HEAD
yashikno Aug 3, 2022
c805e39
[Jenkins] auto-formatting by clang-format version 10.0.0-4ubuntu1
stan-buildbot Aug 3, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
314 changes: 314 additions & 0 deletions src/stan/mcmc/auto_adaptation.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,314 @@
#ifndef STAN_MCMC_AUTO_ADAPTATION_HPP
#define STAN_MCMC_AUTO_ADAPTATION_HPP

#include <stan/math.hpp>
#include <stan/mcmc/windowed_adaptation.hpp>
#include <vector>

namespace stan {

namespace mcmc {
template <typename Model>
struct log_prob_wrapper_covar {
const Model& model_;
explicit log_prob_wrapper_covar(const Model& model) : model_(model) {}

template <typename T>
T operator()(const Eigen::Matrix<T, Eigen::Dynamic, 1>& q) const {
return model_.template log_prob<true, true, T>(
const_cast<Eigen::Matrix<T, Eigen::Dynamic, 1>&>(q), &std::cout);
}
};

namespace internal {
/**
* Compute the covariance of data in Y.
*
* Columns of Y are different variables. Rows are different samples.
*
* When there is only one row in Y, return a covariance matrix of the expected
* size filled with zeros.
*
* @param Y Data
* @return Covariance of Y
*/
Eigen::MatrixXd covariance(const Eigen::MatrixXd& Y) {
stan::math::check_nonzero_size("covariance", "Y", Y);

Eigen::MatrixXd centered = Y.rowwise() - Y.colwise().mean();
return centered.transpose() * centered / std::max(centered.rows() - 1.0, 1.0);
}

/**
* Compute the largest magnitude eigenvalue of a symmetric matrix using the
* power method. The function f should return the product of that matrix with an
* abitrary vector.
*
* f should take one Eigen::VectorXd argument, x, and return the product of a
* matrix with x as an Eigen::VectorXd argument of the same size.
*
* The eigenvalue is estimated iteratively. If the kth estimate is e_k, then the
* function returns when either abs(e_{k + 1} - e_k) < tol * abs(e_k) or the
* maximum number of iterations have been performed
*
* This means the returned eigenvalue might not be computed to full precision
*
* @param initial_guess Initial guess of the eigenvector of the largest
* eigenvalue
* @param[in,out] max_iterations Maximum number of power iterations, on return
* number of iterations used
* @param[in,out] tol Relative tolerance, on return the relative error in the
* eigenvalue estimate
* @return Largest magnitude eigenvalue of operator f
*/
template <typename F>
double power_method(F& f, const Eigen::VectorXd& initial_guess,
int& max_iterations, double& tol) {
Eigen::VectorXd v = initial_guess;
double eval = 0.0;
Eigen::VectorXd Av = f(v);
stan::math::check_matching_sizes("power_method", "matrix vector product", Av,
"vector", v);

int i = 0;
for (; i < max_iterations; ++i) {
double v_norm = v.norm();
double new_eval = v.dot(Av) / (v_norm * v_norm);
if (i == max_iterations - 1
|| std::abs(new_eval - eval) <= tol * std::abs(eval)) {
tol = std::abs(new_eval - eval) / std::abs(eval);
eval = new_eval;
max_iterations = i + 1;
break;
}

eval = new_eval;
v = Av / Av.norm();

Av = f(v);
}

return eval;
}

/**
* Compute the largest eigenvalue of the sample covariance rescaled by a metric,
* that is, the largest eigenvalue of L^{-1} \Sigma L^{-T}
*
* @param L Cholesky decomposition of Metric
* @param Sigma Sample covariance
* @return Largest eigenvalue
*/
double eigenvalue_scaled_covariance(const Eigen::MatrixXd& L,
const Eigen::MatrixXd& Sigma) {
Eigen::MatrixXd S = L.template triangularView<Eigen::Lower>()
.solve(L.template triangularView<Eigen::Lower>()
.solve(Sigma)
.transpose())
.transpose();

auto Sx = [&](Eigen::VectorXd x) -> Eigen::VectorXd { return S * x; };

int max_iterations = 200;
double tol = 1e-5;

return internal::power_method(Sx, Eigen::VectorXd::Random(Sigma.cols()),
max_iterations, tol);
}

/**
* Compute the largest eigenvalue of the Hessian of the log density rescaled by
* a metric, that is, the largest eigenvalue of L^T \nabla^2_{qq} H(q) L
*
* @tparam Model Type of model
* @param model Defines the log density
* @param q Point around which to compute the Hessian
* @param L Cholesky decomposition of Metric
* @return Largest eigenvalue
*/
template <typename Model>
double eigenvalue_scaled_hessian(const Model& model, const Eigen::MatrixXd& L,
const Eigen::VectorXd& q) {
Eigen::VectorXd eigenvalues;
Eigen::MatrixXd eigenvectors;

auto hessian_vector = [&](const Eigen::VectorXd& x) -> Eigen::VectorXd {
double lp;
Eigen::VectorXd grad1;
Eigen::VectorXd grad2;
// stan::math::hessian_times_vector(log_prob_wrapper_covar<Model>(model), q,
// x, lp, Ax);
double dx = 1e-5;
Eigen::VectorXd dr = L * x * dx;
stan::math::gradient(log_prob_wrapper_covar<Model>(model), q + dr / 2.0, lp,
grad1);
stan::math::gradient(log_prob_wrapper_covar<Model>(model), q - dr / 2.0, lp,
grad2);
return L.transpose() * (grad1 - grad2) / dx;
};

int max_iterations = 200;
double tol = 1e-5;

return internal::power_method(
hessian_vector, Eigen::VectorXd::Random(q.size()), max_iterations, tol);
}
} // namespace internal

class auto_adaptation : public windowed_adaptation {
public:
explicit auto_adaptation(int n) : windowed_adaptation("covariance") {}
/**
* Update the metric if at the end of an adaptation window.
*
* @tparam Model Type of model
* @param model Defines the log density
* @param covar[out] New metric
* @param covar_is_diagonal[out] Set to true if metric is diagonal, false
* otherwise
* @param q New MCMC draw
* @return True if this was the end of an adaptation window, false otherwise
*/
template <typename Model>
bool learn_covariance(const Model& model, Eigen::MatrixXd& covar,
bool& covar_is_diagonal, const Eigen::VectorXd& q) {
if (adaptation_window())
qs_.push_back(q);

if (end_adaptation_window()) {
compute_next_window();

int M = q.size();
int N = qs_.size();

Eigen::MatrixXd Y = Eigen::MatrixXd::Zero(M, N);
std::vector<int> idxs(N);
for (int i = 0; i < qs_.size(); i++)
idxs[i] = i;

std::random_shuffle(idxs.begin(), idxs.end());
for (int i = 0; i < qs_.size(); i++)
Y.block(0, i, M, 1) = qs_[idxs[i]];

try {
bool use_dense = false;
// 's' stands for selection
// 'r' stands for refinement
for (char state : {'s', 'r'}) {
Eigen::MatrixXd Ytrain;
Eigen::MatrixXd Ytest;

// If in selection state
if (state == 's') {
int Mtest;
Mtest = static_cast<int>(0.2 * Y.cols());
if (Mtest < 5) {
Mtest = 5;
}

if (Y.cols() < 10) {
throw std::runtime_error(
"Each warmup stage must have at least 10 samples");
}

Ytrain = Y.block(0, 0, M, Y.cols() - Mtest);
Ytest = Y.block(0, Ytrain.cols(), M, Mtest);
} else {
Ytrain = Y;
}

Eigen::MatrixXd cov_train
= (Ytrain.cols() > 0) ? internal::covariance(Ytrain.transpose())
: Eigen::MatrixXd::Zero(M, M);
Eigen::MatrixXd cov_test
= (Ytest.cols() > 0) ? internal::covariance(Ytest.transpose())
: Eigen::MatrixXd::Zero(M, M);

Eigen::MatrixXd dense = (N / (N + 5.0)) * cov_train
+ 1e-3 * (5.0 / (N + 5.0))
* Eigen::MatrixXd::Identity(
cov_train.rows(), cov_train.cols());

Eigen::MatrixXd diag = dense.diagonal().asDiagonal();

covar = dense;

// If in selection state
if (state == 's') {
Eigen::MatrixXd L_dense = dense.llt().matrixL();
Eigen::MatrixXd L_diag
= diag.diagonal().array().sqrt().matrix().asDiagonal();

double low_eigenvalue_dense
= -1.0
/ internal::eigenvalue_scaled_covariance(L_dense, cov_test);
double low_eigenvalue_diag
= -1.0
/ internal::eigenvalue_scaled_covariance(L_diag, cov_test);

double c_dense = 0.0;
double c_diag = 0.0;
for (int i = 0; i < 5; i++) {
double high_eigenvalue_dense
= internal::eigenvalue_scaled_hessian(
model, L_dense, Ytest.block(0, i, M, 1));
double high_eigenvalue_diag = internal::eigenvalue_scaled_hessian(
model, L_diag, Ytest.block(0, i, M, 1));

c_dense = std::max(c_dense, std::sqrt(high_eigenvalue_dense
/ low_eigenvalue_dense));
c_diag = std::max(c_diag, std::sqrt(high_eigenvalue_diag
/ low_eigenvalue_diag));
}

std::cout << "adapt: " << adapt_window_counter_
<< ", which: dense, max: " << c_dense << std::endl;
std::cout << "adapt: " << adapt_window_counter_
<< ", which: diag, max: " << c_diag << std::endl;

if (c_dense < c_diag) {
use_dense = true;
} else {
use_dense = false;
}
} else {
if (use_dense) {
covar = dense;
covar_is_diagonal = false;
} else {
covar = diag;
covar_is_diagonal = true;
}
}
}
} catch (const std::exception& e) {
std::cout << e.what() << std::endl;
std::cout
<< "Exception while using auto adaptation, falling back to diagonal"
<< std::endl;
Eigen::MatrixXd cov = internal::covariance(Y.transpose());
covar = ((N / (N + 5.0)) * cov.diagonal()
+ 1e-3 * (5.0 / (N + 5.0)) * Eigen::VectorXd::Ones(cov.cols()))
.asDiagonal();
covar_is_diagonal = true;
}

++adapt_window_counter_;
qs_.clear();

return true;
}

++adapt_window_counter_;
return false;
}

protected:
std::vector<Eigen::VectorXd> qs_;
};

} // namespace mcmc

} // namespace stan

#endif
70 changes: 70 additions & 0 deletions src/stan/mcmc/hmc/hamiltonians/auto_e_metric.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#ifndef STAN_MCMC_HMC_HAMILTONIANS_AUTO_E_METRIC_HPP
#define STAN_MCMC_HMC_HAMILTONIANS_AUTO_E_METRIC_HPP

#include <stan/callbacks/logger.hpp>
#include <stan/math/prim.hpp>
#include <stan/mcmc/hmc/hamiltonians/base_hamiltonian.hpp>
#include <stan/mcmc/hmc/hamiltonians/auto_e_point.hpp>
#include <boost/random/variate_generator.hpp>
#include <boost/random/normal_distribution.hpp>

namespace stan {
namespace mcmc {

// Euclidean manifold with dense metric
template <class Model, class BaseRNG>
class auto_e_metric : public base_hamiltonian<Model, auto_e_point, BaseRNG> {
public:
explicit auto_e_metric(const Model& model)
: base_hamiltonian<Model, auto_e_point, BaseRNG>(model) {}

double T(auto_e_point& z) {
return 0.5 * z.p.transpose() * z.inv_e_metric_ * z.p;
}

double tau(auto_e_point& z) { return T(z); }

double phi(auto_e_point& z) { return this->V(z); }

double dG_dt(auto_e_point& z, callbacks::logger& logger) {
return 2 * T(z) - z.q.dot(z.g);
}

Eigen::VectorXd dtau_dq(auto_e_point& z, callbacks::logger& logger) {
return Eigen::VectorXd::Zero(this->model_.num_params_r());
}

Eigen::VectorXd dtau_dp(auto_e_point& z) {
if (z.is_diagonal_) {
return z.inv_e_metric_.diagonal().cwiseProduct(z.p);
} else {
return z.inv_e_metric_ * z.p;
}
}

Eigen::VectorXd dphi_dq(auto_e_point& z, callbacks::logger& logger) {
return z.g;
}

void sample_p(auto_e_point& z, BaseRNG& rng) {
typedef typename stan::math::index_type<Eigen::VectorXd>::type idx_t;
boost::variate_generator<BaseRNG&, boost::normal_distribution<> > rand_gaus(
rng, boost::normal_distribution<>());

if (z.is_diagonal_) {
for (int i = 0; i < z.p.size(); ++i)
z.p(i) = rand_gaus() / sqrt(z.inv_e_metric_(i, i));
} else {
Eigen::VectorXd u(z.p.size());

for (idx_t i = 0; i < u.size(); ++i)
u(i) = rand_gaus();

z.p = z.inv_e_metric_.llt().matrixU().solve(u);
}
}
};

} // namespace mcmc
} // namespace stan
#endif
Loading