Skip to content

Commit

Permalink
Merge pull request #684 from stan-dev/feature/659-diagnose-verbose
Browse files Browse the repository at this point in the history
Feature/659 diagnose verbose
  • Loading branch information
mitzimorris authored Apr 24, 2019
2 parents 1de114b + 3c19775 commit fb118b0
Show file tree
Hide file tree
Showing 8 changed files with 167 additions and 87 deletions.
113 changes: 68 additions & 45 deletions src/cmdstan/diagnose.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#include <cmdstan/stansummary_helper.hpp>
#include <fstream>

double RHAT_MAX = 1.05;

void diagnose_usage() {
std::cout << "USAGE: diagnose <filename 1> [<filename 2> ... <filename N>]"
<< std::endl
Expand Down Expand Up @@ -34,7 +36,6 @@ int main(int argc, const char* argv[]) {

for (int i = 1; i < argc; ++i) {
ifstream.open(argv[i]);

if (ifstream.good()) {
filenames.push_back(argv[i]);
ifstream.close();
Expand All @@ -49,120 +50,129 @@ int main(int argc, const char* argv[]) {
}

// Parse specified files
std::cout << "Processing csv files: " << filenames[0];
ifstream.open(filenames[0].c_str());

stan::io::stan_csv stan_csv
= stan::io::stan_csv_reader::parse(ifstream, &std::cout);
stan::mcmc::chains<> chains(stan_csv);

ifstream.close();

if (filenames.size() > 1) std::cout << ", ";
else std::cout << std::endl << std::endl;

for (std::vector<std::string>::size_type chain = 1;
chain < filenames.size(); ++chain) {
std::cout << filenames[chain];
ifstream.open(filenames[chain].c_str());
stan_csv = stan::io::stan_csv_reader::parse(ifstream, &std::cout);
chains.add(stan_csv);
ifstream.close();
if (chain < filenames.size()-1) std::cout << ", ";
else std::cout << std::endl << std::endl;
}

int num_samples = chains.num_samples();

std::vector<std::string> bad_n_eff_names;
std::vector<std::string> bad_rhat_names;
bool has_errors = false;

for (int i = 0; i < chains.num_params(); ++i) {
int max_limit = 10;

if (chains.param_name(i) == std::string("treedepth__")) {
std::cout << "Checking sampler transitions treedepth." << std::endl;
int max_limit = stan_csv.metadata.max_depth;

long n_max = 0;
Eigen::VectorXd t_samples = chains.samples(i);
for (long n = 0; n < t_samples.size(); ++n) {
if (t_samples(n) >= max_limit) {
++n_max;
}
}

if (n_max > 0) {
std::cout << n_max << " of " << num_samples << " ("
<< std::setprecision(2)
<< 100 * static_cast<double>(n_max) / num_samples
<< "%) transitions hit the maximum treedepth limit of "
has_errors = true;
double pct = 100 * static_cast<double>(n_max) / num_samples;
std::cout << n_max << " of " << num_samples
<< " (" << std::setprecision(2) << pct << "%)"
<< " transitions hit the maximum treedepth limit of "
<< max_limit << ", or 2^" << max_limit << " leapfrog steps."
<< " Trajectories that are prematurely terminated due to this"
<< " limit will result in slow exploration and you should"
<< " increase the limit to ensure optimal performance."
<< std::endl
<< "Trajectories that are prematurely terminated due to this"
<< " limit will result in slow exploration."
<< std::endl
<< "For optimal performance, increase this limit."
<< std::endl << std::endl;
} else {
std::cout << "Treedepth satisfactory for all transitions." << std::endl << std::endl;
}

} else if (chains.param_name(i) == std::string("divergent__")) {

std::cout << "Checking sampler transitions for divergences." << std::endl;
int n_divergent = chains.samples(i).sum();
if (n_divergent > 0)
if (n_divergent > 0) {
has_errors = true;
std::cout << n_divergent << " of " << num_samples << " ("
<< std::setprecision(2)
<< 100 * static_cast<double>(n_divergent) / num_samples
<< "%) transitions ended with a divergence. These divergent"
<< " transitions indicate that HMC is not fully able to"
<< " explore the posterior distribution. Try rerunning with"
<< " adapt delta set to a larger value and see if the"
<< " divergences vanish. If increasing adapt delta towards"
<< " 1 does not remove the divergences then you will likely"
<< " need to reparameterize your model."
<< "%) transitions ended with a divergence."
<< std::endl
<< "These divergent transitions indicate that HMC is not fully able to"
<< " explore the posterior distribution."
<< std::endl
<< "Try increasing adapt delta closer to 1."
<< std::endl
<< "If this doesn't remove all"
<< " divergences, try to reparameterize the model."
<< std::endl << std::endl;

} else {
std::cout << "No divergent transitions found." << std::endl << std::endl;
}
} else if (chains.param_name(i) == std::string("energy__")) {

std::cout << "Checking E-BFMI - sampler transitions HMC potential energy." << std::endl;
Eigen::VectorXd e_samples = chains.samples(i);

double delta_e_sq_mean = 0;
double e_mean = 0;
double e_var = 0;

e_mean += e_samples(0);
e_var += e_samples(0) * (e_samples(0) - e_mean);

for (long n = 1; n < e_samples.size(); ++n) {
double e = e_samples(n);

double delta_e_sq = (e - e_samples(n - 1)) * (e - e_samples(n - 1));
double d = delta_e_sq - delta_e_sq_mean;
delta_e_sq_mean += d / n;

d = e - e_mean;
e_mean += d / (n + 1);
e_var += d * (e - e_mean);
}

e_var /= static_cast<double>(e_samples.size() - 1);

double e_bfmi = delta_e_sq_mean / e_var;

double e_bfmi_threshold = 0.3;
if (e_bfmi < e_bfmi_threshold)
if (e_bfmi < e_bfmi_threshold) {
has_errors = true;
std::cout << "The E-BFMI, " << e_bfmi << ", is below the nominal"
<< " threshold of " << e_bfmi_threshold << " which suggests"
<< " that HMC may have trouble exploring the target"
<< " distribution. You should consider any"
<< " reparameterizations if possible."
<< " distribution."
<< std::endl
<< "If possible, try to reparameterize the model."
<< std::endl << std::endl;

} else {
std::cout << "E-BFMI satisfactory for all transitions." << std::endl << std::endl;
}
} else if (chains.param_name(i).find("__") == std::string::npos) {

double n_eff = chains.effective_sample_size(i);
if (n_eff / num_samples < 0.001)
bad_n_eff_names.push_back(chains.param_name(i));

double split_rhat = chains.split_potential_scale_reduction(i);
if (split_rhat > 1.1)
if (split_rhat > RHAT_MAX)
bad_rhat_names.push_back(chains.param_name(i));
}
}

if (bad_n_eff_names.size() > 0) {
has_errors = true;
std::cout << "The following parameters had fewer than 0.001 effective"
<< " samples per transition:" << std::endl;
<< " draws per transition:" << std::endl;
std::cout << " ";
for (size_t n = 0; n < bad_n_eff_names.size() - 1; ++n)
std::cout << bad_n_eff_names.at(n) << ", ";
Expand All @@ -172,23 +182,36 @@ int main(int argc, const char* argv[]) {
<< " estimators may be biased high and actual performance"
<< " may be substantially lower than quoted."
<< std::endl << std::endl;
} else {
std::cout << "Effective sample size satisfactory."
<< std::endl << std::endl;
}

if (bad_rhat_names.size() > 0) {
std::cout << "The following parameters had split R-hat greater than 1.1:"
<< std::endl;
has_errors = true;
std::cout << "The following parameters had split R-hat greater than "
<< RHAT_MAX << ":" << std::endl;
std::cout << " ";
for (size_t n = 0; n < bad_rhat_names.size() - 1; ++n)
std::cout << bad_rhat_names.at(n) << ", ";
std::cout << bad_rhat_names.back() << std::endl;

std::cout << "Such high values indicate incomplete mixing and biased"
<< "estimation. You should consider regularization your model"
<< " with additional prior information or looking for a more"
<< "estimation."
<< std::endl
<< "You should consider regularizating your model"
<< " with additional prior information or a more"
<< " effective parameterization."
<< std::endl << std::endl;
} else {
std::cout << "Split R-hat values satisfactory all parameters."
<< std::endl << std::endl;
}

if (!has_errors)
std::cout << "Processing complete, no problems detected." << std::endl;
else
std::cout << "Processing complete." << std::endl;

return 0;

}
39 changes: 19 additions & 20 deletions src/docs/cmdstan-guide/diagnose.tex
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,15 @@ \section{Building the {\tt\bfseries diagnose} Command}
\section{Running the {\tt\bfseries diagnose} Command}

The \code{diagnose} command is executed on one or more output files,
which are provided as command-line arguments separated by spaces. As
an example, the ``eight schools'' model from Stan's example models and
its corresponding data will be used.%
which are provided as command-line arguments separated by spaces.
If there are no apparent problems with the output files passed to
\code{bin/diagnose}, it outputs a message that all transitions
are within treedepth limit and that no divergent transitions were found.

It problems are detected, it outputs a summary of the problem along with
possible ways to mitigate it. As an example, we use the
the ``eight schools'' model from Stan's example models and
its corresponding data.%
%
\footnote{The model and associated data files are here:
\begin{itemize}
Expand Down Expand Up @@ -85,22 +91,20 @@ \section{Running the {\tt\bfseries diagnose} Command}
\end{Verbatim}
\end{quote}

If there are no apparent problems with the output files passed to
\code{bin/diagnose}, it prints nothing.

\begin{figure}
\begin{Verbatim}[fontsize=\footnotesize]
95 of 4000 (2.4%) transitions ended with a divergence. These
divergent transitions indicate that HMC is not fully able to explore
the posterior distribution. Try rerunning with adapt delta set to a
larger value and see if the divergences vanish. If increasing adapt
delta towards 1 does not remove the divergences then you will likely
need to reparameterize your model.
Checking sampler transitions for divergences.
95 of 4000 (2.4%) transitions ended with a divergence.
These divergent transitions indicate that HMC is not fully
able to explore the posterior distribution.
Try increasing adapt delta closer to 1.
If this doesn't remove all divergences, try to reparameterize the model.
The E-BFMI, 0.27, is below the nominal threshold of 0.3 which
suggests that HMC may have trouble exploring the target
distribution. You should consider any reparameterizations if
possible.
Checking E-BFMI - sampler transitions HMC potential energy.
The E-BFMI, 0.27, is below the nominal threshold of 0.3 which suggests
that HMC may have trouble exploring the target distribution.
If possible, try to reparameterize the model.
\end{Verbatim}
\caption{Example output from \code{bin/diagnose}.}
\label{bin-diagnose-eg.figure}
Expand All @@ -110,8 +114,3 @@ \section{Running the {\tt\bfseries diagnose} Command}
and \url{https://arxiv.org/abs/1701.02434}) contain suggestions for
other diagnostic warning; however the correct resolution is
necessarily model specific, hence all suggestions general guidelines only.

%%% Local Variables:
%%% mode: latex
%%% TeX-master: "cmdstan-guide"
%%% End:
21 changes: 6 additions & 15 deletions src/test/interface/diagnose_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <gtest/gtest.h>
#include <test/utility.hpp>

using cmdstan::test::count_matches;
using cmdstan::test::get_path_separator;
using cmdstan::test::run_command;
using cmdstan::test::run_command_output;
Expand All @@ -19,15 +20,13 @@ TEST(CommandDiagnose, corr_gauss) {
+ "corr_gauss_output.csv";

run_command_output out = run_command(command + " " + csv_file);

ASSERT_FALSE(out.hasError)
<< "\"" << out.command << "\" quit with an error";

std::ifstream expected_output("src/test/interface/example_output/corr_gauss.nom");
std::stringstream ss;
ss << expected_output.rdbuf();

EXPECT_EQ(ss.str(), out.output);
EXPECT_EQ(1, count_matches(ss.str(), out.output));
}

TEST(CommandDiagnose, corr_gauss_depth8) {
Expand All @@ -42,15 +41,13 @@ TEST(CommandDiagnose, corr_gauss_depth8) {
+ "corr_gauss_output_depth8.csv";

run_command_output out = run_command(command + " " + csv_file);

ASSERT_FALSE(out.hasError)
<< "\"" << out.command << "\" quit with an error";

std::ifstream expected_output("src/test/interface/example_output/corr_gauss_depth8.nom");
std::stringstream ss;
ss << expected_output.rdbuf();

EXPECT_EQ(ss.str(), out.output);
EXPECT_EQ(1, count_matches(ss.str(), out.output));
}

TEST(CommandDiagnose, corr_gauss_depth15) {
Expand All @@ -65,15 +62,13 @@ TEST(CommandDiagnose, corr_gauss_depth15) {
+ "corr_gauss_output_depth15.csv";

run_command_output out = run_command(command + " " + csv_file);

ASSERT_FALSE(out.hasError)
<< "\"" << out.command << "\" quit with an error";

std::ifstream expected_output("src/test/interface/example_output/corr_gauss_depth15.nom");
std::stringstream ss;
ss << expected_output.rdbuf();

EXPECT_EQ(ss.str(), out.output);
EXPECT_EQ(1, count_matches(ss.str(), out.output));
}

TEST(CommandDiagnose, eight_schools) {
Expand All @@ -88,15 +83,13 @@ TEST(CommandDiagnose, eight_schools) {
+ "eight_schools_output.csv";

run_command_output out = run_command(command + " " + csv_file);

ASSERT_FALSE(out.hasError)
<< "\"" << out.command << "\" quit with an error";

std::ifstream expected_output("src/test/interface/example_output/eight_schools.nom");
std::stringstream ss;
ss << expected_output.rdbuf();

EXPECT_EQ(ss.str(), out.output);
EXPECT_EQ(1, count_matches(ss.str(), out.output));
}

TEST(CommandDiagnose, mix) {
Expand All @@ -111,13 +104,11 @@ TEST(CommandDiagnose, mix) {
+ "mix_output.*";

run_command_output out = run_command(command + " " + csv_file);

ASSERT_FALSE(out.hasError)
<< "\"" << out.command << "\" quit with an error";

std::ifstream expected_output("src/test/interface/example_output/mix.nom");
std::stringstream ss;
ss << expected_output.rdbuf();

EXPECT_EQ(ss.str(), out.output);
EXPECT_EQ(1, count_matches(ss.str(), out.output));
}
16 changes: 15 additions & 1 deletion src/test/interface/example_output/corr_gauss.nom
Original file line number Diff line number Diff line change
@@ -1,2 +1,16 @@
5 of 1000 (0.5%) transitions hit the maximum treedepth limit of 10, or 2^10 leapfrog steps. Trajectories that are prematurely terminated due to this limit will result in slow exploration and you should increase the limit to ensure optimal performance.
Checking sampler transitions treedepth.
5 of 1000 (0.5%) transitions hit the maximum treedepth limit of 10, or 2^10 leapfrog steps.
Trajectories that are prematurely terminated due to this limit will result in slow exploration.
For optimal performance, increase this limit.

Checking sampler transitions for divergences.
No divergent transitions found.

Checking E-BFMI - sampler transitions HMC potential energy.
E-BFMI satisfactory for all transitions.

Effective sample size satisfactory.

Split R-hat values satisfactory all parameters.

Processing complete.
Loading

0 comments on commit fb118b0

Please sign in to comment.