diff --git a/src/cmdstan/diagnose.cpp b/src/cmdstan/diagnose.cpp index 8d57bc69b0..edd0143e07 100644 --- a/src/cmdstan/diagnose.cpp +++ b/src/cmdstan/diagnose.cpp @@ -6,6 +6,8 @@ #include #include +double RHAT_MAX = 1.05; + void diagnose_usage() { std::cout << "USAGE: diagnose [ ... ]" << std::endl @@ -34,7 +36,6 @@ int main(int argc, const char* argv[]) { for (int i = 1; i < argc; ++i) { ifstream.open(argv[i]); - if (ifstream.good()) { filenames.push_back(argv[i]); ifstream.close(); @@ -49,33 +50,38 @@ int main(int argc, const char* argv[]) { } // Parse specified files + std::cout << "Processing csv files: " << filenames[0]; ifstream.open(filenames[0].c_str()); stan::io::stan_csv stan_csv = stan::io::stan_csv_reader::parse(ifstream, &std::cout); stan::mcmc::chains<> chains(stan_csv); - ifstream.close(); + if (filenames.size() > 1) std::cout << ", "; + else std::cout << std::endl << std::endl; + for (std::vector::size_type chain = 1; chain < filenames.size(); ++chain) { + std::cout << filenames[chain]; ifstream.open(filenames[chain].c_str()); stan_csv = stan::io::stan_csv_reader::parse(ifstream, &std::cout); chains.add(stan_csv); ifstream.close(); + if (chain < filenames.size()-1) std::cout << ", "; + else std::cout << std::endl << std::endl; } int num_samples = chains.num_samples(); - std::vector bad_n_eff_names; std::vector bad_rhat_names; + bool has_errors = false; for (int i = 0; i < chains.num_params(); ++i) { int max_limit = 10; - if (chains.param_name(i) == std::string("treedepth__")) { + std::cout << "Checking sampler transitions treedepth." << std::endl; int max_limit = stan_csv.metadata.max_depth; - long n_max = 0; Eigen::VectorXd t_samples = chains.samples(i); for (long n = 0; n < t_samples.size(); ++n) { @@ -83,86 +89,90 @@ int main(int argc, const char* argv[]) { ++n_max; } } - if (n_max > 0) { - std::cout << n_max << " of " << num_samples << " (" - << std::setprecision(2) - << 100 * static_cast(n_max) / num_samples - << "%) transitions hit the maximum treedepth limit of " + has_errors = true; + double pct = 100 * static_cast(n_max) / num_samples; + std::cout << n_max << " of " << num_samples + << " (" << std::setprecision(2) << pct << "%)" + << " transitions hit the maximum treedepth limit of " << max_limit << ", or 2^" << max_limit << " leapfrog steps." - << " Trajectories that are prematurely terminated due to this" - << " limit will result in slow exploration and you should" - << " increase the limit to ensure optimal performance." + << std::endl + << "Trajectories that are prematurely terminated due to this" + << " limit will result in slow exploration." + << std::endl + << "For optimal performance, increase this limit." << std::endl << std::endl; + } else { + std::cout << "Treedepth satisfactory for all transitions." << std::endl << std::endl; } - } else if (chains.param_name(i) == std::string("divergent__")) { - + std::cout << "Checking sampler transitions for divergences." << std::endl; int n_divergent = chains.samples(i).sum(); - if (n_divergent > 0) + if (n_divergent > 0) { + has_errors = true; std::cout << n_divergent << " of " << num_samples << " (" << std::setprecision(2) << 100 * static_cast(n_divergent) / num_samples - << "%) transitions ended with a divergence. These divergent" - << " transitions indicate that HMC is not fully able to" - << " explore the posterior distribution. Try rerunning with" - << " adapt delta set to a larger value and see if the" - << " divergences vanish. If increasing adapt delta towards" - << " 1 does not remove the divergences then you will likely" - << " need to reparameterize your model." + << "%) transitions ended with a divergence." + << std::endl + << "These divergent transitions indicate that HMC is not fully able to" + << " explore the posterior distribution." + << std::endl + << "Try increasing adapt delta closer to 1." + << std::endl + << "If this doesn't remove all" + << " divergences, try to reparameterize the model." << std::endl << std::endl; - + } else { + std::cout << "No divergent transitions found." << std::endl << std::endl; + } } else if (chains.param_name(i) == std::string("energy__")) { - + std::cout << "Checking E-BFMI - sampler transitions HMC potential energy." << std::endl; Eigen::VectorXd e_samples = chains.samples(i); - double delta_e_sq_mean = 0; double e_mean = 0; double e_var = 0; - e_mean += e_samples(0); e_var += e_samples(0) * (e_samples(0) - e_mean); - for (long n = 1; n < e_samples.size(); ++n) { double e = e_samples(n); - double delta_e_sq = (e - e_samples(n - 1)) * (e - e_samples(n - 1)); double d = delta_e_sq - delta_e_sq_mean; delta_e_sq_mean += d / n; - d = e - e_mean; e_mean += d / (n + 1); e_var += d * (e - e_mean); } e_var /= static_cast(e_samples.size() - 1); - double e_bfmi = delta_e_sq_mean / e_var; - double e_bfmi_threshold = 0.3; - if (e_bfmi < e_bfmi_threshold) + if (e_bfmi < e_bfmi_threshold) { + has_errors = true; std::cout << "The E-BFMI, " << e_bfmi << ", is below the nominal" << " threshold of " << e_bfmi_threshold << " which suggests" << " that HMC may have trouble exploring the target" - << " distribution. You should consider any" - << " reparameterizations if possible." + << " distribution." + << std::endl + << "If possible, try to reparameterize the model." << std::endl << std::endl; - + } else { + std::cout << "E-BFMI satisfactory for all transitions." << std::endl << std::endl; + } } else if (chains.param_name(i).find("__") == std::string::npos) { - double n_eff = chains.effective_sample_size(i); if (n_eff / num_samples < 0.001) bad_n_eff_names.push_back(chains.param_name(i)); double split_rhat = chains.split_potential_scale_reduction(i); - if (split_rhat > 1.1) + if (split_rhat > RHAT_MAX) bad_rhat_names.push_back(chains.param_name(i)); } } - if (bad_n_eff_names.size() > 0) { + has_errors = true; std::cout << "The following parameters had fewer than 0.001 effective" - << " samples per transition:" << std::endl; + << " draws per transition:" << std::endl; std::cout << " "; for (size_t n = 0; n < bad_n_eff_names.size() - 1; ++n) std::cout << bad_n_eff_names.at(n) << ", "; @@ -172,23 +182,36 @@ int main(int argc, const char* argv[]) { << " estimators may be biased high and actual performance" << " may be substantially lower than quoted." << std::endl << std::endl; + } else { + std::cout << "Effective sample size satisfactory." + << std::endl << std::endl; } if (bad_rhat_names.size() > 0) { - std::cout << "The following parameters had split R-hat greater than 1.1:" - << std::endl; + has_errors = true; + std::cout << "The following parameters had split R-hat greater than " + << RHAT_MAX << ":" << std::endl; std::cout << " "; for (size_t n = 0; n < bad_rhat_names.size() - 1; ++n) std::cout << bad_rhat_names.at(n) << ", "; std::cout << bad_rhat_names.back() << std::endl; std::cout << "Such high values indicate incomplete mixing and biased" - << "estimation. You should consider regularization your model" - << " with additional prior information or looking for a more" + << "estimation." + << std::endl + << "You should consider regularizating your model" + << " with additional prior information or a more" << " effective parameterization." << std::endl << std::endl; + } else { + std::cout << "Split R-hat values satisfactory all parameters." + << std::endl << std::endl; } - + if (!has_errors) + std::cout << "Processing complete, no problems detected." << std::endl; + else + std::cout << "Processing complete." << std::endl; + return 0; } diff --git a/src/docs/cmdstan-guide/diagnose.tex b/src/docs/cmdstan-guide/diagnose.tex index 1c940c2b51..c8f9b7c3f2 100644 --- a/src/docs/cmdstan-guide/diagnose.tex +++ b/src/docs/cmdstan-guide/diagnose.tex @@ -35,9 +35,15 @@ \section{Building the {\tt\bfseries diagnose} Command} \section{Running the {\tt\bfseries diagnose} Command} The \code{diagnose} command is executed on one or more output files, -which are provided as command-line arguments separated by spaces. As -an example, the ``eight schools'' model from Stan's example models and -its corresponding data will be used.% +which are provided as command-line arguments separated by spaces. +If there are no apparent problems with the output files passed to +\code{bin/diagnose}, it outputs a message that all transitions +are within treedepth limit and that no divergent transitions were found. + +It problems are detected, it outputs a summary of the problem along with +possible ways to mitigate it. As an example, we use the +the ``eight schools'' model from Stan's example models and +its corresponding data.% % \footnote{The model and associated data files are here: \begin{itemize} @@ -85,22 +91,20 @@ \section{Running the {\tt\bfseries diagnose} Command} \end{Verbatim} \end{quote} -If there are no apparent problems with the output files passed to -\code{bin/diagnose}, it prints nothing. \begin{figure} \begin{Verbatim}[fontsize=\footnotesize] - 95 of 4000 (2.4%) transitions ended with a divergence. These - divergent transitions indicate that HMC is not fully able to explore - the posterior distribution. Try rerunning with adapt delta set to a - larger value and see if the divergences vanish. If increasing adapt - delta towards 1 does not remove the divergences then you will likely - need to reparameterize your model. + Checking sampler transitions for divergences. + 95 of 4000 (2.4%) transitions ended with a divergence. + These divergent transitions indicate that HMC is not fully + able to explore the posterior distribution. + Try increasing adapt delta closer to 1. + If this doesn't remove all divergences, try to reparameterize the model. - The E-BFMI, 0.27, is below the nominal threshold of 0.3 which - suggests that HMC may have trouble exploring the target - distribution. You should consider any reparameterizations if - possible. + Checking E-BFMI - sampler transitions HMC potential energy. + The E-BFMI, 0.27, is below the nominal threshold of 0.3 which suggests + that HMC may have trouble exploring the target distribution. + If possible, try to reparameterize the model. \end{Verbatim} \caption{Example output from \code{bin/diagnose}.} \label{bin-diagnose-eg.figure} @@ -110,8 +114,3 @@ \section{Running the {\tt\bfseries diagnose} Command} and \url{https://arxiv.org/abs/1701.02434}) contain suggestions for other diagnostic warning; however the correct resolution is necessarily model specific, hence all suggestions general guidelines only. - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: "cmdstan-guide" -%%% End: diff --git a/src/test/interface/diagnose_test.cpp b/src/test/interface/diagnose_test.cpp index 58bedcf2bc..f7335761ca 100644 --- a/src/test/interface/diagnose_test.cpp +++ b/src/test/interface/diagnose_test.cpp @@ -3,6 +3,7 @@ #include #include +using cmdstan::test::count_matches; using cmdstan::test::get_path_separator; using cmdstan::test::run_command; using cmdstan::test::run_command_output; @@ -19,15 +20,13 @@ TEST(CommandDiagnose, corr_gauss) { + "corr_gauss_output.csv"; run_command_output out = run_command(command + " " + csv_file); - ASSERT_FALSE(out.hasError) << "\"" << out.command << "\" quit with an error"; std::ifstream expected_output("src/test/interface/example_output/corr_gauss.nom"); std::stringstream ss; ss << expected_output.rdbuf(); - - EXPECT_EQ(ss.str(), out.output); + EXPECT_EQ(1, count_matches(ss.str(), out.output)); } TEST(CommandDiagnose, corr_gauss_depth8) { @@ -42,15 +41,13 @@ TEST(CommandDiagnose, corr_gauss_depth8) { + "corr_gauss_output_depth8.csv"; run_command_output out = run_command(command + " " + csv_file); - ASSERT_FALSE(out.hasError) << "\"" << out.command << "\" quit with an error"; std::ifstream expected_output("src/test/interface/example_output/corr_gauss_depth8.nom"); std::stringstream ss; ss << expected_output.rdbuf(); - - EXPECT_EQ(ss.str(), out.output); + EXPECT_EQ(1, count_matches(ss.str(), out.output)); } TEST(CommandDiagnose, corr_gauss_depth15) { @@ -65,15 +62,13 @@ TEST(CommandDiagnose, corr_gauss_depth15) { + "corr_gauss_output_depth15.csv"; run_command_output out = run_command(command + " " + csv_file); - ASSERT_FALSE(out.hasError) << "\"" << out.command << "\" quit with an error"; std::ifstream expected_output("src/test/interface/example_output/corr_gauss_depth15.nom"); std::stringstream ss; ss << expected_output.rdbuf(); - - EXPECT_EQ(ss.str(), out.output); + EXPECT_EQ(1, count_matches(ss.str(), out.output)); } TEST(CommandDiagnose, eight_schools) { @@ -88,15 +83,13 @@ TEST(CommandDiagnose, eight_schools) { + "eight_schools_output.csv"; run_command_output out = run_command(command + " " + csv_file); - ASSERT_FALSE(out.hasError) << "\"" << out.command << "\" quit with an error"; std::ifstream expected_output("src/test/interface/example_output/eight_schools.nom"); std::stringstream ss; ss << expected_output.rdbuf(); - - EXPECT_EQ(ss.str(), out.output); + EXPECT_EQ(1, count_matches(ss.str(), out.output)); } TEST(CommandDiagnose, mix) { @@ -111,13 +104,11 @@ TEST(CommandDiagnose, mix) { + "mix_output.*"; run_command_output out = run_command(command + " " + csv_file); - ASSERT_FALSE(out.hasError) << "\"" << out.command << "\" quit with an error"; std::ifstream expected_output("src/test/interface/example_output/mix.nom"); std::stringstream ss; ss << expected_output.rdbuf(); - - EXPECT_EQ(ss.str(), out.output); + EXPECT_EQ(1, count_matches(ss.str(), out.output)); } diff --git a/src/test/interface/example_output/corr_gauss.nom b/src/test/interface/example_output/corr_gauss.nom index f4f7df0f7a..e48822b4ee 100644 --- a/src/test/interface/example_output/corr_gauss.nom +++ b/src/test/interface/example_output/corr_gauss.nom @@ -1,2 +1,16 @@ -5 of 1000 (0.5%) transitions hit the maximum treedepth limit of 10, or 2^10 leapfrog steps. Trajectories that are prematurely terminated due to this limit will result in slow exploration and you should increase the limit to ensure optimal performance. +Checking sampler transitions treedepth. +5 of 1000 (0.5%) transitions hit the maximum treedepth limit of 10, or 2^10 leapfrog steps. +Trajectories that are prematurely terminated due to this limit will result in slow exploration. +For optimal performance, increase this limit. +Checking sampler transitions for divergences. +No divergent transitions found. + +Checking E-BFMI - sampler transitions HMC potential energy. +E-BFMI satisfactory for all transitions. + +Effective sample size satisfactory. + +Split R-hat values satisfactory all parameters. + +Processing complete. diff --git a/src/test/interface/example_output/corr_gauss_depth15.nom b/src/test/interface/example_output/corr_gauss_depth15.nom index e69de29bb2..cf0149c4af 100644 --- a/src/test/interface/example_output/corr_gauss_depth15.nom +++ b/src/test/interface/example_output/corr_gauss_depth15.nom @@ -0,0 +1,14 @@ +Checking sampler transitions treedepth. +Treedepth satisfactory for all transitions. + +Checking sampler transitions for divergences. +No divergent transitions found. + +Checking E-BFMI - sampler transitions HMC potential energy. +E-BFMI satisfactory for all transitions. + +Effective sample size satisfactory. + +Split R-hat values satisfactory all parameters. + +Processing complete, no problems detected. diff --git a/src/test/interface/example_output/corr_gauss_depth8.nom b/src/test/interface/example_output/corr_gauss_depth8.nom index 96882fdb76..ce4a1d2b70 100644 --- a/src/test/interface/example_output/corr_gauss_depth8.nom +++ b/src/test/interface/example_output/corr_gauss_depth8.nom @@ -1,2 +1,16 @@ -424 of 1000 (42%) transitions hit the maximum treedepth limit of 8, or 2^8 leapfrog steps. Trajectories that are prematurely terminated due to this limit will result in slow exploration and you should increase the limit to ensure optimal performance. +Checking sampler transitions treedepth. +424 of 1000 (42%) transitions hit the maximum treedepth limit of 8, or 2^8 leapfrog steps. +Trajectories that are prematurely terminated due to this limit will result in slow exploration. +For optimal performance, increase this limit. +Checking sampler transitions for divergences. +No divergent transitions found. + +Checking E-BFMI - sampler transitions HMC potential energy. +E-BFMI satisfactory for all transitions. + +Effective sample size satisfactory. + +Split R-hat values satisfactory all parameters. + +Processing complete. diff --git a/src/test/interface/example_output/eight_schools.nom b/src/test/interface/example_output/eight_schools.nom index d51cf0762c..53b3f89b3d 100644 --- a/src/test/interface/example_output/eight_schools.nom +++ b/src/test/interface/example_output/eight_schools.nom @@ -1,4 +1,18 @@ -7 of 1000 (0.7%) transitions ended with a divergence. These divergent transitions indicate that HMC is not fully able to explore the posterior distribution. Try rerunning with adapt delta set to a larger value and see if the divergences vanish. If increasing adapt delta towards 1 does not remove the divergences then you will likely need to reparameterize your model. +Checking sampler transitions treedepth. +Treedepth satisfactory for all transitions. -The E-BFMI, 0.26, is below the nominal threshold of 0.3 which suggests that HMC may have trouble exploring the target distribution. You should consider any reparameterizations if possible. +Checking sampler transitions for divergences. +7 of 1000 (0.7%) transitions ended with a divergence. +These divergent transitions indicate that HMC is not fully able to explore the posterior distribution. +Try increasing adapt delta closer to 1. +If this doesn't remove all divergences, try to reparameterize the model. +Checking E-BFMI - sampler transitions HMC potential energy. +The E-BFMI, 0.26, is below the nominal threshold of 0.3 which suggests that HMC may have trouble exploring the target distribution. +If possible, try to reparameterize the model. + +Effective sample size satisfactory. + +Split R-hat values satisfactory all parameters. + +Processing complete. diff --git a/src/test/interface/example_output/mix.nom b/src/test/interface/example_output/mix.nom index 965e0b6db7..9121c81b73 100644 --- a/src/test/interface/example_output/mix.nom +++ b/src/test/interface/example_output/mix.nom @@ -1,8 +1,19 @@ -The following parameters had fewer than 0.001 effective samples per transition: +Checking sampler transitions treedepth. +Treedepth satisfactory for all transitions. + +Checking sampler transitions for divergences. +No divergent transitions found. + +Checking E-BFMI - sampler transitions HMC potential energy. +E-BFMI satisfactory for all transitions. + +The following parameters had fewer than 0.001 effective draws per transition: mu[1], mu[2], theta Such low values indicate that the effective sample size estimators may be biased high and actual performance may be substantially lower than quoted. -The following parameters had split R-hat greater than 1.1: +The following parameters had split R-hat greater than 1.05: mu[1], mu[2], theta -Such high values indicate incomplete mixing and biasedestimation. You should consider regularization your model with additional prior information or looking for a more effective parameterization. +Such high values indicate incomplete mixing and biasedestimation. +You should consider regularizating your model with additional prior information or a more effective parameterization. +Processing complete.