diff --git a/src/cmdstan/arguments/arg_pathfinder.hpp b/src/cmdstan/arguments/arg_pathfinder.hpp index e5dda99ad0..1999807462 100644 --- a/src/cmdstan/arguments/arg_pathfinder.hpp +++ b/src/cmdstan/arguments/arg_pathfinder.hpp @@ -18,6 +18,9 @@ class arg_pathfinder : public arg_lbfgs { "num_psis_draws", "Number of draws from PSIS sample", 1000)); _subarguments.push_back( new arg_single_int_pos("num_paths", "Number of single pathfinders", 4)); + _subarguments.push_back(new arg_single_bool( + "save_single_paths", "Output single-path pathfinder draws as CSV", + false)); _subarguments.push_back(new arg_single_int_pos( "max_lbfgs_iters", "Maximum number of LBFGS iterations", 1000)); _subarguments.push_back(new arg_single_int_pos( diff --git a/src/cmdstan/command.hpp b/src/cmdstan/command.hpp index 8c2047275f..0addc5021c 100644 --- a/src/cmdstan/command.hpp +++ b/src/cmdstan/command.hpp @@ -195,6 +195,7 @@ int command(int argc, const char *argv[]) { diagnostic_csv_writers; std::vector> diagnostic_json_writers; + init_callbacks(parser, sample_writers, diagnostic_csv_writers, diagnostic_json_writers); @@ -207,9 +208,11 @@ int command(int argc, const char *argv[]) { init = ""; } catch (const std::logic_error &e) { } + std::vector> init_contexts = get_vec_var_context(init, num_chains); std::vector model_compile_info = model.model_compile_info(); + for (int i = 0; i < num_chains; ++i) { write_stan(sample_writers[i]); write_model(sample_writers[i], model.model_name()); @@ -253,8 +256,8 @@ int command(int argc, const char *argv[]) { = get_arg_val(*pathfinder_arg, "num_psis_draws"); int num_paths = get_arg_val(*pathfinder_arg, "num_paths"); bool save_iterations - = !get_arg_val(parser, "output", "diagnostic_file") - .empty(); + = get_arg_val(*pathfinder_arg, "save_single_paths"); + if (num_paths == 1) { return_code = stan::services::pathfinder::pathfinder_lbfgs_single< false, stan::model::model_base>( @@ -264,16 +267,13 @@ int command(int argc, const char *argv[]) { save_iterations, refresh, interrupt, logger, init_writer, sample_writers[0], diagnostic_json_writers[0]); } else { - std::string output_file - = get_arg_val(parser, "output", "file"); - auto base_sfx = get_basename_suffix(output_file); - if (base_sfx.second.empty()) - base_sfx.second = ".csv"; - auto ofs = std::make_unique(base_sfx.first + "_pathfinder" - + base_sfx.second); - if (sig_figs > -1) { + auto ofs = std::make_unique( + get_basename_suffix( + get_arg_val(parser, "output", "file")) + .first + + ".csv"); + if (sig_figs > -1) ofs->precision(sig_figs); - } stan::callbacks::unique_stream_writer pathfinder_writer( std::move(ofs), "# "); write_stan(pathfinder_writer); diff --git a/src/cmdstan/command_helper.hpp b/src/cmdstan/command_helper.hpp index f9dd83a953..3077db5de5 100644 --- a/src/cmdstan/command_helper.hpp +++ b/src/cmdstan/command_helper.hpp @@ -738,6 +738,7 @@ void check_file_config(argument_parser &parser) { } std::vector make_filenames(const std::string &filename, + const std::string &tag, const std::string &type, unsigned int num_chains, unsigned int id) { @@ -754,7 +755,7 @@ std::vector make_filenames(const std::string &filename, } }; for (int i = 0; i < num_chains; ++i) { - names[i] = base_sfx.first + name_iterator(i) + base_sfx.second; + names[i] = base_sfx.first + tag + name_iterator(i) + base_sfx.second; } return names; } @@ -771,48 +772,92 @@ void init_callbacks( unsigned int num_chains = get_num_chains(parser); unsigned int id = get_arg_val(parser, "id"); int sig_figs = get_arg_val(parser, "output", "sig_figs"); - + bool save_single_paths + = user_method->arg("pathfinder") + && get_arg_val(parser, "method", "pathfinder", + "save_single_paths"); + std::string output_file + = get_arg_val(parser, "output", "file"); + std::string diagnostic_file + = get_arg_val(parser, "output", "diagnostic_file"); + std::vector output_filenames; + std::vector diagnostic_filenames; sample_writers.reserve(num_chains); - std::vector output_filenames - = make_filenames(get_arg_val(parser, "output", "file"), - ".csv", num_chains, id); - for (int i = 0; i < num_chains; ++i) { - auto ofs = std::make_unique(output_filenames[i]); - if (sig_figs > -1) - ofs->precision(sig_figs); - sample_writers.emplace_back(std::move(ofs), "# "); - } - - diag_json_writers.reserve(num_chains); diag_csv_writers.reserve(num_chains); - // create no-op writers by default + diag_json_writers.reserve(num_chains); + + // default - no diagnostics for (int i = 0; i < num_chains; ++i) { + diag_csv_writers.emplace_back(nullptr, "# "); diag_json_writers.emplace_back( stan::callbacks::json_writer()); } - for (int i = 0; i < num_chains; ++i) { - diag_csv_writers.emplace_back(nullptr, "# "); - } - // create json, csv writers as needed. - std::string diagnostic_file - = get_arg_val(parser, "output", "diagnostic_file"); - if (!diagnostic_file.empty()) { - std::vector diag_filenames; - if (user_method->arg("pathfinder")) { - diag_json_writers.clear(); - diag_filenames = make_filenames(diagnostic_file, ".json", num_chains, id); + + if (user_method->arg("pathfinder")) { + std::string basename = get_basename_suffix(output_file).first; + std::string diag_basename = get_basename_suffix(diagnostic_file).first; + bool inst_writers = true; + bool inst_diags = true; + if (num_chains == 1) { + output_filenames.emplace_back(basename + ".csv"); + if (!diag_basename.empty()) { + diagnostic_filenames.emplace_back(diag_basename + ".json"); + } else if (save_single_paths) { + diagnostic_filenames.emplace_back(basename + ".json"); + } else { + inst_diags = false; + } + } else if (save_single_paths) { // filenames for single-path outputs + output_filenames + = make_filenames(basename, "_path", ".csv", num_chains, id); + diagnostic_filenames + = make_filenames(basename, "_path", ".json", num_chains, id); + } else { // multi-path default: don't save single-path outputs + inst_writers = false; + inst_diags = false; for (int i = 0; i < num_chains; ++i) { - auto ofs = std::make_unique(diag_filenames[i]); - if (sig_figs > -1) + sample_writers.emplace_back(nullptr, "# "); + } + } + // allocate writers + if (inst_writers) { + for (int i = 0; i < num_chains; ++i) { + auto ofs = std::make_unique(output_filenames[i]); + if (sig_figs > -1) { ofs->precision(sig_figs); - stan::callbacks::json_writer jwriter(std::move(ofs)); + } + sample_writers.emplace_back(std::move(ofs), "# "); + } + } + if (inst_diags) { + diag_json_writers.clear(); + for (int i = 0; i < num_chains; ++i) { + auto ofs_diag + = std::make_unique(diagnostic_filenames[i]); + if (sig_figs > -1) { + ofs_diag->precision(sig_figs); + } + stan::callbacks::json_writer jwriter( + std::move(ofs_diag)); diag_json_writers.emplace_back(std::move(jwriter)); } - } else { + } + } else { // not pathfinder + output_filenames + = make_filenames(get_arg_val(parser, "output", "file"), + "", ".csv", num_chains, id); + for (int i = 0; i < num_chains; ++i) { + auto ofs = std::make_unique(output_filenames[i]); + if (sig_figs > -1) + ofs->precision(sig_figs); + sample_writers.emplace_back(std::move(ofs), "# "); + } + if (!diagnostic_file.empty()) { diag_csv_writers.clear(); - diag_filenames = make_filenames(diagnostic_file, ".csv", num_chains, id); + diagnostic_filenames + = make_filenames(diagnostic_file, "", ".csv", num_chains, id); for (int i = 0; i < num_chains; ++i) { - auto ofs = std::make_unique(diag_filenames[i]); + auto ofs = std::make_unique(diagnostic_filenames[i]); if (sig_figs > -1) ofs->precision(sig_figs); diag_csv_writers.emplace_back(std::move(ofs), "# "); diff --git a/src/test/interface/pathfinder_test.cpp b/src/test/interface/pathfinder_test.cpp index 819bcabe82..ab164f55dc 100644 --- a/src/test/interface/pathfinder_test.cpp +++ b/src/test/interface/pathfinder_test.cpp @@ -1,9 +1,11 @@ #include #include +#include #include using cmdstan::test::convert_model_path; using cmdstan::test::count_matches; +using cmdstan::test::file_exists; using cmdstan::test::parse_sample; using cmdstan::test::run_command; using cmdstan::test::run_command_output; @@ -16,32 +18,45 @@ class CmdStan : public testing::Test { eight_schools_model = {"src", "test", "test-models", "eight_schools"}; eight_schools_data = {"src", "test", "test-models", "eight_schools.data.json"}; - test_arg_output = {"test", "tmp_pf"}; - test_arg_diags = {"test", "tmp_pf"}; - test_result_draws = {"test", "tmp_pf_pathfinder.csv"}; - test_result_single = {"test", "tmp_pf_1.csv"}; - test_result_diags = {"test", "tmp_pf_1.json"}; + arg_output = {"test", "output"}; + arg_diags = {"test", "diagnostics"}; + output_csv = {"test", "output.csv"}; + output_json = {"test", "output.json"}; + output_diags = {"test", "diagnostics.json"}; + output_single_csv = {"test", "output_path_1.csv"}; + output_single_json = {"test", "output_path_1.json"}; } + + void TearDown() { + std::remove(convert_model_path(output_csv).c_str()); + std::remove(convert_model_path(output_json).c_str()); + std::remove(convert_model_path(output_diags).c_str()); + std::remove(convert_model_path(output_single_csv).c_str()); + std::remove(convert_model_path(output_single_json).c_str()); + } + std::vector dev_null_path; std::vector multi_normal_model; std::vector eight_schools_model; std::vector eight_schools_data; - std::vector test_arg_output; - std::vector test_arg_diags; - std::vector test_result_draws; - std::vector test_result_single; - std::vector test_result_diags; + std::vector arg_output; + std::vector arg_diags; + std::vector output_csv; + std::vector output_json; + std::vector output_diags; + std::vector output_single_csv; + std::vector output_single_json; }; -TEST_F(CmdStan, pathfinder_good) { +TEST_F(CmdStan, pathfinder_defaults) { std::stringstream ss; ss << convert_model_path(multi_normal_model) - << " output refresh=0 file=" << convert_model_path(test_arg_output) - << " method=pathfinder num_psis_draws=40"; + << " output refresh=0 file=" << convert_model_path(arg_output) + << " method=pathfinder"; run_command_output out = run_command(ss.str()); ASSERT_FALSE(out.hasError); - std::fstream result_stream(convert_model_path(test_result_draws)); + std::fstream result_stream(convert_model_path(output_csv)); std::stringstream result_sstream; result_sstream << result_stream.rdbuf(); result_stream.close(); @@ -50,112 +65,158 @@ TEST_F(CmdStan, pathfinder_good) { EXPECT_EQ(1, count_matches(" seconds (Pathfinders)", output)); EXPECT_EQ(1, count_matches(" seconds (PSIS)", output)); EXPECT_EQ(1, count_matches(" seconds (Total)", output)); - EXPECT_EQ(1, count_matches(" seconds (Total)", output)); - EXPECT_EQ(1, count_matches("num_psis_draws = 40", output)); - - result_sstream.str(std::string()); - std::fstream single_stream(convert_model_path(test_result_single)); - result_sstream << single_stream.rdbuf(); - single_stream.close(); - output = result_sstream.str(); - EXPECT_EQ(1, count_matches("# Elapsed Time:", output)); - EXPECT_EQ(1, count_matches(" seconds (Pathfinder)", output)); + EXPECT_EQ(1, count_matches("save_single_paths = 0 (Default)", output)); + EXPECT_EQ(1, count_matches("num_paths = 4 (Default)", output)); } -TEST_F(CmdStan, pathfinder_single_good) { +TEST_F(CmdStan, pathfinder_40_draws) { std::stringstream ss; ss << convert_model_path(multi_normal_model) - << " output refresh=0 file=" << convert_model_path(test_arg_output) - << " method=pathfinder" - << " num_paths=1"; + << " output refresh=0 file=" << convert_model_path(arg_output) + << " method=pathfinder num_psis_draws=40"; run_command_output out = run_command(ss.str()); ASSERT_FALSE(out.hasError); - std::vector test_result_1path = {"test", "tmp_pf.csv"}; - std::fstream result_stream(convert_model_path(test_result_1path)); + std::fstream result_stream(convert_model_path(output_csv)); std::stringstream result_sstream; result_sstream << result_stream.rdbuf(); result_stream.close(); std::string output = result_sstream.str(); EXPECT_EQ(1, count_matches("# Elapsed Time:", output)); - EXPECT_EQ(1, count_matches(" seconds (Pathfinder)", output)); + EXPECT_EQ(1, count_matches(" seconds (Pathfinders)", output)); + EXPECT_EQ(1, count_matches(" seconds (PSIS)", output)); + EXPECT_EQ(1, count_matches(" seconds (Total)", output)); + EXPECT_EQ(1, count_matches("num_psis_draws = 40", output)); + EXPECT_EQ(1, count_matches("num_paths = 4 (Default)", output)); + EXPECT_EQ(1, count_matches("save_single_paths = 0 (Default)", output)); } -TEST_F(CmdStan, pathfinder_multi_good) { +TEST_F(CmdStan, pathfinder_single) { std::stringstream ss; ss << convert_model_path(multi_normal_model) - << " output refresh=0 file=" << convert_model_path(test_arg_output) + << " output refresh=0 file=" << convert_model_path(arg_output) << " method=pathfinder" - << " num_paths=8"; + << " num_paths=1"; run_command_output out = run_command(ss.str()); ASSERT_FALSE(out.hasError); - std::vector test_result_8path = {"test", "tmp_pf_8.csv"}; - std::fstream result_stream(convert_model_path(test_result_8path)); + ASSERT_FALSE(file_exists(convert_model_path(output_json))); + + std::fstream result_stream(convert_model_path(output_csv)); std::stringstream result_sstream; result_sstream << result_stream.rdbuf(); result_stream.close(); std::string output = result_sstream.str(); EXPECT_EQ(1, count_matches("# Elapsed Time:", output)); EXPECT_EQ(1, count_matches(" seconds (Pathfinder)", output)); + EXPECT_EQ(1, count_matches("num_paths = 1", output)); + EXPECT_EQ(1, count_matches("save_single_paths = 0 (Default)", output)); } -TEST_F(CmdStan, pathfinder_diagnostic_json) { +TEST_F(CmdStan, pathfinder_save_single_default_num_paths) { std::stringstream ss; ss << convert_model_path(multi_normal_model) - << " output refresh=0 file=" << convert_model_path(test_arg_output) - << " diagnostic_file=" << convert_model_path(test_arg_diags) - << " method=pathfinder"; + << " output refresh=0 file=" << convert_model_path(arg_output) + << " method=pathfinder save_single_paths=1"; run_command_output out = run_command(ss.str()); ASSERT_FALSE(out.hasError); + ASSERT_TRUE(file_exists(convert_model_path(output_csv))); + ASSERT_FALSE(file_exists(convert_model_path(output_json))); + ASSERT_TRUE(file_exists(convert_model_path(output_single_csv))); + ASSERT_TRUE(file_exists(convert_model_path(output_single_json))); - std::fstream result_stream(convert_model_path(test_result_diags)); + std::fstream single_csv_stream(convert_model_path(output_single_csv)); std::stringstream result_sstream; - result_sstream << result_stream.rdbuf(); - result_stream.close(); - std::string output = result_sstream.str(); - ASSERT_FALSE(output.empty()); + result_sstream << single_csv_stream.rdbuf(); + single_csv_stream.close(); + std::string single_csv = result_sstream.str(); + EXPECT_EQ(1, count_matches("# Elapsed Time:", single_csv)); + EXPECT_EQ(1, count_matches(" seconds (Pathfinder)", single_csv)); + EXPECT_EQ(1, count_matches("save_single_paths = 1", single_csv)); + + std::fstream single_json_stream(convert_model_path(output_single_json)); + std::stringstream result_json_sstream; + result_json_sstream << single_json_stream.rdbuf(); + single_json_stream.close(); + std::string single_json = result_json_sstream.str(); + ASSERT_FALSE(single_json.empty()); + rapidjson::Document document; - ASSERT_FALSE(document.Parse<0>(output.c_str()).HasParseError()); + ASSERT_FALSE(document.Parse<0>(single_json.c_str()).HasParseError()); + EXPECT_EQ(1, count_matches("\"1\" : {\"iter\" : 1,", single_json)); } -TEST_F(CmdStan, pathfinder_lbfgs_iterations) { +TEST_F(CmdStan, pathfinder_save_single_num_paths_1) { std::stringstream ss; - ss << convert_model_path(eight_schools_model) - << " data file=" << convert_model_path(eight_schools_data) - << " random seed=12345" - << " output refresh=0 file=" << convert_model_path(test_arg_output) - << " diagnostic_file=" << convert_model_path(test_arg_diags) - << " method=pathfinder max_lbfgs_iters=3"; + ss << convert_model_path(multi_normal_model) + << " output refresh=0 file=" << convert_model_path(arg_output) + << " method=pathfinder" + << " num_paths=1 save_single_paths=1"; + run_command_output out = run_command(ss.str()); + ASSERT_FALSE(out.hasError); + ASSERT_TRUE(file_exists(convert_model_path(output_csv))); + ASSERT_TRUE(file_exists(convert_model_path(output_json))); + ASSERT_FALSE(file_exists(convert_model_path(output_single_csv))); + ASSERT_FALSE(file_exists(convert_model_path(output_single_json))); +} + +TEST_F(CmdStan, pathfinder_save_single_num_paths_1_diag_file_arg) { + std::stringstream ss; + ss << convert_model_path(multi_normal_model) + << " output refresh=0 file=" << convert_model_path(arg_output) + << " diagnostic_file=" << convert_model_path(arg_diags) + << " method=pathfinder" + << " num_paths=1 save_single_paths=1"; run_command_output out = run_command(ss.str()); ASSERT_FALSE(out.hasError); + ASSERT_TRUE(file_exists(convert_model_path(output_csv))); + ASSERT_TRUE(file_exists(convert_model_path(output_diags))); + ASSERT_FALSE(file_exists(convert_model_path(output_json))); + ASSERT_FALSE(file_exists(convert_model_path(output_single_csv))); + ASSERT_FALSE(file_exists(convert_model_path(output_single_json))); +} - std::fstream result_stream(convert_model_path(test_result_diags)); +TEST_F(CmdStan, pathfinder_num_paths_8) { + std::stringstream ss; + ss << convert_model_path(multi_normal_model) + << " output refresh=0 file=" << convert_model_path(arg_output) + << " method=pathfinder" + << " num_paths=8"; + run_command_output out = run_command(ss.str()); + ASSERT_FALSE(out.hasError); + ASSERT_TRUE(file_exists(convert_model_path(output_csv))); + ASSERT_FALSE(file_exists(convert_model_path(output_single_csv))); + + std::fstream result_stream(convert_model_path(output_csv)); std::stringstream result_sstream; result_sstream << result_stream.rdbuf(); result_stream.close(); std::string output = result_sstream.str(); - ASSERT_FALSE(output.empty()); - rapidjson::Document document; - ASSERT_FALSE(document.Parse<0>(output.c_str()).HasParseError()); - EXPECT_EQ(1, count_matches("\"3\" : {\"iter\" : 3,", output)); - EXPECT_EQ(0, count_matches("\"4\" : {\"iter\" : 4,", output)); + EXPECT_EQ(1, count_matches(" seconds (Pathfinders)", output)); + EXPECT_EQ(1, count_matches(" seconds (PSIS)", output)); + EXPECT_EQ(1, count_matches("num_paths = 8", output)); } -TEST_F(CmdStan, pathfinder_num_paths_draws) { +TEST_F(CmdStan, pathfinder_lbfgs_iterations) { std::stringstream ss; ss << convert_model_path(eight_schools_model) << " data file=" << convert_model_path(eight_schools_data) - << " output refresh=0 file=" << convert_model_path(test_arg_output) - << " method=pathfinder num_draws=10 num_paths=2"; + << " random seed=12345" + << " output refresh=0 file=" << convert_model_path(arg_output) + << " method=pathfinder max_lbfgs_iters=3" + << " save_single_paths=1"; run_command_output out = run_command(ss.str()); ASSERT_FALSE(out.hasError); + ASSERT_TRUE(file_exists(convert_model_path(output_csv))); + ASSERT_TRUE(file_exists(convert_model_path(output_single_json))); - std::fstream result_stream(convert_model_path(test_result_draws)); + std::fstream result_stream(convert_model_path(output_single_json)); std::stringstream result_sstream; result_sstream << result_stream.rdbuf(); result_stream.close(); std::string output = result_sstream.str(); ASSERT_FALSE(output.empty()); - EXPECT_EQ(1, count_matches("num_paths = 2", output)); - EXPECT_EQ(1, count_matches("num_draws = 10", output)); + rapidjson::Document document; + ASSERT_FALSE(document.Parse<0>(output.c_str()).HasParseError()); + EXPECT_EQ(1, count_matches("\"3\" : {\"iter\" : 3,", output)); + EXPECT_EQ(0, count_matches("\"4\" : {\"iter\" : 4,", output)); } diff --git a/src/test/utility.hpp b/src/test/utility.hpp index 7bd6834a46..004e9e516d 100644 --- a/src/test/utility.hpp +++ b/src/test/utility.hpp @@ -9,6 +9,8 @@ #include #include +#include + namespace cmdstan { namespace test { @@ -281,6 +283,11 @@ int idx_first_match(const std::vector &lines, return idx; } +bool file_exists(const std::string &filename) { + struct stat buffer; + return (stat(filename.c_str(), &buffer) == 0); +} + } // namespace test } // namespace cmdstan #endif