From 0c586d196f020c028beae41bc0afa6873d86921c Mon Sep 17 00:00:00 2001 From: kleeman Date: Tue, 3 Jul 2018 11:37:15 -0700 Subject: [PATCH] Add tests to ensure predict, predict_marginal and predict_mean are consistent for Gaussian Processes. --- albatross/core/distribution.h | 10 +++++++++- albatross/evaluate.h | 5 ++--- albatross/models/gp.h | 5 +++-- tests/test_models.cc | 18 ++++++++++++++++++ tests/test_utils.h | 5 +++-- 5 files changed, 35 insertions(+), 8 deletions(-) diff --git a/albatross/core/distribution.h b/albatross/core/distribution.h index 060c1a73..db5b4651 100644 --- a/albatross/core/distribution.h +++ b/albatross/core/distribution.h @@ -85,9 +85,17 @@ template struct Distribution { } }; +// A JointDistribution has a dense covariance matrix, which +// contains the covariance between each variable and all others. +using JointDistribution = Distribution; + +// We use a wrapper around DiagonalMatrix in order to make +// the resulting distribution serializable using DiagonalMatrixXd = Eigen::SerializableDiagonalMatrix; -using JointDistribution = Distribution; +// A MarginalDistribution has only a digaonal covariance +// matrix, so in turn only describes the variance of each +// variable independent of all others. using MarginalDistribution = Distribution; template diff --git a/albatross/evaluate.h b/albatross/evaluate.h index 52db6426..db807e6e 100644 --- a/albatross/evaluate.h +++ b/albatross/evaluate.h @@ -105,9 +105,8 @@ negative_log_likelihood(const Eigen::VectorXd &deviation, */ namespace evaluation_metrics { -static inline double -root_mean_square_error(const JointDistribution &prediction, - const MarginalDistribution &truth) { +static inline double root_mean_square_error(const JointDistribution &prediction, + const MarginalDistribution &truth) { const Eigen::VectorXd error = prediction.mean - truth.mean; double mse = error.dot(error) / static_cast(error.size()); return sqrt(mse); diff --git a/albatross/models/gp.h b/albatross/models/gp.h index edb7b54f..99d52475 100644 --- a/albatross/models/gp.h +++ b/albatross/models/gp.h @@ -127,8 +127,9 @@ class GaussianProcessRegression } protected: - FitType serializable_fit_(const std::vector &features, - const MarginalDistribution &targets) const override { + FitType + serializable_fit_(const std::vector &features, + const MarginalDistribution &targets) const override { Eigen::MatrixXd cov = symmetric_covariance(covariance_function_, features); FitType model_fit; model_fit.train_features = features; diff --git a/tests/test_models.cc b/tests/test_models.cc index d5a3ec5b..95eda9c3 100644 --- a/tests/test_models.cc +++ b/tests/test_models.cc @@ -90,4 +90,22 @@ TEST(test_models, test_with_target_distribution) { EXPECT_LE(scores.mean(), scores_without_variance.mean()); } + +TEST(test_models, test_predict_variants) { + auto dataset = make_heteroscedastic_toy_linear_data(); + + auto model = MakeGaussianProcess().create(); + model->fit(dataset); + const auto joint_predictions = model->predict(dataset.features); + const auto marginal_predictions = model->predict_marginal(dataset.features); + const auto mean_predictions = model->predict_mean(dataset.features); + + for (Eigen::Index i = 0; i < joint_predictions.mean.size(); i++) { + EXPECT_NEAR(joint_predictions.mean[i], mean_predictions[i], 1e-6); + EXPECT_NEAR(joint_predictions.mean[i], marginal_predictions.mean[i], 1e-6); + EXPECT_NEAR(joint_predictions.covariance(i, i), + marginal_predictions.covariance.diagonal()[i], 1e-6); + } +} + } // namespace albatross diff --git a/tests/test_utils.h b/tests/test_utils.h index e3405605..6b114f6d 100644 --- a/tests/test_utils.h +++ b/tests/test_utils.h @@ -73,8 +73,9 @@ class MockModel : public SerializableRegressionModel { protected: // builds the map from int to value - MockFit serializable_fit_(const std::vector &features, - const MarginalDistribution &targets) const override { + MockFit + serializable_fit_(const std::vector &features, + const MarginalDistribution &targets) const override { int n = static_cast(features.size()); Eigen::VectorXd predictions(n);