Skip to content

Commit

Permalink
Remove PredictDistribution and TargetDistribution aliases in
Browse files Browse the repository at this point in the history
favor of more descriptive JointDistribution and MarginalDistribution.
  • Loading branch information
akleeman committed Jul 3, 2018
1 parent 310b4e7 commit f96fa21
Show file tree
Hide file tree
Showing 13 changed files with 76 additions and 79 deletions.
4 changes: 2 additions & 2 deletions albatross/core/distribution.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ template <typename CovarianceType> struct Distribution {

using DiagonalMatrixXd =
Eigen::SerializableDiagonalMatrix<double, Eigen::Dynamic>;
using DenseDistribution = Distribution<Eigen::MatrixXd>;
using DiagonalDistribution = Distribution<DiagonalMatrixXd>;
using JointDistribution = Distribution<Eigen::MatrixXd>;
using MarginalDistribution = Distribution<DiagonalMatrixXd>;

template <typename CovarianceType, typename SizeType>
Distribution<CovarianceType> subset(const std::vector<SizeType> &indices,
Expand Down
37 changes: 17 additions & 20 deletions albatross/core/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@

namespace albatross {

using TargetDistribution = DiagonalDistribution;
using PredictDistribution = DenseDistribution;

/*
* A RegressionDataset holds two vectors of data, the features
* where a single feature can be any class that contains the information used
Expand All @@ -36,12 +33,12 @@ using PredictDistribution = DenseDistribution;
*/
template <typename FeatureType> struct RegressionDataset {
std::vector<FeatureType> features;
TargetDistribution targets;
MarginalDistribution targets;

RegressionDataset(){};

RegressionDataset(const std::vector<FeatureType> &features_,
const TargetDistribution &targets_)
const MarginalDistribution &targets_)
: features(features_), targets(targets_) {
// If the two inputs aren't the same size they clearly aren't
// consistent.
Expand All @@ -51,7 +48,7 @@ template <typename FeatureType> struct RegressionDataset {

RegressionDataset(const std::vector<FeatureType> &features_,
const Eigen::VectorXd &targets_)
: RegressionDataset(features_, TargetDistribution(targets_)) {}
: RegressionDataset(features_, MarginalDistribution(targets_)) {}

template <class Archive>
typename std::enable_if<valid_in_out_serializer<FeatureType, Archive>::value,
Expand Down Expand Up @@ -127,7 +124,7 @@ class RegressionModel : public ParameterHandlingMixin {
* predict.
*/
void fit(const std::vector<FeatureType> &features,
const TargetDistribution &targets) {
const MarginalDistribution &targets) {
assert(features.size() > 0);
assert(static_cast<s32>(features.size()) ==
static_cast<s32>(targets.size()));
Expand All @@ -140,7 +137,7 @@ class RegressionModel : public ParameterHandlingMixin {
*/
void fit(const std::vector<FeatureType> &features,
const Eigen::VectorXd &targets) {
fit(features, TargetDistribution(targets));
fit(features, MarginalDistribution(targets));
}

/*
Expand All @@ -155,23 +152,23 @@ class RegressionModel : public ParameterHandlingMixin {
* and makes simple checks to confirm the implementation is returning
* properly sized Distribution.
*/
PredictDistribution predict(const std::vector<FeatureType> &features) const {
JointDistribution predict(const std::vector<FeatureType> &features) const {
assert(has_been_fit());
PredictDistribution preds = predict_(features);
JointDistribution preds = predict_(features);
assert(static_cast<s32>(preds.mean.size()) ==
static_cast<s32>(features.size()));
return preds;
}

PredictDistribution predict(const FeatureType &feature) const {
JointDistribution predict(const FeatureType &feature) const {
std::vector<FeatureType> features = {feature};
return predict(features);
}

DiagonalDistribution
MarginalDistribution
predict_marginal(const std::vector<FeatureType> &features) const {
assert(has_been_fit());
DiagonalDistribution preds = predict_marginal_(features);
MarginalDistribution preds = predict_marginal_(features);
assert(static_cast<s32>(preds.mean.size()) ==
static_cast<s32>(features.size()));
return preds;
Expand All @@ -195,9 +192,9 @@ class RegressionModel : public ParameterHandlingMixin {
* follwed by predict but overriding this method may speed up computation for
* some models.
*/
PredictDistribution
JointDistribution
fit_and_predict(const std::vector<FeatureType> &train_features,
const TargetDistribution &train_targets,
const MarginalDistribution &train_targets,
const std::vector<FeatureType> &test_features) {
// Fit using the training data, then predict with the test.
fit(train_features, train_targets);
Expand All @@ -208,7 +205,7 @@ class RegressionModel : public ParameterHandlingMixin {
* A convenience wrapper around fit_and_predict which uses the entries
* in a RegressionFold struct
*/
PredictDistribution fit_and_predict(const RegressionFold<FeatureType> &fold) {
JointDistribution fit_and_predict(const RegressionFold<FeatureType> &fold) {
return fit_and_predict(fold.train.features, fold.train.targets,
fold.test.features);
}
Expand Down Expand Up @@ -249,17 +246,17 @@ class RegressionModel : public ParameterHandlingMixin {

protected:
virtual void fit_(const std::vector<FeatureType> &features,
const TargetDistribution &targets) = 0;
const MarginalDistribution &targets) = 0;

virtual PredictDistribution
virtual JointDistribution
predict_(const std::vector<FeatureType> &features) const = 0;

virtual DiagonalDistribution
virtual MarginalDistribution
predict_marginal_(const std::vector<FeatureType> &features) const {
std::cout << "WARNING: A marginal prediction is being made, but in a "
"horribly inefficient way.";
const auto full_distribution = predict_(features);
return DiagonalDistribution(
return MarginalDistribution(
full_distribution.mean,
full_distribution.covariance.diagonal().asDiagonal());
}
Expand Down
8 changes: 4 additions & 4 deletions albatross/core/model_adapter.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ class AdaptedRegressionModel

protected:
void fit_(const std::vector<FeatureType> &features,
const TargetDistribution &targets) override {
const MarginalDistribution &targets) override {
this->sub_model_.fit(convert_features(features), targets);
}

Expand All @@ -130,19 +130,19 @@ class AdaptedRegressionModel
*/
fit_type_if_serializable<RegressionModelImplementation>
serializable_fit_(const std::vector<FeatureType> &features,
const TargetDistribution &targets) const override {
const MarginalDistribution &targets) const override {
assert(false &&
"serializable_fit_ for an adapted model should never be called");
typename fit_type_or_void<RegressionModelImplementation>::type dummy;
return dummy;
}

PredictDistribution
JointDistribution
predict_(const std::vector<FeatureType> &features) const override {
return sub_model_.predict(convert_features(features));
}

virtual DiagonalDistribution
virtual MarginalDistribution
predict_marginal_(const std::vector<FeatureType> &features) const override {
return sub_model_.predict_marginal(convert_features(features));
}
Expand Down
4 changes: 2 additions & 2 deletions albatross/core/serialize.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,13 @@ class SerializableRegressionModel : public RegressionModel<FeatureType> {

protected:
void fit_(const std::vector<FeatureType> &features,
const TargetDistribution &targets) {
const MarginalDistribution &targets) {
model_fit_ = serializable_fit_(features, targets);
}

virtual ModelFit
serializable_fit_(const std::vector<FeatureType> &features,
const TargetDistribution &targets) const = 0;
const MarginalDistribution &targets) const = 0;

ModelFit model_fit_;
};
Expand Down
24 changes: 12 additions & 12 deletions albatross/crossvalidation.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace albatross {
* the quality of the prediction.
*/
using EvaluationMetric = std::function<double(
const PredictDistribution &prediction, const TargetDistribution &targets)>;
const JointDistribution &prediction, const MarginalDistribution &targets)>;

inline FoldIndices get_train_indices(const FoldIndices &test_indices,
const int n) {
Expand Down Expand Up @@ -68,11 +68,11 @@ folds_from_fold_indexer(const RegressionDataset<FeatureType> &dataset,

std::vector<FeatureType> train_features =
subset(train_indices, dataset.features);
TargetDistribution train_targets = subset(train_indices, dataset.targets);
MarginalDistribution train_targets = subset(train_indices, dataset.targets);

std::vector<FeatureType> test_features =
subset(test_indices, dataset.features);
TargetDistribution test_targets = subset(test_indices, dataset.targets);
MarginalDistribution test_targets = subset(test_indices, dataset.targets);

assert(train_features.size() == train_targets.size());
assert(test_features.size() == test_targets.size());
Expand Down Expand Up @@ -151,17 +151,17 @@ static inline std::vector<RegressionFold<FeatureType>> leave_one_group_out(
}

/*
* Computes a PredictDistribution for each fold in set of cross validation
* folds. The resulting vector of PredictDistributions can then be used
* Computes a JointDistribution for each fold in set of cross validation
* folds. The resulting vector of JointDistributions can then be used
* for things like computing an EvaluationMetric for each fold, or assembling
* all the predictions into a single cross validated PredictionDistribution.
*/
template <typename FeatureType>
static inline std::vector<PredictDistribution> cross_validated_predictions(
static inline std::vector<JointDistribution> cross_validated_predictions(
const std::vector<RegressionFold<FeatureType>> &folds,
RegressionModel<FeatureType> *model) {
// Iteratively make predictions and assemble the output vector
std::vector<PredictDistribution> predictions;
std::vector<JointDistribution> predictions;
for (std::size_t i = 0; i < folds.size(); i++) {
predictions.push_back(model->fit_and_predict(
folds[i].train_dataset.features, folds[i].train_dataset.targets,
Expand All @@ -178,7 +178,7 @@ template <class FeatureType>
static inline Eigen::VectorXd
compute_scores(const EvaluationMetric &metric,
const std::vector<RegressionFold<FeatureType>> &folds,
const std::vector<PredictDistribution> &predictions) {
const std::vector<JointDistribution> &predictions) {
// Create a vector of metrics, one for each fold.
Eigen::VectorXd metrics(static_cast<s32>(folds.size()));
// Loop over each fold, making predictions then evaluating them
Expand All @@ -200,7 +200,7 @@ cross_validated_scores(const EvaluationMetric &metric,
const std::vector<RegressionFold<FeatureType>> &folds,
RegressionModel<FeatureType> *model) {
// Create a vector of predictions.
std::vector<PredictDistribution> predictions =
std::vector<JointDistribution> predictions =
cross_validated_predictions<FeatureType>(folds, model);
return compute_scores(metric, folds, predictions);
}
Expand All @@ -216,13 +216,13 @@ cross_validated_scores(const EvaluationMetric &metric,
* unknown.
*/
template <typename FeatureType>
static inline PredictDistribution
static inline JointDistribution
cross_validated_predict(const std::vector<RegressionFold<FeatureType>> &folds,
RegressionModel<FeatureType> *model) {
// Get the cross validated predictions, note however that
// depending on the type of folds, these predictions may
// be shuffled.
const std::vector<PredictDistribution> predictions =
const std::vector<JointDistribution> predictions =
cross_validated_predictions<FeatureType>(folds, model);
// Create a new prediction mean that will eventually contain
// the ordered concatenation of each fold's predictions.
Expand All @@ -240,7 +240,7 @@ cross_validated_predict(const std::vector<RegressionFold<FeatureType>> &folds,
pred.mean[i];
}
}
return PredictDistribution(mean);
return JointDistribution(mean);
}

} // namespace albatross
Expand Down
12 changes: 6 additions & 6 deletions albatross/evaluate.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ negative_log_likelihood(const Eigen::VectorXd &deviation,
namespace evaluation_metrics {

static inline double
root_mean_square_error(const PredictDistribution &prediction,
const TargetDistribution &truth) {
root_mean_square_error(const JointDistribution &prediction,
const MarginalDistribution &truth) {
const Eigen::VectorXd error = prediction.mean - truth.mean;
double mse = error.dot(error) / static_cast<double>(error.size());
return sqrt(mse);
Expand All @@ -117,8 +117,8 @@ root_mean_square_error(const PredictDistribution &prediction,
* Takes output from a model (PredictionDistribution)
* and the corresponding truth and uses them to compute the stddev.
*/
static inline double standard_deviation(const PredictDistribution &prediction,
const TargetDistribution &truth) {
static inline double standard_deviation(const JointDistribution &prediction,
const MarginalDistribution &truth) {
Eigen::VectorXd error = prediction.mean - truth.mean;
const auto n_elements = static_cast<double>(error.size());
const double mean_error = error.sum() / n_elements;
Expand All @@ -131,8 +131,8 @@ static inline double standard_deviation(const PredictDistribution &prediction,
* distribution is multivariate normal.
*/
static inline double
negative_log_likelihood(const PredictDistribution &prediction,
const TargetDistribution &truth) {
negative_log_likelihood(const JointDistribution &prediction,
const MarginalDistribution &truth) {
const Eigen::VectorXd mean = prediction.mean - truth.mean;
Eigen::MatrixXd covariance(prediction.covariance);
if (truth.has_covariance()) {
Expand Down
12 changes: 6 additions & 6 deletions albatross/models/gp.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

namespace albatross {

using InspectionDistribution = PredictDistribution;
using InspectionDistribution = JointDistribution;

template <typename FeatureType> struct GaussianProcessFit {
std::vector<FeatureType> train_features;
Expand Down Expand Up @@ -128,7 +128,7 @@ class GaussianProcessRegression

protected:
FitType serializable_fit_(const std::vector<FeatureType> &features,
const TargetDistribution &targets) const override {
const MarginalDistribution &targets) const override {
Eigen::MatrixXd cov = symmetric_covariance(covariance_function_, features);
FitType model_fit;
model_fit.train_features = features;
Expand All @@ -141,7 +141,7 @@ class GaussianProcessRegression
return model_fit;
}

PredictDistribution
JointDistribution
predict_(const std::vector<FeatureType> &features) const override {
const auto cross_cov = asymmetric_covariance(
covariance_function_, features, this->model_fit_.train_features);
Expand All @@ -151,10 +151,10 @@ class GaussianProcessRegression
symmetric_covariance(covariance_function_, features);
auto ldlt = this->model_fit_.train_ldlt;
pred_cov -= cross_cov * ldlt.solve(cross_cov.transpose());
return PredictDistribution(pred, pred_cov);
return JointDistribution(pred, pred_cov);
}

virtual DiagonalDistribution
virtual MarginalDistribution
predict_marginal_(const std::vector<FeatureType> &features) const override {
const auto cross_cov = asymmetric_covariance(
covariance_function_, features, this->model_fit_.train_features);
Expand All @@ -169,7 +169,7 @@ class GaussianProcessRegression
marginal_variance[i] += covariance_function_(features[i], features[i]);
}

return DiagonalDistribution(pred, marginal_variance.asDiagonal());
return MarginalDistribution(pred, marginal_variance.asDiagonal());
}

virtual Eigen::VectorXd
Expand Down
6 changes: 3 additions & 3 deletions albatross/models/least_squares.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class LeastSquaresRegression

LeastSquaresFit
serializable_fit_(const std::vector<Eigen::VectorXd> &features,
const TargetDistribution &targets) const override {
const MarginalDistribution &targets) const override {
// The way this is currently implemented we assume all targets have the same
// variance (or zero variance).
assert(!targets.has_covariance());
Expand All @@ -68,7 +68,7 @@ class LeastSquaresRegression
}

protected:
PredictDistribution
JointDistribution
predict_(const std::vector<Eigen::VectorXd> &features) const override {
int n = static_cast<s32>(features.size());
Eigen::VectorXd predictions(n);
Expand All @@ -77,7 +77,7 @@ class LeastSquaresRegression
features[static_cast<std::size_t>(i)].dot(this->model_fit_.coefs);
}

return PredictDistribution(predictions);
return JointDistribution(predictions);
}

/*
Expand Down
2 changes: 1 addition & 1 deletion tests/test_core_distribution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ void expect_subset_equal(const Eigen::DiagonalMatrix<Scalar, Size> &original,
template <typename DistributionType>
class PolymorphicDistributionTest : public ::testing::Test {};

typedef ::testing::Types<TargetDistribution, PredictDistribution>
typedef ::testing::Types<MarginalDistribution, JointDistribution>
DistributionsToTest;
TYPED_TEST_CASE(PolymorphicDistributionTest, DistributionsToTest);

Expand Down
2 changes: 1 addition & 1 deletion tests/test_core_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ TEST(test_core_model, test_fit_predict) {
MockModel m;
m.fit(dataset);
// We should be able to perfectly predict in this case.
PredictDistribution predictions = m.predict(dataset.features);
JointDistribution predictions = m.predict(dataset.features);
EXPECT_LT((predictions.mean - dataset.targets.mean).norm(), 1e-10);
}

Expand Down
Loading

0 comments on commit f96fa21

Please sign in to comment.