Skip to content

Commit

Permalink
Merge pull request #98 from akleeman/keep_fold_names_with_cv_predictions
Browse files Browse the repository at this point in the history
Return Maps from Cross Validation
  • Loading branch information
akleeman authored Mar 26, 2019
2 parents 8946817 + f4c79bf commit 313c9bf
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 72 deletions.
24 changes: 12 additions & 12 deletions albatross/evaluation/cross_validation.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class Prediction<CrossValidation<ModelType>, FeatureType, FoldIndexer> {
typename DummyType = ModelType,
typename std::enable_if<has_valid_cv_mean<DummyType, FeatureType>::value,
int>::type = 0>
std::vector<Eigen::VectorXd> means() const {
std::map<std::string, Eigen::VectorXd> means() const {
return model_.cross_validated_predictions(
dataset_, indexer_, PredictTypeIdentity<Eigen::VectorXd>());
}
Expand All @@ -48,7 +48,7 @@ class Prediction<CrossValidation<ModelType>, FeatureType, FoldIndexer> {
typename fit_type<DummyType, FeatureType>::type>>::value &&
!has_valid_cv_mean<DummyType, FeatureType>::value,
int>::type = 0>
std::vector<Eigen::VectorXd> means() const {
std::map<std::string, Eigen::VectorXd> means() const {
const auto folds = folds_from_fold_indexer(dataset_, indexer_);
const auto predictions = albatross::get_predictions(model_, folds);
return get_means(predictions);
Expand All @@ -62,7 +62,7 @@ class Prediction<CrossValidation<ModelType>, FeatureType, FoldIndexer> {
typename fit_type<DummyType, FeatureType>::type>>::value &&
!has_valid_cv_mean<DummyType, FeatureType>::value,
int>::type = 0>
std::vector<Eigen::VectorXd> means() const = delete;
std::map<std::string, Eigen::VectorXd> means() const = delete;

template <typename DummyType = ModelType,
typename std::enable_if<
Expand Down Expand Up @@ -94,7 +94,7 @@ class Prediction<CrossValidation<ModelType>, FeatureType, FoldIndexer> {
typename DummyType = ModelType,
typename std::enable_if<
has_valid_cv_marginal<DummyType, FeatureType>::value, int>::type = 0>
std::vector<MarginalDistribution> marginals() const {
std::map<std::string, MarginalDistribution> marginals() const {
return model_.cross_validated_predictions(
dataset_, indexer_, PredictTypeIdentity<MarginalDistribution>());
}
Expand All @@ -107,7 +107,7 @@ class Prediction<CrossValidation<ModelType>, FeatureType, FoldIndexer> {
typename fit_type<DummyType, FeatureType>::type>>::value &&
!has_valid_cv_marginal<DummyType, FeatureType>::value,
int>::type = 0>
std::vector<MarginalDistribution> marginals() const {
std::map<std::string, MarginalDistribution> marginals() const {
const auto folds = folds_from_fold_indexer(dataset_, indexer_);
const auto predictions = albatross::get_predictions(model_, folds);
return get_marginals(predictions);
Expand Down Expand Up @@ -153,7 +153,7 @@ class Prediction<CrossValidation<ModelType>, FeatureType, FoldIndexer> {
typename DummyType = ModelType,
typename std::enable_if<has_valid_cv_joint<DummyType, FeatureType>::value,
int>::type = 0>
std::vector<JointDistribution> joints() const {
std::map<std::string, JointDistribution> joints() const {
return model_.cross_validated_predictions(
dataset_, indexer_, PredictTypeIdentity<JointDistribution>());
}
Expand All @@ -166,7 +166,7 @@ class Prediction<CrossValidation<ModelType>, FeatureType, FoldIndexer> {
typename fit_type<DummyType, FeatureType>::type>>::value &&
!has_valid_cv_joint<DummyType, FeatureType>::value,
int>::type = 0>
std::vector<JointDistribution> joints() const {
std::map<std::string, JointDistribution> joints() const {
const auto folds = folds_from_fold_indexer(dataset_, indexer_);
const auto predictions = albatross::get_predictions(model_, folds);
return get_joints(predictions);
Expand All @@ -180,7 +180,7 @@ class Prediction<CrossValidation<ModelType>, FeatureType, FoldIndexer> {
typename fit_type<DummyType, FeatureType>::type>>::value &&
!has_valid_cv_joint<DummyType, FeatureType>::value,
int>::type = 0>
std::vector<JointDistribution> joints() const = delete;
std::map<std::string, JointDistribution> joints() const = delete;

template <typename DummyType = ModelType>
JointDistribution joint() const =
Expand All @@ -197,19 +197,19 @@ class Prediction<CrossValidation<ModelType>, FeatureType, FoldIndexer> {

auto get(get_type<Eigen::VectorXd>) const { return this->mean(); }

auto get(get_type<std::vector<Eigen::VectorXd>>) const {
auto get(get_type<std::map<std::string, Eigen::VectorXd>>) const {
return this->means();
}

auto get(get_type<MarginalDistribution>) const { return this->marginal(); }

auto get(get_type<std::vector<MarginalDistribution>>) const {
auto get(get_type<std::map<std::string, MarginalDistribution>>) const {
return this->marginals();
}

auto get(get_type<JointDistribution>) const { return this->joint(); }

auto get(get_type<std::vector<JointDistribution>>) const {
auto get(get_type<std::map<std::string, JointDistribution>>) const {
return this->joints();
}

Expand Down Expand Up @@ -281,7 +281,7 @@ template <typename ModelType> class CrossValidation {
const auto folds = folds_from_fold_indexer(dataset, indexer);
const auto prediction = predict(dataset, indexer);
const auto predictions =
prediction.template get<std::vector<RequiredPredictType>>();
prediction.template get<std::map<std::string, RequiredPredictType>>();
return cross_validated_scores(metric, folds, predictions);
}

Expand Down
57 changes: 30 additions & 27 deletions albatross/evaluation/cross_validation_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@ get_predictions(const ModelType &model,
const std::vector<RegressionFold<FeatureType>> &folds) {

using FitType = typename fit_type<ModelType, FeatureType>::type;
std::vector<Prediction<ModelType, FeatureType, FitType>> predictions;
std::map<std::string, Prediction<ModelType, FeatureType, FitType>>
predictions;
for (const auto &fold : folds) {
predictions.emplace_back(
predictions.emplace(
fold.name,
model.fit(fold.train_dataset).predict(fold.test_dataset.features));
}

Expand All @@ -32,47 +34,47 @@ get_predictions(const ModelType &model,

template <typename PredictType, typename Prediction>
inline auto get_predict_types(
const std::vector<Prediction> &prediction_classes,
const std::map<std::string, Prediction> &prediction_classes,
PredictTypeIdentity<PredictType> = PredictTypeIdentity<PredictType>()) {
std::vector<PredictType> predictions;
std::map<std::string, PredictType> predictions;
for (const auto &pred : prediction_classes) {
predictions.emplace_back(pred.template get<PredictType>());
predictions.emplace(pred.first, pred.second.template get<PredictType>());
}
return predictions;
}

template <typename PredictionType>
inline std::vector<Eigen::VectorXd>
get_means(const std::vector<PredictionType> &predictions) {
inline std::map<std::string, Eigen::VectorXd>
get_means(const std::map<std::string, PredictionType> &predictions) {
return get_predict_types<Eigen::VectorXd>(predictions);
}

template <typename PredictionType>
inline std::vector<MarginalDistribution>
get_marginals(const std::vector<PredictionType> &predictions) {
inline std::map<std::string, MarginalDistribution>
get_marginals(const std::map<std::string, PredictionType> &predictions) {
return get_predict_types<MarginalDistribution>(predictions);
}

template <typename PredictionType>
inline std::vector<JointDistribution>
get_joints(const std::vector<PredictionType> &predictions) {
inline std::map<std::string, JointDistribution>
get_joints(const std::map<std::string, PredictionType> &predictions) {
return get_predict_types<JointDistribution>(predictions);
}

template <typename FeatureType>
inline Eigen::VectorXd concatenate_mean_predictions(
const std::vector<RegressionFold<FeatureType>> &folds,
const std::vector<Eigen::VectorXd> &means) {
const std::map<std::string, Eigen::VectorXd> &means) {
assert(folds.size() == means.size());

Eigen::Index n = static_cast<Eigen::Index>(dataset_size_from_folds(folds));
Eigen::VectorXd pred(n);
Eigen::Index number_filled = 0;
// Put all the predicted means back in order.
for (std::size_t i = 0; i < folds.size(); ++i) {
assert(means[i].size() ==
assert(means.at(folds[i].name).size() ==
static_cast<Eigen::Index>(folds[i].test_dataset.size()));
set_subset(means[i], folds[i].test_indices, &pred);
set_subset(means.at(folds[i].name), folds[i].test_indices, &pred);
number_filled += static_cast<Eigen::Index>(folds[i].test_indices.size());
}
assert(number_filled == n);
Expand All @@ -82,14 +84,14 @@ inline Eigen::VectorXd concatenate_mean_predictions(
template <typename FeatureType, typename PredType>
inline Eigen::VectorXd concatenate_mean_predictions(
const std::vector<RegressionFold<FeatureType>> &folds,
const std::vector<PredType> &predictions) {
const std::map<std::string, PredType> &predictions) {
return concatenate_mean_predictions(folds, get_means(predictions));
}

template <typename FeatureType>
inline MarginalDistribution concatenate_marginal_predictions(
const std::vector<RegressionFold<FeatureType>> &folds,
const std::vector<MarginalDistribution> &marginals) {
const std::map<std::string, MarginalDistribution> &marginals) {
assert(folds.size() == marginals.size());

Eigen::Index n = static_cast<Eigen::Index>(dataset_size_from_folds(folds));
Expand All @@ -99,10 +101,10 @@ inline MarginalDistribution concatenate_marginal_predictions(
Eigen::Index number_filled = 0;
// Put all the predicted means back in order.
for (std::size_t i = 0; i < folds.size(); ++i) {
assert(marginals[i].size() == folds[i].test_dataset.size());
set_subset(marginals[i].mean, folds[i].test_indices, &mean);
set_subset(marginals[i].covariance.diagonal(), folds[i].test_indices,
&variance);
assert(marginals.at(folds[i].name).size() == folds[i].test_dataset.size());
set_subset(marginals.at(folds[i].name).mean, folds[i].test_indices, &mean);
set_subset(marginals.at(folds[i].name).covariance.diagonal(),
folds[i].test_indices, &variance);
number_filled += static_cast<Eigen::Index>(folds[i].test_indices.size());
}
assert(number_filled == n);
Expand All @@ -112,23 +114,24 @@ inline MarginalDistribution concatenate_marginal_predictions(
template <typename FeatureType, typename PredType>
inline MarginalDistribution concatenate_marginal_predictions(
const std::vector<RegressionFold<FeatureType>> &folds,
const std::vector<PredType> &predictions) {
const std::map<std::string, PredType> &predictions) {
return concatenate_marginal_predictions(folds, get_marginals(predictions));
}

template <typename EvaluationMetricType, typename FeatureType,
typename PredictionType>
Eigen::VectorXd
cross_validated_scores(const EvaluationMetricType &metric,
const std::vector<RegressionFold<FeatureType>> &folds,
const std::vector<PredictionType> &predictions) {
Eigen::VectorXd cross_validated_scores(
const EvaluationMetricType &metric,
const std::vector<RegressionFold<FeatureType>> &folds,
const std::map<std::string, PredictionType> &predictions) {
assert(folds.size() == predictions.size());
Eigen::Index n = static_cast<Eigen::Index>(predictions.size());
Eigen::VectorXd output(n);
for (Eigen::Index i = 0; i < n; ++i) {
assert(static_cast<std::size_t>(folds[i].test_dataset.size()) ==
static_cast<std::size_t>(predictions[i].size()));
output[i] = metric(predictions[i], folds[i].test_dataset.targets);
static_cast<std::size_t>(predictions.at(folds[i].name).size()));
output[i] =
metric(predictions.at(folds[i].name), folds[i].test_dataset.targets);
}
return output;
}
Expand Down
2 changes: 1 addition & 1 deletion albatross/evaluation/traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class has_valid_cross_validated_predictions {
std::declval<const FoldIndexer &>(),
std::declval<PredictTypeIdentity<PredictType>>()))>
static typename std::enable_if<
std::is_same<std::vector<PredictType>, ReturnType>::value,
std::is_same<std::map<std::string, PredictType>, ReturnType>::value,
std::true_type>::type
test(C *);
template <typename> static std::false_type test(...);
Expand Down
24 changes: 13 additions & 11 deletions albatross/models/gp.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ class GaussianProcessBase
delete; // Covariance Function isn't defined for FeatureType.

template <typename FeatureType>
std::vector<JointDistribution>
std::map<std::string, JointDistribution>
cross_validated_predictions(const RegressionDataset<FeatureType> &dataset,
const FoldIndexer &fold_indexer,
PredictTypeIdentity<JointDistribution>) const {
Expand All @@ -283,20 +283,21 @@ class GaussianProcessBase
const auto gp_fit = fit_model.get_fit();

const std::vector<FoldIndices> indices = map_values(fold_indexer);
const std::vector<std::string> fold_names = map_keys(fold_indexer);
const auto inverse_blocks = gp_fit.train_ldlt.inverse_blocks(indices);

std::vector<JointDistribution> output;
std::map<std::string, JointDistribution> output;
for (std::size_t i = 0; i < inverse_blocks.size(); i++) {
Eigen::VectorXd yi = subset(dataset.targets.mean, indices[i]);
Eigen::VectorXd vi = subset(gp_fit.information, indices[i]);
const auto A_inv = inverse_blocks[i].inverse();
output.push_back(JointDistribution(yi - A_inv * vi, A_inv));
output[fold_names[i]] = JointDistribution(yi - A_inv * vi, A_inv);
}
return output;
}

template <typename FeatureType>
std::vector<MarginalDistribution>
std::map<std::string, MarginalDistribution>
cross_validated_predictions(const RegressionDataset<FeatureType> &dataset,
const FoldIndexer &fold_indexer,
PredictTypeIdentity<MarginalDistribution>) const {
Expand All @@ -305,36 +306,37 @@ class GaussianProcessBase
const auto gp_fit = fit_model.get_fit();

const std::vector<FoldIndices> indices = map_values(fold_indexer);
const std::vector<std::string> fold_names = map_keys(fold_indexer);
const auto inverse_blocks = gp_fit.train_ldlt.inverse_blocks(indices);

std::vector<MarginalDistribution> output;
std::map<std::string, MarginalDistribution> output;
for (std::size_t i = 0; i < inverse_blocks.size(); i++) {
Eigen::VectorXd yi = subset(dataset.targets.mean, indices[i]);
Eigen::VectorXd vi = subset(gp_fit.information, indices[i]);
const auto A_ldlt = Eigen::SerializableLDLT(inverse_blocks[i].ldlt());

output.push_back(MarginalDistribution(
yi - A_ldlt.solve(vi), A_ldlt.inverse_diagonal().asDiagonal()));
output[fold_names[i]] = MarginalDistribution(
yi - A_ldlt.solve(vi), A_ldlt.inverse_diagonal().asDiagonal());
}
return output;
}

template <typename FeatureType>
std::vector<Eigen::VectorXd>
std::map<std::string, Eigen::VectorXd>
cross_validated_predictions(const RegressionDataset<FeatureType> &dataset,
const FoldIndexer &fold_indexer,
PredictTypeIdentity<Eigen::VectorXd>) const {
const auto fit_model = impl().fit(dataset);
const auto gp_fit = fit_model.get_fit();
const std::vector<FoldIndices> indices = map_values(fold_indexer);
const std::vector<std::string> fold_names = map_keys(fold_indexer);
const auto inverse_blocks = gp_fit.train_ldlt.inverse_blocks(indices);

std::vector<Eigen::VectorXd> output;
std::map<std::string, Eigen::VectorXd> output;
for (std::size_t i = 0; i < inverse_blocks.size(); i++) {
Eigen::VectorXd yi = subset(dataset.targets.mean, indices[i]);
Eigen::VectorXd vi = subset(gp_fit.information, indices[i]);
const auto A_ldlt = Eigen::SerializableLDLT(inverse_blocks[i].ldlt());
output.push_back(yi - A_ldlt.solve(vi));
output[fold_names[i]] = yi - A_ldlt.solve(vi);
}
return output;
}
Expand Down
2 changes: 1 addition & 1 deletion tests/test_cross_validation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ TYPED_TEST_P(RegressionModelTester, test_loo_get_predictions) {
model.cross_validate().get_predictions(dataset, leave_one_out);

for (const auto &pred : predictions) {
expect_predict_variants_consistent(pred);
expect_predict_variants_consistent(pred.second);
}
}

Expand Down
Loading

0 comments on commit 313c9bf

Please sign in to comment.