Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Return Maps from Cross Validation #98

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions albatross/evaluation/cross_validation.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class Prediction<CrossValidation<ModelType>, FeatureType, FoldIndexer> {
typename DummyType = ModelType,
typename std::enable_if<has_valid_cv_mean<DummyType, FeatureType>::value,
int>::type = 0>
std::vector<Eigen::VectorXd> means() const {
std::map<std::string, Eigen::VectorXd> means() const {
return model_.cross_validated_predictions(
dataset_, indexer_, PredictTypeIdentity<Eigen::VectorXd>());
}
Expand All @@ -48,7 +48,7 @@ class Prediction<CrossValidation<ModelType>, FeatureType, FoldIndexer> {
typename fit_type<DummyType, FeatureType>::type>>::value &&
!has_valid_cv_mean<DummyType, FeatureType>::value,
int>::type = 0>
std::vector<Eigen::VectorXd> means() const {
std::map<std::string, Eigen::VectorXd> means() const {
const auto folds = folds_from_fold_indexer(dataset_, indexer_);
const auto predictions = albatross::get_predictions(model_, folds);
return get_means(predictions);
Expand All @@ -62,7 +62,7 @@ class Prediction<CrossValidation<ModelType>, FeatureType, FoldIndexer> {
typename fit_type<DummyType, FeatureType>::type>>::value &&
!has_valid_cv_mean<DummyType, FeatureType>::value,
int>::type = 0>
std::vector<Eigen::VectorXd> means() const = delete;
std::map<std::string, Eigen::VectorXd> means() const = delete;

template <typename DummyType = ModelType,
typename std::enable_if<
Expand Down Expand Up @@ -94,7 +94,7 @@ class Prediction<CrossValidation<ModelType>, FeatureType, FoldIndexer> {
typename DummyType = ModelType,
typename std::enable_if<
has_valid_cv_marginal<DummyType, FeatureType>::value, int>::type = 0>
std::vector<MarginalDistribution> marginals() const {
std::map<std::string, MarginalDistribution> marginals() const {
return model_.cross_validated_predictions(
dataset_, indexer_, PredictTypeIdentity<MarginalDistribution>());
}
Expand All @@ -107,7 +107,7 @@ class Prediction<CrossValidation<ModelType>, FeatureType, FoldIndexer> {
typename fit_type<DummyType, FeatureType>::type>>::value &&
!has_valid_cv_marginal<DummyType, FeatureType>::value,
int>::type = 0>
std::vector<MarginalDistribution> marginals() const {
std::map<std::string, MarginalDistribution> marginals() const {
const auto folds = folds_from_fold_indexer(dataset_, indexer_);
const auto predictions = albatross::get_predictions(model_, folds);
return get_marginals(predictions);
Expand Down Expand Up @@ -153,7 +153,7 @@ class Prediction<CrossValidation<ModelType>, FeatureType, FoldIndexer> {
typename DummyType = ModelType,
typename std::enable_if<has_valid_cv_joint<DummyType, FeatureType>::value,
int>::type = 0>
std::vector<JointDistribution> joints() const {
std::map<std::string, JointDistribution> joints() const {
return model_.cross_validated_predictions(
dataset_, indexer_, PredictTypeIdentity<JointDistribution>());
}
Expand All @@ -166,7 +166,7 @@ class Prediction<CrossValidation<ModelType>, FeatureType, FoldIndexer> {
typename fit_type<DummyType, FeatureType>::type>>::value &&
!has_valid_cv_joint<DummyType, FeatureType>::value,
int>::type = 0>
std::vector<JointDistribution> joints() const {
std::map<std::string, JointDistribution> joints() const {
const auto folds = folds_from_fold_indexer(dataset_, indexer_);
const auto predictions = albatross::get_predictions(model_, folds);
return get_joints(predictions);
Expand All @@ -180,7 +180,7 @@ class Prediction<CrossValidation<ModelType>, FeatureType, FoldIndexer> {
typename fit_type<DummyType, FeatureType>::type>>::value &&
!has_valid_cv_joint<DummyType, FeatureType>::value,
int>::type = 0>
std::vector<JointDistribution> joints() const = delete;
std::map<std::string, JointDistribution> joints() const = delete;

template <typename DummyType = ModelType>
JointDistribution joint() const =
Expand All @@ -197,19 +197,19 @@ class Prediction<CrossValidation<ModelType>, FeatureType, FoldIndexer> {

auto get(get_type<Eigen::VectorXd>) const { return this->mean(); }

auto get(get_type<std::vector<Eigen::VectorXd>>) const {
auto get(get_type<std::map<std::string, Eigen::VectorXd>>) const {
return this->means();
}

auto get(get_type<MarginalDistribution>) const { return this->marginal(); }

auto get(get_type<std::vector<MarginalDistribution>>) const {
auto get(get_type<std::map<std::string, MarginalDistribution>>) const {
return this->marginals();
}

auto get(get_type<JointDistribution>) const { return this->joint(); }

auto get(get_type<std::vector<JointDistribution>>) const {
auto get(get_type<std::map<std::string, JointDistribution>>) const {
return this->joints();
}

Expand Down Expand Up @@ -281,7 +281,7 @@ template <typename ModelType> class CrossValidation {
const auto folds = folds_from_fold_indexer(dataset, indexer);
const auto prediction = predict(dataset, indexer);
const auto predictions =
prediction.template get<std::vector<RequiredPredictType>>();
prediction.template get<std::map<std::string, RequiredPredictType>>();
return cross_validated_scores(metric, folds, predictions);
}

Expand Down
57 changes: 30 additions & 27 deletions albatross/evaluation/cross_validation_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@ get_predictions(const ModelType &model,
const std::vector<RegressionFold<FeatureType>> &folds) {

using FitType = typename fit_type<ModelType, FeatureType>::type;
std::vector<Prediction<ModelType, FeatureType, FitType>> predictions;
std::map<std::string, Prediction<ModelType, FeatureType, FitType>>
predictions;
for (const auto &fold : folds) {
predictions.emplace_back(
predictions.emplace(
fold.name,
model.fit(fold.train_dataset).predict(fold.test_dataset.features));
}

Expand All @@ -32,47 +34,47 @@ get_predictions(const ModelType &model,

template <typename PredictType, typename Prediction>
inline auto get_predict_types(
const std::vector<Prediction> &prediction_classes,
const std::map<std::string, Prediction> &prediction_classes,
PredictTypeIdentity<PredictType> = PredictTypeIdentity<PredictType>()) {
std::vector<PredictType> predictions;
std::map<std::string, PredictType> predictions;
for (const auto &pred : prediction_classes) {
predictions.emplace_back(pred.template get<PredictType>());
predictions.emplace(pred.first, pred.second.template get<PredictType>());
}
return predictions;
}

template <typename PredictionType>
inline std::vector<Eigen::VectorXd>
get_means(const std::vector<PredictionType> &predictions) {
inline std::map<std::string, Eigen::VectorXd>
get_means(const std::map<std::string, PredictionType> &predictions) {
return get_predict_types<Eigen::VectorXd>(predictions);
}

template <typename PredictionType>
inline std::vector<MarginalDistribution>
get_marginals(const std::vector<PredictionType> &predictions) {
inline std::map<std::string, MarginalDistribution>
get_marginals(const std::map<std::string, PredictionType> &predictions) {
return get_predict_types<MarginalDistribution>(predictions);
}

template <typename PredictionType>
inline std::vector<JointDistribution>
get_joints(const std::vector<PredictionType> &predictions) {
inline std::map<std::string, JointDistribution>
get_joints(const std::map<std::string, PredictionType> &predictions) {
return get_predict_types<JointDistribution>(predictions);
}

template <typename FeatureType>
inline Eigen::VectorXd concatenate_mean_predictions(
const std::vector<RegressionFold<FeatureType>> &folds,
const std::vector<Eigen::VectorXd> &means) {
const std::map<std::string, Eigen::VectorXd> &means) {
assert(folds.size() == means.size());

Eigen::Index n = static_cast<Eigen::Index>(dataset_size_from_folds(folds));
Eigen::VectorXd pred(n);
Eigen::Index number_filled = 0;
// Put all the predicted means back in order.
for (std::size_t i = 0; i < folds.size(); ++i) {
assert(means[i].size() ==
assert(means.at(folds[i].name).size() ==
static_cast<Eigen::Index>(folds[i].test_dataset.size()));
set_subset(means[i], folds[i].test_indices, &pred);
set_subset(means.at(folds[i].name), folds[i].test_indices, &pred);
number_filled += static_cast<Eigen::Index>(folds[i].test_indices.size());
}
assert(number_filled == n);
Expand All @@ -82,14 +84,14 @@ inline Eigen::VectorXd concatenate_mean_predictions(
template <typename FeatureType, typename PredType>
inline Eigen::VectorXd concatenate_mean_predictions(
const std::vector<RegressionFold<FeatureType>> &folds,
const std::vector<PredType> &predictions) {
const std::map<std::string, PredType> &predictions) {
return concatenate_mean_predictions(folds, get_means(predictions));
}

template <typename FeatureType>
inline MarginalDistribution concatenate_marginal_predictions(
const std::vector<RegressionFold<FeatureType>> &folds,
const std::vector<MarginalDistribution> &marginals) {
const std::map<std::string, MarginalDistribution> &marginals) {
assert(folds.size() == marginals.size());

Eigen::Index n = static_cast<Eigen::Index>(dataset_size_from_folds(folds));
Expand All @@ -99,10 +101,10 @@ inline MarginalDistribution concatenate_marginal_predictions(
Eigen::Index number_filled = 0;
// Put all the predicted means back in order.
for (std::size_t i = 0; i < folds.size(); ++i) {
assert(marginals[i].size() == folds[i].test_dataset.size());
set_subset(marginals[i].mean, folds[i].test_indices, &mean);
set_subset(marginals[i].covariance.diagonal(), folds[i].test_indices,
&variance);
assert(marginals.at(folds[i].name).size() == folds[i].test_dataset.size());
set_subset(marginals.at(folds[i].name).mean, folds[i].test_indices, &mean);
set_subset(marginals.at(folds[i].name).covariance.diagonal(),
folds[i].test_indices, &variance);
number_filled += static_cast<Eigen::Index>(folds[i].test_indices.size());
}
assert(number_filled == n);
Expand All @@ -112,23 +114,24 @@ inline MarginalDistribution concatenate_marginal_predictions(
template <typename FeatureType, typename PredType>
inline MarginalDistribution concatenate_marginal_predictions(
const std::vector<RegressionFold<FeatureType>> &folds,
const std::vector<PredType> &predictions) {
const std::map<std::string, PredType> &predictions) {
return concatenate_marginal_predictions(folds, get_marginals(predictions));
}

template <typename EvaluationMetricType, typename FeatureType,
typename PredictionType>
Eigen::VectorXd
cross_validated_scores(const EvaluationMetricType &metric,
const std::vector<RegressionFold<FeatureType>> &folds,
const std::vector<PredictionType> &predictions) {
Eigen::VectorXd cross_validated_scores(
const EvaluationMetricType &metric,
const std::vector<RegressionFold<FeatureType>> &folds,
const std::map<std::string, PredictionType> &predictions) {
assert(folds.size() == predictions.size());
Eigen::Index n = static_cast<Eigen::Index>(predictions.size());
Eigen::VectorXd output(n);
for (Eigen::Index i = 0; i < n; ++i) {
assert(static_cast<std::size_t>(folds[i].test_dataset.size()) ==
static_cast<std::size_t>(predictions[i].size()));
output[i] = metric(predictions[i], folds[i].test_dataset.targets);
static_cast<std::size_t>(predictions.at(folds[i].name).size()));
output[i] =
metric(predictions.at(folds[i].name), folds[i].test_dataset.targets);
}
return output;
}
Expand Down
2 changes: 1 addition & 1 deletion albatross/evaluation/traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class has_valid_cross_validated_predictions {
std::declval<const FoldIndexer &>(),
std::declval<PredictTypeIdentity<PredictType>>()))>
static typename std::enable_if<
std::is_same<std::vector<PredictType>, ReturnType>::value,
std::is_same<std::map<std::string, PredictType>, ReturnType>::value,
std::true_type>::type
test(C *);
template <typename> static std::false_type test(...);
Expand Down
24 changes: 13 additions & 11 deletions albatross/models/gp.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ class GaussianProcessBase
delete; // Covariance Function isn't defined for FeatureType.

template <typename FeatureType>
std::vector<JointDistribution>
std::map<std::string, JointDistribution>
cross_validated_predictions(const RegressionDataset<FeatureType> &dataset,
const FoldIndexer &fold_indexer,
PredictTypeIdentity<JointDistribution>) const {
Expand All @@ -283,20 +283,21 @@ class GaussianProcessBase
const auto gp_fit = fit_model.get_fit();

const std::vector<FoldIndices> indices = map_values(fold_indexer);
const std::vector<std::string> fold_names = map_keys(fold_indexer);
const auto inverse_blocks = gp_fit.train_ldlt.inverse_blocks(indices);

std::vector<JointDistribution> output;
std::map<std::string, JointDistribution> output;
for (std::size_t i = 0; i < inverse_blocks.size(); i++) {
Eigen::VectorXd yi = subset(dataset.targets.mean, indices[i]);
Eigen::VectorXd vi = subset(gp_fit.information, indices[i]);
const auto A_inv = inverse_blocks[i].inverse();
output.push_back(JointDistribution(yi - A_inv * vi, A_inv));
output[fold_names[i]] = JointDistribution(yi - A_inv * vi, A_inv);
}
return output;
}

template <typename FeatureType>
std::vector<MarginalDistribution>
std::map<std::string, MarginalDistribution>
cross_validated_predictions(const RegressionDataset<FeatureType> &dataset,
const FoldIndexer &fold_indexer,
PredictTypeIdentity<MarginalDistribution>) const {
Expand All @@ -305,36 +306,37 @@ class GaussianProcessBase
const auto gp_fit = fit_model.get_fit();

const std::vector<FoldIndices> indices = map_values(fold_indexer);
const std::vector<std::string> fold_names = map_keys(fold_indexer);
const auto inverse_blocks = gp_fit.train_ldlt.inverse_blocks(indices);

std::vector<MarginalDistribution> output;
std::map<std::string, MarginalDistribution> output;
for (std::size_t i = 0; i < inverse_blocks.size(); i++) {
Eigen::VectorXd yi = subset(dataset.targets.mean, indices[i]);
Eigen::VectorXd vi = subset(gp_fit.information, indices[i]);
const auto A_ldlt = Eigen::SerializableLDLT(inverse_blocks[i].ldlt());

output.push_back(MarginalDistribution(
yi - A_ldlt.solve(vi), A_ldlt.inverse_diagonal().asDiagonal()));
output[fold_names[i]] = MarginalDistribution(
yi - A_ldlt.solve(vi), A_ldlt.inverse_diagonal().asDiagonal());
}
return output;
}

template <typename FeatureType>
std::vector<Eigen::VectorXd>
std::map<std::string, Eigen::VectorXd>
cross_validated_predictions(const RegressionDataset<FeatureType> &dataset,
const FoldIndexer &fold_indexer,
PredictTypeIdentity<Eigen::VectorXd>) const {
const auto fit_model = impl().fit(dataset);
const auto gp_fit = fit_model.get_fit();
const std::vector<FoldIndices> indices = map_values(fold_indexer);
const std::vector<std::string> fold_names = map_keys(fold_indexer);
const auto inverse_blocks = gp_fit.train_ldlt.inverse_blocks(indices);

std::vector<Eigen::VectorXd> output;
std::map<std::string, Eigen::VectorXd> output;
for (std::size_t i = 0; i < inverse_blocks.size(); i++) {
Eigen::VectorXd yi = subset(dataset.targets.mean, indices[i]);
Eigen::VectorXd vi = subset(gp_fit.information, indices[i]);
const auto A_ldlt = Eigen::SerializableLDLT(inverse_blocks[i].ldlt());
output.push_back(yi - A_ldlt.solve(vi));
output[fold_names[i]] = yi - A_ldlt.solve(vi);
}
return output;
}
Expand Down
2 changes: 1 addition & 1 deletion tests/test_cross_validation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ TYPED_TEST_P(RegressionModelTester, test_loo_get_predictions) {
model.cross_validate().get_predictions(dataset, leave_one_out);

for (const auto &pred : predictions) {
expect_predict_variants_consistent(pred);
expect_predict_variants_consistent(pred.second);
}
}

Expand Down
Loading