From ec39aaf753b1d7c43b8de42b6632ca317b2844e5 Mon Sep 17 00:00:00 2001 From: jgabry Date: Mon, 22 Jan 2024 10:37:26 -0700 Subject: [PATCH] register internal S3 methods --- DESCRIPTION | 2 +- NAMESPACE | 9 ++++++++ R/E_loo.R | 4 ++-- R/effective_sample_sizes.R | 4 ++++ R/elpd.R | 2 +- R/loo_subsample.R | 7 +++--- man/loo-package.Rd | 37 ++++++++++++++++++++++++++++++-- man/loo_approximate_posterior.Rd | 6 ++++-- man/loo_compare.Rd | 17 +++++++-------- man/loo_subsample.Rd | 24 ++++++++++++++------- man/update.psis_loo_ss.Rd | 2 +- 11 files changed, 85 insertions(+), 29 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6229b79f..9bbe68bc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -53,5 +53,5 @@ Suggests: VignetteBuilder: knitr Encoding: UTF-8 SystemRequirements: pandoc (>= 1.12.3), pandoc-citeproc -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.0 Roxygen: list(markdown = TRUE) diff --git a/NAMESPACE b/NAMESPACE index 932dbc4d..ce14d72d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,9 +5,15 @@ S3method("[",loo) S3method("[[",loo) S3method(E_loo,default) S3method(E_loo,matrix) +S3method(E_loo_khat,default) +S3method(E_loo_khat,matrix) S3method(ap_psis,array) S3method(ap_psis,default) S3method(ap_psis,matrix) +S3method(as.psis_loo,psis_loo) +S3method(as.psis_loo,psis_loo_ss) +S3method(as.psis_loo_ss,psis_loo) +S3method(as.psis_loo_ss,psis_loo_ss) S3method(crps,matrix) S3method(crps,numeric) S3method(dim,importance_sampling) @@ -50,6 +56,7 @@ S3method(print,psis_loo) S3method(print,psis_loo_ap) S3method(print,stacking_weights) S3method(print,waic) +S3method(print_dims,elpd_generic) S3method(print_dims,importance_sampling) S3method(print_dims,importance_sampling_loo) S3method(print_dims,kfold) @@ -59,6 +66,8 @@ S3method(print_dims,waic) S3method(psis,array) S3method(psis,default) S3method(psis,matrix) +S3method(psis_n_eff,default) +S3method(psis_n_eff,matrix) S3method(relative_eff,"function") S3method(relative_eff,array) S3method(relative_eff,default) diff --git a/R/E_loo.R b/R/E_loo.R index a12fe839..c4716f71 100644 --- a/R/E_loo.R +++ b/R/E_loo.R @@ -240,11 +240,11 @@ E_loo.matrix <- E_loo_khat <- function(x, psis_object, log_ratios, ...) { UseMethod("E_loo_khat") } - +#' @export E_loo_khat.default <- function(x, psis_object, log_ratios, ...) { .E_loo_khat_i(x, log_ratios, attr(psis_object, "tail_len")) } - +#' @export E_loo_khat.matrix <- function(x, psis_object, log_ratios, ...) { tail_lengths <- attr(psis_object, "tail_len") sapply(seq_len(ncol(x)), function(i) { diff --git a/R/effective_sample_sizes.R b/R/effective_sample_sizes.R index 8dd78da7..b4ae47cf 100644 --- a/R/effective_sample_sizes.R +++ b/R/effective_sample_sizes.R @@ -170,6 +170,8 @@ relative_eff.importance_sampling <- function(x, ...) { psis_n_eff <- function(w, ...) { UseMethod("psis_n_eff") } + +#' @export psis_n_eff.default <- function(w, r_eff = NULL, ...) { ss <- sum(w^2) if (is.null(r_eff)) { @@ -179,6 +181,8 @@ psis_n_eff.default <- function(w, r_eff = NULL, ...) { stopifnot(length(r_eff) == 1) 1 / ss * r_eff } + +#' @export psis_n_eff.matrix <- function(w, r_eff = NULL, ...) { ss <- colSums(w^2) if (is.null(r_eff)) { diff --git a/R/elpd.R b/R/elpd.R index 4927b201..20724e71 100644 --- a/R/elpd.R +++ b/R/elpd.R @@ -65,7 +65,7 @@ elpd_object <- function(pointwise, dims) { class = c("elpd_generic", "loo") ) } - +#' @export print_dims.elpd_generic <- function(x, ...) { cat( "Computed from", diff --git a/R/loo_subsample.R b/R/loo_subsample.R index 8f7f83fd..fce6ab9d 100644 --- a/R/loo_subsample.R +++ b/R/loo_subsample.R @@ -884,11 +884,11 @@ psis_loo_ss_object <- function(x, as.psis_loo_ss <- function(x) { UseMethod("as.psis_loo_ss") } - +#' @export as.psis_loo_ss.psis_loo_ss <- function(x) { x } - +#' @export as.psis_loo_ss.psis_loo <- function(x) { class(x) <- c("psis_loo_ss", class(x)) x$estimates <- cbind(x$estimates, matrix(0, nrow = nrow(x$estimates))) @@ -913,10 +913,11 @@ as.psis_loo <- function(x) { UseMethod("as.psis_loo") } +#' @export as.psis_loo.psis_loo <- function(x) { x } - +#' @export as.psis_loo.psis_loo_ss <- function(x) { if (x$loo_subsampling$data_dim[1] == nrow(x$pointwise)) { x$estimates <- x$estimates[, 1:2] diff --git a/man/loo-package.Rd b/man/loo-package.Rd index d17a5607..6668ce82 100644 --- a/man/loo-package.Rd +++ b/man/loo-package.Rd @@ -58,11 +58,13 @@ stacking to average Bayesian predictive distributions. Magnusson, M., Riis Andersen, M., Jonasson, J. and Vehtari, A. (2019). Leave-One-Out Cross-Validation for Large Data. -In \emph{International Conference on Machine Learning} +In \emph{Thirty-sixth International Conference on Machine Learning}, +PMLR 97:4244-4253. Magnusson, M., Riis Andersen, M., Jonasson, J. and Vehtari, A. (2020). Leave-One-Out Cross-Validation for Model Comparison in Large Data. -In \emph{International Conference on Artificial Intelligence and Statistics (AISTATS)} +In \emph{Proceedings of the 23rd International Conference on Artificial +Intelligence and Statistics (AISTATS)}, PMLR 108:341-351. Epifani, I., MacEachern, S. N., and Peruggia, M. (2008). Case-deletion importance sampling estimators: Central limit theorems and related results. @@ -105,3 +107,34 @@ Zhang, J., and Stephens, M. A. (2009). A new and efficient estimation method for the generalized Pareto distribution. \emph{Technometrics} \strong{51}, 316-325. } +\seealso{ +Useful links: +\itemize{ + \item \url{https://mc-stan.org/loo/} + \item \url{https://discourse.mc-stan.org} + \item Report bugs at \url{https://github.com/stan-dev/loo/issues} +} + +} +\author{ +\strong{Maintainer}: Jonah Gabry \email{jsg2201@columbia.edu} + +Authors: +\itemize{ + \item Aki Vehtari \email{Aki.Vehtari@aalto.fi} + \item Mans Magnusson + \item Yuling Yao + \item Paul-Christian Bürkner + \item Topi Paananen + \item Andrew Gelman +} + +Other contributors: +\itemize{ + \item Ben Goodrich [contributor] + \item Juho Piironen [contributor] + \item Bruno Nicenboim [contributor] + \item Leevi Lindgren [contributor] +} + +} diff --git a/man/loo_approximate_posterior.Rd b/man/loo_approximate_posterior.Rd index 8d2662b1..a17d59a2 100644 --- a/man/loo_approximate_posterior.Rd +++ b/man/loo_approximate_posterior.Rd @@ -140,11 +140,13 @@ are recycled for each observation. \references{ Magnusson, M., Riis Andersen, M., Jonasson, J. and Vehtari, A. (2019). Leave-One-Out Cross-Validation for Large Data. -In \emph{International Conference on Machine Learning} +In \emph{Thirty-sixth International Conference on Machine Learning}, +PMLR 97:4244-4253. Magnusson, M., Riis Andersen, M., Jonasson, J. and Vehtari, A. (2020). Leave-One-Out Cross-Validation for Model Comparison in Large Data. -In \emph{International Conference on Artificial Intelligence and Statistics (AISTATS)} +In \emph{Proceedings of the 23rd International Conference on Artificial +Intelligence and Statistics (AISTATS)}, PMLR 108:341-351. } \seealso{ \code{\link[=loo]{loo()}}, \code{\link[=psis]{psis()}}, \code{\link[=loo_compare]{loo_compare()}} diff --git a/man/loo_compare.Rd b/man/loo_compare.Rd index 85f876b3..580acdbf 100644 --- a/man/loo_compare.Rd +++ b/man/loo_compare.Rd @@ -67,15 +67,14 @@ standard approach of comparing differences of deviances to a Chi-squared distribution, a practice derived for Gaussian linear models or asymptotically, and which only applies to nested models in any case. -If more than \eqn{11} models are compared, then the median model by elpd is -taken as the baseline model, and we recompute (internally) the model -differences to this baseline. We then estimate whether the difference in -predictive performances is potentially due to chance as described by -McLatchie and Vehtari (2023). This will flag a warning if it is deemed that -there is a risk of over-fitting due to the selection process, and users -are recommended to avoid model selection based on LOO-CV, and -instead to favour of model averaging/stacking or projection predictive -inference. +If more than \eqn{11} models are compared, we internally recompute the model +differences using the median model by ELPD as the baseline model. We then +estimate whether the differences in predictive performance are potentially +due to chance as described by McLatchie and Vehtari (2023). This will flag +a warning if it is deemed that there is a risk of over-fitting due to the +selection process. In that case users are recommended to avoid model +selection based on LOO-CV, and instead to favor model averaging/stacking or +projection predictive inference. } \examples{ # very artificial example, just for demonstration! diff --git a/man/loo_subsample.Rd b/man/loo_subsample.Rd index 4a2ce59c..87135469 100644 --- a/man/loo_subsample.Rd +++ b/man/loo_subsample.Rd @@ -3,7 +3,9 @@ \name{loo_subsample} \alias{loo_subsample} \alias{loo_subsample.function} -\title{Efficient approximate leave-one-out cross-validation (LOO) using subsampling} +\title{Efficient approximate leave-one-out cross-validation (LOO) using subsampling, +so that less costly and more approximate computation is made for all LOO-fold, +and more costly and accurate computations are made only for m