Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unstructured (auto)correlation terms #1435

Merged
merged 24 commits into from
Dec 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
2e9234a
start working on unstructure autocorrelation matrix feature
paul-buerkner Dec 4, 2022
87d2436
continue working on UNSTR models
paul-buerkner Dec 6, 2022
f7bde90
enable posterior predictions with UNSTR models
paul-buerkner Dec 6, 2022
d006e84
rename Iobs to Jtime
paul-buerkner Dec 6, 2022
57435be
add doc for the new feature
paul-buerkner Dec 7, 2022
559d21f
add tests
paul-buerkner Dec 7, 2022
5d5fce7
fix failing tests
paul-buerkner Dec 7, 2022
d199552
Merge branch 'master' into unstr-corr
paul-buerkner Dec 7, 2022
7364922
Merge branch 'master' into unstr-corr
paul-buerkner Dec 7, 2022
7f8558d
minor changes
paul-buerkner Dec 8, 2022
4f77f77
Merge branch 'master' into unstr-corr
paul-buerkner Dec 8, 2022
5b69f17
add TODO comment
paul-buerkner Dec 8, 2022
9a9b579
fix some doc typos
paul-buerkner Dec 8, 2022
fb1394c
Merge branch 'master' into unstr-corr
paul-buerkner Dec 8, 2022
d8b718f
fix issue #1437
paul-buerkner Dec 12, 2022
750a7d8
start using 'add_diag'
paul-buerkner Dec 12, 2022
166dff8
Revert "start using 'add_diag'"
paul-buerkner Dec 12, 2022
8b8f76a
improve efficiency of normal_flex models
paul-buerkner Dec 13, 2022
8e18d8e
fix problem with non-linear models
paul-buerkner Dec 13, 2022
594216c
some efficiency improvements to Stan code
paul-buerkner Dec 14, 2022
678fa4b
fix and further optimize Stan code
paul-buerkner Dec 14, 2022
1b36f71
more work on Stan code
paul-buerkner Dec 14, 2022
d34651a
correctly add latent residuals to the linear predictor
paul-buerkner Dec 14, 2022
8e5da2e
update NEWS
paul-buerkner Dec 14, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ Package: brms
Encoding: UTF-8
Type: Package
Title: Bayesian Regression Models using 'Stan'
Version: 2.18.4
Date: 2022-11-24
Version: 2.18.5
Date: 2022-12-13
Authors@R:
c(person("Paul-Christian", "Bürkner", email = "paul.buerkner@gmail.com",
role = c("aut", "cre")),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,7 @@ export(t2)
export(theme_black)
export(theme_default)
export(threading)
export(unstr)
export(update_adterms)
export(validate_newdata)
export(validate_prior)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

### New Features

* Model unstructured autocorrelation matrices via the `unstr` term
thanks to the help of Sebastian Weber. (#1435)
* Improve user control over model recompilation via argument `recompile`
in post-processing methods that require a compiled Stan model.
* Extend control over the `point_estimate` feature in `prepare_predictions`
Expand Down
17 changes: 16 additions & 1 deletion R/brmsfit-helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,9 @@ get_cor_matrix <- function(cor, size = NULL, ndraws = NULL) {
# compute covariance matrices of autocor structures
# @param prep a brmsprep object
# @param obs observations for which to compute the covariance matrix
# @param Jtime vector indicating to which time points obs belong
# @param latent compute covariance matrix for latent residuals?
get_cov_matrix_ac <- function(prep, obs = NULL, latent = FALSE) {
get_cov_matrix_ac <- function(prep, obs = NULL, Jtime = NULL, latent = FALSE) {
if (is.null(obs)) {
obs <- seq_len(prep$nobs)
}
Expand All @@ -238,6 +239,9 @@ get_cov_matrix_ac <- function(prep, obs = NULL, latent = FALSE) {
} else if (has_ac_class(acef, "cosy")) {
cosy <- as.numeric(prep$ac$cosy)
cor <- get_cor_matrix_cosy(cosy, nobs)
} else if (has_ac_class(acef, "unstr")) {
cortime <- prep$ac$cortime
cor <- get_cor_matrix_unstr(cortime, Jtime)
} else if (has_ac_class(acef, "fcor")) {
cor <- get_cor_matrix_fcor(prep$ac$Mfcor, ndraws)
} else {
Expand Down Expand Up @@ -347,6 +351,17 @@ get_cor_matrix_cosy <- function(cosy, nobs) {
out
}

# compute unstructured time correlation matrices
# @param cortime time correlation draws
# @param Jtime indictor of rows/cols to consider in cortime
# @return a numeric 'ndraws' x 'nobs' x 'nobs' array
# where nobs = length(Jtime[Jtime > 0])
get_cor_matrix_unstr <- function(cortime, Jtime) {
stopifnot(length(Jtime) > 0L)
Jtime <- Jtime[Jtime > 0]
get_cor_matrix(cortime)[, Jtime, Jtime, drop = FALSE]
}

# prepare a fixed correlation matrix
# @param Mfcor correlation matrix to be prepared
# @param ndraws number of posterior draws
Expand Down
4 changes: 2 additions & 2 deletions R/brmsterms.R
Original file line number Diff line number Diff line change
Expand Up @@ -903,7 +903,7 @@ get_effect.btl <- function(x, target = "fe", ...) {

#' @export
get_effect.btnl <- function(x, target = "fe", ...) {
NULL
x[[target]]
}

all_terms <- function(x) {
Expand All @@ -924,7 +924,7 @@ regex_sp <- function(type = "all") {
funs <- c(
sm = "(s|(t2)|(te)|(ti))",
gp = "gp", cs = "cse?", mmc = "mmc",
ac = "((arma)|(ar)|(ma)|(cosy)|(sar)|(car)|(fcor))"
ac = "((arma)|(ar)|(ma)|(cosy)|(unstr)|(sar)|(car)|(fcor))"
)
funs[all_sp_types()] <- all_sp_types()
if ("sp" %in% type) {
Expand Down
30 changes: 27 additions & 3 deletions R/data-predictor.R
Original file line number Diff line number Diff line change
Expand Up @@ -631,14 +631,13 @@ data_gp <- function(bterms, data, internal = FALSE, basis = NULL, ...) {
}

# data for autocorrelation variables
# @param locations optional original locations for CAR models
data_ac <- function(bterms, data, data2, basis = NULL, ...) {
out <- list()
N <- nrow(data)
acef <- tidy_acef(bterms)
if (has_ac_subset(bterms, dim = "time")) {
gr <- subset2(acef, dim = "time")$gr
if (gr != "NA") {
gr <- get_ac_vars(acef, "gr", dim = "time")
if (isTRUE(nzchar(gr))) {
tgroup <- as.numeric(factor(data[[gr]]))
} else {
tgroup <- rep(1, N)
Expand All @@ -662,12 +661,37 @@ data_ac <- function(bterms, data, data2, basis = NULL, ...) {
}
if (use_ac_cov_time(acef)) {
# data for the 'covariance' versions of time-series structures
# TODO: change begin[i]:end[i] notation to slice[i]:(slice[i+1] - 1)
# see comment on PR #1435
out$N_tg <- length(unique(tgroup))
out$begin_tg <- as.array(ulapply(unique(tgroup), match, tgroup))
out$nobs_tg <- as.array(with(out,
c(if (N_tg > 1L) begin_tg[2:N_tg], N + 1) - begin_tg
))
out$end_tg <- with(out, begin_tg + nobs_tg - 1)
if (has_ac_class(acef, "unstr")) {
time <- get_ac_vars(bterms, "time", dim = "time")
time_data <- get(time, data)
new_times <- extract_levels(time_data)
if (length(basis)) {
times <- basis$times
# unstr estimates correlations only for given time points
invalid_times <- setdiff(new_times, times)
if (length(invalid_times)) {
stop2("Cannot handle new time points in UNSTR models.")
}
} else {
times <- new_times
}
out$n_unique_t <- length(times)
out$n_unique_cortime <- out$n_unique_t * (out$n_unique_t - 1) / 2
Jtime <- match(time_data, times)
out$Jtime_tg <- matrix(0L, out$N_tg, max(out$nobs_tg))
for (i in seq_len(out$N_tg)) {
out$Jtime_tg[i, seq_len(out$nobs_tg[i])] <-
Jtime[out$begin_tg[i]:out$end_tg[i]]
}
}
}
if (has_ac_class(acef, "sar")) {
acef_sar <- subset2(acef, class = "sar")
Expand Down
8 changes: 6 additions & 2 deletions R/exclude_pars.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,13 @@ exclude_pars.mvbrmsterms <- function(x, save_pars, ...) {
exclude_pars.brmsterms <- function(x, data, save_pars, ...) {
resp <- usc(combine_prefix(x))
data <- subset_data(data, x)
out <- "Lncor"
par_classes <- c("Lncor", "Cortime")
out <- paste0(par_classes, resp)
if (!save_pars$all) {
par_classes <- c("ordered_Intercept", "fixed_Intercept", "theta", "Llncor")
par_classes <- c(
"ordered_Intercept", "fixed_Intercept",
"theta", "Llncor", "Lcortime"
)
c(out) <- paste0(par_classes, resp)
}
for (dp in names(x$dpars)) {
Expand Down
62 changes: 53 additions & 9 deletions R/formula-ac.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
#'
#' Specify autocorrelation terms in \pkg{brms} models. Currently supported terms
#' are \code{\link{arma}}, \code{\link{ar}}, \code{\link{ma}},
#' \code{\link{cosy}}, \code{\link{sar}}, \code{\link{car}}, and
#' \code{\link{fcor}}. Terms can be directly specified within the formula, or
#' passed to the \code{autocor} argument of \code{\link{brmsformula}} in the
#' form of a one-sided formula. For deprecated ways of specifying
#' autocorrelation terms, see \code{\link{cor_brms}}.
#' \code{\link{cosy}}, \code{\link{unstr}}, \code{\link{sar}},
#' \code{\link{car}}, and \code{\link{fcor}}. Terms can be directly specified
#' within the formula, or passed to the \code{autocor} argument of
#' \code{\link{brmsformula}} in the form of a one-sided formula. For deprecated
#' ways of specifying autocorrelation terms, see \code{\link{cor_brms}}.
#'
#' @name autocor-terms
#'
Expand All @@ -17,8 +17,8 @@
#'
#' @seealso \code{\link{brmsformula}}, \code{\link{acformula}},
#' \code{\link{arma}}, \code{\link{ar}}, \code{\link{ma}},
#' \code{\link{cosy}}, \code{\link{sar}}, \code{\link{car}},
#' \code{\link{fcor}}
#' \code{\link{cosy}}, \code{\link{unstr}}, \code{\link{sar}},
#' \code{\link{car}}, \code{\link{fcor}}
#'
#' @examples
#' # specify autocor terms within the formula
Expand Down Expand Up @@ -188,7 +188,6 @@ ma <- function(time = NA, gr = NA, q = 1, cov = FALSE) {
#' }
#'
#' @export
#' @export
cosy <- function(time = NA, gr = NA) {
label <- deparse(match.call())
time <- deparse(substitute(time))
Expand All @@ -200,6 +199,39 @@ cosy <- function(time = NA, gr = NA) {
out
}

#' Set up UNSTR correlation structures
#'
#' Set up an unstructured (UNSTR) correlation term in \pkg{brms}. The function does
#' not evaluate its arguments -- it exists purely to help set up a model with
#' UNSTR terms.
#'
#' @inheritParams arma
#'
#' @return An object of class \code{'unstr_term'}, which is a list
#' of arguments to be interpreted by the formula
#' parsing functions of \pkg{brms}.
#'
#' @seealso \code{\link{autocor-terms}}
#'
#' @examples
#' \dontrun{
#' # add an unstructured correlation matrix for visits within the same patient
#' fit <- brm(count ~ Trt + unstr(visit, patient), data = epilepsy)
#' summary(fit)
#' }
#'
#' @export
unstr <- function(time, gr) {
label <- deparse(match.call())
time <- deparse(substitute(time))
time <- as_one_variable(time)
gr <- deparse(substitute(gr))
stopif_illegal_group(gr)
out <- nlist(time, gr, label)
class(out) <- c("unstr_term", "ac_term")
out
}

#' Spatial simultaneous autoregressive (SAR) structures
#'
#' Set up an spatial simultaneous autoregressive (SAR) term in \pkg{brms}. The
Expand Down Expand Up @@ -443,7 +475,7 @@ tidy_acef.brmsterms <- function(x, ...) {
}

#' @export
tidy_acef.btl <- function(x, data = NULL, ...) {
tidy_acef.btl <- function(x, ...) {
form <- x[["ac"]]
if (!is.formula(form)) {
return(empty_acef())
Expand Down Expand Up @@ -476,6 +508,13 @@ tidy_acef.btl <- function(x, data = NULL, ...) {
out$gr[i] <- ac$gr
out$cov[i] <- TRUE
}
if (is.unstr_term(ac)) {
out$class[i] <- "unstr"
out$dim[i] <- "time"
out$time[i] <- ac$time
out$gr[i] <- ac$gr
out$cov[i] <- TRUE
}
if (is.sar_term(ac)) {
out$class[i] <- "sar"
out$dim[i] <- "space"
Expand Down Expand Up @@ -638,6 +677,7 @@ validate_fcor_matrix <- function(M) {

# regex to extract all parameter names of autocorrelation structures
regex_autocor_pars <- function() {
# cortime is ignored here to allow custom renaming in summary.brmsfit
p <- c("ar", "ma", "sderr", "cosy", "lagsar", "errorsar",
"car", "sdcar", "rhocar")
p <- paste0("(", p, ")", collapse = "|")
Expand All @@ -656,6 +696,10 @@ is.cosy_term <- function(x) {
inherits(x, "cosy_term")
}

is.unstr_term <- function(x) {
inherits(x, "unstr_term")
}

is.sar_term <- function(x) {
inherits(x, "sar_term")
}
Expand Down
5 changes: 4 additions & 1 deletion R/formula-re.R
Original file line number Diff line number Diff line change
Expand Up @@ -796,7 +796,10 @@ get_levels <- function(...) {

extract_levels <- function(x) {
# do not check for NAs according to #1355
levels(factor(x))
if (!is.factor(x)) {
x <- factor(x)
}
levels(x)
}

# extract names of group-level effects
Expand Down
6 changes: 4 additions & 2 deletions R/log_lik.R
Original file line number Diff line number Diff line change
Expand Up @@ -245,9 +245,10 @@ log_lik_student_mv <- function(i, prep) {

log_lik_gaussian_time <- function(i, prep) {
obs <- with(prep$ac, begin_tg[i]:end_tg[i])
Jtime <- prep$ac$Jtime_tg[i, ]
Y <- as.numeric(prep$data$Y[obs])
mu <- as.matrix(get_dpar(prep, "mu", i = obs))
Sigma <- get_cov_matrix_ac(prep, obs)
Sigma <- get_cov_matrix_ac(prep, obs, Jtime = Jtime)
.log_lik <- function(s) {
C <- as.matrix(Sigma[s, , ])
Cinv <- solve(C)
Expand All @@ -264,10 +265,11 @@ log_lik_gaussian_time <- function(i, prep) {

log_lik_student_time <- function(i, prep) {
obs <- with(prep$ac, begin_tg[i]:end_tg[i])
Jtime <- prep$ac$Jtime_tg[i, ]
Y <- as.numeric(prep$data$Y[obs])
nu <- as.matrix(get_dpar(prep, "nu", i = obs))
mu <- as.matrix(get_dpar(prep, "mu", i = obs))
Sigma <- get_cov_matrix_ac(prep, obs)
Sigma <- get_cov_matrix_ac(prep, obs, Jtime = Jtime)
.log_lik <- function(s) {
df <- nu[s, ]
C <- as.matrix(Sigma[s, , ])
Expand Down
5 changes: 4 additions & 1 deletion R/make_standata.R
Original file line number Diff line number Diff line change
Expand Up @@ -283,13 +283,16 @@ standata_basis_ac <- function(x, data, ...) {
out <- list()
if (has_ac_class(x, "car")) {
gr <- get_ac_vars(x, "gr", class = "car")
stopifnot(length(gr) <= 1L)
if (isTRUE(nzchar(gr))) {
out$locations <- extract_levels(get(gr, data))
} else {
out$locations <- NA
}
}
if (has_ac_class(x, "unstr")) {
time <- get_ac_vars(x, "time", dim = "time")
out$times <- extract_levels(get(time, data))
}
out
}

Expand Down
6 changes: 4 additions & 2 deletions R/posterior_predict.R
Original file line number Diff line number Diff line change
Expand Up @@ -389,8 +389,9 @@ posterior_predict_student_mv <- function(i, prep, ...) {

posterior_predict_gaussian_time <- function(i, prep, ...) {
obs <- with(prep$ac, begin_tg[i]:end_tg[i])
Jtime <- prep$ac$Jtime_tg[i, ]
mu <- as.matrix(get_dpar(prep, "mu", i = obs))
Sigma <- get_cov_matrix_ac(prep, obs)
Sigma <- get_cov_matrix_ac(prep, obs, Jtime = Jtime)
.predict <- function(s) {
rmulti_normal(1, mu = mu[s, ], Sigma = Sigma[s, , ])
}
Expand All @@ -399,9 +400,10 @@ posterior_predict_gaussian_time <- function(i, prep, ...) {

posterior_predict_student_time <- function(i, prep, ...) {
obs <- with(prep$ac, begin_tg[i]:end_tg[i])
Jtime <- prep$ac$Jtime_tg[i, ]
nu <- as.matrix(get_dpar(prep, "nu", i = obs))
mu <- as.matrix(get_dpar(prep, "mu", i = obs))
Sigma <- get_cov_matrix_ac(prep, obs)
Sigma <- get_cov_matrix_ac(prep, obs, Jtime = Jtime)
.predict <- function(s) {
rmulti_student_t(1, df = nu[s, ], mu = mu[s, ], Sigma = Sigma[s, , ])
}
Expand Down
26 changes: 12 additions & 14 deletions R/predictor.R
Original file line number Diff line number Diff line change
Expand Up @@ -440,21 +440,19 @@ predictor_offset <- function(prep, i, nobs) {
# @note eta has to be passed to this function in
# order for ARMA structures to work correctly
predictor_ac <- function(eta, prep, i, fprep = NULL) {
if (has_ac_class(prep$ac$acef, "arma")) {
if (!is.null(prep$ac$err)) {
# ARMA correlations via latent residuals
eta <- eta + p(prep$ac$err, i, row = FALSE)
} else {
# ARMA correlations via explicit natural residuals
if (!is.null(i)) {
stop2("Pointwise evaluation is not possible for ARMA models.")
}
eta <- .predictor_arma(
eta, ar = prep$ac$ar, ma = prep$ac$ma,
Y = prep$ac$Y, J_lag = prep$ac$J_lag,
fprep = fprep
)
if (!is.null(prep$ac[["err"]])) {
# auto-correlations via latent residuals
eta <- eta + p(prep$ac$err, i, row = FALSE)
} else if (has_ac_class(prep$ac$acef, "arma")) {
# ARMA correlations via explicit natural residuals
if (!is.null(i)) {
stop2("Pointwise evaluation is not possible for ARMA models.")
}
eta <- .predictor_arma(
eta, ar = prep$ac$ar, ma = prep$ac$ma,
Y = prep$ac$Y, J_lag = prep$ac$J_lag,
fprep = fprep
)
}
if (has_ac_class(prep$ac$acef, "car")) {
eta <- eta + .predictor_re(Z = p(prep$ac$Zcar, i), r = prep$ac$rcar)
Expand Down
Loading